Browse Source

Merge r1555021 through r1556550 from trunk.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-5535@1556552 13f79535-47bb-0310-9956-ffa450edef68
Tsz-wo Sze 11 năm trước cách đây
mục cha
commit
f3ee35ab28
100 tập tin đã thay đổi với 2490 bổ sung1345 xóa
  1. 5 0
      hadoop-common-project/hadoop-auth/pom.xml
  2. 19 1
      hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/PseudoAuthenticationHandler.java
  3. 1 1
      hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestPseudoAuthenticationHandler.java
  4. 5 0
      hadoop-common-project/hadoop-common/CHANGES.txt
  5. 43 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/JavaKeyStoreProvider.java
  6. 14 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java
  7. 30 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/UserProvider.java
  8. 3 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
  9. 3 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java
  10. 21 9
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Credentials.java
  11. 1 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java
  12. 30 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
  13. 11 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyProviderFactory.java
  14. 24 3
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java
  15. 17 0
      hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/mount/MountdBase.java
  16. 15 0
      hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Base.java
  17. 23 5
      hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcProgram.java
  18. 5 4
      hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/portmap/PortmapRequest.java
  19. 51 1
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/DFSClientCache.java
  20. 156 132
      hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
  21. 7 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
  22. 150 24
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java
  23. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
  24. 2 5
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
  25. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
  26. 3 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java
  27. 54 59
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
  28. 7 8
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
  29. 54 49
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
  30. 2 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
  31. 21 13
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
  32. 25 27
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSPermissionChecker.java
  33. 84 72
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java
  34. 66 71
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java
  35. 39 35
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
  36. 3 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java
  37. 62 59
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java
  38. 7 6
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java
  39. 20 21
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeWithAdditionalFields.java
  40. 32 29
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodesInPath.java
  41. 8 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
  42. 13 16
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/AbstractINodeDiff.java
  43. 45 55
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/AbstractINodeDiffList.java
  44. 72 64
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java
  45. 4 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileDiff.java
  46. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileDiffList.java
  47. 7 7
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileWithSnapshotFeature.java
  48. 33 21
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/INodeDirectorySnapshottable.java
  49. 25 21
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java
  50. 7 6
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java
  51. 4 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
  52. 6 6
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/StorageReport.java
  53. 2 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto
  54. 30 16
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
  55. 5 13
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java
  56. 3 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
  57. 5 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestJspHelper.java
  58. 3 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java
  59. 3 3
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java
  60. 45 3
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestFsDatasetCache.java
  61. 113 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestStorageReport.java
  62. 12 9
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java
  63. 3 156
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java
  64. 52 13
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java
  65. 3 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java
  66. 2 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirectory.java
  67. 5 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java
  68. 2 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java
  69. 7 3
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSnapshotPathINodes.java
  70. 8 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
  71. 2 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotTestHelper.java
  72. 3 5
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestINodeFileUnderConstructionWithSnapshot.java
  73. 107 60
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java
  74. 4 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSetQuotaWithSnapshot.java
  75. 17 13
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDeletion.java
  76. 2 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotRename.java
  77. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotReplication.java
  78. 93 124
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/TestOfflineEditsViewer.java
  79. 5 0
      hadoop-mapreduce-project/CHANGES.txt
  80. 2 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
  81. 15 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java
  82. 51 2
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java
  83. 3 2
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java
  84. 17 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java
  85. 16 6
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobContext.java
  86. 2 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
  87. 5 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainMapContextImpl.java
  88. 5 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainReduceContextImpl.java
  89. 5 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/map/WrappedMapper.java
  90. 5 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/reduce/WrappedReducer.java
  91. 11 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/JobContextImpl.java
  92. 1 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManagerImpl.java
  93. 191 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestOldCombinerGrouping.java
  94. 178 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestNewCombinerGrouping.java
  95. 20 0
      hadoop-yarn-project/CHANGES.txt
  96. 0 9
      hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
  97. 17 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/HAUtil.java
  98. 49 31
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
  99. 8 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto
  100. 7 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml

+ 5 - 0
hadoop-common-project/hadoop-auth/pom.xml

@@ -92,6 +92,11 @@
       <artifactId>hadoop-minikdc</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpclient</artifactId>
+      <scope>compile</scope>
+    </dependency>
   </dependencies>
 
   <build>

+ 19 - 1
hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/PseudoAuthenticationHandler.java

@@ -16,10 +16,15 @@ package org.apache.hadoop.security.authentication.server;
 import org.apache.hadoop.security.authentication.client.AuthenticationException;
 import org.apache.hadoop.security.authentication.client.PseudoAuthenticator;
 
+import org.apache.http.client.utils.URLEncodedUtils;
+import org.apache.http.NameValuePair;
+
 import javax.servlet.ServletException;
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;
 import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.List;
 import java.util.Properties;
 
 /**
@@ -48,6 +53,7 @@ public class PseudoAuthenticationHandler implements AuthenticationHandler {
    */
   public static final String ANONYMOUS_ALLOWED = TYPE + ".anonymous.allowed";
 
+  private static final Charset UTF8_CHARSET = Charset.forName("UTF-8");
   private boolean acceptAnonymous;
 
   /**
@@ -114,6 +120,18 @@ public class PseudoAuthenticationHandler implements AuthenticationHandler {
     return true;
   }
 
+  private String getUserName(HttpServletRequest request) {
+    List<NameValuePair> list = URLEncodedUtils.parse(request.getQueryString(), UTF8_CHARSET);
+    if (list != null) {
+      for (NameValuePair nv : list) {
+        if (PseudoAuthenticator.USER_NAME.equals(nv.getName())) {
+          return nv.getValue();
+        }
+      }
+    }
+    return null;
+  }
+
   /**
    * Authenticates an HTTP client request.
    * <p/>
@@ -139,7 +157,7 @@ public class PseudoAuthenticationHandler implements AuthenticationHandler {
   public AuthenticationToken authenticate(HttpServletRequest request, HttpServletResponse response)
     throws IOException, AuthenticationException {
     AuthenticationToken token;
-    String userName = request.getParameter(PseudoAuthenticator.USER_NAME);
+    String userName = getUserName(request);
     if (userName == null) {
       if (getAcceptAnonymous()) {
         token = AuthenticationToken.ANONYMOUS;

+ 1 - 1
hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestPseudoAuthenticationHandler.java

@@ -94,7 +94,7 @@ public class TestPseudoAuthenticationHandler {
 
       HttpServletRequest request = Mockito.mock(HttpServletRequest.class);
       HttpServletResponse response = Mockito.mock(HttpServletResponse.class);
-      Mockito.when(request.getParameter(PseudoAuthenticator.USER_NAME)).thenReturn("user");
+      Mockito.when(request.getQueryString()).thenReturn(PseudoAuthenticator.USER_NAME + "=" + "user");
 
       AuthenticationToken token = handler.authenticate(request, response);
 

+ 5 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -108,6 +108,8 @@ Trunk (Unreleased)
     HADOOP-10141. Create KeyProvider API to separate encryption key storage
     from the applications. (omalley)
 
+    HADOOP-10201. Add listing to KeyProvider API. (Larry McCay via omalley)
+
   BUG FIXES
 
     HADOOP-9451. Fault single-layer config if node group topology is enabled.
@@ -580,6 +582,9 @@ Release 2.3.0 - UNRELEASED
     HADOOP-10090. Jobtracker metrics not updated properly after execution
     of a mapreduce job. (ivanmi)
 
+    HADOOP-10193. hadoop-auth's PseudoAuthenticationHandler can consume getInputStream. 
+    (gchanan via tucu)
+
 Release 2.2.0 - 2013-10-13
 
   INCOMPATIBLE CHANGES

+ 43 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/JavaKeyStoreProvider.java

@@ -36,8 +36,11 @@ import java.security.KeyStoreException;
 import java.security.NoSuchAlgorithmException;
 import java.security.UnrecoverableKeyException;
 import java.security.cert.CertificateException;
+import java.util.ArrayList;
 import java.util.Date;
+import java.util.Enumeration;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 
 /**
@@ -56,6 +59,7 @@ import java.util.Map;
  */
 @InterfaceAudience.Private
 public class JavaKeyStoreProvider extends KeyProvider {
+  private static final String KEY_METADATA = "KeyMetadata";
   public static final String SCHEME_NAME = "jceks";
   public static final String KEYSTORE_PASSWORD_NAME =
       "HADOOP_KEYSTORE_PASSWORD";
@@ -117,6 +121,44 @@ public class JavaKeyStoreProvider extends KeyProvider {
     return new KeyVersion(versionName, key.getEncoded());
   }
 
+  @Override
+  public List<String> getKeys() throws IOException {
+    ArrayList<String> list = new ArrayList<String>();
+    String alias = null;
+    try {
+      Enumeration<String> e = keyStore.aliases();
+      while (e.hasMoreElements()) {
+         alias = e.nextElement();
+         // only include the metadata key names in the list of names
+         if (!alias.contains("@")) {
+             list.add(alias);
+         }
+      }
+    } catch (KeyStoreException e) {
+      throw new IOException("Can't get key " + alias + " from " + path, e);
+    }
+    return list;
+  }
+
+  @Override
+  public List<KeyVersion> getKeyVersions(String name) throws IOException {
+    List<KeyVersion> list = new ArrayList<KeyVersion>();
+    Metadata km = getMetadata(name);
+    if (km != null) {
+      int latestVersion = km.getVersions();
+      KeyVersion v = null;
+      String versionName = null;
+      for (int i = 0; i < latestVersion; i++) {
+        versionName = buildVersionName(name, i);
+        v = getKeyVersion(versionName);
+        if (v != null) {
+          list.add(v);
+        }
+      }
+    }
+    return list;
+  }
+
   @Override
   public Metadata getMetadata(String name) throws IOException {
     if (cache.containsKey(name)) {
@@ -288,7 +330,7 @@ public class JavaKeyStoreProvider extends KeyProvider {
 
     @Override
     public String getFormat() {
-      return "KeyMetadata";
+      return KEY_METADATA;
     }
 
     @Override

+ 14 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java

@@ -254,6 +254,20 @@ public abstract class KeyProvider {
   public abstract KeyVersion getKeyVersion(String versionName
                                             ) throws IOException;
 
+  /**
+   * Get the key names for all keys.
+   * @return the list of key names
+   * @throws IOException
+   */
+  public abstract List<String> getKeys() throws IOException;
+
+  /**
+   * Get the key material for all versions of a specific key name.
+   * @return the list of key material
+   * @throws IOException
+   */
+  public abstract List<KeyVersion> getKeyVersions(String name) throws IOException;
+
   /**
    * Get the current version of the key, which should be used for encrypting new
    * data.

+ 30 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/UserProvider.java

@@ -20,8 +20,10 @@ package org.apache.hadoop.crypto.key;
 
 import java.io.IOException;
 import java.net.URI;
+import java.util.ArrayList;
 import java.util.Date;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -142,4 +144,32 @@ public class UserProvider extends KeyProvider {
       return null;
     }
   }
+
+  @Override
+  public List<String> getKeys() throws IOException {
+    List<String> list = new ArrayList<String>();
+    List<Text> keys = credentials.getAllSecretKeys();
+    for (Text key : keys) {
+      if (key.find("@") == -1) {
+        list.add(key.toString());
+      }
+    }
+    return list;
+  }
+
+  @Override
+  public List<KeyVersion> getKeyVersions(String name) throws IOException {
+      List<KeyVersion> list = new ArrayList<KeyVersion>();
+      Metadata km = getMetadata(name);
+      if (km != null) {
+        int latestVersion = km.getVersions();
+        for (int i = 0; i < latestVersion; i++) {
+          KeyVersion v = getKeyVersion(buildVersionName(name, i));
+          if (v != null) {
+            list.add(v);
+          }
+        }
+      }
+      return list;
+  }
 }

+ 3 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java

@@ -21,7 +21,6 @@ package org.apache.hadoop.fs;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.http.lib.StaticUserWebFilter;
-import org.apache.hadoop.security.authorize.Service;
 
 /** 
  * This class contains constants for configuration keys used
@@ -240,4 +239,7 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
   /** Default value for IPC_SERVER_CONNECTION_IDLE_SCAN_INTERVAL_KEY */
   public static final int IPC_CLIENT_CONNECTION_IDLESCANINTERVAL_DEFAULT =
       10000;
+
+  public static final String HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS =
+    "hadoop.user.group.metrics.percentiles.intervals";
 }

+ 3 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java

@@ -766,8 +766,9 @@ public class ActiveStandbyElector implements StatCallback, StringCallback {
     zkClient = getNewZooKeeper();
     LOG.debug("Created new connection for " + this);
   }
-  
-  void terminateConnection() {
+
+  @InterfaceAudience.Private
+  public void terminateConnection() {
     if (zkClient == null) {
       return;
     }

+ 21 - 9
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Credentials.java

@@ -29,7 +29,9 @@ import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -73,15 +75,6 @@ public class Credentials implements Writable {
     this.addAll(credentials);
   }
   
-  /**
-   * Returns the key bytes for the alias
-   * @param alias the alias for the key
-   * @return key for this alias
-   */
-  public byte[] getSecretKey(Text alias) {
-    return secretKeysMap.get(alias);
-  }
-  
   /**
    * Returns the Token object for the alias
    * @param alias the alias for the Token
@@ -117,6 +110,15 @@ public class Credentials implements Writable {
   public int numberOfTokens() {
     return tokenMap.size();
   }
+
+  /**
+   * Returns the key bytes for the alias
+   * @param alias the alias for the key
+   * @return key for this alias
+   */
+  public byte[] getSecretKey(Text alias) {
+    return secretKeysMap.get(alias);
+  }
   
   /**
    * @return number of keys in the in-memory map
@@ -142,6 +144,16 @@ public class Credentials implements Writable {
     secretKeysMap.remove(alias);
   }
 
+  /**
+   * Return all the secret key entries in the in-memory map
+   */
+  public List<Text> getAllSecretKeys() {
+    List<Text> list = new java.util.ArrayList<Text>();
+    list.addAll(secretKeysMap.keySet());
+
+    return list;
+  }
+
   /**
    * Convenience method for reading a token storage file, and loading the Tokens
    * therein in the passed UGI

+ 1 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java

@@ -138,6 +138,7 @@ public class Groups {
     List<String> groupList = impl.getGroups(user);
     long endMs = Time.monotonicNow();
     long deltaMs = endMs - startMs ;
+    UserGroupInformation.metrics.addGetGroups(deltaMs);
     if (deltaMs > warningDeltaMs) {
       LOG.warn("Potential performance problem: getGroups(user=" + user +") " +
           "took " + deltaMs + " milliseconds.");

+ 30 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java

@@ -19,6 +19,7 @@ package org.apache.hadoop.security;
 
 import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN;
 import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN_DEFAULT;
+import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS;
 
 import java.io.File;
 import java.io.IOException;
@@ -58,6 +59,8 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.metrics2.annotation.Metric;
 import org.apache.hadoop.metrics2.annotation.Metrics;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+import org.apache.hadoop.metrics2.lib.MutableQuantiles;
 import org.apache.hadoop.metrics2.lib.MutableRate;
 import org.apache.hadoop.security.SaslRpcServer.AuthMethod;
 import org.apache.hadoop.security.authentication.util.KerberosUtil;
@@ -92,14 +95,27 @@ public class UserGroupInformation {
    */
   @Metrics(about="User and group related metrics", context="ugi")
   static class UgiMetrics {
+    final MetricsRegistry registry = new MetricsRegistry("UgiMetrics");
+
     @Metric("Rate of successful kerberos logins and latency (milliseconds)")
     MutableRate loginSuccess;
     @Metric("Rate of failed kerberos logins and latency (milliseconds)")
     MutableRate loginFailure;
+    @Metric("GetGroups") MutableRate getGroups;
+    MutableQuantiles[] getGroupsQuantiles;
 
     static UgiMetrics create() {
       return DefaultMetricsSystem.instance().register(new UgiMetrics());
     }
+
+    void addGetGroups(long latency) {
+      getGroups.add(latency);
+      if (getGroupsQuantiles != null) {
+        for (MutableQuantiles q : getGroupsQuantiles) {
+          q.add(latency);
+        }
+      }
+    }
   }
   
   /**
@@ -250,6 +266,20 @@ public class UserGroupInformation {
       groups = Groups.getUserToGroupsMappingService(conf);
     }
     UserGroupInformation.conf = conf;
+
+    if (metrics.getGroupsQuantiles == null) {
+      int[] intervals = conf.getInts(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS);
+      if (intervals != null && intervals.length > 0) {
+        final int length = intervals.length;
+        MutableQuantiles[] getGroupsQuantiles = new MutableQuantiles[length];
+        for (int i = 0; i < length; i++) {
+          getGroupsQuantiles[i] = metrics.registry.newQuantiles(
+            "getGroups" + intervals[i] + "s",
+            "Get groups", "ops", "latency", intervals[i]);
+        }
+        metrics.getGroupsQuantiles = getGroupsQuantiles;
+      }
+    }
   }
 
   /**

+ 11 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyProviderFactory.java

@@ -21,6 +21,7 @@ import java.io.File;
 import java.io.IOException;
 import java.util.List;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.crypto.key.KeyProvider.KeyVersion;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -160,6 +161,16 @@ public class TestKeyProviderFactory {
         provider.getCurrentKey("key4").getMaterial());
     assertArrayEquals(key3, provider.getCurrentKey("key3").getMaterial());
     assertEquals("key3@0", provider.getCurrentKey("key3").getVersionName());
+
+    List<String> keys = provider.getKeys();
+    assertTrue("Keys should have been returned.", keys.size() == 2);
+    assertTrue("Returned Keys should have included key3.", keys.contains("key3"));
+    assertTrue("Returned Keys should have included key4.", keys.contains("key4"));
+
+    List<KeyVersion> kvl = provider.getKeyVersions("key3");
+    assertTrue("KeyVersions should have been returned for key3.", kvl.size() == 1);
+    assertTrue("KeyVersions should have included key3@0.", kvl.get(0).getVersionName().equals("key3@0"));
+    assertArrayEquals(key3, kvl.get(0).getMaterial());
   }
 
   @Test

+ 24 - 3
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java

@@ -19,7 +19,6 @@ package org.apache.hadoop.security;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.ipc.TestSaslRPC;
 import org.apache.hadoop.metrics2.MetricsRecordBuilder;
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
 import org.apache.hadoop.security.authentication.util.KerberosName;
@@ -40,9 +39,9 @@ import java.util.Collection;
 import java.util.LinkedHashSet;
 import java.util.Set;
 
+import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS;
 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL;
 import static org.apache.hadoop.ipc.TestSaslRPC.*;
-import static org.apache.hadoop.security.token.delegation.TestDelegationToken.TestDelegationTokenIdentifier;
 import static org.apache.hadoop.test.MetricsAsserts.*;
 import static org.junit.Assert.*;
 import static org.mockito.Mockito.mock;
@@ -55,6 +54,8 @@ public class TestUserGroupInformation {
   final private static String GROUP3_NAME = "group3";
   final private static String[] GROUP_NAMES = 
     new String[]{GROUP1_NAME, GROUP2_NAME, GROUP3_NAME};
+  // Rollover interval of percentile metrics (in seconds)
+  private static final int PERCENTILES_INTERVAL = 1;
   private static Configuration conf;
   
   /**
@@ -80,7 +81,8 @@ public class TestUserGroupInformation {
     // doesn't matter what it is, but getGroups needs it set...
     // use HADOOP_HOME environment variable to prevent interfering with logic
     // that finds winutils.exe
-    System.setProperty("hadoop.home.dir", System.getenv("HADOOP_HOME"));
+    String home = System.getenv("HADOOP_HOME");
+    System.setProperty("hadoop.home.dir", (home != null ? home : "."));
     // fake the realm is kerberos is enabled
     System.setProperty("java.security.krb5.kdc", "");
     System.setProperty("java.security.krb5.realm", "DEFAULT.REALM");
@@ -150,11 +152,15 @@ public class TestUserGroupInformation {
   /** Test login method */
   @Test (timeout = 30000)
   public void testLogin() throws Exception {
+    conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
+      String.valueOf(PERCENTILES_INTERVAL));
+    UserGroupInformation.setConfiguration(conf);
     // login from unix
     UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
     assertEquals(UserGroupInformation.getCurrentUser(),
                  UserGroupInformation.getLoginUser());
     assertTrue(ugi.getGroupNames().length >= 1);
+    verifyGroupMetrics(1);
 
     // ensure that doAs works correctly
     UserGroupInformation userGroupInfo = 
@@ -728,6 +734,21 @@ public class TestUserGroupInformation {
     }
   }
 
+  private static void verifyGroupMetrics(
+      long groups) throws InterruptedException {
+    MetricsRecordBuilder rb = getMetrics("UgiMetrics");
+    if (groups > 0) {
+      assertCounter("GetGroupsNumOps", groups, rb);
+      double avg = getDoubleGauge("GetGroupsAvgTime", rb);
+      assertTrue(avg >= 0.0);
+
+      // Sleep for an interval+slop to let the percentiles rollover
+      Thread.sleep((PERCENTILES_INTERVAL+1)*1000);
+      // Check that the percentiles were updated
+      assertQuantileGauges("GetGroups1s", rb);
+    }
+  }
+
   /**
    * Test for the case that UserGroupInformation.getCurrentUser()
    * is called when the AccessControlContext has a Subject associated

+ 17 - 0
hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/mount/MountdBase.java

@@ -23,6 +23,7 @@ import org.apache.hadoop.oncrpc.RpcProgram;
 import org.apache.hadoop.oncrpc.SimpleTcpServer;
 import org.apache.hadoop.oncrpc.SimpleUdpServer;
 import org.apache.hadoop.portmap.PortmapMapping;
+import org.apache.hadoop.util.ShutdownHookManager;
 
 /**
  * Main class for starting mountd daemon. This daemon implements the NFS
@@ -71,8 +72,24 @@ abstract public class MountdBase {
     startUDPServer();
     startTCPServer();
     if (register) {
+      ShutdownHookManager.get().addShutdownHook(new Unregister(),
+          SHUTDOWN_HOOK_PRIORITY);
       rpcProgram.register(PortmapMapping.TRANSPORT_UDP, udpBoundPort);
       rpcProgram.register(PortmapMapping.TRANSPORT_TCP, tcpBoundPort);
     }
   }
+  
+  /**
+   * Priority of the mountd shutdown hook.
+   */
+  public static final int SHUTDOWN_HOOK_PRIORITY = 10;
+
+  private class Unregister implements Runnable {
+    @Override
+    public synchronized void run() {
+      rpcProgram.unregister(PortmapMapping.TRANSPORT_UDP, udpBoundPort);
+      rpcProgram.unregister(PortmapMapping.TRANSPORT_TCP, tcpBoundPort);
+    }
+  }
+  
 }

+ 15 - 0
hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/nfs/nfs3/Nfs3Base.java

@@ -23,6 +23,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.oncrpc.RpcProgram;
 import org.apache.hadoop.oncrpc.SimpleTcpServer;
 import org.apache.hadoop.portmap.PortmapMapping;
+import org.apache.hadoop.util.ShutdownHookManager;
 
 /**
  * Nfs server. Supports NFS v3 using {@link RpcProgram}.
@@ -50,6 +51,8 @@ public abstract class Nfs3Base {
     startTCPServer(); // Start TCP server
     
     if (register) {
+      ShutdownHookManager.get().addShutdownHook(new Unregister(),
+          SHUTDOWN_HOOK_PRIORITY);
       rpcProgram.register(PortmapMapping.TRANSPORT_TCP, nfsBoundPort);
     }
   }
@@ -61,4 +64,16 @@ public abstract class Nfs3Base {
     tcpServer.run();
     nfsBoundPort = tcpServer.getBoundPort();
   }
+  
+  /**
+   * Priority of the nfsd shutdown hook.
+   */
+  public static final int SHUTDOWN_HOOK_PRIORITY = 10;
+
+  private class Unregister implements Runnable {
+    @Override
+    public synchronized void run() {
+      rpcProgram.unregister(PortmapMapping.TRANSPORT_TCP, nfsBoundPort);
+    }
+  }
 }

+ 23 - 5
hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/oncrpc/RpcProgram.java

@@ -78,23 +78,41 @@ public abstract class RpcProgram extends SimpleChannelUpstreamHandler {
     for (int vers = lowProgVersion; vers <= highProgVersion; vers++) {
       PortmapMapping mapEntry = new PortmapMapping(progNumber, vers, transport,
           port);
-      register(mapEntry);
+      register(mapEntry, true);
+    }
+  }
+  
+  /**
+   * Unregister this program with the local portmapper.
+   */
+  public void unregister(int transport, int boundPort) {
+    if (boundPort != port) {
+      LOG.info("The bound port is " + boundPort
+          + ", different with configured port " + port);
+      port = boundPort;
+    }
+    // Unregister all the program versions with portmapper for a given transport
+    for (int vers = lowProgVersion; vers <= highProgVersion; vers++) {
+      PortmapMapping mapEntry = new PortmapMapping(progNumber, vers, transport,
+          port);
+      register(mapEntry, false);
     }
   }
   
   /**
    * Register the program with Portmap or Rpcbind
    */
-  protected void register(PortmapMapping mapEntry) {
-    XDR mappingRequest = PortmapRequest.create(mapEntry);
+  protected void register(PortmapMapping mapEntry, boolean set) {
+    XDR mappingRequest = PortmapRequest.create(mapEntry, set);
     SimpleUdpClient registrationClient = new SimpleUdpClient(host, RPCB_PORT,
         mappingRequest);
     try {
       registrationClient.run();
     } catch (IOException e) {
-      LOG.error("Registration failure with " + host + ":" + port
+      String request = set ? "Registration" : "Unregistration";
+      LOG.error(request + " failure with " + host + ":" + port
           + ", portmap entry: " + mapEntry);
-      throw new RuntimeException("Registration failure");
+      throw new RuntimeException(request + " failure");
     }
   }
 

+ 5 - 4
hadoop-common-project/hadoop-nfs/src/main/java/org/apache/hadoop/portmap/PortmapRequest.java

@@ -31,13 +31,14 @@ public class PortmapRequest {
     return PortmapMapping.deserialize(xdr);
   }
 
-  public static XDR create(PortmapMapping mapping) {
+  public static XDR create(PortmapMapping mapping, boolean set) {
     XDR request = new XDR();
+    int procedure = set ? RpcProgramPortmap.PMAPPROC_SET
+        : RpcProgramPortmap.PMAPPROC_UNSET;
     RpcCall call = RpcCall.getInstance(
         RpcUtil.getNewXid(String.valueOf(RpcProgramPortmap.PROGRAM)),
-        RpcProgramPortmap.PROGRAM, RpcProgramPortmap.VERSION,
-        RpcProgramPortmap.PMAPPROC_SET, new CredentialsNone(),
-        new VerifierNone());
+        RpcProgramPortmap.PROGRAM, RpcProgramPortmap.VERSION, procedure,
+        new CredentialsNone(), new VerifierNone());
     call.write(request);
     return mapping.serialize(request);
   }

+ 51 - 1
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/DFSClientCache.java

@@ -19,6 +19,10 @@ package org.apache.hadoop.hdfs.nfs.nfs3;
 
 import java.io.IOException;
 import java.security.PrivilegedExceptionAction;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeUnit;
 
@@ -29,7 +33,9 @@ import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSInputStream;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.io.MultipleIOException;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.ShutdownHookManager;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Objects;
@@ -94,7 +100,7 @@ class DFSClientCache {
   DFSClientCache(Configuration config) {
     this(config, DEFAULT_DFS_CLIENT_CACHE_SIZE);
   }
-
+  
   DFSClientCache(Configuration config, int clientCache) {
     this.config = config;
     this.clientCache = CacheBuilder.newBuilder()
@@ -107,8 +113,52 @@ class DFSClientCache {
         .expireAfterAccess(DEFAULT_DFS_INPUTSTREAM_CACHE_TTL, TimeUnit.SECONDS)
         .removalListener(inputStreamRemovalListener())
         .build(inputStreamLoader());
+    
+    ShutdownHookManager.get().addShutdownHook(new CacheFinalizer(),
+        SHUTDOWN_HOOK_PRIORITY);
+  }
+
+  /**
+   * Priority of the FileSystem shutdown hook.
+   */
+  public static final int SHUTDOWN_HOOK_PRIORITY = 10;
+  
+  private class CacheFinalizer implements Runnable {
+    @Override
+    public synchronized void run() {
+      try {
+        closeAll(true);
+      } catch (IOException e) {
+        LOG.info("DFSClientCache.closeAll() threw an exception:\n", e);
+      }
+    }
   }
+  
+  /**
+   * Close all DFSClient instances in the Cache.
+   * @param onlyAutomatic only close those that are marked for automatic closing
+   */
+  synchronized void closeAll(boolean onlyAutomatic) throws IOException {
+    List<IOException> exceptions = new ArrayList<IOException>();
 
+    ConcurrentMap<String, DFSClient> map = clientCache.asMap();
+
+    for (Entry<String, DFSClient> item : map.entrySet()) {
+      final DFSClient client = item.getValue();
+      if (client != null) {
+        try {
+          client.close();
+        } catch (IOException ioe) {
+          exceptions.add(ioe);
+        }
+      }
+    }
+
+    if (!exceptions.isEmpty()) {
+      throw MultipleIOException.createIOException(exceptions);
+    }
+  }
+  
   private CacheLoader<String, DFSClient> clientLoader() {
     return new CacheLoader<String, DFSClient>() {
       @Override

+ 156 - 132
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -13,10 +13,6 @@ Trunk (Unreleased)
 
     HDFS-3125. Add JournalService to enable Journal Daemon. (suresh)
 
-    HDFS-2832. Heterogeneous Storages support in HDFS phase 1 - treat DataNode
-    as a collection of storages (see breakdown of tasks below for features and
-    contributors).
-
   IMPROVEMENTS
 
     HDFS-4665. Move TestNetworkTopologyWithNodeGroup to common.
@@ -246,8 +242,8 @@ Trunk (Unreleased)
     HDFS-5651. Remove dfs.namenode.caching.enabled and improve CRM locking.
     (cmccabe via wang)
 
-    HDFS-5496. Make replication queue initialization asynchronous. (Vinay via
-    jing9)
+    HDFS-5715. Use Snapshot ID to indicate the corresponding Snapshot for a
+    FileDiff/DirectoryDiff. (jing9)
 
   OPTIMIZATIONS
 
@@ -452,12 +448,6 @@ Trunk (Unreleased)
 
     HDFS-5626. dfsadmin -report shows incorrect cache values. (cmccabe)
 
-    HDFS-5406. Send incremental block reports for all storages in a
-    single call. (Arpit Agarwal)
-
-    HDFS-5454. DataNode UUID should be assigned prior to FsDataset
-    initialization. (Arpit Agarwal)
-
     HDFS-5679. TestCacheDirectives should handle the case where native code
     is not available. (wang)
 
@@ -471,130 +461,19 @@ Trunk (Unreleased)
     HDFS-5659. dfsadmin -report doesn't output cache information properly.
     (wang)
 
-  BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
-
-    HDFS-4985. Add storage type to the protocol and expose it in block report
-    and block locations. (Arpit Agarwal)
-
-    HDFS-5115. Make StorageID a UUID. (Arpit Agarwal)
-
-    HDFS-5000. DataNode configuration should allow specifying storage type.
-    (Arpit Agarwal)
-
-    HDFS-4987. Namenode changes to track multiple storages per datanode.
-    (szetszwo)
-
-    HDFS-5154. Fix TestBlockManager and TestDatanodeDescriptor after HDFS-4987.
-    (Junping Du via szetszwo)
-
-    HDFS-5009. Include storage information in the LocatedBlock.  (szetszwo)
-
-    HDFS-5134. Move blockContentsStale, heartbeatedSinceFailover and
-    firstBlockReport from DatanodeDescriptor to DatanodeStorageInfo; and
-    fix a synchronization problem in DatanodeStorageInfo.  (szetszwo)
-
-    HDFS-5157. Add StorageType to FsVolume.  (Junping Du via szetszwo)
-
-    HDFS-4990. Change BlockPlacementPolicy to choose storages instead of
-    datanodes.  (szetszwo)
-
-    HDFS-5232. Protocol changes to transmit StorageUuid. (Arpit Agarwal)
-
-    HDFS-5233. Use Datanode UUID to identify Datanodes. (Arpit Agarwal)
-
-    HDFS-5222. Move block schedule information from DatanodeDescriptor to
-    DatanodeStorageInfo.  (szetszwo)
-
-    HDFS-4988. Datanode must support all the volumes as individual storages.
-    (Arpit Agarwal)
-
-    HDFS-5377. Heartbeats from Datandode should include one storage report
-    per storage directory. (Arpit Agarwal)
-
-    HDFS-5398. NameNode changes to process storage reports per storage
-    directory. (Arpit Agarwal)
-
-    HDFS-5390. Send one incremental block report per storage directory.
-    (Arpit Agarwal)
-
-    HDFS-5401. Fix NPE in Directory Scanner. (Arpit Agarwal)
-
-    HDFS-5417. Fix storage IDs in PBHelper and UpgradeUtilities.  (szetszwo)
-
-    HDFS-5214. Fix NPEs in BlockManager and DirectoryScanner. (Arpit Agarwal)
-
-    HDFS-5435. File append fails to initialize storageIDs. (Junping Du via
-    Arpit Agarwal)
-
-    HDFS-5437. Fix TestBlockReport and TestBPOfferService failures. (Arpit
-    Agarwal)
-
-    HDFS-5447. Fix TestJspHelper. (Arpit Agarwal)
-
-    HDFS-5452. Fix TestReplicationPolicy and TestBlocksScheduledCounter.
-
-    HDFS-5448. Datanode should generate its ID on first registration. (Arpit
-    Agarwal)
-
-    HDFS-5448. Fix break caused by previous checkin for HDFS-5448. (Arpit
-    Agarwal)
-
-    HDFS-5455. NN should update storageMap on first heartbeat. (Arpit Agarwal)
-
-    HDFS-5457. Fix TestDatanodeRegistration, TestFsck and TestAddBlockRetry.
-    (Contributed by szetszwo)
-
-    HDFS-5466. Update storage IDs when the pipeline is updated. (Contributed
-    by szetszwo)
-
-    HDFS-5439. Fix TestPendingReplication. (Contributed by Junping Du, Arpit
-    Agarwal)
-
-    HDFS-5470. Add back trunk's reportDiff algorithm to the branch.
-    (Contributed by szetszwo)
-
-    HDFS-5472. Fix TestDatanodeManager, TestSafeMode and
-    TestNNThroughputBenchmark (Contributed by szetszwo)
-
-    HDFS-5475. NN incorrectly tracks more than one replica per DN. (Arpit
-    Agarwal)
-
-    HDFS-5481. Fix TestDataNodeVolumeFailure in branch HDFS-2832. (Contributed
-    by Junping Du)
-
-    HDFS-5480. Update Balancer for HDFS-2832. (Contributed by szetszwo)
-
-    HDFS-5486. Fix TestNameNodeMetrics for HDFS-2832. (Arpit Agarwal)
-
-    HDFS-5491. Update editsStored for HDFS-2832. (Arpit Agarwal)
-
-    HDFS-5494. Fix findbugs warnings for HDFS-2832. (Arpit Agarwal)
-
-    HDFS-5508. Fix compilation error after merge. (Contributed by szetszwo)
-
-    HDFS-5501. Fix pendingReceivedRequests tracking in BPServiceActor. (Arpit
-    Agarwal)
-
-    HDFS-5510. Fix a findbug warning in DataStorage.java on HDFS-2832 branch.
-    (Junping Du via Arpit Agarwal)
- 
-    HDFS-5515. Fix TestDFSStartupVersions for HDFS-2832. (Arpit Agarwal)
+    HDFS-5705. TestSecondaryNameNodeUpgrade#testChangeNsIDFails may fail due
+    to ConcurrentModificationException. (Ted Yu via brandonli)
 
-    HDFS-5527. Fix TestUnderReplicatedBlocks on branch HDFS-2832. (Arpit
-    Agarwal)
+    HDFS-5719. FSImage#doRollback() should close prevState before return
+    (Ted Yu via brandonli)
 
-    HDFS-5547. Fix build break after merge from trunk to HDFS-2832. (Arpit
-    Agarwal)
+    HDFS-5589. Namenode loops caching and uncaching when data should be
+    uncached (awang via cmccabe)
 
-    HDFS-5542. Fix TODO and clean up the code in HDFS-2832. (Contributed by
-    szetszwo)
+    HDFS-5724. modifyCacheDirective logging audit log command wrongly as
+    addCacheDirective (Uma Maheswara Rao G via Colin Patrick McCabe)
 
-    HDFS-5559. Fix TestDatanodeConfig in HDFS-2832. (Contributed by szetszwo)
-
-    HDFS-5484. StorageType and State in DatanodeStorageInfo in NameNode is
-    not accurate. (Eric Sirianni via Arpit Agarwal)
-
-    HDFS-5648. Get rid of FsDatasetImpl#perVolumeReplicaMap. (Arpit Agarwal)
+    HDFS-5726. Fix compilation error in AbstractINodeDiff for JDK7. (jing9)
 
 Release 2.4.0 - UNRELEASED
 
@@ -626,6 +505,10 @@ Release 2.4.0 - UNRELEASED
 
     HDFS-5514. FSNamesystem's fsLock should allow custom implementation (daryn)
 
+    HDFS-2832. Heterogeneous Storages support in HDFS phase 1 - treat DataNode
+    as a collection of storages (see breakdown of tasks below for features and
+    contributors).
+
   IMPROVEMENTS
 
     HDFS-5267. Remove volatile from LightWeightHashSet. (Junping Du via llu)
@@ -783,6 +666,11 @@ Release 2.4.0 - UNRELEASED
     HDFS-2933. Improve DataNode Web UI Index Page. (Vivek Ganesan via
     Arpit Agarwal)
 
+    HDFS-5695. Clean up TestOfflineEditsViewer and OfflineEditsViewerHelper.
+    (Haohui Mai via jing9)
+
+    HDFS-5220. Expose group resolution time as metric (jxiang via cmccabe)
+
   OPTIMIZATIONS
 
     HDFS-5239.  Allow FSNamesystem lock fairness to be configurable (daryn)
@@ -848,6 +736,139 @@ Release 2.4.0 - UNRELEASED
     HDFS-5690. DataNode fails to start in secure mode when dfs.http.policy equals to 
     HTTP_ONLY. (Haohui Mai via jing9)
 
+  BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
+
+    HDFS-4985. Add storage type to the protocol and expose it in block report
+    and block locations. (Arpit Agarwal)
+
+    HDFS-5115. Make StorageID a UUID. (Arpit Agarwal)
+
+    HDFS-5000. DataNode configuration should allow specifying storage type.
+    (Arpit Agarwal)
+
+    HDFS-4987. Namenode changes to track multiple storages per datanode.
+    (szetszwo)
+
+    HDFS-5154. Fix TestBlockManager and TestDatanodeDescriptor after HDFS-4987.
+    (Junping Du via szetszwo)
+
+    HDFS-5009. Include storage information in the LocatedBlock.  (szetszwo)
+
+    HDFS-5134. Move blockContentsStale, heartbeatedSinceFailover and
+    firstBlockReport from DatanodeDescriptor to DatanodeStorageInfo; and
+    fix a synchronization problem in DatanodeStorageInfo.  (szetszwo)
+
+    HDFS-5157. Add StorageType to FsVolume.  (Junping Du via szetszwo)
+
+    HDFS-4990. Change BlockPlacementPolicy to choose storages instead of
+    datanodes.  (szetszwo)
+
+    HDFS-5232. Protocol changes to transmit StorageUuid. (Arpit Agarwal)
+
+    HDFS-5233. Use Datanode UUID to identify Datanodes. (Arpit Agarwal)
+
+    HDFS-5222. Move block schedule information from DatanodeDescriptor to
+    DatanodeStorageInfo.  (szetszwo)
+
+    HDFS-4988. Datanode must support all the volumes as individual storages.
+    (Arpit Agarwal)
+
+    HDFS-5377. Heartbeats from Datandode should include one storage report
+    per storage directory. (Arpit Agarwal)
+
+    HDFS-5398. NameNode changes to process storage reports per storage
+    directory. (Arpit Agarwal)
+
+    HDFS-5390. Send one incremental block report per storage directory.
+    (Arpit Agarwal)
+
+    HDFS-5401. Fix NPE in Directory Scanner. (Arpit Agarwal)
+
+    HDFS-5417. Fix storage IDs in PBHelper and UpgradeUtilities.  (szetszwo)
+
+    HDFS-5214. Fix NPEs in BlockManager and DirectoryScanner. (Arpit Agarwal)
+
+    HDFS-5435. File append fails to initialize storageIDs. (Junping Du via
+    Arpit Agarwal)
+
+    HDFS-5437. Fix TestBlockReport and TestBPOfferService failures. (Arpit
+    Agarwal)
+
+    HDFS-5447. Fix TestJspHelper. (Arpit Agarwal)
+
+    HDFS-5452. Fix TestReplicationPolicy and TestBlocksScheduledCounter.
+
+    HDFS-5448. Datanode should generate its ID on first registration. (Arpit
+    Agarwal)
+
+    HDFS-5448. Fix break caused by previous checkin for HDFS-5448. (Arpit
+    Agarwal)
+
+    HDFS-5455. NN should update storageMap on first heartbeat. (Arpit Agarwal)
+
+    HDFS-5457. Fix TestDatanodeRegistration, TestFsck and TestAddBlockRetry.
+    (Contributed by szetszwo)
+
+    HDFS-5466. Update storage IDs when the pipeline is updated. (Contributed
+    by szetszwo)
+
+    HDFS-5439. Fix TestPendingReplication. (Contributed by Junping Du, Arpit
+    Agarwal)
+
+    HDFS-5470. Add back trunk's reportDiff algorithm to the branch.
+    (Contributed by szetszwo)
+
+    HDFS-5472. Fix TestDatanodeManager, TestSafeMode and
+    TestNNThroughputBenchmark (Contributed by szetszwo)
+
+    HDFS-5475. NN incorrectly tracks more than one replica per DN. (Arpit
+    Agarwal)
+
+    HDFS-5481. Fix TestDataNodeVolumeFailure in branch HDFS-2832. (Contributed
+    by Junping Du)
+
+    HDFS-5480. Update Balancer for HDFS-2832. (Contributed by szetszwo)
+
+    HDFS-5486. Fix TestNameNodeMetrics for HDFS-2832. (Arpit Agarwal)
+
+    HDFS-5491. Update editsStored for HDFS-2832. (Arpit Agarwal)
+
+    HDFS-5494. Fix findbugs warnings for HDFS-2832. (Arpit Agarwal)
+
+    HDFS-5508. Fix compilation error after merge. (Contributed by szetszwo)
+
+    HDFS-5501. Fix pendingReceivedRequests tracking in BPServiceActor. (Arpit
+    Agarwal)
+
+    HDFS-5510. Fix a findbug warning in DataStorage.java on HDFS-2832 branch.
+    (Junping Du via Arpit Agarwal)
+ 
+    HDFS-5515. Fix TestDFSStartupVersions for HDFS-2832. (Arpit Agarwal)
+
+    HDFS-5527. Fix TestUnderReplicatedBlocks on branch HDFS-2832. (Arpit
+    Agarwal)
+
+    HDFS-5547. Fix build break after merge from trunk to HDFS-2832. (Arpit
+    Agarwal)
+
+    HDFS-5542. Fix TODO and clean up the code in HDFS-2832. (Contributed by
+    szetszwo)
+
+    HDFS-5559. Fix TestDatanodeConfig in HDFS-2832. (Contributed by szetszwo)
+
+    HDFS-5484. StorageType and State in DatanodeStorageInfo in NameNode is
+    not accurate. (Eric Sirianni via Arpit Agarwal)
+
+    HDFS-5648. Get rid of FsDatasetImpl#perVolumeReplicaMap. (Arpit Agarwal)
+
+    HDFS-5406. Send incremental block reports for all storages in a
+    single call. (Arpit Agarwal)
+
+    HDFS-5454. DataNode UUID should be assigned prior to FsDataset
+    initialization. (Arpit Agarwal)
+
+    HDFS-5667. Include DatanodeStorage in StorageReport. (Arpit Agarwal)
+
 Release 2.3.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -1042,6 +1063,9 @@ Release 2.3.0 - UNRELEASED
 
     HDFS-5671. Fix socket leak in DFSInputStream#getBlockReader. (JamesLi via umamahesh) 
 
+    HDFS-5649. Unregister NFS and Mount service when NFS gateway is shutting down.
+    (brandonli)
+
 Release 2.2.0 - 2013-10-13
 
   INCOMPATIBLE CHANGES

+ 7 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java

@@ -1559,13 +1559,17 @@ public class PBHelper {
     StorageReportProto.Builder builder = StorageReportProto.newBuilder()
         .setBlockPoolUsed(r.getBlockPoolUsed()).setCapacity(r.getCapacity())
         .setDfsUsed(r.getDfsUsed()).setRemaining(r.getRemaining())
-        .setStorageUuid(r.getStorageID());
+        .setStorageUuid(r.getStorage().getStorageID())
+        .setStorage(convert(r.getStorage()));
     return builder.build();
   }
 
   public static StorageReport convert(StorageReportProto p) {
-    return new StorageReport(p.getStorageUuid(), p.getFailed(),
-        p.getCapacity(), p.getDfsUsed(), p.getRemaining(),
+    return new StorageReport(
+        p.hasStorage() ?
+            convert(p.getStorage()) :
+            new DatanodeStorage(p.getStorageUuid()),
+        p.getFailed(), p.getCapacity(), p.getDfsUsed(), p.getRemaining(),
         p.getBlockPoolUsed());
   }
 

+ 150 - 24
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java

@@ -21,12 +21,14 @@ import static org.apache.hadoop.util.ExitUtil.terminate;
 
 import java.io.Closeable;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Date;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Random;
+import java.util.TreeMap;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.ReentrantLock;
@@ -47,6 +49,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
 import org.apache.hadoop.hdfs.server.namenode.INodeFile;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
 import org.apache.hadoop.hdfs.util.ReadOnlyList;
 import org.apache.hadoop.util.GSet;
 import org.apache.hadoop.util.Time;
@@ -76,7 +79,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
   /**
    * Pseudorandom number source
    */
-  private final Random random = new Random();
+  private static final Random random = new Random();
 
   /**
    * The interval at which we scan the namesystem for caching changes.
@@ -310,8 +313,6 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
     FSDirectory fsDir = namesystem.getFSDirectory();
     final long now = new Date().getTime();
     for (CacheDirective directive : cacheManager.getCacheDirectives()) {
-      // Reset the directive's statistics
-      directive.resetStatistics();
       // Skip processing this entry if it has expired
       if (LOG.isTraceEnabled()) {
         LOG.trace("Directive expiry is at " + directive.getExpiryTime());
@@ -339,7 +340,8 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
         }
       } else if (node.isDirectory()) {
         INodeDirectory dir = node.asDirectory();
-        ReadOnlyList<INode> children = dir.getChildrenList(null);
+        ReadOnlyList<INode> children = dir
+            .getChildrenList(Snapshot.CURRENT_STATE_ID);
         for (INode child : children) {
           if (child.isFile()) {
             rescanFile(directive, child.asFile());
@@ -461,7 +463,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
       // there may be a period of time when incomplete blocks remain cached
       // on the DataNodes.
       return "not complete";
-    }  else if (cblock.getReplication() == 0) {
+    } else if (cblock.getReplication() == 0) {
       // Since 0 is not a valid value for a cache directive's replication
       // field, seeing a replication of 0 on a CacheBlock means that it
       // has never been reached by any sweep.
@@ -469,6 +471,9 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
     } else if (cblock.getMark() != mark) { 
       // Although the block was needed in the past, we didn't reach it during
       // the current sweep.  Therefore, it doesn't need to be cached any more.
+      // Need to set the replication to 0 so it doesn't flip back to cached
+      // when the mark flips on the next scan
+      cblock.setReplicationAndMark((short)0, mark);
       return "no longer needed by any directives";
     }
     return null;
@@ -595,7 +600,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
    * @param pendingCached    A list of DataNodes that will soon cache the
    *                         block.
    */
-  private void addNewPendingCached(int neededCached,
+  private void addNewPendingCached(final int neededCached,
       CachedBlock cachedBlock, List<DatanodeDescriptor> cached,
       List<DatanodeDescriptor> pendingCached) {
     // To figure out which replicas can be cached, we consult the
@@ -616,35 +621,156 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
       }
       return;
     }
-    List<DatanodeDescriptor> possibilities = new LinkedList<DatanodeDescriptor>();
+    // Filter the list of replicas to only the valid targets
+    List<DatanodeDescriptor> possibilities =
+        new LinkedList<DatanodeDescriptor>();
     int numReplicas = blockInfo.getCapacity();
     Collection<DatanodeDescriptor> corrupt =
         blockManager.getCorruptReplicas(blockInfo);
+    int outOfCapacity = 0;
     for (int i = 0; i < numReplicas; i++) {
       DatanodeDescriptor datanode = blockInfo.getDatanode(i);
-      if ((datanode != null) && 
-          ((!pendingCached.contains(datanode)) &&
-          ((corrupt == null) || (!corrupt.contains(datanode))))) {
-        possibilities.add(datanode);
+      if (datanode == null) {
+        continue;
       }
-    }
-    while (neededCached > 0) {
-      if (possibilities.isEmpty()) {
-        LOG.warn("We need " + neededCached + " more replica(s) than " +
-            "actually exist to provide a cache replication of " +
-            cachedBlock.getReplication() + " for " + cachedBlock);
-        return;
+      if (datanode.isDecommissioned() || datanode.isDecommissionInProgress()) {
+        continue;
       }
-      DatanodeDescriptor datanode =
-          possibilities.remove(random.nextInt(possibilities.size()));
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("AddNewPendingCached: datanode " + datanode + 
-            " will now cache block " + cachedBlock);
+      if (corrupt != null && corrupt.contains(datanode)) {
+        continue;
       }
+      if (pendingCached.contains(datanode) || cached.contains(datanode)) {
+        continue;
+      }
+      long pendingCapacity = datanode.getCacheRemaining();
+      // Subtract pending cached blocks from effective capacity
+      Iterator<CachedBlock> it = datanode.getPendingCached().iterator();
+      while (it.hasNext()) {
+        CachedBlock cBlock = it.next();
+        BlockInfo info =
+            blockManager.getStoredBlock(new Block(cBlock.getBlockId()));
+        if (info != null) {
+          pendingCapacity -= info.getNumBytes();
+        }
+      }
+      it = datanode.getPendingUncached().iterator();
+      // Add pending uncached blocks from effective capacity
+      while (it.hasNext()) {
+        CachedBlock cBlock = it.next();
+        BlockInfo info =
+            blockManager.getStoredBlock(new Block(cBlock.getBlockId()));
+        if (info != null) {
+          pendingCapacity += info.getNumBytes();
+        }
+      }
+      if (pendingCapacity < blockInfo.getNumBytes()) {
+        if (LOG.isTraceEnabled()) {
+          LOG.trace("Datanode " + datanode + " is not a valid possibility for"
+              + " block " + blockInfo.getBlockId() + " of size "
+              + blockInfo.getNumBytes() + " bytes, only has "
+              + datanode.getCacheRemaining() + " bytes of cache remaining.");
+        }
+        outOfCapacity++;
+        continue;
+      }
+      possibilities.add(datanode);
+    }
+    List<DatanodeDescriptor> chosen = chooseDatanodesForCaching(possibilities,
+        neededCached, blockManager.getDatanodeManager().getStaleInterval());
+    for (DatanodeDescriptor datanode : chosen) {
       pendingCached.add(datanode);
       boolean added = datanode.getPendingCached().add(cachedBlock);
       assert added;
-      neededCached--;
     }
+    // We were unable to satisfy the requested replication factor
+    if (neededCached > chosen.size()) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(
+            "Only have " +
+            (cachedBlock.getReplication() - neededCached + chosen.size()) +
+            " of " + cachedBlock.getReplication() + " cached replicas for " +
+            cachedBlock + " (" + outOfCapacity + " nodes have insufficient " +
+            "capacity).");
+      }
+    }
+  }
+
+  /**
+   * Chooses datanode locations for caching from a list of valid possibilities.
+   * Non-stale nodes are chosen before stale nodes.
+   * 
+   * @param possibilities List of candidate datanodes
+   * @param neededCached Number of replicas needed
+   * @param staleInterval Age of a stale datanode
+   * @return A list of chosen datanodes
+   */
+  private static List<DatanodeDescriptor> chooseDatanodesForCaching(
+      final List<DatanodeDescriptor> possibilities, final int neededCached,
+      final long staleInterval) {
+    // Make a copy that we can modify
+    List<DatanodeDescriptor> targets =
+        new ArrayList<DatanodeDescriptor>(possibilities);
+    // Selected targets
+    List<DatanodeDescriptor> chosen = new LinkedList<DatanodeDescriptor>();
+
+    // Filter out stale datanodes
+    List<DatanodeDescriptor> stale = new LinkedList<DatanodeDescriptor>();
+    Iterator<DatanodeDescriptor> it = targets.iterator();
+    while (it.hasNext()) {
+      DatanodeDescriptor d = it.next();
+      if (d.isStale(staleInterval)) {
+        it.remove();
+        stale.add(d);
+      }
+    }
+    // Select targets
+    while (chosen.size() < neededCached) {
+      // Try to use stale nodes if we're out of non-stale nodes, else we're done
+      if (targets.isEmpty()) {
+        if (!stale.isEmpty()) {
+          targets = stale;
+        } else {
+          break;
+        }
+      }
+      // Select a random target
+      DatanodeDescriptor target =
+          chooseRandomDatanodeByRemainingCapacity(targets);
+      chosen.add(target);
+      targets.remove(target);
+    }
+    return chosen;
+  }
+
+  /**
+   * Choose a single datanode from the provided list of possible
+   * targets, weighted by the percentage of free space remaining on the node.
+   * 
+   * @return The chosen datanode
+   */
+  private static DatanodeDescriptor chooseRandomDatanodeByRemainingCapacity(
+      final List<DatanodeDescriptor> targets) {
+    // Use a weighted probability to choose the target datanode
+    float total = 0;
+    for (DatanodeDescriptor d : targets) {
+      total += d.getCacheRemainingPercent();
+    }
+    // Give each datanode a portion of keyspace equal to its relative weight
+    // [0, w1) selects d1, [w1, w2) selects d2, etc.
+    TreeMap<Integer, DatanodeDescriptor> lottery =
+        new TreeMap<Integer, DatanodeDescriptor>();
+    int offset = 0;
+    for (DatanodeDescriptor d : targets) {
+      // Since we're using floats, be paranoid about negative values
+      int weight =
+          Math.max(1, (int)((d.getCacheRemainingPercent() / total) * 1000000));
+      offset += weight;
+      lottery.put(offset, d);
+    }
+    // Choose a number from [0, offset), which is the total amount of weight,
+    // to select the winner
+    DatanodeDescriptor winner =
+        lottery.higherEntry(random.nextInt(offset)).getValue();
+    return winner;
   }
 }

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java

@@ -355,11 +355,11 @@ public class DatanodeDescriptor extends DatanodeInfo {
     setLastUpdate(Time.now());    
     this.volumeFailures = volFailures;
     for (StorageReport report : reports) {
-      DatanodeStorageInfo storage = storageMap.get(report.getStorageID());
+      DatanodeStorageInfo storage = storageMap.get(report.getStorage().getStorageID());
       if (storage == null) {
         // This is seen during cluster initialization when the heartbeat
         // is received before the initial block reports from each storage.
-        storage = updateStorage(new DatanodeStorage(report.getStorageID()));
+        storage = updateStorage(report.getStorage());
       }
       storage.receivedHeartbeat(report);
       totalCapacity += report.getCapacity();

+ 2 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java

@@ -121,7 +121,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
       reports = new StorageReport[volumes.volumes.size()];
       int i = 0;
       for (FsVolumeImpl volume : volumes.volumes) {
-        reports[i++] = new StorageReport(volume.getStorageID(),
+        reports[i++] = new StorageReport(volume.toDatanodeStorage(),
                                          false,
                                          volume.getCapacity(),
                                          volume.getDfsUsed(),
@@ -237,12 +237,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
     final List<FsVolumeImpl> volArray = new ArrayList<FsVolumeImpl>(
         storage.getNumStorageDirs());
     for (int idx = 0; idx < storage.getNumStorageDirs(); idx++) {
-      // TODO: getStorageTypeFromLocations() is only a temporary workaround and 
-      // should be replaced with getting storage type from DataStorage (missing 
-      // storage type now) directly.
       Storage.StorageDirectory sd = storage.getStorageDir(idx);
       final File dir = sd.getCurrentDir();
-      final StorageType storageType = getStorageTypeFromLocations(dataLocations, dir);
+      final StorageType storageType = getStorageTypeFromLocations(dataLocations, sd.getRoot());
       volArray.add(new FsVolumeImpl(this, sd.getStorageUuid(), dir, conf,
           storageType));
       LOG.info("Added volume - " + dir + ", StorageType: " + storageType);

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java

@@ -19,10 +19,10 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl;
 
 import java.io.File;
 import java.io.IOException;
-import java.util.HashMap;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.Executor;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.ThreadFactory;
@@ -54,7 +54,7 @@ class FsVolumeImpl implements FsVolumeSpi {
   private final String storageID;
   private final StorageType storageType;
   private final Map<String, BlockPoolSlice> bpSlices
-      = new HashMap<String, BlockPoolSlice>();
+      = new ConcurrentHashMap<String, BlockPoolSlice>();
   private final File currentDir;    // <StorageDirectory>/current
   private final DF usage;           
   private final long reserved;

+ 3 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java

@@ -53,7 +53,6 @@ import org.apache.hadoop.fs.InvalidRequestException;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.hdfs.DFSUtil;
-import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.CacheDirective;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
@@ -69,6 +68,7 @@ import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList.Type;
 import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
@@ -399,7 +399,8 @@ public final class CacheManager {
       requestedBytes = file.computeFileSize();
     } else if (node.isDirectory()) {
       INodeDirectory dir = node.asDirectory();
-      ReadOnlyList<INode> children = dir.getChildrenList(null);
+      ReadOnlyList<INode> children = dir
+          .getChildrenList(Snapshot.CURRENT_STATE_ID);
       requestedFiles = children.size();
       for (INode child : children) {
         if (child.isFile()) {

+ 54 - 59
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java

@@ -614,14 +614,14 @@ public class FSDirectory implements Closeable {
     INode srcChild = srcIIP.getLastINode();
     final byte[] srcChildName = srcChild.getLocalNameBytes();
     final boolean isSrcInSnapshot = srcChild.isInLatestSnapshot(
-        srcIIP.getLatestSnapshot());
+        srcIIP.getLatestSnapshotId());
     final boolean srcChildIsReference = srcChild.isReference();
     
     // Record the snapshot on srcChild. After the rename, before any new 
     // snapshot is taken on the dst tree, changes will be recorded in the latest
     // snapshot of the src tree.
     if (isSrcInSnapshot) {
-      srcChild = srcChild.recordModification(srcIIP.getLatestSnapshot());
+      srcChild = srcChild.recordModification(srcIIP.getLatestSnapshotId());
       srcIIP.setLastINode(srcChild);
     }
     
@@ -629,17 +629,16 @@ public class FSDirectory implements Closeable {
     final INodeReference.WithCount withCount;
     Quota.Counts oldSrcCounts = Quota.Counts.newInstance();
     int srcRefDstSnapshot = srcChildIsReference ? srcChild.asReference()
-        .getDstSnapshotId() : Snapshot.INVALID_ID;
+        .getDstSnapshotId() : Snapshot.CURRENT_STATE_ID;
     if (isSrcInSnapshot) {
       final INodeReference.WithName withName = 
           srcIIP.getINode(-2).asDirectory().replaceChild4ReferenceWithName(
-              srcChild, srcIIP.getLatestSnapshot()); 
+              srcChild, srcIIP.getLatestSnapshotId()); 
       withCount = (INodeReference.WithCount) withName.getReferredINode();
       srcChild = withName;
       srcIIP.setLastINode(srcChild);
       // get the counts before rename
-      withCount.getReferredINode().computeQuotaUsage(oldSrcCounts, true,
-          Snapshot.INVALID_ID);
+      withCount.getReferredINode().computeQuotaUsage(oldSrcCounts, true);
     } else if (srcChildIsReference) {
       // srcChild is reference but srcChild is not in latest snapshot
       withCount = (WithCount) srcChild.asReference().getReferredINode();
@@ -675,10 +674,9 @@ public class FSDirectory implements Closeable {
         toDst = srcChild;
       } else {
         withCount.getReferredINode().setLocalName(dstChildName);
-        Snapshot dstSnapshot = dstIIP.getLatestSnapshot();
+        int dstSnapshotId = dstIIP.getLatestSnapshotId();
         final INodeReference.DstReference ref = new INodeReference.DstReference(
-            dstParent.asDirectory(), withCount,
-            dstSnapshot == null ? Snapshot.INVALID_ID : dstSnapshot.getId());
+            dstParent.asDirectory(), withCount, dstSnapshotId);
         toDst = ref;
       }
       
@@ -690,9 +688,9 @@ public class FSDirectory implements Closeable {
         }
         // update modification time of dst and the parent of src
         final INode srcParent = srcIIP.getINode(-2);
-        srcParent.updateModificationTime(timestamp, srcIIP.getLatestSnapshot());
+        srcParent.updateModificationTime(timestamp, srcIIP.getLatestSnapshotId());
         dstParent = dstIIP.getINode(-2); // refresh dstParent
-        dstParent.updateModificationTime(timestamp, dstIIP.getLatestSnapshot());
+        dstParent.updateModificationTime(timestamp, dstIIP.getLatestSnapshotId());
         // update moved leases with new filename
         getFSNamesystem().unprotectedChangeLease(src, dst);     
 
@@ -700,7 +698,7 @@ public class FSDirectory implements Closeable {
         if (isSrcInSnapshot) {
           // get the counts after rename
           Quota.Counts newSrcCounts = srcChild.computeQuotaUsage(
-              Quota.Counts.newInstance(), false, Snapshot.INVALID_ID);
+              Quota.Counts.newInstance(), false);
           newSrcCounts.subtract(oldSrcCounts);
           srcParent.addSpaceConsumed(newSrcCounts.get(Quota.NAMESPACE),
               newSrcCounts.get(Quota.DISKSPACE), false);
@@ -732,8 +730,7 @@ public class FSDirectory implements Closeable {
         if (isSrcInSnapshot) {
           // srcParent must have snapshot feature since isSrcInSnapshot is true
           // and src node has been removed from srcParent 
-          srcParent.undoRename4ScrParent(oldSrcChild.asReference(), srcChild,
-              srcIIP.getLatestSnapshot());
+          srcParent.undoRename4ScrParent(oldSrcChild.asReference(), srcChild);
         } else {
           // original srcChild is not in latest snapshot, we only need to add
           // the srcChild back
@@ -836,7 +833,7 @@ public class FSDirectory implements Closeable {
       }
       if (dstInode.isDirectory()) {
         final ReadOnlyList<INode> children = dstInode.asDirectory()
-            .getChildrenList(null);
+            .getChildrenList(Snapshot.CURRENT_STATE_ID);
         if (!children.isEmpty()) {
           error = "rename destination directory is not empty: " + dst;
           NameNode.stateChangeLog.warn(
@@ -867,31 +864,30 @@ public class FSDirectory implements Closeable {
     INode srcChild = srcIIP.getLastINode();
     final byte[] srcChildName = srcChild.getLocalNameBytes();
     final boolean isSrcInSnapshot = srcChild.isInLatestSnapshot(
-        srcIIP.getLatestSnapshot());
+        srcIIP.getLatestSnapshotId());
     final boolean srcChildIsReference = srcChild.isReference();
     
     // Record the snapshot on srcChild. After the rename, before any new 
     // snapshot is taken on the dst tree, changes will be recorded in the latest
     // snapshot of the src tree.
     if (isSrcInSnapshot) {
-      srcChild = srcChild.recordModification(srcIIP.getLatestSnapshot());
+      srcChild = srcChild.recordModification(srcIIP.getLatestSnapshotId());
       srcIIP.setLastINode(srcChild);
     }
     
     // check srcChild for reference
     final INodeReference.WithCount withCount;
     int srcRefDstSnapshot = srcChildIsReference ? srcChild.asReference()
-        .getDstSnapshotId() : Snapshot.INVALID_ID;
+        .getDstSnapshotId() : Snapshot.CURRENT_STATE_ID;
     Quota.Counts oldSrcCounts = Quota.Counts.newInstance();    
     if (isSrcInSnapshot) {
       final INodeReference.WithName withName = srcIIP.getINode(-2).asDirectory()
-          .replaceChild4ReferenceWithName(srcChild, srcIIP.getLatestSnapshot()); 
+          .replaceChild4ReferenceWithName(srcChild, srcIIP.getLatestSnapshotId()); 
       withCount = (INodeReference.WithCount) withName.getReferredINode();
       srcChild = withName;
       srcIIP.setLastINode(srcChild);
       // get the counts before rename
-      withCount.getReferredINode().computeQuotaUsage(oldSrcCounts, true,
-          Snapshot.INVALID_ID);
+      withCount.getReferredINode().computeQuotaUsage(oldSrcCounts, true);
     } else if (srcChildIsReference) {
       // srcChild is reference but srcChild is not in latest snapshot
       withCount = (WithCount) srcChild.asReference().getReferredINode();
@@ -935,10 +931,9 @@ public class FSDirectory implements Closeable {
         toDst = srcChild;
       } else {
         withCount.getReferredINode().setLocalName(dstChildName);
-        Snapshot dstSnapshot = dstIIP.getLatestSnapshot();
+        int dstSnapshotId = dstIIP.getLatestSnapshotId();
         final INodeReference.DstReference ref = new INodeReference.DstReference(
-            dstIIP.getINode(-2).asDirectory(), withCount,
-            dstSnapshot == null ? Snapshot.INVALID_ID : dstSnapshot.getId());
+            dstIIP.getINode(-2).asDirectory(), withCount, dstSnapshotId);
         toDst = ref;
       }
 
@@ -952,9 +947,9 @@ public class FSDirectory implements Closeable {
         }
 
         final INode srcParent = srcIIP.getINode(-2);
-        srcParent.updateModificationTime(timestamp, srcIIP.getLatestSnapshot());
+        srcParent.updateModificationTime(timestamp, srcIIP.getLatestSnapshotId());
         dstParent = dstIIP.getINode(-2);
-        dstParent.updateModificationTime(timestamp, dstIIP.getLatestSnapshot());
+        dstParent.updateModificationTime(timestamp, dstIIP.getLatestSnapshotId());
         // update moved lease with new filename
         getFSNamesystem().unprotectedChangeLease(src, dst);
 
@@ -964,8 +959,8 @@ public class FSDirectory implements Closeable {
           undoRemoveDst = false;
           BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
           List<INode> removedINodes = new ChunkedArrayList<INode>();
-          filesDeleted = removedDst.cleanSubtree(null,
-              dstIIP.getLatestSnapshot(), collectedBlocks, removedINodes, true)
+          filesDeleted = removedDst.cleanSubtree(Snapshot.CURRENT_STATE_ID,
+              dstIIP.getLatestSnapshotId(), collectedBlocks, removedINodes, true)
               .get(Quota.NAMESPACE);
           getFSNamesystem().removePathAndBlocks(src, collectedBlocks,
               removedINodes);
@@ -981,7 +976,7 @@ public class FSDirectory implements Closeable {
         if (isSrcInSnapshot) {
           // get the counts after rename
           Quota.Counts newSrcCounts = srcChild.computeQuotaUsage(
-              Quota.Counts.newInstance(), false, Snapshot.INVALID_ID);
+              Quota.Counts.newInstance(), false);
           newSrcCounts.subtract(oldSrcCounts);
           srcParent.addSpaceConsumed(newSrcCounts.get(Quota.NAMESPACE),
               newSrcCounts.get(Quota.DISKSPACE), false);
@@ -1012,8 +1007,7 @@ public class FSDirectory implements Closeable {
         }
         
         if (srcParent.isWithSnapshot()) {
-          srcParent.undoRename4ScrParent(oldSrcChild.asReference(), srcChild,
-              srcIIP.getLatestSnapshot());
+          srcParent.undoRename4ScrParent(oldSrcChild.asReference(), srcChild);
         } else {
           // srcParent is not an INodeDirectoryWithSnapshot, we only need to add
           // the srcChild back
@@ -1024,7 +1018,7 @@ public class FSDirectory implements Closeable {
         // Rename failed - restore dst
         if (dstParent.isDirectory() && dstParent.asDirectory().isWithSnapshot()) {
           dstParent.asDirectory().undoRename4DstParent(removedDst,
-              dstIIP.getLatestSnapshot());
+              dstIIP.getLatestSnapshotId());
         } else {
           addLastINodeNoQuotaCheck(dstIIP, removedDst);
         }
@@ -1088,7 +1082,7 @@ public class FSDirectory implements Closeable {
       updateCount(iip, 0, dsDelta, true);
     }
 
-    file = file.setFileReplication(replication, iip.getLatestSnapshot(),
+    file = file.setFileReplication(replication, iip.getLatestSnapshotId(),
         inodeMap);
     
     final short newBR = file.getBlockReplication(); 
@@ -1155,7 +1149,7 @@ public class FSDirectory implements Closeable {
     if (inode == null) {
       throw new FileNotFoundException("File does not exist: " + src);
     }
-    inode.setPermission(permissions, inodesInPath.getLatestSnapshot());
+    inode.setPermission(permissions, inodesInPath.getLatestSnapshotId());
   }
 
   void setOwner(String src, String username, String groupname)
@@ -1180,10 +1174,10 @@ public class FSDirectory implements Closeable {
       throw new FileNotFoundException("File does not exist: " + src);
     }
     if (username != null) {
-      inode = inode.setUser(username, inodesInPath.getLatestSnapshot());
+      inode = inode.setUser(username, inodesInPath.getLatestSnapshotId());
     }
     if (groupname != null) {
-      inode.setGroup(groupname, inodesInPath.getLatestSnapshot());
+      inode.setGroup(groupname, inodesInPath.getLatestSnapshotId());
     }
   }
 
@@ -1225,12 +1219,12 @@ public class FSDirectory implements Closeable {
     final INode[] trgINodes = trgIIP.getINodes();
     final INodeFile trgInode = trgIIP.getLastINode().asFile();
     INodeDirectory trgParent = trgINodes[trgINodes.length-2].asDirectory();
-    final Snapshot trgLatestSnapshot = trgIIP.getLatestSnapshot();
+    final int trgLatestSnapshot = trgIIP.getLatestSnapshotId();
     
     final INodeFile [] allSrcInodes = new INodeFile[srcs.length];
     for(int i = 0; i < srcs.length; i++) {
       final INodesInPath iip = getINodesInPath4Write(srcs[i]);
-      final Snapshot latest = iip.getLatestSnapshot();
+      final int latest = iip.getLatestSnapshotId();
       final INode inode = iip.getLastINode();
 
       // check if the file in the latest snapshot
@@ -1354,7 +1348,7 @@ public class FSDirectory implements Closeable {
         //not found or not a directory
         return false;
       }
-      final Snapshot s = inodesInPath.getPathSnapshot();
+      final int s = inodesInPath.getPathSnapshotId();
       return !inode.asDirectory().getChildrenList(s).isEmpty();
     } finally {
       readUnlock();
@@ -1408,7 +1402,7 @@ public class FSDirectory implements Closeable {
     }
 
     // record modification
-    final Snapshot latestSnapshot = iip.getLatestSnapshot();
+    final int latestSnapshot = iip.getLatestSnapshotId();
     targetNode = targetNode.recordModification(latestSnapshot);
     iip.setLastINode(targetNode);
 
@@ -1429,8 +1423,8 @@ public class FSDirectory implements Closeable {
     if (!targetNode.isInLatestSnapshot(latestSnapshot)) {
       targetNode.destroyAndCollectBlocks(collectedBlocks, removedINodes);
     } else {
-      Quota.Counts counts = targetNode.cleanSubtree(null, latestSnapshot,
-          collectedBlocks, removedINodes, true);
+      Quota.Counts counts = targetNode.cleanSubtree(Snapshot.CURRENT_STATE_ID,
+          latestSnapshot, collectedBlocks, removedINodes, true);
       parent.addSpaceConsumed(-counts.get(Quota.NAMESPACE),
           -counts.get(Quota.DISKSPACE), true);
       removed = counts.get(Quota.NAMESPACE);
@@ -1467,7 +1461,7 @@ public class FSDirectory implements Closeable {
           }
         }
       } 
-      for (INode child : targetDir.getChildrenList(null)) {
+      for (INode child : targetDir.getChildrenList(Snapshot.CURRENT_STATE_ID)) {
         checkSnapshot(child, snapshottableDirs);
       }
     }
@@ -1491,7 +1485,7 @@ public class FSDirectory implements Closeable {
         return getSnapshotsListing(srcs, startAfter);
       }
       final INodesInPath inodesInPath = rootDir.getLastINodeInPath(srcs, true);
-      final Snapshot snapshot = inodesInPath.getPathSnapshot();
+      final int snapshot = inodesInPath.getPathSnapshotId();
       final INode targetNode = inodesInPath.getINode(0);
       if (targetNode == null)
         return null;
@@ -1543,7 +1537,8 @@ public class FSDirectory implements Closeable {
     final HdfsFileStatus listing[] = new HdfsFileStatus[numOfListing];
     for (int i = 0; i < numOfListing; i++) {
       Root sRoot = snapshots.get(i + skipSize).getRoot();
-      listing[i] = createFileStatus(sRoot.getLocalNameBytes(), sRoot, null);
+      listing[i] = createFileStatus(sRoot.getLocalNameBytes(), sRoot,
+          Snapshot.CURRENT_STATE_ID);
     }
     return new DirectoryListing(
         listing, snapshots.size() - skipSize - numOfListing);
@@ -1566,7 +1561,7 @@ public class FSDirectory implements Closeable {
       final INodesInPath inodesInPath = rootDir.getLastINodeInPath(srcs, resolveLink);
       final INode i = inodesInPath.getINode(0);
       return i == null? null: createFileStatus(HdfsFileStatus.EMPTY_NAME, i,
-          inodesInPath.getPathSnapshot());
+          inodesInPath.getPathSnapshotId());
     } finally {
       readUnlock();
     }
@@ -2129,7 +2124,7 @@ public class FSDirectory implements Closeable {
     }
 
     final INodeDirectory parent = pathComponents[pos-1].asDirectory();
-    final int count = parent.getChildrenList(null).size();
+    final int count = parent.getChildrenList(Snapshot.CURRENT_STATE_ID).size();
     if (count >= maxDirItems) {
       final MaxDirectoryItemsExceededException e
           = new MaxDirectoryItemsExceededException(maxDirItems, count);
@@ -2193,7 +2188,7 @@ public class FSDirectory implements Closeable {
     final INodeDirectory parent = inodes[pos-1].asDirectory();
     boolean added = false;
     try {
-      added = parent.addChild(child, true, iip.getLatestSnapshot());
+      added = parent.addChild(child, true, iip.getLatestSnapshotId());
     } catch (QuotaExceededException e) {
       updateCountNoQuotaCheck(iip, pos,
           -counts.get(Quota.NAMESPACE), -counts.get(Quota.DISKSPACE));
@@ -2228,7 +2223,7 @@ public class FSDirectory implements Closeable {
    */
   private long removeLastINode(final INodesInPath iip)
       throws QuotaExceededException {
-    final Snapshot latestSnapshot = iip.getLatestSnapshot();
+    final int latestSnapshot = iip.getLatestSnapshotId();
     final INode last = iip.getLastINode();
     final INodeDirectory parent = iip.getINode(-2).asDirectory();
     if (!parent.removeChild(last, latestSnapshot)) {
@@ -2382,7 +2377,7 @@ public class FSDirectory implements Closeable {
         return null;
       }
 
-      final Snapshot latest = iip.getLatestSnapshot();
+      final int latest = iip.getLatestSnapshotId();
       dirNode = dirNode.recordModification(latest);
       dirNode.setQuota(nsQuota, dsQuota);
       return dirNode;
@@ -2425,11 +2420,11 @@ public class FSDirectory implements Closeable {
    * Sets the access time on the file/directory. Logs it in the transaction log.
    */
   void setTimes(String src, INode inode, long mtime, long atime, boolean force,
-      Snapshot latest) throws QuotaExceededException {
+      int latestSnapshotId) throws QuotaExceededException {
     boolean status = false;
     writeLock();
     try {
-      status = unprotectedSetTimes(inode, mtime, atime, force, latest);
+      status = unprotectedSetTimes(inode, mtime, atime, force, latestSnapshotId);
     } finally {
       writeUnlock();
     }
@@ -2443,11 +2438,11 @@ public class FSDirectory implements Closeable {
     assert hasWriteLock();
     final INodesInPath i = getLastINodeInPath(src); 
     return unprotectedSetTimes(i.getLastINode(), mtime, atime, force,
-        i.getLatestSnapshot());
+        i.getLatestSnapshotId());
   }
 
   private boolean unprotectedSetTimes(INode inode, long mtime,
-      long atime, boolean force, Snapshot latest) throws QuotaExceededException {
+      long atime, boolean force, int latest) throws QuotaExceededException {
     assert hasWriteLock();
     boolean status = false;
     if (mtime != -1) {
@@ -2455,7 +2450,7 @@ public class FSDirectory implements Closeable {
       status = true;
     }
     if (atime != -1) {
-      long inodeTime = inode.getAccessTime(null);
+      long inodeTime = inode.getAccessTime();
 
       // if the last access time update was within the last precision interval, then
       // no need to store access time
@@ -2495,7 +2490,7 @@ public class FSDirectory implements Closeable {
    * @throws IOException if any error occurs
    */
   private HdfsFileStatus createFileStatus(byte[] path, INode node,
-      boolean needLocation, Snapshot snapshot) throws IOException {
+      boolean needLocation, int snapshot) throws IOException {
     if (needLocation) {
       return createLocatedFileStatus(path, node, snapshot);
     } else {
@@ -2506,7 +2501,7 @@ public class FSDirectory implements Closeable {
    * Create FileStatus by file INode 
    */
    HdfsFileStatus createFileStatus(byte[] path, INode node,
-       Snapshot snapshot) {
+       int snapshot) {
      long size = 0;     // length is zero for directories
      short replication = 0;
      long blocksize = 0;
@@ -2539,7 +2534,7 @@ public class FSDirectory implements Closeable {
    * Create FileStatus with location info by file INode
    */
   private HdfsLocatedFileStatus createLocatedFileStatus(byte[] path,
-      INode node, Snapshot snapshot) throws IOException {
+      INode node, int snapshot) throws IOException {
     assert hasReadLock();
     long size = 0; // length is zero for directories
     short replication = 0;
@@ -2551,7 +2546,7 @@ public class FSDirectory implements Closeable {
       replication = fileNode.getFileReplication(snapshot);
       blocksize = fileNode.getPreferredBlockSize();
 
-      final boolean inSnapshot = snapshot != null; 
+      final boolean inSnapshot = snapshot != Snapshot.CURRENT_STATE_ID; 
       final boolean isUc = inSnapshot ? false : fileNode.isUnderConstruction();
       final long fileSize = !inSnapshot && isUc ? 
           fileNode.computeFileSizeNotIncludingLastUcBlock() : size;

+ 7 - 8
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java

@@ -24,14 +24,12 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.util.Arrays;
 import java.util.EnumMap;
-import java.util.EnumSet;
 import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.fs.CacheFlag;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
@@ -79,6 +77,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateBlocksOp;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp;
 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
 import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
@@ -327,7 +326,7 @@ public class FSEditLogLoader {
         // add the op into retry cache if necessary
         if (toAddRetryCache) {
           HdfsFileStatus stat = fsNamesys.dir.createFileStatus(
-              HdfsFileStatus.EMPTY_NAME, newFile, null);
+              HdfsFileStatus.EMPTY_NAME, newFile, Snapshot.CURRENT_STATE_ID);
           fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId,
               addCloseOp.rpcCallId, stat);
         }
@@ -340,7 +339,7 @@ public class FSEditLogLoader {
           }
           LocatedBlock lb = fsNamesys.prepareFileForWrite(addCloseOp.path,
               oldFile, addCloseOp.clientName, addCloseOp.clientMachine, null,
-              false, iip.getLatestSnapshot(), false);
+              false, iip.getLatestSnapshotId(), false);
           newFile = INodeFile.valueOf(fsDir.getINode(addCloseOp.path),
               addCloseOp.path, true);
           
@@ -356,8 +355,8 @@ public class FSEditLogLoader {
       // update the block list.
       
       // Update the salient file attributes.
-      newFile.setAccessTime(addCloseOp.atime, null);
-      newFile.setModificationTime(addCloseOp.mtime, null);
+      newFile.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID);
+      newFile.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID);
       updateBlocks(fsDir, addCloseOp, newFile);
       break;
     }
@@ -375,8 +374,8 @@ public class FSEditLogLoader {
       final INodeFile file = INodeFile.valueOf(iip.getINode(0), addCloseOp.path);
 
       // Update the salient file attributes.
-      file.setAccessTime(addCloseOp.atime, null);
-      file.setModificationTime(addCloseOp.mtime, null);
+      file.setAccessTime(addCloseOp.atime, Snapshot.CURRENT_STATE_ID);
+      file.setModificationTime(addCloseOp.mtime, Snapshot.CURRENT_STATE_ID);
       updateBlocks(fsDir, addCloseOp, file);
 
       // Now close the file

+ 54 - 49
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java

@@ -53,6 +53,7 @@ import org.apache.hadoop.hdfs.server.common.Util;
 import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
 import org.apache.hadoop.hdfs.server.protocol.CheckpointCommand;
@@ -405,60 +406,64 @@ public class FSImage implements Closeable {
     // Directories that don't have previous state do not rollback
     boolean canRollback = false;
     FSImage prevState = new FSImage(conf);
-    prevState.getStorage().layoutVersion = HdfsConstants.LAYOUT_VERSION;
-    for (Iterator<StorageDirectory> it = storage.dirIterator(); it.hasNext();) {
-      StorageDirectory sd = it.next();
-      File prevDir = sd.getPreviousDir();
-      if (!prevDir.exists()) {  // use current directory then
-        LOG.info("Storage directory " + sd.getRoot()
-                 + " does not contain previous fs state.");
-        // read and verify consistency with other directories
-        storage.readProperties(sd);
-        continue;
-      }
+    try {
+      prevState.getStorage().layoutVersion = HdfsConstants.LAYOUT_VERSION;
+      for (Iterator<StorageDirectory> it = storage.dirIterator(); it.hasNext();) {
+        StorageDirectory sd = it.next();
+        File prevDir = sd.getPreviousDir();
+        if (!prevDir.exists()) {  // use current directory then
+          LOG.info("Storage directory " + sd.getRoot()
+            + " does not contain previous fs state.");
+          // read and verify consistency with other directories
+          storage.readProperties(sd);
+          continue;
+        }
 
-      // read and verify consistency of the prev dir
-      prevState.getStorage().readPreviousVersionProperties(sd);
+        // read and verify consistency of the prev dir
+        prevState.getStorage().readPreviousVersionProperties(sd);
 
-      if (prevState.getLayoutVersion() != HdfsConstants.LAYOUT_VERSION) {
-        throw new IOException(
-          "Cannot rollback to storage version " +
-          prevState.getLayoutVersion() +
-          " using this version of the NameNode, which uses storage version " +
-          HdfsConstants.LAYOUT_VERSION + ". " +
-          "Please use the previous version of HDFS to perform the rollback.");
+        if (prevState.getLayoutVersion() != HdfsConstants.LAYOUT_VERSION) {
+          throw new IOException(
+            "Cannot rollback to storage version " +
+                prevState.getLayoutVersion() +
+                " using this version of the NameNode, which uses storage version " +
+                HdfsConstants.LAYOUT_VERSION + ". " +
+              "Please use the previous version of HDFS to perform the rollback.");
+        }
+        canRollback = true;
       }
-      canRollback = true;
-    }
-    if (!canRollback)
-      throw new IOException("Cannot rollback. None of the storage "
-                            + "directories contain previous fs state.");
+      if (!canRollback)
+        throw new IOException("Cannot rollback. None of the storage "
+            + "directories contain previous fs state.");
 
-    // Now that we know all directories are going to be consistent
-    // Do rollback for each directory containing previous state
-    for (Iterator<StorageDirectory> it = storage.dirIterator(); it.hasNext();) {
-      StorageDirectory sd = it.next();
-      File prevDir = sd.getPreviousDir();
-      if (!prevDir.exists())
-        continue;
+      // Now that we know all directories are going to be consistent
+      // Do rollback for each directory containing previous state
+      for (Iterator<StorageDirectory> it = storage.dirIterator(); it.hasNext();) {
+        StorageDirectory sd = it.next();
+        File prevDir = sd.getPreviousDir();
+        if (!prevDir.exists())
+          continue;
+
+        LOG.info("Rolling back storage directory " + sd.getRoot()
+          + ".\n   new LV = " + prevState.getStorage().getLayoutVersion()
+          + "; new CTime = " + prevState.getStorage().getCTime());
+        File tmpDir = sd.getRemovedTmp();
+        assert !tmpDir.exists() : "removed.tmp directory must not exist.";
+        // rename current to tmp
+        File curDir = sd.getCurrentDir();
+        assert curDir.exists() : "Current directory must exist.";
+        NNStorage.rename(curDir, tmpDir);
+        // rename previous to current
+        NNStorage.rename(prevDir, curDir);
 
-      LOG.info("Rolling back storage directory " + sd.getRoot()
-               + ".\n   new LV = " + prevState.getStorage().getLayoutVersion()
-               + "; new CTime = " + prevState.getStorage().getCTime());
-      File tmpDir = sd.getRemovedTmp();
-      assert !tmpDir.exists() : "removed.tmp directory must not exist.";
-      // rename current to tmp
-      File curDir = sd.getCurrentDir();
-      assert curDir.exists() : "Current directory must exist.";
-      NNStorage.rename(curDir, tmpDir);
-      // rename previous to current
-      NNStorage.rename(prevDir, curDir);
-
-      // delete tmp dir
-      NNStorage.deleteDir(tmpDir);
-      LOG.info("Rollback of " + sd.getRoot()+ " is complete.");
+        // delete tmp dir
+        NNStorage.deleteDir(tmpDir);
+        LOG.info("Rollback of " + sd.getRoot()+ " is complete.");
+      }
+      isUpgradeFinalized = true;
+    } finally {
+      prevState.close();
     }
-    isUpgradeFinalized = true;
   }
 
   private void doFinalize(StorageDirectory sd) throws IOException {
@@ -766,7 +771,7 @@ public class FSImage implements Closeable {
 
     dir.computeQuotaUsage4CurrentDirectory(counts);
     
-    for (INode child : dir.getChildrenList(null)) {
+    for (INode child : dir.getChildrenList(Snapshot.CURRENT_STATE_ID)) {
       if (child.isDirectory()) {
         updateCountForQuotaRecursively(child.asDirectory(), counts);
       } else {

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java

@@ -1111,7 +1111,8 @@ public class FSImageFormat {
         return;
       }
       
-      final ReadOnlyList<INode> children = current.getChildrenList(null);
+      final ReadOnlyList<INode> children = current
+          .getChildrenList(Snapshot.CURRENT_STATE_ID);
       int dirNum = 0;
       List<INodeDirectory> snapshotDirs = null;
       DirectoryWithSnapshotFeature sf = current.getDirectoryWithSnapshotFeature();

+ 21 - 13
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -176,7 +176,15 @@ import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
-import org.apache.hadoop.hdfs.server.blockmanagement.*;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStatistics;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
+import org.apache.hadoop.hdfs.server.blockmanagement.OutOfV1GenerationStampsException;
 import org.apache.hadoop.hdfs.server.common.GenerationStamp;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
@@ -1631,11 +1639,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
             if (isReadOp) {
               continue;
             }
-            dir.setTimes(src, inode, -1, now, false, iip.getLatestSnapshot());
+            dir.setTimes(src, inode, -1, now, false, iip.getLatestSnapshotId());
           }
         }
         final long fileSize = iip.isSnapshot() ?
-            inode.computeFileSize(iip.getPathSnapshot())
+            inode.computeFileSize(iip.getPathSnapshotId())
             : inode.computeFileSizeNotIncludingLastUcBlock();
         boolean isUc = inode.isUnderConstruction();
         if (iip.isSnapshot()) {
@@ -1883,7 +1891,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       final INodesInPath iip = dir.getINodesInPath4Write(src);
       final INode inode = iip.getLastINode();
       if (inode != null) {
-        dir.setTimes(src, inode, mtime, atime, true, iip.getLatestSnapshot());
+        dir.setTimes(src, inode, mtime, atime, true, iip.getLatestSnapshotId());
         resultingStat = getAuditFileInfo(src, false);
       } else {
         throw new FileNotFoundException("File/Directory " + src + " does not exist.");
@@ -2279,7 +2287,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       final DatanodeDescriptor clientNode = 
           blockManager.getDatanodeManager().getDatanodeByHost(clientMachine);
       return prepareFileForWrite(src, myFile, holder, clientMachine, clientNode,
-          true, iip.getLatestSnapshot(), logRetryCache);
+          true, iip.getLatestSnapshotId(), logRetryCache);
     } catch (IOException ie) {
       NameNode.stateChangeLog.warn("DIR* NameSystem.append: " +ie.getMessage());
       throw ie;
@@ -2304,7 +2312,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    */
   LocatedBlock prepareFileForWrite(String src, INodeFile file,
       String leaseHolder, String clientMachine, DatanodeDescriptor clientNode,
-      boolean writeToEditLog, Snapshot latestSnapshot, boolean logRetryCache)
+      boolean writeToEditLog, int latestSnapshot, boolean logRetryCache)
       throws IOException {
     file = file.recordModification(latestSnapshot);
     final INodeFile cons = file.toUnderConstruction(leaseHolder, clientMachine,
@@ -2939,7 +2947,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     }
 
     finalizeINodeFileUnderConstruction(src, pendingFile,
-        iip.getLatestSnapshot());
+        iip.getLatestSnapshotId());
     return true;
   }
 
@@ -3648,7 +3656,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     // then reap lease immediately and close the file.
     if(nrCompleteBlocks == nrBlocks) {
       finalizeINodeFileUnderConstruction(src, pendingFile,
-          iip.getLatestSnapshot());
+          iip.getLatestSnapshotId());
       NameNode.stateChangeLog.warn("BLOCK*"
         + " internalReleaseLease: All existing blocks are COMPLETE,"
         + " lease removed, file closed.");
@@ -3697,7 +3705,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       if(penultimateBlockMinReplication &&
           blockManager.checkMinReplication(lastBlock)) {
         finalizeINodeFileUnderConstruction(src, pendingFile,
-            iip.getLatestSnapshot());
+            iip.getLatestSnapshotId());
         NameNode.stateChangeLog.warn("BLOCK*"
           + " internalReleaseLease: Committed blocks are minimally replicated,"
           + " lease removed, file closed.");
@@ -3728,7 +3736,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         // We can remove this block and close the file.
         pendingFile.removeLastBlock(lastBlock);
         finalizeINodeFileUnderConstruction(src, pendingFile,
-            iip.getLatestSnapshot());
+            iip.getLatestSnapshotId());
         NameNode.stateChangeLog.warn("BLOCK* internalReleaseLease: "
             + "Removed empty last block and closed file.");
         return true;
@@ -3789,7 +3797,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   }
 
   private void finalizeINodeFileUnderConstruction(String src,
-      INodeFile pendingFile, Snapshot latestSnapshot) throws IOException,
+      INodeFile pendingFile, int latestSnapshot) throws IOException,
       UnresolvedLinkException {
     assert hasWriteLock();
     FileUnderConstructionFeature uc = pendingFile.getFileUnderConstructionFeature();
@@ -3989,7 +3997,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
     //remove lease, close file
     finalizeINodeFileUnderConstruction(src, pendingFile,
-        Snapshot.findLatestSnapshot(pendingFile, null));
+        Snapshot.findLatestSnapshot(pendingFile, Snapshot.CURRENT_STATE_ID));
 
     return src;
   }
@@ -7114,7 +7122,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         getEditLog().logSync();
       }
       if (isAuditEnabled() && isExternalInvocation()) {
-        logAuditEvent(success, "addCacheDirective", null, null, null);
+        logAuditEvent(success, "modifyCacheDirective", null, null, null);
       }
       RetryCache.setState(cacheEntry, success);
     }

+ 25 - 27
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSPermissionChecker.java

@@ -17,7 +17,6 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
-import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
@@ -29,7 +28,6 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
 
@@ -142,54 +140,54 @@ class FSPermissionChecker {
     // check if (parentAccess != null) && file exists, then check sb
     // If resolveLink, the check is performed on the link target.
     final INodesInPath inodesInPath = root.getINodesInPath(path, resolveLink);
-    final Snapshot snapshot = inodesInPath.getPathSnapshot();
+    final int snapshotId = inodesInPath.getPathSnapshotId();
     final INode[] inodes = inodesInPath.getINodes();
     int ancestorIndex = inodes.length - 2;
     for(; ancestorIndex >= 0 && inodes[ancestorIndex] == null;
         ancestorIndex--);
-    checkTraverse(inodes, ancestorIndex, snapshot);
+    checkTraverse(inodes, ancestorIndex, snapshotId);
 
     final INode last = inodes[inodes.length - 1];
     if (parentAccess != null && parentAccess.implies(FsAction.WRITE)
         && inodes.length > 1 && last != null) {
-      checkStickyBit(inodes[inodes.length - 2], last, snapshot);
+      checkStickyBit(inodes[inodes.length - 2], last, snapshotId);
     }
     if (ancestorAccess != null && inodes.length > 1) {
-      check(inodes, ancestorIndex, snapshot, ancestorAccess);
+      check(inodes, ancestorIndex, snapshotId, ancestorAccess);
     }
     if (parentAccess != null && inodes.length > 1) {
-      check(inodes, inodes.length - 2, snapshot, parentAccess);
+      check(inodes, inodes.length - 2, snapshotId, parentAccess);
     }
     if (access != null) {
-      check(last, snapshot, access);
+      check(last, snapshotId, access);
     }
     if (subAccess != null) {
-      checkSubAccess(last, snapshot, subAccess);
+      checkSubAccess(last, snapshotId, subAccess);
     }
     if (doCheckOwner) {
-      checkOwner(last, snapshot);
+      checkOwner(last, snapshotId);
     }
   }
 
   /** Guarded by {@link FSNamesystem#readLock()} */
-  private void checkOwner(INode inode, Snapshot snapshot
+  private void checkOwner(INode inode, int snapshotId
       ) throws AccessControlException {
-    if (inode != null && user.equals(inode.getUserName(snapshot))) {
+    if (inode != null && user.equals(inode.getUserName(snapshotId))) {
       return;
     }
     throw new AccessControlException("Permission denied");
   }
 
   /** Guarded by {@link FSNamesystem#readLock()} */
-  private void checkTraverse(INode[] inodes, int last, Snapshot snapshot
+  private void checkTraverse(INode[] inodes, int last, int snapshotId
       ) throws AccessControlException {
     for(int j = 0; j <= last; j++) {
-      check(inodes[j], snapshot, FsAction.EXECUTE);
+      check(inodes[j], snapshotId, FsAction.EXECUTE);
     }
   }
 
   /** Guarded by {@link FSNamesystem#readLock()} */
-  private void checkSubAccess(INode inode, Snapshot snapshot, FsAction access
+  private void checkSubAccess(INode inode, int snapshotId, FsAction access
       ) throws AccessControlException {
     if (inode == null || !inode.isDirectory()) {
       return;
@@ -198,9 +196,9 @@ class FSPermissionChecker {
     Stack<INodeDirectory> directories = new Stack<INodeDirectory>();
     for(directories.push(inode.asDirectory()); !directories.isEmpty(); ) {
       INodeDirectory d = directories.pop();
-      check(d, snapshot, access);
+      check(d, snapshotId, access);
 
-      for(INode child : d.getChildrenList(snapshot)) {
+      for(INode child : d.getChildrenList(snapshotId)) {
         if (child.isDirectory()) {
           directories.push(child.asDirectory());
         }
@@ -209,23 +207,23 @@ class FSPermissionChecker {
   }
 
   /** Guarded by {@link FSNamesystem#readLock()} */
-  private void check(INode[] inodes, int i, Snapshot snapshot, FsAction access
+  private void check(INode[] inodes, int i, int snapshotId, FsAction access
       ) throws AccessControlException {
-    check(i >= 0? inodes[i]: null, snapshot, access);
+    check(i >= 0? inodes[i]: null, snapshotId, access);
   }
 
   /** Guarded by {@link FSNamesystem#readLock()} */
-  private void check(INode inode, Snapshot snapshot, FsAction access
+  private void check(INode inode, int snapshotId, FsAction access
       ) throws AccessControlException {
     if (inode == null) {
       return;
     }
-    FsPermission mode = inode.getFsPermission(snapshot);
+    FsPermission mode = inode.getFsPermission(snapshotId);
 
-    if (user.equals(inode.getUserName(snapshot))) { //user class
+    if (user.equals(inode.getUserName(snapshotId))) { //user class
       if (mode.getUserAction().implies(access)) { return; }
     }
-    else if (groups.contains(inode.getGroupName(snapshot))) { //group class
+    else if (groups.contains(inode.getGroupName(snapshotId))) { //group class
       if (mode.getGroupAction().implies(access)) { return; }
     }
     else { //other class
@@ -236,19 +234,19 @@ class FSPermissionChecker {
   }
 
   /** Guarded by {@link FSNamesystem#readLock()} */
-  private void checkStickyBit(INode parent, INode inode, Snapshot snapshot
+  private void checkStickyBit(INode parent, INode inode, int snapshotId
       ) throws AccessControlException {
-    if(!parent.getFsPermission(snapshot).getStickyBit()) {
+    if(!parent.getFsPermission(snapshotId).getStickyBit()) {
       return;
     }
 
     // If this user is the directory owner, return
-    if(parent.getUserName(snapshot).equals(user)) {
+    if(parent.getUserName(snapshotId).equals(user)) {
       return;
     }
 
     // if this user is the file owner, return
-    if(inode.getUserName(snapshot).equals(user)) {
+    if(inode.getUserName(snapshotId).equals(user)) {
       return;
     }
 

+ 84 - 72
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java

@@ -70,98 +70,101 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
   }
 
   /** Get the {@link PermissionStatus} */
-  abstract PermissionStatus getPermissionStatus(Snapshot snapshot);
+  abstract PermissionStatus getPermissionStatus(int snapshotId);
 
   /** The same as getPermissionStatus(null). */
   final PermissionStatus getPermissionStatus() {
-    return getPermissionStatus(null);
+    return getPermissionStatus(Snapshot.CURRENT_STATE_ID);
   }
 
   /**
-   * @param snapshot
-   *          if it is not null, get the result from the given snapshot;
-   *          otherwise, get the result from the current inode.
+   * @param snapshotId
+   *          if it is not {@link Snapshot#CURRENT_STATE_ID}, get the result
+   *          from the given snapshot; otherwise, get the result from the
+   *          current inode.
    * @return user name
    */
-  abstract String getUserName(Snapshot snapshot);
+  abstract String getUserName(int snapshotId);
 
-  /** The same as getUserName(null). */
+  /** The same as getUserName(Snapshot.CURRENT_STATE_ID). */
   @Override
   public final String getUserName() {
-    return getUserName(null);
+    return getUserName(Snapshot.CURRENT_STATE_ID);
   }
 
   /** Set user */
   abstract void setUser(String user);
 
   /** Set user */
-  final INode setUser(String user, Snapshot latest)
+  final INode setUser(String user, int latestSnapshotId)
       throws QuotaExceededException {
-    final INode nodeToUpdate = recordModification(latest);
+    final INode nodeToUpdate = recordModification(latestSnapshotId);
     nodeToUpdate.setUser(user);
     return nodeToUpdate;
   }
   /**
-   * @param snapshot
-   *          if it is not null, get the result from the given snapshot;
-   *          otherwise, get the result from the current inode.
+   * @param snapshotId
+   *          if it is not {@link Snapshot#CURRENT_STATE_ID}, get the result
+   *          from the given snapshot; otherwise, get the result from the
+   *          current inode.
    * @return group name
    */
-  abstract String getGroupName(Snapshot snapshot);
+  abstract String getGroupName(int snapshotId);
 
-  /** The same as getGroupName(null). */
+  /** The same as getGroupName(Snapshot.CURRENT_STATE_ID). */
   @Override
   public final String getGroupName() {
-    return getGroupName(null);
+    return getGroupName(Snapshot.CURRENT_STATE_ID);
   }
 
   /** Set group */
   abstract void setGroup(String group);
 
   /** Set group */
-  final INode setGroup(String group, Snapshot latest)
+  final INode setGroup(String group, int latestSnapshotId)
       throws QuotaExceededException {
-    final INode nodeToUpdate = recordModification(latest);
+    final INode nodeToUpdate = recordModification(latestSnapshotId);
     nodeToUpdate.setGroup(group);
     return nodeToUpdate;
   }
 
   /**
-   * @param snapshot
-   *          if it is not null, get the result from the given snapshot;
-   *          otherwise, get the result from the current inode.
+   * @param snapshotId
+   *          if it is not {@link Snapshot#CURRENT_STATE_ID}, get the result
+   *          from the given snapshot; otherwise, get the result from the
+   *          current inode.
    * @return permission.
    */
-  abstract FsPermission getFsPermission(Snapshot snapshot);
+  abstract FsPermission getFsPermission(int snapshotId);
   
-  /** The same as getFsPermission(null). */
+  /** The same as getFsPermission(Snapshot.CURRENT_STATE_ID). */
   @Override
   public final FsPermission getFsPermission() {
-    return getFsPermission(null);
+    return getFsPermission(Snapshot.CURRENT_STATE_ID);
   }
 
   /** Set the {@link FsPermission} of this {@link INode} */
   abstract void setPermission(FsPermission permission);
 
   /** Set the {@link FsPermission} of this {@link INode} */
-  INode setPermission(FsPermission permission, Snapshot latest) 
+  INode setPermission(FsPermission permission, int latestSnapshotId) 
       throws QuotaExceededException {
-    final INode nodeToUpdate = recordModification(latest);
+    final INode nodeToUpdate = recordModification(latestSnapshotId);
     nodeToUpdate.setPermission(permission);
     return nodeToUpdate;
   }
 
   /**
-   * @return if the given snapshot is null, return this;
-   *     otherwise return the corresponding snapshot inode.
+   * @return if the given snapshot id is {@link Snapshot#CURRENT_STATE_ID},
+   *         return this; otherwise return the corresponding snapshot inode.
    */
-  public INodeAttributes getSnapshotINode(final Snapshot snapshot) {
+  public INodeAttributes getSnapshotINode(final int snapshotId) {
     return this;
   }
 
   /** Is this inode in the latest snapshot? */
-  public final boolean isInLatestSnapshot(final Snapshot latest) {
-    if (latest == null) {
+  public final boolean isInLatestSnapshot(final int latestSnapshotId) {
+    if (latestSnapshotId == Snapshot.CURRENT_STATE_ID) {
       return false;
     }
     // if parent is a reference node, parent must be a renamed node. We can 
@@ -173,10 +176,11 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
     if (parentDir == null) { // root
       return true;
     }
-    if (!parentDir.isInLatestSnapshot(latest)) {
+    if (!parentDir.isInLatestSnapshot(latestSnapshotId)) {
       return false;
     }
-    final INode child = parentDir.getChild(getLocalNameBytes(), latest);
+    final INode child = parentDir.getChild(getLocalNameBytes(),
+        latestSnapshotId);
     if (this == child) {
       return true;
     }
@@ -203,21 +207,22 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
    * operation, or the snapshot belonging to the DST tree.
    * 
    * @param latestInDst
-   *          the latest snapshot in the DST tree above the reference node
+   *          id of the latest snapshot in the DST tree above the reference node
    * @return True: the modification should be recorded in the snapshot that
    *         belongs to the SRC tree. False: the modification should be
    *         recorded in the snapshot that belongs to the DST tree.
    */
-  public final boolean shouldRecordInSrcSnapshot(final Snapshot latestInDst) {
+  public final boolean shouldRecordInSrcSnapshot(final int latestInDst) {
     Preconditions.checkState(!isReference());
 
-    if (latestInDst == null) {
+    if (latestInDst == Snapshot.CURRENT_STATE_ID) {
       return true;
     }
     INodeReference withCount = getParentReference();
     if (withCount != null) {
       int dstSnapshotId = withCount.getParentReference().getDstSnapshotId();
-      if (dstSnapshotId >= latestInDst.getId()) {
+      if (dstSnapshotId != Snapshot.CURRENT_STATE_ID
+          && dstSnapshotId >= latestInDst) {
         return true;
       }
     }
@@ -228,13 +233,14 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
    * This inode is being modified.  The previous version of the inode needs to
    * be recorded in the latest snapshot.
    *
-   * @param latest the latest snapshot that has been taken.
-   *        Note that it is null if no snapshots have been taken.
+   * @param latestSnapshotId The id of the latest snapshot that has been taken.
+   *                         Note that it is {@link Snapshot#CURRENT_STATE_ID} 
+   *                         if no snapshots have been taken.
    * @return The current inode, which usually is the same object of this inode.
    *         However, in some cases, this inode may be replaced with a new inode
    *         for maintaining snapshots. The current inode is then the new inode.
    */
-  abstract INode recordModification(final Snapshot latest)
+  abstract INode recordModification(final int latestSnapshotId)
       throws QuotaExceededException;
 
   /** Check whether it's a reference. */
@@ -330,12 +336,13 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
    * snapshot in its diff list. Recursively clean its children.
    * </pre>
    * 
-   * @param snapshot
-   *          The snapshot to delete. Null means to delete the current
+   * @param snapshotId
+   *          The id of the snapshot to delete. 
+   *          {@link Snapshot#CURRENT_STATE_ID} means to delete the current
    *          file/directory.
-   * @param prior
-   *          The latest snapshot before the to-be-deleted snapshot. When
-   *          deleting a current inode, this parameter captures the latest
+   * @param priorSnapshotId
+   *          The id of the latest snapshot before the to-be-deleted snapshot.
+   *          When deleting a current inode, this parameter captures the latest
    *          snapshot.
    * @param collectedBlocks
    *          blocks collected from the descents for further block
@@ -345,8 +352,8 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
    *          inodeMap
    * @return quota usage delta when deleting a snapshot
    */
-  public abstract Quota.Counts cleanSubtree(final Snapshot snapshot,
-      Snapshot prior, BlocksMapUpdateInfo collectedBlocks,
+  public abstract Quota.Counts cleanSubtree(final int snapshotId,
+      int priorSnapshotId, BlocksMapUpdateInfo collectedBlocks,
       List<INode> removedINodes, boolean countDiffChange)
       throws QuotaExceededException;
   
@@ -460,9 +467,10 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
    * @param counts The subtree counts for returning.
    * @param useCache Whether to use cached quota usage. Note that 
    *                 {@link WithName} node never uses cache for its subtree.
-   * @param lastSnapshotId {@link Snapshot#INVALID_ID} indicates the computation
-   *                       is in the current tree. Otherwise the id indicates
-   *                       the computation range for a {@link WithName} node.
+   * @param lastSnapshotId {@link Snapshot#CURRENT_STATE_ID} indicates the 
+   *                       computation is in the current tree. Otherwise the id
+   *                       indicates the computation range for a 
+   *                       {@link WithName} node.
    * @return The same objects as the counts parameter.
    */
   public abstract Quota.Counts computeQuotaUsage(Quota.Counts counts,
@@ -470,7 +478,7 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
 
   public final Quota.Counts computeQuotaUsage(Quota.Counts counts,
       boolean useCache) {
-    return computeQuotaUsage(counts, useCache, Snapshot.INVALID_ID);
+    return computeQuotaUsage(counts, useCache, Snapshot.CURRENT_STATE_ID);
   }
   
   /**
@@ -558,21 +566,22 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
   }
 
   /**
-   * @param snapshot
-   *          if it is not null, get the result from the given snapshot;
-   *          otherwise, get the result from the current inode.
+   * @param snapshotId
+   *          if it is not {@link Snapshot#CURRENT_STATE_ID}, get the result
+   *          from the given snapshot; otherwise, get the result from the
+   *          current inode.
    * @return modification time.
    */
-  abstract long getModificationTime(Snapshot snapshot);
+  abstract long getModificationTime(int snapshotId);
 
-  /** The same as getModificationTime(null). */
+  /** The same as getModificationTime(Snapshot.CURRENT_STATE_ID). */
   @Override
   public final long getModificationTime() {
-    return getModificationTime(null);
+    return getModificationTime(Snapshot.CURRENT_STATE_ID);
   }
 
   /** Update modification time if it is larger than the current value. */
-  public abstract INode updateModificationTime(long mtime, Snapshot latest) 
+  public abstract INode updateModificationTime(long mtime, int latestSnapshotId) 
       throws QuotaExceededException;
 
   /** Set the last modification time of inode. */
@@ -580,24 +589,25 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
 
   /** Set the last modification time of inode. */
   public final INode setModificationTime(long modificationTime,
-      Snapshot latest) throws QuotaExceededException {
-    final INode nodeToUpdate = recordModification(latest);
+      int latestSnapshotId) throws QuotaExceededException {
+    final INode nodeToUpdate = recordModification(latestSnapshotId);
     nodeToUpdate.setModificationTime(modificationTime);
     return nodeToUpdate;
   }
 
   /**
-   * @param snapshot
-   *          if it is not null, get the result from the given snapshot;
-   *          otherwise, get the result from the current inode.
+   * @param snapshotId
+   *          if it is not {@link Snapshot#CURRENT_STATE_ID}, get the result
+   *          from the given snapshot; otherwise, get the result from the
+   *          current inode.
    * @return access time
    */
-  abstract long getAccessTime(Snapshot snapshot);
+  abstract long getAccessTime(int snapshotId);
 
-  /** The same as getAccessTime(null). */
+  /** The same as getAccessTime(Snapshot.CURRENT_STATE_ID). */
   @Override
   public final long getAccessTime() {
-    return getAccessTime(null);
+    return getAccessTime(Snapshot.CURRENT_STATE_ID);
   }
 
   /**
@@ -608,9 +618,9 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
   /**
    * Set last access time of inode.
    */
-  public final INode setAccessTime(long accessTime, Snapshot latest)
+  public final INode setAccessTime(long accessTime, int latestSnapshotId)
       throws QuotaExceededException {
-    final INode nodeToUpdate = recordModification(latest);
+    final INode nodeToUpdate = recordModification(latestSnapshotId);
     nodeToUpdate.setAccessTime(accessTime);
     return nodeToUpdate;
   }
@@ -679,13 +689,15 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
   @VisibleForTesting
   public final StringBuffer dumpTreeRecursively() {
     final StringWriter out = new StringWriter(); 
-    dumpTreeRecursively(new PrintWriter(out, true), new StringBuilder(), null);
+    dumpTreeRecursively(new PrintWriter(out, true), new StringBuilder(),
+        Snapshot.CURRENT_STATE_ID);
     return out.getBuffer();
   }
 
   @VisibleForTesting
   public final void dumpTreeRecursively(PrintStream out) {
-    dumpTreeRecursively(new PrintWriter(out, true), new StringBuilder(), null);
+    dumpTreeRecursively(new PrintWriter(out, true), new StringBuilder(),
+        Snapshot.CURRENT_STATE_ID);
   }
 
   /**
@@ -694,7 +706,7 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
    */
   @VisibleForTesting
   public void dumpTreeRecursively(PrintWriter out, StringBuilder prefix,
-      Snapshot snapshot) {
+      int snapshotId) {
     out.print(prefix);
     out.print(" ");
     final String name = getLocalName();
@@ -703,7 +715,7 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
     out.print(getObjectString());
     out.print("), ");
     out.print(getParentString());
-    out.print(", " + getPermissionStatus(snapshot));
+    out.print(", " + getPermissionStatus(snapshotId));
   }
   
   /**

+ 66 - 71
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java

@@ -204,9 +204,9 @@ public class INodeDirectory extends INodeWithAdditionalFields
   }
   
   @Override
-  public INodeDirectoryAttributes getSnapshotINode(Snapshot snapshot) {
+  public INodeDirectoryAttributes getSnapshotINode(int snapshotId) {
     DirectoryWithSnapshotFeature sf = getDirectoryWithSnapshotFeature();
-    return sf == null ? this : sf.getDiffs().getSnapshotINode(snapshot, this);
+    return sf == null ? this : sf.getDiffs().getSnapshotINode(snapshotId, this);
   }
   
   @Override
@@ -217,12 +217,13 @@ public class INodeDirectory extends INodeWithAdditionalFields
 
   /** Replace itself with an {@link INodeDirectorySnapshottable}. */
   public INodeDirectorySnapshottable replaceSelf4INodeDirectorySnapshottable(
-      Snapshot latest, final INodeMap inodeMap) throws QuotaExceededException {
+      int latestSnapshotId, final INodeMap inodeMap)
+      throws QuotaExceededException {
     Preconditions.checkState(!(this instanceof INodeDirectorySnapshottable),
         "this is already an INodeDirectorySnapshottable, this=%s", this);
     final INodeDirectorySnapshottable s = new INodeDirectorySnapshottable(this);
     replaceSelf(s, inodeMap).getDirectoryWithSnapshotFeature().getDiffs()
-        .saveSelf2Snapshot(latest, s, this);
+        .saveSelf2Snapshot(latestSnapshotId, s, this);
     return s;
   }
 
@@ -289,8 +290,8 @@ public class INodeDirectory extends INodeWithAdditionalFields
   }
 
   INodeReference.WithName replaceChild4ReferenceWithName(INode oldChild,
-      Snapshot latest) {
-    Preconditions.checkArgument(latest != null);
+      int latestSnapshotId) {
+    Preconditions.checkArgument(latestSnapshotId != Snapshot.CURRENT_STATE_ID);
     if (oldChild instanceof INodeReference.WithName) {
       return (INodeReference.WithName)oldChild;
     }
@@ -304,22 +305,23 @@ public class INodeDirectory extends INodeWithAdditionalFields
       withCount = new INodeReference.WithCount(null, oldChild);
     }
     final INodeReference.WithName ref = new INodeReference.WithName(this,
-        withCount, oldChild.getLocalNameBytes(), latest.getId());
+        withCount, oldChild.getLocalNameBytes(), latestSnapshotId);
     replaceChild(oldChild, ref, null);
     return ref;
   }
 
   @Override
-  public INodeDirectory recordModification(Snapshot latest) 
+  public INodeDirectory recordModification(int latestSnapshotId) 
       throws QuotaExceededException {
-    if (isInLatestSnapshot(latest) && !shouldRecordInSrcSnapshot(latest)) {
+    if (isInLatestSnapshot(latestSnapshotId)
+        && !shouldRecordInSrcSnapshot(latestSnapshotId)) {
       // add snapshot feature if necessary
       DirectoryWithSnapshotFeature sf = getDirectoryWithSnapshotFeature();
       if (sf == null) {
         sf = addSnapshotFeature(null);
       }
       // record self in the diff list if necessary
-      sf.getDiffs().saveSelf2Snapshot(latest, this, null);
+      sf.getDiffs().saveSelf2Snapshot(latestSnapshotId, this, null);
     }
     return this;
   }
@@ -329,9 +331,9 @@ public class INodeDirectory extends INodeWithAdditionalFields
    * 
    * @return the child inode, which may be replaced.
    */
-  public INode saveChild2Snapshot(final INode child, final Snapshot latest,
+  public INode saveChild2Snapshot(final INode child, final int latestSnapshotId,
       final INode snapshotCopy) throws QuotaExceededException {
-    if (latest == null) {
+    if (latestSnapshotId == Snapshot.CURRENT_STATE_ID) {
       return child;
     }
     
@@ -340,42 +342,45 @@ public class INodeDirectory extends INodeWithAdditionalFields
     if (sf == null) {
       sf = this.addSnapshotFeature(null);
     }
-    return sf.saveChild2Snapshot(this, child, latest, snapshotCopy);
+    return sf.saveChild2Snapshot(this, child, latestSnapshotId, snapshotCopy);
   }
 
   /**
    * @param name the name of the child
-   * @param snapshot
-   *          if it is not null, get the result from the given snapshot;
-   *          otherwise, get the result from the current directory.
+   * @param snapshotId
+   *          if it is not {@link Snapshot#CURRENT_STATE_ID}, get the result
+   *          from the corresponding snapshot; otherwise, get the result from
+   *          the current directory.
    * @return the child inode.
    */
-  public INode getChild(byte[] name, Snapshot snapshot) {
+  public INode getChild(byte[] name, int snapshotId) {
     DirectoryWithSnapshotFeature sf;
-    if (snapshot == null || (sf = getDirectoryWithSnapshotFeature()) == null) {
+    if (snapshotId == Snapshot.CURRENT_STATE_ID || 
+        (sf = getDirectoryWithSnapshotFeature()) == null) {
       ReadOnlyList<INode> c = getCurrentChildrenList();
       final int i = ReadOnlyList.Util.binarySearch(c, name);
       return i < 0 ? null : c.get(i);
     }
     
-    return sf.getChild(this, name, snapshot);
+    return sf.getChild(this, name, snapshotId);
   }
   
   /**
-   * @param snapshot
-   *          if it is not null, get the result from the given snapshot;
-   *          otherwise, get the result from the current directory.
+   * @param snapshotId
+   *          if it is not {@link Snapshot#CURRENT_STATE_ID}, get the result
+   *          from the corresponding snapshot; otherwise, get the result from
+   *          the current directory.
    * @return the current children list if the specified snapshot is null;
    *         otherwise, return the children list corresponding to the snapshot.
    *         Note that the returned list is never null.
    */
-  public ReadOnlyList<INode> getChildrenList(final Snapshot snapshot) {
+  public ReadOnlyList<INode> getChildrenList(final int snapshotId) {
     DirectoryWithSnapshotFeature sf;
-    if (snapshot == null
+    if (snapshotId == Snapshot.CURRENT_STATE_ID
         || (sf = this.getDirectoryWithSnapshotFeature()) == null) {
       return getCurrentChildrenList();
     }
-    return sf.getChildrenList(this, snapshot);
+    return sf.getChildrenList(this, snapshotId);
   }
   
   private ReadOnlyList<INode> getCurrentChildrenList() {
@@ -450,15 +455,15 @@ public class INodeDirectory extends INodeWithAdditionalFields
   /**
    * Remove the specified child from this directory.
    */
-  public boolean removeChild(INode child, Snapshot latest)
+  public boolean removeChild(INode child, int latestSnapshotId)
       throws QuotaExceededException {
-    if (isInLatestSnapshot(latest)) {
+    if (isInLatestSnapshot(latestSnapshotId)) {
       // create snapshot feature if necessary
       DirectoryWithSnapshotFeature sf = this.getDirectoryWithSnapshotFeature();
       if (sf == null) {
         sf = this.addSnapshotFeature(null);
       }
-      return sf.removeChild(this, child, latest);
+      return sf.removeChild(this, child, latestSnapshotId);
     }
     return removeChild(child);
   }
@@ -493,24 +498,24 @@ public class INodeDirectory extends INodeWithAdditionalFields
    *         otherwise, return true;
    */
   public boolean addChild(INode node, final boolean setModTime,
-      final Snapshot latest) throws QuotaExceededException {
+      final int latestSnapshotId) throws QuotaExceededException {
     final int low = searchChildren(node.getLocalNameBytes());
     if (low >= 0) {
       return false;
     }
 
-    if (isInLatestSnapshot(latest)) {
+    if (isInLatestSnapshot(latestSnapshotId)) {
       // create snapshot feature if necessary
       DirectoryWithSnapshotFeature sf = this.getDirectoryWithSnapshotFeature();
       if (sf == null) {
         sf = this.addSnapshotFeature(null);
       }
-      return sf.addChild(this, node, setModTime, latest);
+      return sf.addChild(this, node, setModTime, latestSnapshotId);
     }
     addChild(node, low);
     if (setModTime) {
       // update modification time of the parent directory
-      updateModificationTime(node.getModificationTime(), latest);
+      updateModificationTime(node.getModificationTime(), latestSnapshotId);
     }
     return true;
   }
@@ -548,10 +553,9 @@ public class INodeDirectory extends INodeWithAdditionalFields
     // we are computing the quota usage for a specific snapshot here, i.e., the
     // computation only includes files/directories that exist at the time of the
     // given snapshot
-    if (sf != null && lastSnapshotId != Snapshot.INVALID_ID
+    if (sf != null && lastSnapshotId != Snapshot.CURRENT_STATE_ID
         && !(useCache && isQuotaSet())) {
-      Snapshot lastSnapshot = sf.getDiffs().getSnapshotById(lastSnapshotId);
-      ReadOnlyList<INode> childrenList = getChildrenList(lastSnapshot);
+      ReadOnlyList<INode> childrenList = getChildrenList(lastSnapshotId);
       for (INode child : childrenList) {
         child.computeQuotaUsage(counts, useCache, lastSnapshotId);
       }
@@ -607,7 +611,7 @@ public class INodeDirectory extends INodeWithAdditionalFields
 
   ContentSummaryComputationContext computeDirectoryContentSummary(
       ContentSummaryComputationContext summary) {
-    ReadOnlyList<INode> childrenList = getChildrenList(null);
+    ReadOnlyList<INode> childrenList = getChildrenList(Snapshot.CURRENT_STATE_ID);
     // Explicit traversing is done to enable repositioning after relinquishing
     // and reacquiring locks.
     for (int i = 0;  i < childrenList.size(); i++) {
@@ -629,7 +633,7 @@ public class INodeDirectory extends INodeWithAdditionalFields
         break;
       }
       // Obtain the children list again since it may have been modified.
-      childrenList = getChildrenList(null);
+      childrenList = getChildrenList(Snapshot.CURRENT_STATE_ID);
       // Reposition in case the children list is changed. Decrement by 1
       // since it will be incremented when loops.
       i = nextChild(childrenList, childName) - 1;
@@ -668,21 +672,16 @@ public class INodeDirectory extends INodeWithAdditionalFields
    *          The reference node to be removed/replaced
    * @param newChild
    *          The node to be added back
-   * @param latestSnapshot
-   *          The latest snapshot. Note this may not be the last snapshot in the
-   *          diff list, since the src tree of the current rename operation
-   *          may be the dst tree of a previous rename.
    * @throws QuotaExceededException should not throw this exception
    */
   public void undoRename4ScrParent(final INodeReference oldChild,
-      final INode newChild, Snapshot latestSnapshot)
-      throws QuotaExceededException {
+      final INode newChild) throws QuotaExceededException {
     DirectoryWithSnapshotFeature sf = getDirectoryWithSnapshotFeature();
     Preconditions.checkState(sf != null,
         "Directory does not have snapshot feature");
     sf.getDiffs().removeChild(ListType.DELETED, oldChild);
     sf.getDiffs().replaceChild(ListType.CREATED, oldChild, newChild);
-    addChild(newChild, true, null);
+    addChild(newChild, true, Snapshot.CURRENT_STATE_ID);
   }
   
   /**
@@ -691,16 +690,14 @@ public class INodeDirectory extends INodeWithAdditionalFields
    * and delete possible record in the deleted list.  
    */
   public void undoRename4DstParent(final INode deletedChild,
-      Snapshot latestSnapshot) throws QuotaExceededException {
+      int latestSnapshotId) throws QuotaExceededException {
     DirectoryWithSnapshotFeature sf = getDirectoryWithSnapshotFeature();
     Preconditions.checkState(sf != null,
         "Directory does not have snapshot feature");
     boolean removeDeletedChild = sf.getDiffs().removeChild(ListType.DELETED,
         deletedChild);
-    // pass null for inodeMap since the parent node will not get replaced when
-    // undoing rename
-    final boolean added = addChild(deletedChild, true, removeDeletedChild ? null
-        : latestSnapshot);
+    int sid = removeDeletedChild ? Snapshot.CURRENT_STATE_ID : latestSnapshotId;
+    final boolean added = addChild(deletedChild, true, sid);
     // update quota usage if adding is successfully and the old child has not
     // been stored in deleted list before
     if (added && !removeDeletedChild) {
@@ -722,8 +719,8 @@ public class INodeDirectory extends INodeWithAdditionalFields
   }
 
   /** Call cleanSubtree(..) recursively down the subtree. */
-  public Quota.Counts cleanSubtreeRecursively(final Snapshot snapshot,
-      Snapshot prior, final BlocksMapUpdateInfo collectedBlocks,
+  public Quota.Counts cleanSubtreeRecursively(final int snapshot,
+      int prior, final BlocksMapUpdateInfo collectedBlocks,
       final List<INode> removedINodes, final Map<INode, INode> excludedNodes, 
       final boolean countDiffChange) throws QuotaExceededException {
     Quota.Counts counts = Quota.Counts.newInstance();
@@ -732,9 +729,10 @@ public class INodeDirectory extends INodeWithAdditionalFields
     // to its latest previous snapshot. (besides, we also need to consider nodes
     // created after prior but before snapshot. this will be done in 
     // DirectoryWithSnapshotFeature)
-    Snapshot s = snapshot != null && prior != null ? prior : snapshot;
+    int s = snapshot != Snapshot.CURRENT_STATE_ID
+        && prior != Snapshot.NO_SNAPSHOT_ID ? prior : snapshot;
     for (INode child : getChildrenList(s)) {
-      if (snapshot != null && excludedNodes != null
+      if (snapshot != Snapshot.CURRENT_STATE_ID && excludedNodes != null
           && excludedNodes.containsKey(child)) {
         continue;
       } else {
@@ -753,7 +751,7 @@ public class INodeDirectory extends INodeWithAdditionalFields
     if (sf != null) {
       sf.clear(this, collectedBlocks, removedINodes);
     }
-    for (INode child : getChildrenList(null)) {
+    for (INode child : getChildrenList(Snapshot.CURRENT_STATE_ID)) {
       child.destroyAndCollectBlocks(collectedBlocks, removedINodes);
     }
     clear();
@@ -761,18 +759,19 @@ public class INodeDirectory extends INodeWithAdditionalFields
   }
   
   @Override
-  public Quota.Counts cleanSubtree(final Snapshot snapshot, Snapshot prior,
+  public Quota.Counts cleanSubtree(final int snapshotId, int priorSnapshotId,
       final BlocksMapUpdateInfo collectedBlocks,
       final List<INode> removedINodes, final boolean countDiffChange)
       throws QuotaExceededException {
     DirectoryWithSnapshotFeature sf = getDirectoryWithSnapshotFeature();
     // there is snapshot data
     if (sf != null) {
-      return sf.cleanDirectory(this, snapshot, prior, collectedBlocks,
-          removedINodes, countDiffChange);
+      return sf.cleanDirectory(this, snapshotId, priorSnapshotId,
+          collectedBlocks, removedINodes, countDiffChange);
     }
     // there is no snapshot data
-    if (prior == null && snapshot == null) {
+    if (priorSnapshotId == Snapshot.NO_SNAPSHOT_ID
+        && snapshotId == Snapshot.CURRENT_STATE_ID) {
       // destroy the whole subtree and collect blocks that should be deleted
       Quota.Counts counts = Quota.Counts.newInstance();
       this.computeQuotaUsage(counts, true);
@@ -780,7 +779,7 @@ public class INodeDirectory extends INodeWithAdditionalFields
       return counts; 
     } else {
       // process recursively down the subtree
-      Quota.Counts counts = cleanSubtreeRecursively(snapshot, prior,
+      Quota.Counts counts = cleanSubtreeRecursively(snapshotId, priorSnapshotId,
           collectedBlocks, removedINodes, null, countDiffChange);
       if (isQuotaSet()) {
         getDirectoryWithQuotaFeature().addSpaceConsumed2Cache(
@@ -816,7 +815,7 @@ public class INodeDirectory extends INodeWithAdditionalFields
   @VisibleForTesting
   @Override
   public void dumpTreeRecursively(PrintWriter out, StringBuilder prefix,
-      final Snapshot snapshot) {
+      final int snapshot) {
     super.dumpTreeRecursively(out, prefix, snapshot);
     out.print(", childrenSize=" + getChildrenList(snapshot).size());
     final DirectoryWithQuotaFeature q = getDirectoryWithQuotaFeature();
@@ -824,7 +823,7 @@ public class INodeDirectory extends INodeWithAdditionalFields
       out.print(", " + q);
     }
     if (this instanceof Snapshot.Root) {
-      out.print(", snapshotId=" + snapshot.getId());
+      out.print(", snapshotId=" + snapshot);
     }
     out.println();
 
@@ -869,7 +868,7 @@ public class INodeDirectory extends INodeWithAdditionalFields
       for(final Iterator<SnapshotAndINode> i = subs.iterator(); i.hasNext();) {
         final SnapshotAndINode pair = i.next();
         prefix.append(i.hasNext()? DUMPTREE_EXCEPT_LAST_ITEM: DUMPTREE_LAST_ITEM);
-        pair.inode.dumpTreeRecursively(out, prefix, pair.snapshot);
+        pair.inode.dumpTreeRecursively(out, prefix, pair.snapshotId);
         prefix.setLength(prefix.length() - 2);
       }
     }
@@ -877,20 +876,16 @@ public class INodeDirectory extends INodeWithAdditionalFields
 
   /** A pair of Snapshot and INode objects. */
   protected static class SnapshotAndINode {
-    public final Snapshot snapshot;
+    public final int snapshotId;
     public final INode inode;
 
-    public SnapshotAndINode(Snapshot snapshot, INode inode) {
-      this.snapshot = snapshot;
+    public SnapshotAndINode(int snapshot, INode inode) {
+      this.snapshotId = snapshot;
       this.inode = inode;
     }
-
-    public SnapshotAndINode(Snapshot snapshot) {
-      this(snapshot, snapshot.getRoot());
-    }
   }
 
-  public final int getChildrenNum(final Snapshot snapshot) {
-    return getChildrenList(snapshot).size();
+  public final int getChildrenNum(final int snapshotId) {
+    return getChildrenList(snapshotId).size();
   }
 }

+ 39 - 35
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java

@@ -17,6 +17,9 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
+import static org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.CURRENT_STATE_ID;
+import static org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.NO_SNAPSHOT_ID;
+
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.PrintWriter;
@@ -282,26 +285,27 @@ public class INodeFile extends INodeWithAdditionalFields
   }
 
   @Override
-  public INodeFileAttributes getSnapshotINode(final Snapshot snapshot) {
+  public INodeFileAttributes getSnapshotINode(final int snapshotId) {
     FileWithSnapshotFeature sf = this.getFileWithSnapshotFeature();
     if (sf != null) {
-      return sf.getDiffs().getSnapshotINode(snapshot, this);
+      return sf.getDiffs().getSnapshotINode(snapshotId, this);
     } else {
       return this;
     }
   }
 
   @Override
-  public INodeFile recordModification(final Snapshot latest) 
+  public INodeFile recordModification(final int latestSnapshotId) 
       throws QuotaExceededException {
-    if (isInLatestSnapshot(latest) && !shouldRecordInSrcSnapshot(latest)) {
+    if (isInLatestSnapshot(latestSnapshotId)
+        && !shouldRecordInSrcSnapshot(latestSnapshotId)) {
       // the file is in snapshot, create a snapshot feature if it does not have
       FileWithSnapshotFeature sf = this.getFileWithSnapshotFeature();
       if (sf == null) {
         sf = addSnapshotFeature(null);
       }
       // record self in the diff list if necessary
-      sf.getDiffs().saveSelf2Snapshot(latest, this, null);
+      sf.getDiffs().saveSelf2Snapshot(latestSnapshotId, this, null);
     }
     return this;
   }
@@ -317,23 +321,22 @@ public class INodeFile extends INodeWithAdditionalFields
   /* End of Snapshot Feature */
 
   /** @return the replication factor of the file. */
-  public final short getFileReplication(Snapshot snapshot) {
-    if (snapshot != null) {
+  public final short getFileReplication(int snapshot) {
+    if (snapshot != CURRENT_STATE_ID) {
       return getSnapshotINode(snapshot).getFileReplication();
     }
-
     return HeaderFormat.getReplication(header);
   }
 
   /** The same as getFileReplication(null). */
   @Override // INodeFileAttributes
   public final short getFileReplication() {
-    return getFileReplication(null);
+    return getFileReplication(CURRENT_STATE_ID);
   }
 
   @Override // BlockCollection
   public short getBlockReplication() {
-    short max = getFileReplication(null);
+    short max = getFileReplication(CURRENT_STATE_ID);
     FileWithSnapshotFeature sf = this.getFileWithSnapshotFeature();
     if (sf != null) {
       short maxInSnapshot = sf.getMaxBlockRepInDiffs();
@@ -351,9 +354,10 @@ public class INodeFile extends INodeWithAdditionalFields
   }
 
   /** Set the replication factor of this file. */
-  public final INodeFile setFileReplication(short replication, Snapshot latest,
-      final INodeMap inodeMap) throws QuotaExceededException {
-    final INodeFile nodeToUpdate = recordModification(latest);
+  public final INodeFile setFileReplication(short replication,
+      int latestSnapshotId, final INodeMap inodeMap)
+      throws QuotaExceededException {
+    final INodeFile nodeToUpdate = recordModification(latestSnapshotId);
     nodeToUpdate.setFileReplication(replication);
     return nodeToUpdate;
   }
@@ -431,22 +435,22 @@ public class INodeFile extends INodeWithAdditionalFields
   }
 
   @Override
-  public Quota.Counts cleanSubtree(final Snapshot snapshot, Snapshot prior,
+  public Quota.Counts cleanSubtree(final int snapshot, int priorSnapshotId,
       final BlocksMapUpdateInfo collectedBlocks,
       final List<INode> removedINodes, final boolean countDiffChange)
       throws QuotaExceededException {
     FileWithSnapshotFeature sf = getFileWithSnapshotFeature();
     if (sf != null) {
-      return sf.cleanFile(this, snapshot, prior, collectedBlocks,
+      return sf.cleanFile(this, snapshot, priorSnapshotId, collectedBlocks,
           removedINodes, countDiffChange);
     }
     Quota.Counts counts = Quota.Counts.newInstance();
-    if (snapshot == null && prior == null) {
+    if (snapshot == CURRENT_STATE_ID && priorSnapshotId == NO_SNAPSHOT_ID) {
       // this only happens when deleting the current file and the file is not
       // in any snapshot
       computeQuotaUsage(counts, false);
       destroyAndCollectBlocks(collectedBlocks, removedINodes);
-    } else if (snapshot == null && prior != null) {
+    } else if (snapshot == CURRENT_STATE_ID && priorSnapshotId != NO_SNAPSHOT_ID) {
       // when deleting the current file and the file is in snapshot, we should
       // clean the 0-sized block if the file is UC
       FileUnderConstructionFeature uc = getFileUnderConstructionFeature();
@@ -490,17 +494,18 @@ public class INodeFile extends INodeWithAdditionalFields
     FileWithSnapshotFeature sf = getFileWithSnapshotFeature();
     if (sf != null) {
       FileDiffList fileDiffList = sf.getDiffs();
-      Snapshot last = fileDiffList.getLastSnapshot();
+      int last = fileDiffList.getLastSnapshotId();
       List<FileDiff> diffs = fileDiffList.asList();
 
-      if (lastSnapshotId == Snapshot.INVALID_ID || last == null) {
+      if (lastSnapshotId == Snapshot.CURRENT_STATE_ID
+          || last == Snapshot.CURRENT_STATE_ID) {
         nsDelta += diffs.size();
         dsDelta = diskspaceConsumed();
-      } else if (last.getId() < lastSnapshotId) {
+      } else if (last < lastSnapshotId) {
         dsDelta = computeFileSize(true, false) * getFileReplication();
       } else {      
-        Snapshot s = fileDiffList.getSnapshotById(lastSnapshotId);
-        dsDelta = diskspaceConsumed(s);
+        int sid = fileDiffList.getSnapshotById(lastSnapshotId);
+        dsDelta = diskspaceConsumed(sid);
       }
     } else {
       dsDelta = diskspaceConsumed();
@@ -511,7 +516,7 @@ public class INodeFile extends INodeWithAdditionalFields
   }
 
   @Override
-  public final ContentSummaryComputationContext  computeContentSummary(
+  public final ContentSummaryComputationContext computeContentSummary(
       final ContentSummaryComputationContext summary) {
     computeContentSummary4Snapshot(summary.getCounts());
     computeContentSummary4Current(summary.getCounts());
@@ -550,23 +555,21 @@ public class INodeFile extends INodeWithAdditionalFields
 
   /** The same as computeFileSize(null). */
   public final long computeFileSize() {
-    return computeFileSize(null);
+    return computeFileSize(CURRENT_STATE_ID);
   }
 
   /**
    * Compute file size of the current file if the given snapshot is null;
    * otherwise, get the file size from the given snapshot.
    */
-  public final long computeFileSize(Snapshot snapshot) {
+  public final long computeFileSize(int snapshotId) {
     FileWithSnapshotFeature sf = this.getFileWithSnapshotFeature();
-    if (snapshot != null && sf != null) {
-      final FileDiff d = sf.getDiffs().getDiff(
-          snapshot);
+    if (snapshotId != CURRENT_STATE_ID && sf != null) {
+      final FileDiff d = sf.getDiffs().getDiffById(snapshotId);
       if (d != null) {
         return d.getFileSize();
       }
     }
-
     return computeFileSize(true, false);
   }
 
@@ -617,9 +620,10 @@ public class INodeFile extends INodeWithAdditionalFields
     return computeFileSize(true, true) * getBlockReplication();
   }
 
-  public final long diskspaceConsumed(Snapshot lastSnapshot) {
-    if (lastSnapshot != null) {
-      return computeFileSize(lastSnapshot) * getFileReplication(lastSnapshot);
+  public final long diskspaceConsumed(int lastSnapshotId) {
+    if (lastSnapshotId != CURRENT_STATE_ID) {
+      return computeFileSize(lastSnapshotId)
+          * getFileReplication(lastSnapshotId);
     } else {
       return diskspaceConsumed();
     }
@@ -648,9 +652,9 @@ public class INodeFile extends INodeWithAdditionalFields
   @VisibleForTesting
   @Override
   public void dumpTreeRecursively(PrintWriter out, StringBuilder prefix,
-      final Snapshot snapshot) {
-    super.dumpTreeRecursively(out, prefix, snapshot);
-    out.print(", fileSize=" + computeFileSize(snapshot));
+      final int snapshotId) {
+    super.dumpTreeRecursively(out, prefix, snapshotId);
+    out.print(", fileSize=" + computeFileSize(snapshotId));
     // only compare the first block
     out.print(", blocks=");
     out.print(blocks == null || blocks.length == 0? null: blocks[0]);

+ 3 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java

@@ -23,7 +23,6 @@ import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.permission.PermissionStatus;
 import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
 import org.apache.hadoop.hdfs.server.namenode.Quota.Counts;
-import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
 import org.apache.hadoop.util.GSet;
 import org.apache.hadoop.util.LightWeightGSet;
 
@@ -89,7 +88,8 @@ public class INodeMap {
         "", "", new FsPermission((short) 0)), 0, 0) {
       
       @Override
-      INode recordModification(Snapshot latest) throws QuotaExceededException {
+      INode recordModification(int latestSnapshotId)
+          throws QuotaExceededException {
         return null;
       }
       
@@ -112,7 +112,7 @@ public class INodeMap {
       }
       
       @Override
-      public Counts cleanSubtree(Snapshot snapshot, Snapshot prior,
+      public Counts cleanSubtree(int snapshotId, int priorSnapshotId,
           BlocksMapUpdateInfo collectedBlocks, List<INode> removedINodes,
           boolean countDiffChange) throws QuotaExceededException {
         return null;

+ 62 - 59
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java

@@ -91,7 +91,7 @@ public abstract class INodeReference extends INode {
    * method to identify the snapshot which is the latest snapshot before the
    * reference node's creation. 
    */
-  static Snapshot getPriorSnapshot(INodeReference ref) {
+  static int getPriorSnapshot(INodeReference ref) {
     WithCount wc = (WithCount) ref.getReferredINode();
     WithName wn = null;
     if (ref instanceof DstReference) {
@@ -111,7 +111,7 @@ public abstract class INodeReference extends INode {
         }
       }
     }
-    return null;
+    return Snapshot.NO_SNAPSHOT_ID;
   }
   
   private INode referred;
@@ -185,13 +185,13 @@ public abstract class INodeReference extends INode {
   }
   
   @Override
-  public final PermissionStatus getPermissionStatus(Snapshot snapshot) {
-    return referred.getPermissionStatus(snapshot);
+  public final PermissionStatus getPermissionStatus(int snapshotId) {
+    return referred.getPermissionStatus(snapshotId);
   }
   
   @Override
-  public final String getUserName(Snapshot snapshot) {
-    return referred.getUserName(snapshot);
+  public final String getUserName(int snapshotId) {
+    return referred.getUserName(snapshotId);
   }
   
   @Override
@@ -200,8 +200,8 @@ public abstract class INodeReference extends INode {
   }
   
   @Override
-  public final String getGroupName(Snapshot snapshot) {
-    return referred.getGroupName(snapshot);
+  public final String getGroupName(int snapshotId) {
+    return referred.getGroupName(snapshotId);
   }
   
   @Override
@@ -210,8 +210,8 @@ public abstract class INodeReference extends INode {
   }
   
   @Override
-  public final FsPermission getFsPermission(Snapshot snapshot) {
-    return referred.getFsPermission(snapshot);
+  public final FsPermission getFsPermission(int snapshotId) {
+    return referred.getFsPermission(snapshotId);
   }
   @Override
   public final short getFsPermissionShort() {
@@ -229,14 +229,14 @@ public abstract class INodeReference extends INode {
   }
 
   @Override
-  public final long getModificationTime(Snapshot snapshot) {
-    return referred.getModificationTime(snapshot);
+  public final long getModificationTime(int snapshotId) {
+    return referred.getModificationTime(snapshotId);
   }
   
   @Override
-  public final INode updateModificationTime(long mtime, Snapshot latest) 
+  public final INode updateModificationTime(long mtime, int latestSnapshotId) 
       throws QuotaExceededException {
-    return referred.updateModificationTime(mtime, latest);
+    return referred.updateModificationTime(mtime, latestSnapshotId);
   }
   
   @Override
@@ -245,8 +245,8 @@ public abstract class INodeReference extends INode {
   }
   
   @Override
-  public final long getAccessTime(Snapshot snapshot) {
-    return referred.getAccessTime(snapshot);
+  public final long getAccessTime(int snapshotId) {
+    return referred.getAccessTime(snapshotId);
   }
   
   @Override
@@ -255,15 +255,15 @@ public abstract class INodeReference extends INode {
   }
 
   @Override
-  final INode recordModification(Snapshot latest)
+  final INode recordModification(int latestSnapshotId)
       throws QuotaExceededException {
-    referred.recordModification(latest);
+    referred.recordModification(latestSnapshotId);
     // reference is never replaced 
     return this;
   }
 
   @Override // used by WithCount
-  public Quota.Counts cleanSubtree(Snapshot snapshot, Snapshot prior,
+  public Quota.Counts cleanSubtree(int snapshot, int prior,
       BlocksMapUpdateInfo collectedBlocks, final List<INode> removedINodes,
       final boolean countDiffChange) throws QuotaExceededException {
     return referred.cleanSubtree(snapshot, prior, collectedBlocks,
@@ -291,8 +291,8 @@ public abstract class INodeReference extends INode {
   }
   
   @Override
-  public final INodeAttributes getSnapshotINode(Snapshot snapshot) {
-    return referred.getSnapshotINode(snapshot);
+  public final INodeAttributes getSnapshotINode(int snapshotId) {
+    return referred.getSnapshotINode(snapshotId);
   }
 
   @Override
@@ -308,7 +308,7 @@ public abstract class INodeReference extends INode {
 
   @Override
   public void dumpTreeRecursively(PrintWriter out, StringBuilder prefix,
-      final Snapshot snapshot) {
+      final int snapshot) {
     super.dumpTreeRecursively(out, prefix, snapshot);
     if (this instanceof DstReference) {
       out.print(", dstSnapshotId=" + ((DstReference) this).dstSnapshotId);
@@ -327,7 +327,7 @@ public abstract class INodeReference extends INode {
   }
   
   public int getDstSnapshotId() {
-    return Snapshot.INVALID_ID;
+    return Snapshot.CURRENT_STATE_ID;
   }
   
   /** An anonymous reference with reference count. */
@@ -457,34 +457,35 @@ public abstract class INodeReference extends INode {
       // node happened before the rename of its ancestor. This should be 
       // impossible since for WithName node we only count its children at the 
       // time of the rename. 
-      Preconditions.checkState(this.lastSnapshotId >= lastSnapshotId);
+      Preconditions.checkState(lastSnapshotId == Snapshot.CURRENT_STATE_ID
+          || this.lastSnapshotId >= lastSnapshotId);
       final INode referred = this.getReferredINode().asReference()
           .getReferredINode();
       // We will continue the quota usage computation using the same snapshot id
       // as time line (if the given snapshot id is valid). Also, we cannot use 
       // cache for the referred node since its cached quota may have already 
       // been updated by changes in the current tree.
-      int id = lastSnapshotId > Snapshot.INVALID_ID ? 
+      int id = lastSnapshotId != Snapshot.CURRENT_STATE_ID ? 
           lastSnapshotId : this.lastSnapshotId;
       return referred.computeQuotaUsage(counts, false, id);
     }
     
     @Override
-    public Quota.Counts cleanSubtree(final Snapshot snapshot, Snapshot prior,
+    public Quota.Counts cleanSubtree(final int snapshot, int prior,
         final BlocksMapUpdateInfo collectedBlocks,
         final List<INode> removedINodes, final boolean countDiffChange)
         throws QuotaExceededException {
       // since WithName node resides in deleted list acting as a snapshot copy,
       // the parameter snapshot must be non-null
-      Preconditions.checkArgument(snapshot != null);
-      // if prior is null, we need to check snapshot belonging to the previous
-      // WithName instance
-      if (prior == null) {
+      Preconditions.checkArgument(snapshot != Snapshot.CURRENT_STATE_ID);
+      // if prior is NO_SNAPSHOT_ID, we need to check snapshot belonging to the
+      // previous WithName instance
+      if (prior == Snapshot.NO_SNAPSHOT_ID) {
         prior = getPriorSnapshot(this);
       }
       
-      if (prior != null
-          && Snapshot.ID_COMPARATOR.compare(snapshot, prior) <= 0) {
+      if (prior != Snapshot.NO_SNAPSHOT_ID
+          && Snapshot.ID_INTEGER_COMPARATOR.compare(snapshot, prior) <= 0) {
         return Quota.Counts.newInstance();
       }
 
@@ -496,7 +497,7 @@ public abstract class INodeReference extends INode {
             -counts.get(Quota.DISKSPACE), true);
       }
       
-      if (snapshot.getId() < lastSnapshotId) {
+      if (snapshot < lastSnapshotId) {
         // for a WithName node, when we compute its quota usage, we only count
         // in all the nodes existing at the time of the corresponding rename op.
         // Thus if we are deleting a snapshot before/at the snapshot associated 
@@ -509,16 +510,16 @@ public abstract class INodeReference extends INode {
     @Override
     public void destroyAndCollectBlocks(BlocksMapUpdateInfo collectedBlocks,
         final List<INode> removedINodes) {
-      Snapshot snapshot = getSelfSnapshot();
+      int snapshot = getSelfSnapshot();
       if (removeReference(this) <= 0) {
         getReferredINode().destroyAndCollectBlocks(collectedBlocks,
             removedINodes);
       } else {
-        Snapshot prior = getPriorSnapshot(this);
+        int prior = getPriorSnapshot(this);
         INode referred = getReferredINode().asReference().getReferredINode();
         
-        if (snapshot != null) {
-          if (prior != null && snapshot.getId() <= prior.getId()) {
+        if (snapshot != Snapshot.NO_SNAPSHOT_ID) {
+          if (prior != Snapshot.NO_SNAPSHOT_ID && snapshot <= prior) {
             // the snapshot to be deleted has been deleted while traversing 
             // the src tree of the previous rename operation. This usually 
             // happens when rename's src and dst are under the same 
@@ -545,9 +546,9 @@ public abstract class INodeReference extends INode {
       }
     }
     
-    private Snapshot getSelfSnapshot() {
+    private int getSelfSnapshot() {
       INode referred = getReferredINode().asReference().getReferredINode();
-      Snapshot snapshot = null;
+      int snapshot = Snapshot.NO_SNAPSHOT_ID;
       if (referred.isFile() && referred.asFile().isWithSnapshot()) {
         snapshot = referred.asFile().getDiffs().getPrior(lastSnapshotId);
       } else if (referred.isDirectory()) {
@@ -569,7 +570,7 @@ public abstract class INodeReference extends INode {
      * latest snapshot. Otherwise changes will be recorded to the snapshot
      * belonging to the src of the rename.
      * 
-     * {@link Snapshot#INVALID_ID} means no dstSnapshot (e.g., src of the
+     * {@link Snapshot#NO_SNAPSHOT_ID} means no dstSnapshot (e.g., src of the
      * first-time rename).
      */
     private final int dstSnapshotId;
@@ -587,25 +588,27 @@ public abstract class INodeReference extends INode {
     }
     
     @Override
-    public Quota.Counts cleanSubtree(Snapshot snapshot, Snapshot prior,
+    public Quota.Counts cleanSubtree(int snapshot, int prior,
         BlocksMapUpdateInfo collectedBlocks, List<INode> removedINodes,
         final boolean countDiffChange) throws QuotaExceededException {
-      if (snapshot == null && prior == null) {
+      if (snapshot == Snapshot.CURRENT_STATE_ID
+          && prior == Snapshot.NO_SNAPSHOT_ID) {
         Quota.Counts counts = Quota.Counts.newInstance();
         this.computeQuotaUsage(counts, true);
         destroyAndCollectBlocks(collectedBlocks, removedINodes);
         return counts;
       } else {
-        // if prior is null, we need to check snapshot belonging to the previous
-        // WithName instance
-        if (prior == null) {
+        // if prior is NO_SNAPSHOT_ID, we need to check snapshot belonging to 
+        // the previous WithName instance
+        if (prior == Snapshot.NO_SNAPSHOT_ID) {
           prior = getPriorSnapshot(this);
         }
-        // if prior is not null, and prior is not before the to-be-deleted 
-        // snapshot, we can quit here and leave the snapshot deletion work to 
-        // the src tree of rename
-        if (snapshot != null && prior != null
-            && Snapshot.ID_COMPARATOR.compare(snapshot, prior) <= 0) {
+        // if prior is not NO_SNAPSHOT_ID, and prior is not before the
+        // to-be-deleted snapshot, we can quit here and leave the snapshot
+        // deletion work to the src tree of rename
+        if (snapshot != Snapshot.CURRENT_STATE_ID
+            && prior != Snapshot.NO_SNAPSHOT_ID
+            && Snapshot.ID_INTEGER_COMPARATOR.compare(snapshot, prior) <= 0) {
           return Quota.Counts.newInstance();
         }
         return getReferredINode().cleanSubtree(snapshot, prior,
@@ -632,12 +635,12 @@ public abstract class INodeReference extends INode {
       } else {
         // we will clean everything, including files, directories, and 
         // snapshots, that were created after this prior snapshot
-        Snapshot prior = getPriorSnapshot(this);
+        int prior = getPriorSnapshot(this);
         // prior must be non-null, otherwise we do not have any previous 
         // WithName nodes, and the reference number will be 0.
-        Preconditions.checkState(prior != null);
+        Preconditions.checkState(prior != Snapshot.NO_SNAPSHOT_ID);
         // identify the snapshot created after prior
-        Snapshot snapshot = getSelfSnapshot(prior);
+        int snapshot = getSelfSnapshot(prior);
         
         INode referred = getReferredINode().asReference().getReferredINode();
         if (referred.isFile()) {
@@ -671,23 +674,23 @@ public abstract class INodeReference extends INode {
       }
     }
     
-    private Snapshot getSelfSnapshot(final Snapshot prior) {
+    private int getSelfSnapshot(final int prior) {
       WithCount wc = (WithCount) getReferredINode().asReference();
       INode referred = wc.getReferredINode();
-      Snapshot lastSnapshot = null;
+      int lastSnapshot = Snapshot.CURRENT_STATE_ID;
       if (referred.isFile() && referred.asFile().isWithSnapshot()) {
-        lastSnapshot = referred.asFile().getDiffs().getLastSnapshot();
+        lastSnapshot = referred.asFile().getDiffs().getLastSnapshotId();
       } else if (referred.isDirectory()) {
         DirectoryWithSnapshotFeature sf = referred.asDirectory()
             .getDirectoryWithSnapshotFeature();
         if (sf != null) {
-          lastSnapshot = sf.getLastSnapshot();
+          lastSnapshot = sf.getLastSnapshotId();
         }
       }
-      if (lastSnapshot != null && !lastSnapshot.equals(prior)) {
+      if (lastSnapshot != Snapshot.CURRENT_STATE_ID && lastSnapshot != prior) {
         return lastSnapshot;
       } else {
-        return null;
+        return Snapshot.CURRENT_STATE_ID;
       }
     }
   }

+ 7 - 6
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java

@@ -45,10 +45,10 @@ public class INodeSymlink extends INodeWithAdditionalFields {
   }
 
   @Override
-  INode recordModification(Snapshot latest) throws QuotaExceededException {
-    if (isInLatestSnapshot(latest)) {
+  INode recordModification(int latestSnapshotId) throws QuotaExceededException {
+    if (isInLatestSnapshot(latestSnapshotId)) {
       INodeDirectory parent = getParent();
-      parent.saveChild2Snapshot(this, latest, new INodeSymlink(this));
+      parent.saveChild2Snapshot(this, latestSnapshotId, new INodeSymlink(this));
     }
     return this;
   }
@@ -74,10 +74,11 @@ public class INodeSymlink extends INodeWithAdditionalFields {
   }
   
   @Override
-  public Quota.Counts cleanSubtree(final Snapshot snapshot, Snapshot prior,
+  public Quota.Counts cleanSubtree(final int snapshotId, int priorSnapshotId,
       final BlocksMapUpdateInfo collectedBlocks,
       final List<INode> removedINodes, final boolean countDiffChange) {
-    if (snapshot == null && prior == null) {
+    if (snapshotId == Snapshot.CURRENT_STATE_ID
+        && priorSnapshotId == Snapshot.NO_SNAPSHOT_ID) {
       destroyAndCollectBlocks(collectedBlocks, removedINodes);
     }
     return Quota.Counts.newInstance(1, 0);
@@ -105,7 +106,7 @@ public class INodeSymlink extends INodeWithAdditionalFields {
 
   @Override
   public void dumpTreeRecursively(PrintWriter out, StringBuilder prefix,
-      final Snapshot snapshot) {
+      final int snapshot) {
     super.dumpTreeRecursively(out, prefix, snapshot);
     out.println();
   }

+ 20 - 21
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeWithAdditionalFields.java

@@ -154,9 +154,9 @@ public abstract class INodeWithAdditionalFields extends INode
   }
 
   @Override
-  final PermissionStatus getPermissionStatus(Snapshot snapshot) {
-    return new PermissionStatus(getUserName(snapshot), getGroupName(snapshot),
-        getFsPermission(snapshot));
+  final PermissionStatus getPermissionStatus(int snapshotId) {
+    return new PermissionStatus(getUserName(snapshotId), getGroupName(snapshotId),
+        getFsPermission(snapshotId));
   }
 
   private final void updatePermissionStatus(PermissionStatusFormat f, long n) {
@@ -164,9 +164,9 @@ public abstract class INodeWithAdditionalFields extends INode
   }
 
   @Override
-  final String getUserName(Snapshot snapshot) {
-    if (snapshot != null) {
-      return getSnapshotINode(snapshot).getUserName();
+  final String getUserName(int snapshotId) {
+    if (snapshotId != Snapshot.CURRENT_STATE_ID) {
+      return getSnapshotINode(snapshotId).getUserName();
     }
 
     int n = (int)PermissionStatusFormat.USER.retrieve(permission);
@@ -180,9 +180,9 @@ public abstract class INodeWithAdditionalFields extends INode
   }
 
   @Override
-  final String getGroupName(Snapshot snapshot) {
-    if (snapshot != null) {
-      return getSnapshotINode(snapshot).getGroupName();
+  final String getGroupName(int snapshotId) {
+    if (snapshotId != Snapshot.CURRENT_STATE_ID) {
+      return getSnapshotINode(snapshotId).getGroupName();
     }
 
     int n = (int)PermissionStatusFormat.GROUP.retrieve(permission);
@@ -196,9 +196,9 @@ public abstract class INodeWithAdditionalFields extends INode
   }
 
   @Override
-  final FsPermission getFsPermission(Snapshot snapshot) {
-    if (snapshot != null) {
-      return getSnapshotINode(snapshot).getFsPermission();
+  final FsPermission getFsPermission(int snapshotId) {
+    if (snapshotId != Snapshot.CURRENT_STATE_ID) {
+      return getSnapshotINode(snapshotId).getFsPermission();
     }
 
     return new FsPermission(getFsPermissionShort());
@@ -220,9 +220,9 @@ public abstract class INodeWithAdditionalFields extends INode
   }
 
   @Override
-  final long getModificationTime(Snapshot snapshot) {
-    if (snapshot != null) {
-      return getSnapshotINode(snapshot).getModificationTime();
+  final long getModificationTime(int snapshotId) {
+    if (snapshotId != Snapshot.CURRENT_STATE_ID) {
+      return getSnapshotINode(snapshotId).getModificationTime();
     }
 
     return this.modificationTime;
@@ -231,13 +231,13 @@ public abstract class INodeWithAdditionalFields extends INode
 
   /** Update modification time if it is larger than the current value. */
   @Override
-  public final INode updateModificationTime(long mtime, Snapshot latest) 
+  public final INode updateModificationTime(long mtime, int latestSnapshotId) 
       throws QuotaExceededException {
     Preconditions.checkState(isDirectory());
     if (mtime <= modificationTime) {
       return this;
     }
-    return setModificationTime(mtime, latest);
+    return setModificationTime(mtime, latestSnapshotId);
   }
 
   final void cloneModificationTime(INodeWithAdditionalFields that) {
@@ -250,11 +250,10 @@ public abstract class INodeWithAdditionalFields extends INode
   }
 
   @Override
-  final long getAccessTime(Snapshot snapshot) {
-    if (snapshot != null) {
-      return getSnapshotINode(snapshot).getAccessTime();
+  final long getAccessTime(int snapshotId) {
+    if (snapshotId != Snapshot.CURRENT_STATE_ID) {
+      return getSnapshotINode(snapshotId).getAccessTime();
     }
-
     return accessTime;
   }
 

+ 32 - 29
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodesInPath.java

@@ -135,8 +135,8 @@ public class INodesInPath {
       if (!isRef && isDir && dir.isWithSnapshot()) {
         //if the path is a non-snapshot path, update the latest snapshot.
         if (!existing.isSnapshot()) {
-          existing.updateLatestSnapshot(dir.getDirectoryWithSnapshotFeature()
-              .getLastSnapshot());
+          existing.updateLatestSnapshotId(dir.getDirectoryWithSnapshotFeature()
+              .getLastSnapshotId());
         }
       } else if (isRef && isDir && !lastComp) {
         // If the curNode is a reference node, need to check its dstSnapshot:
@@ -151,16 +151,17 @@ public class INodesInPath {
         // recordModification method.
         if (!existing.isSnapshot()) {
           int dstSnapshotId = curNode.asReference().getDstSnapshotId();
-          Snapshot latest = existing.getLatestSnapshot();
-          if (latest == null ||  // no snapshot in dst tree of rename
-              dstSnapshotId >= latest.getId()) { // the above scenario 
-            Snapshot lastSnapshot = null;
+          int latest = existing.getLatestSnapshotId();
+          if (latest == Snapshot.CURRENT_STATE_ID || // no snapshot in dst tree of rename
+              (dstSnapshotId != Snapshot.CURRENT_STATE_ID && 
+                dstSnapshotId >= latest)) { // the above scenario 
+            int lastSnapshot = Snapshot.CURRENT_STATE_ID;
             DirectoryWithSnapshotFeature sf = null;
             if (curNode.isDirectory() && 
                 (sf = curNode.asDirectory().getDirectoryWithSnapshotFeature()) != null) {
-              lastSnapshot = sf.getLastSnapshot();
+              lastSnapshot = sf.getLastSnapshotId();
             }
-            existing.setSnapshot(lastSnapshot);
+            existing.setSnapshotId(lastSnapshot);
           }
         }
       }
@@ -206,14 +207,14 @@ public class INodesInPath {
           curNode = null;
         } else {
           curNode = s.getRoot();
-          existing.setSnapshot(s);
+          existing.setSnapshotId(s.getId());
         }
         if (index >= -1) {
           existing.snapshotRootIndex = existing.numNonNull;
         }
       } else {
         // normal case, and also for resolving file/dir under snapshot root
-        curNode = dir.getChild(childName, existing.getPathSnapshot());
+        curNode = dir.getChild(childName, existing.getPathSnapshotId());
       }
       count++;
       index++;
@@ -245,11 +246,12 @@ public class INodesInPath {
    */
   private int snapshotRootIndex;
   /**
-   * For snapshot paths, it is the reference to the snapshot; or null if the
-   * snapshot does not exist. For non-snapshot paths, it is the reference to
-   * the latest snapshot found in the path; or null if no snapshot is found.
+   * For snapshot paths, it is the id of the snapshot; or 
+   * {@link Snapshot#CURRENT_STATE_ID} if the snapshot does not exist. For 
+   * non-snapshot paths, it is the id of the latest snapshot found in the path;
+   * or {@link Snapshot#CURRENT_STATE_ID} if no snapshot is found.
    */
-  private Snapshot snapshot = null; 
+  private int snapshotId = Snapshot.CURRENT_STATE_ID; 
 
   private INodesInPath(byte[][] path, int number) {
     this.path = path;
@@ -262,29 +264,30 @@ public class INodesInPath {
   }
 
   /**
-   * For non-snapshot paths, return the latest snapshot found in the path.
-   * For snapshot paths, return null.
+   * For non-snapshot paths, return the latest snapshot id found in the path.
    */
-  public Snapshot getLatestSnapshot() {
-    return isSnapshot? null: snapshot;
+  public int getLatestSnapshotId() {
+    Preconditions.checkState(!isSnapshot);
+    return snapshotId;
   }
   
   /**
-   * For snapshot paths, return the snapshot specified in the path.
-   * For non-snapshot paths, return null.
+   * For snapshot paths, return the id of the snapshot specified in the path.
+   * For non-snapshot paths, return {@link Snapshot#CURRENT_STATE_ID}.
    */
-  public Snapshot getPathSnapshot() {
-    return isSnapshot? snapshot: null;
+  public int getPathSnapshotId() {
+    return isSnapshot ? snapshotId : Snapshot.CURRENT_STATE_ID;
   }
 
-  private void setSnapshot(Snapshot s) {
-    snapshot = s;
+  private void setSnapshotId(int sid) {
+    snapshotId = sid;
   }
   
-  private void updateLatestSnapshot(Snapshot s) {
-    if (snapshot == null
-        || (s != null && Snapshot.ID_COMPARATOR.compare(snapshot, s) < 0)) {
-      snapshot = s;
+  private void updateLatestSnapshotId(int sid) {
+    if (snapshotId == Snapshot.CURRENT_STATE_ID
+        || (sid != Snapshot.CURRENT_STATE_ID && Snapshot.ID_INTEGER_COMPARATOR
+            .compare(snapshotId, sid) < 0)) {
+      snapshotId = sid;
     }
   }
 
@@ -386,7 +389,7 @@ public class INodesInPath {
      .append("\n  capacity   = ").append(capacity)
      .append("\n  isSnapshot        = ").append(isSnapshot)
      .append("\n  snapshotRootIndex = ").append(snapshotRootIndex)
-     .append("\n  snapshot          = ").append(snapshot);
+     .append("\n  snapshotId        = ").append(snapshotId);
     return b.toString();
   }
 

+ 8 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java

@@ -480,6 +480,14 @@ public class NameNode implements NameNodeStatusMXBean {
    * @param conf the configuration
    */
   protected void initialize(Configuration conf) throws IOException {
+    if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) {
+      String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY);
+      if (intervals != null) {
+        conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
+          intervals);
+      }
+    }
+
     UserGroupInformation.setConfiguration(conf);
     loginAsNameNodeUser(conf);
 

+ 13 - 16
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/AbstractINodeDiff.java

@@ -22,8 +22,8 @@ import java.io.IOException;
 import java.util.List;
 
 import org.apache.hadoop.hdfs.server.namenode.INode;
-import org.apache.hadoop.hdfs.server.namenode.INodeAttributes;
 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
+import org.apache.hadoop.hdfs.server.namenode.INodeAttributes;
 import org.apache.hadoop.hdfs.server.namenode.Quota;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap;
 
@@ -52,8 +52,8 @@ abstract class AbstractINodeDiff<N extends INode,
                                  D extends AbstractINodeDiff<N, A, D>>
     implements Comparable<Integer> {
 
-  /** The snapshot will be obtained after this diff is applied. */
-  Snapshot snapshot;
+  /** The id of the corresponding snapshot. */
+  private int snapshotId;
   /** The snapshot inode data.  It is null when there is no change. */
   A snapshotINode;
   /**
@@ -64,10 +64,8 @@ abstract class AbstractINodeDiff<N extends INode,
    */
   private D posteriorDiff;
 
-  AbstractINodeDiff(Snapshot snapshot, A snapshotINode, D posteriorDiff) {
-    Preconditions.checkNotNull(snapshot, "snapshot is null");
-
-    this.snapshot = snapshot;
+  AbstractINodeDiff(int snapshotId, A snapshotINode, D posteriorDiff) {
+    this.snapshotId = snapshotId;
     this.snapshotINode = snapshotINode;
     this.posteriorDiff = posteriorDiff;
   }
@@ -75,16 +73,16 @@ abstract class AbstractINodeDiff<N extends INode,
   /** Compare diffs with snapshot ID. */
   @Override
   public final int compareTo(final Integer that) {
-    return Snapshot.ID_INTEGER_COMPARATOR.compare(this.snapshot.getId(), that);
+    return Snapshot.ID_INTEGER_COMPARATOR.compare(this.snapshotId, that);
   }
 
   /** @return the snapshot object of this diff. */
-  public final Snapshot getSnapshot() {
-    return snapshot;
+  public final int getSnapshotId() {
+    return snapshotId;
   }
   
-  final void setSnapshot(Snapshot snapshot) {
-    this.snapshot = snapshot;
+  final void setSnapshotId(int snapshot) {
+    this.snapshotId = snapshot;
   }
 
   /** @return the posterior diff. */
@@ -132,13 +130,12 @@ abstract class AbstractINodeDiff<N extends INode,
 
   @Override
   public String toString() {
-    return getClass().getSimpleName() + ": " + snapshot + " (post="
-        + (posteriorDiff == null? null: posteriorDiff.snapshot) + ")";
+    return getClass().getSimpleName() + ": " + this.getSnapshotId() + " (post="
+        + (posteriorDiff == null? null: posteriorDiff.getSnapshotId()) + ")";
   }
 
   void writeSnapshot(DataOutput out) throws IOException {
-    // Assume the snapshot is recorded before, write id only.
-    out.writeInt(snapshot.getId());
+    out.writeInt(snapshotId);
   }
   
   abstract void write(DataOutput out, ReferenceMap referenceMap

+ 45 - 55
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/AbstractINodeDiffList.java

@@ -53,7 +53,7 @@ abstract class AbstractINodeDiffList<N extends INode,
   }
 
   /** @return an {@link AbstractINodeDiff}. */
-  abstract D createDiff(Snapshot snapshot, N currentINode);
+  abstract D createDiff(int snapshotId, N currentINode);
 
   /** @return a snapshot copy of the current inode. */  
   abstract A createSnapshotCopy(N currentINode);
@@ -63,25 +63,25 @@ abstract class AbstractINodeDiffList<N extends INode,
    * outside. If the diff to remove is not the first one in the diff list, we 
    * need to combine the diff with its previous one.
    * 
-   * @param snapshot The snapshot to be deleted
-   * @param prior The snapshot taken before the to-be-deleted snapshot
+   * @param snapshot The id of the snapshot to be deleted
+   * @param prior The id of the snapshot taken before the to-be-deleted snapshot
    * @param collectedBlocks Used to collect information for blocksMap update
    * @return delta in namespace. 
    */
-  public final Quota.Counts deleteSnapshotDiff(final Snapshot snapshot,
-      Snapshot prior, final N currentINode,
+  public final Quota.Counts deleteSnapshotDiff(final int snapshot,
+      final int prior, final N currentINode,
       final BlocksMapUpdateInfo collectedBlocks,
       final List<INode> removedINodes, boolean countDiffChange) 
       throws QuotaExceededException {
-    int snapshotIndex = Collections.binarySearch(diffs, snapshot.getId());
+    int snapshotIndex = Collections.binarySearch(diffs, snapshot);
     
     Quota.Counts counts = Quota.Counts.newInstance();
     D removed = null;
     if (snapshotIndex == 0) {
-      if (prior != null) {
+      if (prior != Snapshot.NO_SNAPSHOT_ID) { // there is still snapshot before
         // set the snapshot to latestBefore
-        diffs.get(snapshotIndex).setSnapshot(prior);
-      } else {
+        diffs.get(snapshotIndex).setSnapshotId(prior);
+      } else { // there is no snapshot before
         removed = diffs.remove(0);
         if (countDiffChange) {
           counts.add(Quota.NAMESPACE, 1);
@@ -96,8 +96,8 @@ abstract class AbstractINodeDiffList<N extends INode,
       }
     } else if (snapshotIndex > 0) {
       final AbstractINodeDiff<N, A, D> previous = diffs.get(snapshotIndex - 1);
-      if (!previous.getSnapshot().equals(prior)) {
-        diffs.get(snapshotIndex).setSnapshot(prior);
+      if (previous.getSnapshotId() != prior) {
+        diffs.get(snapshotIndex).setSnapshotId(prior);
       } else {
         // combine the to-be-removed diff with its previous diff
         removed = diffs.remove(snapshotIndex);
@@ -120,10 +120,10 @@ abstract class AbstractINodeDiffList<N extends INode,
   }
 
   /** Add an {@link AbstractINodeDiff} for the given snapshot. */
-  final D addDiff(Snapshot latest, N currentINode)
+  final D addDiff(int latestSnapshotId, N currentINode)
       throws QuotaExceededException {
     currentINode.addSpaceConsumed(1, 0, true);
-    return addLast(createDiff(latest, currentINode));
+    return addLast(createDiff(latestSnapshotId, currentINode));
   }
 
   /** Append the diff at the end of the list. */
@@ -149,10 +149,10 @@ abstract class AbstractINodeDiffList<N extends INode,
     return n == 0? null: diffs.get(n - 1);
   }
 
-  /** @return the last snapshot. */
-  public final Snapshot getLastSnapshot() {
+  /** @return the id of the last snapshot. */
+  public final int getLastSnapshotId() {
     final AbstractINodeDiff<N, A, D> last = getLast();
-    return last == null? null: last.getSnapshot();
+    return last == null ? Snapshot.CURRENT_STATE_ID : last.getSnapshotId();
   }
   
   /**
@@ -161,60 +161,49 @@ abstract class AbstractINodeDiffList<N extends INode,
    *                 snapshot id.
    * @param exclusive True means the returned snapshot's id must be < the given
    *                  id, otherwise <=.
-   * @return The latest snapshot before the given snapshot.
+   * @return The id of the latest snapshot before the given snapshot.
    */
-  private final Snapshot getPrior(int anchorId, boolean exclusive) {
-    if (anchorId == Snapshot.INVALID_ID) {
-      return getLastSnapshot();
+  private final int getPrior(int anchorId, boolean exclusive) {
+    if (anchorId == Snapshot.CURRENT_STATE_ID) {
+      return getLastSnapshotId();
     }
     final int i = Collections.binarySearch(diffs, anchorId);
     if (exclusive) { // must be the one before
       if (i == -1 || i == 0) {
-        return null;
+        return Snapshot.NO_SNAPSHOT_ID;
       } else {
         int priorIndex = i > 0 ? i - 1 : -i - 2;
-        return diffs.get(priorIndex).getSnapshot();
+        return diffs.get(priorIndex).getSnapshotId();
       }
     } else { // the one, or the one before if not existing
       if (i >= 0) {
-        return diffs.get(i).getSnapshot();
+        return diffs.get(i).getSnapshotId();
       } else if (i < -1) {
-        return diffs.get(-i - 2).getSnapshot();
+        return diffs.get(-i - 2).getSnapshotId();
       } else { // i == -1
-        return null;
+        return Snapshot.NO_SNAPSHOT_ID;
       }
     }
   }
   
-  public final Snapshot getPrior(int snapshotId) {
+  public final int getPrior(int snapshotId) {
     return getPrior(snapshotId, false);
   }
   
   /**
    * Update the prior snapshot.
    */
-  final Snapshot updatePrior(Snapshot snapshot, Snapshot prior) {
-    int id = snapshot == null ? Snapshot.INVALID_ID : snapshot.getId();
-    Snapshot s = getPrior(id, true);
-    if (s != null && 
-        (prior == null || Snapshot.ID_COMPARATOR.compare(s, prior) > 0)) {
-      return s;
+  final int updatePrior(int snapshot, int prior) {
+    int p = getPrior(snapshot, true);
+    if (p != Snapshot.CURRENT_STATE_ID
+        && Snapshot.ID_INTEGER_COMPARATOR.compare(p, prior) > 0) {
+      return p;
     }
     return prior;
   }
-
-  /**
-   * @return the diff corresponding to the given snapshot.
-   *         When the diff is null, it means that the current state and
-   *         the corresponding snapshot state are the same. 
-   */
-  public final D getDiff(Snapshot snapshot) {
-    return getDiffById(snapshot == null ? 
-        Snapshot.INVALID_ID : snapshot.getId());
-  }
   
-  private final D getDiffById(final int snapshotId) {
-    if (snapshotId == Snapshot.INVALID_ID) {
+  public final D getDiffById(final int snapshotId) {
+    if (snapshotId == Snapshot.CURRENT_STATE_ID) {
       return null;
     }
     final int i = Collections.binarySearch(diffs, snapshotId);
@@ -234,9 +223,9 @@ abstract class AbstractINodeDiffList<N extends INode,
    * Search for the snapshot whose id is 1) no less than the given id, 
    * and 2) most close to the given id.
    */
-  public final Snapshot getSnapshotById(final int snapshotId) {
+  public final int getSnapshotById(final int snapshotId) {
     D diff = getDiffById(snapshotId);
-    return diff == null ? null : diff.getSnapshot();
+    return diff == null ? Snapshot.CURRENT_STATE_ID : diff.getSnapshotId();
   }
   
   /**
@@ -271,8 +260,8 @@ abstract class AbstractINodeDiffList<N extends INode,
    *         Note that the current inode is returned if there is no change
    *         between the given snapshot and the current state. 
    */
-  public A getSnapshotINode(final Snapshot snapshot, final A currentINode) {
-    final D diff = getDiff(snapshot);
+  public A getSnapshotINode(final int snapshotId, final A currentINode) {
+    final D diff = getDiffById(snapshotId);
     final A inode = diff == null? null: diff.getSnapshotINode();
     return inode == null? currentINode: inode;
   }
@@ -281,15 +270,16 @@ abstract class AbstractINodeDiffList<N extends INode,
    * Check if the latest snapshot diff exists.  If not, add it.
    * @return the latest snapshot diff, which is never null.
    */
-  final D checkAndAddLatestSnapshotDiff(Snapshot latest, N currentINode)
+  final D checkAndAddLatestSnapshotDiff(int latestSnapshotId, N currentINode)
       throws QuotaExceededException {
     final D last = getLast();
     if (last != null
-        && Snapshot.ID_COMPARATOR.compare(last.getSnapshot(), latest) >= 0) {
+        && Snapshot.ID_INTEGER_COMPARATOR.compare(last.getSnapshotId(),
+            latestSnapshotId) >= 0) {
       return last;
     } else {
       try {
-        return addDiff(latest, currentINode);
+        return addDiff(latestSnapshotId, currentINode);
       } catch(NSQuotaExceededException e) {
         e.setMessagePrefix("Failed to record modification for snapshot");
         throw e;
@@ -298,10 +288,10 @@ abstract class AbstractINodeDiffList<N extends INode,
   }
 
   /** Save the snapshot copy to the latest snapshot. */
-  public void saveSelf2Snapshot(Snapshot latest, N currentINode, A snapshotCopy)
-      throws QuotaExceededException {
-    if (latest != null) {
-      D diff = checkAndAddLatestSnapshotDiff(latest, currentINode);
+  public void saveSelf2Snapshot(int latestSnapshotId, N currentINode,
+      A snapshotCopy) throws QuotaExceededException {
+    if (latestSnapshotId != Snapshot.CURRENT_STATE_ID) {
+      D diff = checkAndAddLatestSnapshotDiff(latestSnapshotId, currentINode);
       if (diff.snapshotINode == null) {
         if (snapshotCopy == null) {
           snapshotCopy = createSnapshotCopy(currentINode);

+ 72 - 64
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java

@@ -225,30 +225,36 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
     private final int childrenSize;
     /** The children list diff. */
     private final ChildrenDiff diff;
+    private boolean isSnapshotRoot = false;
+    
+    private DirectoryDiff(int snapshotId, INodeDirectory dir) {
+      super(snapshotId, null, null);
 
-    private DirectoryDiff(Snapshot snapshot, INodeDirectory dir) {
-      super(snapshot, null, null);
-
-      this.childrenSize = dir.getChildrenList(null).size();
+      this.childrenSize = dir.getChildrenList(Snapshot.CURRENT_STATE_ID).size();
       this.diff = new ChildrenDiff();
     }
 
     /** Constructor used by FSImage loading */
-    DirectoryDiff(Snapshot snapshot, INodeDirectoryAttributes snapshotINode,
-        DirectoryDiff posteriorDiff, int childrenSize,
-        List<INode> createdList, List<INode> deletedList) {
-      super(snapshot, snapshotINode, posteriorDiff);
+    DirectoryDiff(int snapshotId, INodeDirectoryAttributes snapshotINode,
+        DirectoryDiff posteriorDiff, int childrenSize, List<INode> createdList,
+        List<INode> deletedList, boolean isSnapshotRoot) {
+      super(snapshotId, snapshotINode, posteriorDiff);
       this.childrenSize = childrenSize;
       this.diff = new ChildrenDiff(createdList, deletedList);
+      this.isSnapshotRoot = isSnapshotRoot;
     }
 
     ChildrenDiff getChildrenDiff() {
       return diff;
     }
-
-    /** Is the inode the root of the snapshot? */
+    
+    void setSnapshotRoot(INodeDirectoryAttributes root) {
+      this.snapshotINode = root;
+      this.isSnapshotRoot = true;
+    }
+    
     boolean isSnapshotRoot() {
-      return snapshotINode == snapshot.getRoot();
+      return isSnapshotRoot;
     }
 
     @Override
@@ -287,7 +293,7 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
               combined.combinePosterior(d.diff, null);
             }
             children = combined.apply2Current(ReadOnlyList.Util.asList(
-                currentDir.getChildrenList(null)));
+                currentDir.getChildrenList(Snapshot.CURRENT_STATE_ID)));
           }
           return children;
         }
@@ -327,7 +333,7 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
           return null;
         } else if (d.getPosterior() == null) {
           // no more posterior diff, get from current inode.
-          return currentDir.getChild(name, null);
+          return currentDir.getChild(name, Snapshot.CURRENT_STATE_ID);
         }
       }
     }
@@ -342,11 +348,9 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
       writeSnapshot(out);
       out.writeInt(childrenSize);
 
-      // write snapshotINode
-      if (isSnapshotRoot()) {
-        out.writeBoolean(true);
-      } else {
-        out.writeBoolean(false);
+      // Write snapshotINode
+      out.writeBoolean(isSnapshotRoot);
+      if (!isSnapshotRoot) {
         if (snapshotINode != null) {
           out.writeBoolean(true);
           FSImageSerialization.writeINodeDirectoryAttributes(snapshotINode, out);
@@ -373,7 +377,7 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
       extends AbstractINodeDiffList<INodeDirectory, INodeDirectoryAttributes, DirectoryDiff> {
 
     @Override
-    DirectoryDiff createDiff(Snapshot snapshot, INodeDirectory currentDir) {
+    DirectoryDiff createDiff(int snapshot, INodeDirectory currentDir) {
       return new DirectoryDiff(snapshot, currentDir);
     }
 
@@ -424,12 +428,13 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
   /**
    * Destroy a subtree under a DstReference node.
    */
-  public static void destroyDstSubtree(INode inode, final Snapshot snapshot,
-      final Snapshot prior, final BlocksMapUpdateInfo collectedBlocks,
+  public static void destroyDstSubtree(INode inode, final int snapshot,
+      final int prior, final BlocksMapUpdateInfo collectedBlocks,
       final List<INode> removedINodes) throws QuotaExceededException {
-    Preconditions.checkArgument(prior != null);
+    Preconditions.checkArgument(prior != Snapshot.NO_SNAPSHOT_ID);
     if (inode.isReference()) {
-      if (inode instanceof INodeReference.WithName && snapshot != null) {
+      if (inode instanceof INodeReference.WithName
+          && snapshot != Snapshot.CURRENT_STATE_ID) {
         // this inode has been renamed before the deletion of the DstReference
         // subtree
         inode.cleanSubtree(snapshot, prior, collectedBlocks, removedINodes,
@@ -447,18 +452,18 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
       DirectoryWithSnapshotFeature sf = dir.getDirectoryWithSnapshotFeature();
       if (sf != null) {
         DirectoryDiffList diffList = sf.getDiffs();
-        DirectoryDiff priorDiff = diffList.getDiff(prior);
-        if (priorDiff != null && priorDiff.getSnapshot().equals(prior)) {
+        DirectoryDiff priorDiff = diffList.getDiffById(prior);
+        if (priorDiff != null && priorDiff.getSnapshotId() == prior) {
           List<INode> dList = priorDiff.diff.getList(ListType.DELETED);
           excludedNodes = cloneDiffList(dList);
         }
         
-        if (snapshot != null) {
+        if (snapshot != Snapshot.CURRENT_STATE_ID) {
           diffList.deleteSnapshotDiff(snapshot, prior, dir, collectedBlocks,
               removedINodes, true);
         }
-        priorDiff = diffList.getDiff(prior);
-        if (priorDiff != null && priorDiff.getSnapshot().equals(prior)) {
+        priorDiff = diffList.getDiffById(prior);
+        if (priorDiff != null && priorDiff.getSnapshotId() == prior) {
           priorDiff.diff.destroyCreatedList(dir, collectedBlocks,
               removedINodes);
         }
@@ -478,14 +483,14 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
    * deleted list of prior.
    * @param inode The inode to clean.
    * @param post The post snapshot.
-   * @param prior The prior snapshot.
+   * @param prior The id of the prior snapshot.
    * @param collectedBlocks Used to collect blocks for later deletion.
    * @return Quota usage update.
    */
   private static Quota.Counts cleanDeletedINode(INode inode,
-      final Snapshot post, final Snapshot prior,
+      final int post, final int prior,
       final BlocksMapUpdateInfo collectedBlocks,
-      final List<INode> removedINodes, final boolean countDiffChange) 
+      final List<INode> removedINodes, final boolean countDiffChange)
       throws QuotaExceededException {
     Quota.Counts counts = Quota.Counts.newInstance();
     Deque<INode> queue = new ArrayDeque<INode>();
@@ -494,7 +499,7 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
       INode topNode = queue.pollFirst();
       if (topNode instanceof INodeReference.WithName) {
         INodeReference.WithName wn = (INodeReference.WithName) topNode;
-        if (wn.getLastSnapshotId() >= post.getId()) {
+        if (wn.getLastSnapshotId() >= post) {
           wn.cleanSubtree(post, prior, collectedBlocks, removedINodes,
               countDiffChange);
         }
@@ -511,8 +516,8 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
         if (sf != null) {
           // delete files/dirs created after prior. Note that these
           // files/dirs, along with inode, were deleted right after post.
-          DirectoryDiff priorDiff = sf.getDiffs().getDiff(prior);
-          if (priorDiff != null && priorDiff.getSnapshot().equals(prior)) {
+          DirectoryDiff priorDiff = sf.getDiffs().getDiffById(prior);
+          if (priorDiff != null && priorDiff.getSnapshotId() == prior) {
             priorChildrenDiff = priorDiff.getChildrenDiff();
             counts.add(priorChildrenDiff.destroyCreatedList(dir,
                 collectedBlocks, removedINodes));
@@ -540,8 +545,8 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
   }
 
   /** @return the last snapshot. */
-  public Snapshot getLastSnapshot() {
-    return diffs.getLastSnapshot();
+  public int getLastSnapshotId() {
+    return diffs.getLastSnapshotId();
   }
 
   /** @return the snapshot diff list. */
@@ -565,11 +570,13 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
    * to make sure that parent is in the given snapshot "latest".
    */
   public boolean addChild(INodeDirectory parent, INode inode,
-      boolean setModTime, Snapshot latest) throws QuotaExceededException {
-    ChildrenDiff diff = diffs.checkAndAddLatestSnapshotDiff(latest, parent).diff;
+      boolean setModTime, int latestSnapshotId) throws QuotaExceededException {
+    ChildrenDiff diff = diffs.checkAndAddLatestSnapshotDiff(latestSnapshotId,
+        parent).diff;
     int undoInfo = diff.create(inode);
 
-    final boolean added = parent.addChild(inode, setModTime, null);
+    final boolean added = parent.addChild(inode, setModTime,
+        Snapshot.CURRENT_STATE_ID);
     if (!added) {
       diff.undoCreate(inode, undoInfo);
     }
@@ -581,7 +588,7 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
    * needs to make sure that parent is in the given snapshot "latest".
    */
   public boolean removeChild(INodeDirectory parent, INode child,
-      Snapshot latest) throws QuotaExceededException {
+      int latestSnapshotId) throws QuotaExceededException {
     // For a directory that is not a renamed node, if isInLatestSnapshot returns
     // false, the directory is not in the latest snapshot, thus we do not need
     // to record the removed child in any snapshot.
@@ -593,7 +600,8 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
     // directory node cannot be in any snapshot (not in current tree, nor in
     // previous src tree). Thus we do not need to record the removed child in
     // any snapshot.
-    ChildrenDiff diff = diffs.checkAndAddLatestSnapshotDiff(latest, parent).diff;
+    ChildrenDiff diff = diffs.checkAndAddLatestSnapshotDiff(latestSnapshotId,
+        parent).diff;
     UndoInfo<INode> undoInfo = diff.delete(child);
 
     final boolean removed = parent.removeChild(child);
@@ -611,29 +619,29 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
    *         for the snapshot and return it. 
    */
   public ReadOnlyList<INode> getChildrenList(INodeDirectory currentINode,
-      final Snapshot snapshot) {
-    final DirectoryDiff diff = diffs.getDiff(snapshot);
+      final int snapshotId) {
+    final DirectoryDiff diff = diffs.getDiffById(snapshotId);
     return diff != null ? diff.getChildrenList(currentINode) : currentINode
-        .getChildrenList(null);
+        .getChildrenList(Snapshot.CURRENT_STATE_ID);
   }
   
   public INode getChild(INodeDirectory currentINode, byte[] name,
-      Snapshot snapshot) {
-    final DirectoryDiff diff = diffs.getDiff(snapshot);
+      int snapshotId) {
+    final DirectoryDiff diff = diffs.getDiffById(snapshotId);
     return diff != null ? diff.getChild(name, true, currentINode)
-        : currentINode.getChild(name, null);
+        : currentINode.getChild(name, Snapshot.CURRENT_STATE_ID);
   }
   
   /** Used to record the modification of a symlink node */
   public INode saveChild2Snapshot(INodeDirectory currentINode,
-      final INode child, final Snapshot latest, final INode snapshotCopy)
+      final INode child, final int latestSnapshotId, final INode snapshotCopy)
       throws QuotaExceededException {
     Preconditions.checkArgument(!child.isDirectory(),
         "child is a directory, child=%s", child);
-    Preconditions.checkArgument(latest != null);
+    Preconditions.checkArgument(latestSnapshotId != Snapshot.CURRENT_STATE_ID);
     
-    final DirectoryDiff diff = diffs.checkAndAddLatestSnapshotDiff(latest,
-        currentINode);
+    final DirectoryDiff diff = diffs.checkAndAddLatestSnapshotDiff(
+        latestSnapshotId, currentINode);
     if (diff.getChild(child.getLocalNameBytes(), false, currentINode) != null) {
       // it was already saved in the latest snapshot earlier.  
       return child;
@@ -656,7 +664,7 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
   public Quota.Counts computeQuotaUsage4CurrentDirectory(Quota.Counts counts) {
     for(DirectoryDiff d : diffs) {
       for(INode deleted : d.getChildrenDiff().getList(ListType.DELETED)) {
-        deleted.computeQuotaUsage(counts, false, Snapshot.INVALID_ID);
+        deleted.computeQuotaUsage(counts, false, Snapshot.CURRENT_STATE_ID);
       }
     }
     counts.add(Quota.NAMESPACE, diffs.asList().size());
@@ -744,14 +752,14 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
   }
 
   public Quota.Counts cleanDirectory(final INodeDirectory currentINode,
-      final Snapshot snapshot, Snapshot prior,
+      final int snapshot, int prior,
       final BlocksMapUpdateInfo collectedBlocks,
       final List<INode> removedINodes, final boolean countDiffChange)
       throws QuotaExceededException {
     Quota.Counts counts = Quota.Counts.newInstance();
     Map<INode, INode> priorCreated = null;
     Map<INode, INode> priorDeleted = null;
-    if (snapshot == null) { // delete the current directory
+    if (snapshot == Snapshot.CURRENT_STATE_ID) { // delete the current directory
       currentINode.recordModification(prior);
       // delete everything in created list
       DirectoryDiff lastDiff = diffs.getLast();
@@ -764,9 +772,9 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
       prior = getDiffs().updatePrior(snapshot, prior);
       // if there is a snapshot diff associated with prior, we need to record
       // its original created and deleted list before deleting post
-      if (prior != null) {
-        DirectoryDiff priorDiff = this.getDiffs().getDiff(prior);
-        if (priorDiff != null && priorDiff.getSnapshot().equals(prior)) {
+      if (prior != Snapshot.NO_SNAPSHOT_ID) {
+        DirectoryDiff priorDiff = this.getDiffs().getDiffById(prior);
+        if (priorDiff != null && priorDiff.getSnapshotId() == prior) {
           List<INode> cList = priorDiff.diff.getList(ListType.CREATED);
           List<INode> dList = priorDiff.diff.getList(ListType.DELETED);
           priorCreated = cloneDiffList(cList);
@@ -774,13 +782,13 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
         }
       }
       
-      counts.add(getDiffs().deleteSnapshotDiff(snapshot, prior, currentINode, 
-          collectedBlocks, removedINodes, countDiffChange));
+      counts.add(getDiffs().deleteSnapshotDiff(snapshot, prior,
+          currentINode, collectedBlocks, removedINodes, countDiffChange));
       
       // check priorDiff again since it may be created during the diff deletion
-      if (prior != null) {
-        DirectoryDiff priorDiff = this.getDiffs().getDiff(prior);
-        if (priorDiff != null && priorDiff.getSnapshot().equals(prior)) {
+      if (prior != Snapshot.NO_SNAPSHOT_ID) {
+        DirectoryDiff priorDiff = this.getDiffs().getDiffById(prior);
+        if (priorDiff != null && priorDiff.getSnapshotId() == prior) {
           // For files/directories created between "prior" and "snapshot", 
           // we need to clear snapshot copies for "snapshot". Note that we must
           // use null as prior in the cleanSubtree call. Files/directories that
@@ -791,8 +799,8 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
             for (INode cNode : priorDiff.getChildrenDiff().getList(
                 ListType.CREATED)) {
               if (priorCreated.containsKey(cNode)) {
-                counts.add(cNode.cleanSubtree(snapshot, null, collectedBlocks,
-                    removedINodes, countDiffChange));
+                counts.add(cNode.cleanSubtree(snapshot, Snapshot.NO_SNAPSHOT_ID,
+                    collectedBlocks, removedINodes, countDiffChange));
               }
             }
           }

+ 4 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileDiff.java

@@ -38,15 +38,15 @@ public class FileDiff extends
   /** The file size at snapshot creation time. */
   private final long fileSize;
 
-  FileDiff(Snapshot snapshot, INodeFile file) {
-    super(snapshot, null, null);
+  FileDiff(int snapshotId, INodeFile file) {
+    super(snapshotId, null, null);
     fileSize = file.computeFileSize();
   }
 
   /** Constructor used by FSImage loading */
-  FileDiff(Snapshot snapshot, INodeFileAttributes snapshotINode,
+  FileDiff(int snapshotId, INodeFileAttributes snapshotINode,
       FileDiff posteriorDiff, long fileSize) {
-    super(snapshot, snapshotINode, posteriorDiff);
+    super(snapshotId, snapshotINode, posteriorDiff);
     this.fileSize = fileSize;
   }
 

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileDiffList.java

@@ -25,8 +25,8 @@ public class FileDiffList extends
     AbstractINodeDiffList<INodeFile, INodeFileAttributes, FileDiff> {
   
   @Override
-  FileDiff createDiff(Snapshot snapshot, INodeFile file) {
-    return new FileDiff(snapshot, file);
+  FileDiff createDiff(int snapshotId, INodeFile file) {
+    return new FileDiff(snapshotId, file);
   }
   
   @Override

+ 7 - 7
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FileWithSnapshotFeature.java

@@ -78,22 +78,22 @@ public class FileWithSnapshotFeature implements INode.Feature {
     return (isCurrentFileDeleted()? "(DELETED), ": ", ") + diffs;
   }
   
-  public Quota.Counts cleanFile(final INodeFile file, final Snapshot snapshot,
-      Snapshot prior, final BlocksMapUpdateInfo collectedBlocks,
+  public Quota.Counts cleanFile(final INodeFile file, final int snapshotId,
+      int priorSnapshotId, final BlocksMapUpdateInfo collectedBlocks,
       final List<INode> removedINodes, final boolean countDiffChange)
       throws QuotaExceededException {
-    if (snapshot == null) {
+    if (snapshotId == Snapshot.CURRENT_STATE_ID) {
       // delete the current file while the file has snapshot feature
       if (!isCurrentFileDeleted()) {
-        file.recordModification(prior);
+        file.recordModification(priorSnapshotId);
         deleteCurrentFile();
       }
       collectBlocksAndClear(file, collectedBlocks, removedINodes);
       return Quota.Counts.newInstance();
     } else { // delete the snapshot
-      prior = getDiffs().updatePrior(snapshot, prior);
-      return diffs.deleteSnapshotDiff(snapshot, prior, file, collectedBlocks,
-          removedINodes, countDiffChange);
+      priorSnapshotId = getDiffs().updatePrior(snapshotId, priorSnapshotId);
+      return diffs.deleteSnapshotDiff(snapshotId, priorSnapshotId, file,
+          collectedBlocks, removedINodes, countDiffChange);
     }
   }
   

+ 33 - 21
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/INodeDirectorySnapshottable.java

@@ -206,6 +206,15 @@ public class INodeDirectorySnapshottable extends INodeDirectory {
     return i < 0? null: snapshotsByNames.get(i);
   }
   
+  Snapshot getSnapshotById(int sid) {
+    for (Snapshot s : snapshotsByNames) {
+      if (s.getId() == sid) {
+        return s;
+      }
+    }
+    return null;
+  }
+  
   /** @return {@link #snapshotsByNames} as a {@link ReadOnlyList} */
   public ReadOnlyList<Snapshot> getSnapshotList() {
     return ReadOnlyList.Util.asReadOnlyList(snapshotsByNames);
@@ -297,13 +306,14 @@ public class INodeDirectorySnapshottable extends INodeDirectory {
           + "snapshot with the same name \"" + Snapshot.getSnapshotName(s) + "\".");
     }
 
-    final DirectoryDiff d = getDiffs().addDiff(s, this);
-    d.snapshotINode = s.getRoot();
+    final DirectoryDiff d = getDiffs().addDiff(id, this);
+    d.setSnapshotRoot(s.getRoot());
     snapshotsByNames.add(-i - 1, s);
 
     //set modification time
-    updateModificationTime(Time.now(), null);
-    s.getRoot().setModificationTime(getModificationTime(), null);
+    updateModificationTime(Time.now(), Snapshot.CURRENT_STATE_ID);
+    s.getRoot().setModificationTime(getModificationTime(),
+        Snapshot.CURRENT_STATE_ID);
     return s;
   }
   
@@ -326,10 +336,10 @@ public class INodeDirectorySnapshottable extends INodeDirectory {
           + ": the snapshot does not exist.");
     } else {
       final Snapshot snapshot = snapshotsByNames.get(i);
-      Snapshot prior = Snapshot.findLatestSnapshot(this, snapshot);
+      int prior = Snapshot.findLatestSnapshot(this, snapshot.getId());
       try {
-        Quota.Counts counts = cleanSubtree(snapshot, prior, collectedBlocks,
-            removedINodes, true);
+        Quota.Counts counts = cleanSubtree(snapshot.getId(), prior,
+            collectedBlocks, removedINodes, true);
         INodeDirectory parent = getParent();
         if (parent != null) {
           // there will not be any WithName node corresponding to the deleted 
@@ -425,8 +435,9 @@ public class INodeDirectorySnapshottable extends INodeDirectory {
           diffReport.addDirDiff(dir, relativePath, diff);
         }
       }
-      ReadOnlyList<INode> children = dir.getChildrenList(diffReport
-          .isFromEarlier() ? diffReport.to : diffReport.from);
+      ReadOnlyList<INode> children = dir.getChildrenList(
+          diffReport.isFromEarlier() ? Snapshot.getSnapshotId(diffReport.to) : 
+            Snapshot.getSnapshotId(diffReport.from));
       for (INode child : children) {
         final byte[] name = child.getLocalNameBytes();
         if (diff.searchIndex(ListType.CREATED, name) < 0
@@ -454,16 +465,15 @@ public class INodeDirectorySnapshottable extends INodeDirectory {
    * Replace itself with {@link INodeDirectoryWithSnapshot} or
    * {@link INodeDirectory} depending on the latest snapshot.
    */
-  INodeDirectory replaceSelf(final Snapshot latest, final INodeMap inodeMap)
+  INodeDirectory replaceSelf(final int latestSnapshotId, final INodeMap inodeMap)
       throws QuotaExceededException {
-    if (latest == null) {
-      Preconditions.checkState(
-          getDirectoryWithSnapshotFeature().getLastSnapshot() == null,
-          "latest == null but getLastSnapshot() != null, this=%s", this);
+    if (latestSnapshotId == Snapshot.CURRENT_STATE_ID) {
+      Preconditions.checkState(getDirectoryWithSnapshotFeature()
+          .getLastSnapshotId() == Snapshot.CURRENT_STATE_ID, "this=%s", this);
     }
     INodeDirectory dir = replaceSelf4INodeDirectory(inodeMap);
-    if (latest != null) {
-      dir.recordModification(latest);
+    if (latestSnapshotId != Snapshot.CURRENT_STATE_ID) {
+      dir.recordModification(latestSnapshotId);
     }
     return dir;
   }
@@ -475,10 +485,10 @@ public class INodeDirectorySnapshottable extends INodeDirectory {
 
   @Override
   public void dumpTreeRecursively(PrintWriter out, StringBuilder prefix,
-      Snapshot snapshot) {
+      int snapshot) {
     super.dumpTreeRecursively(out, prefix, snapshot);
 
-    if (snapshot == null) {
+    if (snapshot == Snapshot.CURRENT_STATE_ID) {
       out.println();
       out.print(prefix);
 
@@ -494,7 +504,8 @@ public class INodeDirectorySnapshottable extends INodeDirectory {
           n++;
         }
       }
-      Preconditions.checkState(n == snapshotsByNames.size());
+      Preconditions.checkState(n == snapshotsByNames.size(), "#n=" + n
+          + ", snapshotsByNames.size()=" + snapshotsByNames.size());
       out.print(", #snapshot=");
       out.println(n);
 
@@ -522,8 +533,9 @@ public class INodeDirectorySnapshottable extends INodeDirectory {
   
             @Override
             public SnapshotAndINode next() {
-              final Snapshot s = next.snapshot;
-              final SnapshotAndINode pair = new SnapshotAndINode(s);
+              final SnapshotAndINode pair = new SnapshotAndINode(next
+                  .getSnapshotId(), getSnapshotById(next.getSnapshotId())
+                  .getRoot());
               next = findNext();
               return pair;
             }

+ 25 - 21
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/Snapshot.java

@@ -37,7 +37,11 @@ import org.apache.hadoop.hdfs.util.ReadOnlyList;
 /** Snapshot of a sub-tree in the namesystem. */
 @InterfaceAudience.Private
 public class Snapshot implements Comparable<byte[]> {
-  public static final int INVALID_ID = -1;
+  /**
+   * This id is used to indicate the current state (vs. snapshots)
+   */
+  public static final int CURRENT_STATE_ID = Integer.MAX_VALUE - 1;
+  public static final int NO_SNAPSHOT_ID = -1;
   
   /**
    * The pattern for generating the default snapshot name.
@@ -61,14 +65,18 @@ public class Snapshot implements Comparable<byte[]> {
         .toString();
   }
   
-  /** 
-   * Get the name of the given snapshot. 
+  /**
+   * Get the name of the given snapshot.
    * @param s The given snapshot.
    * @return The name of the snapshot, or an empty string if {@code s} is null
    */
   static String getSnapshotName(Snapshot s) {
     return s != null ? s.getRoot().getLocalName() : "";
   }
+  
+  public static int getSnapshotId(Snapshot s) {
+    return s == null ? CURRENT_STATE_ID : s.getId();
+  }
 
   /**
    * Compare snapshot with IDs, where null indicates the current status thus
@@ -78,9 +86,8 @@ public class Snapshot implements Comparable<byte[]> {
       = new Comparator<Snapshot>() {
     @Override
     public int compare(Snapshot left, Snapshot right) {
-      return ID_INTEGER_COMPARATOR.compare(
-          left == null? null: left.getId(),
-          right == null? null: right.getId());
+      return ID_INTEGER_COMPARATOR.compare(Snapshot.getSnapshotId(left),
+          Snapshot.getSnapshotId(right));
     }
   };
 
@@ -92,12 +99,9 @@ public class Snapshot implements Comparable<byte[]> {
       = new Comparator<Integer>() {
     @Override
     public int compare(Integer left, Integer right) {
-      // null means the current state, thus should be the largest
-      if (left == null) {
-        return right == null? 0: 1;
-      } else {
-        return right == null? -1: left - right; 
-      }
+      // Snapshot.CURRENT_STATE_ID means the current state, thus should be the 
+      // largest
+      return left - right;
     }
   };
 
@@ -108,12 +112,12 @@ public class Snapshot implements Comparable<byte[]> {
    * is not null).
    * 
    * @param inode the given inode that the returned snapshot needs to cover
-   * @param anchor the returned snapshot should be taken before this snapshot.
-   * @return the latest snapshot covers the given inode and was taken before the
-   *         the given snapshot (if it is not null).
+   * @param anchor the returned snapshot should be taken before this given id.
+   * @return id of the latest snapshot that covers the given inode and was taken 
+   *         before the the given snapshot (if it is not null).
    */
-  public static Snapshot findLatestSnapshot(INode inode, Snapshot anchor) {
-    Snapshot latest = null;
+  public static int findLatestSnapshot(INode inode, final int anchor) {
+    int latest = NO_SNAPSHOT_ID;
     for(; inode != null; inode = inode.getParent()) {
       if (inode.isDirectory()) {
         final INodeDirectory dir = inode.asDirectory();
@@ -139,13 +143,13 @@ public class Snapshot implements Comparable<byte[]> {
     }
 
     @Override
-    public ReadOnlyList<INode> getChildrenList(Snapshot snapshot) {
-      return getParent().getChildrenList(snapshot);
+    public ReadOnlyList<INode> getChildrenList(int snapshotId) {
+      return getParent().getChildrenList(snapshotId);
     }
 
     @Override
-    public INode getChild(byte[] name, Snapshot snapshot) {
-      return getParent().getChild(name, snapshot);
+    public INode getChild(byte[] name, int snapshotId) {
+      return getParent().getChild(name, snapshotId);
     }
     
     @Override

+ 7 - 6
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java

@@ -118,7 +118,7 @@ public class SnapshotFSImageFormat {
 
   private static FileDiff loadFileDiff(FileDiff posterior, DataInput in,
       FSImageFormat.Loader loader) throws IOException {
-    // 1. Read the full path of the Snapshot root to identify the Snapshot
+    // 1. Read the id of the Snapshot root to identify the Snapshot
     final Snapshot snapshot = loader.getSnapshot(in);
 
     // 2. Load file size
@@ -128,7 +128,7 @@ public class SnapshotFSImageFormat {
     final INodeFileAttributes snapshotINode = in.readBoolean()?
         loader.loadINodeFileAttributes(in): null;
     
-    return new FileDiff(snapshot, snapshotINode, posterior, fileSize);
+    return new FileDiff(snapshot.getId(), snapshotINode, posterior, fileSize);
   }
 
   /**
@@ -149,7 +149,8 @@ public class SnapshotFSImageFormat {
       } // else go to the next SnapshotDiff
     } 
     // use the current child
-    INode currentChild = parent.getChild(createdNodeName, null);
+    INode currentChild = parent.getChild(createdNodeName,
+        Snapshot.CURRENT_STATE_ID);
     if (currentChild == null) {
       throw new IOException("Cannot find an INode associated with the INode "
           + DFSUtil.bytes2String(createdNodeName)
@@ -295,9 +296,9 @@ public class SnapshotFSImageFormat {
     
     // 6. Compose the SnapshotDiff
     List<DirectoryDiff> diffs = parent.getDiffs().asList();
-    DirectoryDiff sdiff = new DirectoryDiff(snapshot, snapshotINode,
-        diffs.isEmpty() ? null : diffs.get(0),
-        childrenSize, createdList, deletedList);
+    DirectoryDiff sdiff = new DirectoryDiff(snapshot.getId(), snapshotINode,
+        diffs.isEmpty() ? null : diffs.get(0), childrenSize, createdList,
+        deletedList, snapshotINode == snapshot.getRoot());
     return sdiff;
   }
   

+ 4 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java

@@ -114,7 +114,7 @@ public class SnapshotManager implements SnapshotStats {
       s = (INodeDirectorySnapshottable)d; 
       s.setSnapshotQuota(INodeDirectorySnapshottable.SNAPSHOT_LIMIT);
     } else {
-      s = d.replaceSelf4INodeDirectorySnapshottable(iip.getLatestSnapshot(),
+      s = d.replaceSelf4INodeDirectorySnapshottable(iip.getLatestSnapshotId(),
           fsdir.getINodeMap());
     }
     addSnapshottable(s);
@@ -160,7 +160,7 @@ public class SnapshotManager implements SnapshotStats {
     if (s == fsdir.getRoot()) {
       s.setSnapshotQuota(0); 
     } else {
-      s.replaceSelf(iip.getLatestSnapshot(), fsdir.getINodeMap());
+      s.replaceSelf(iip.getLatestSnapshotId(), fsdir.getINodeMap());
     }
     removeSnapshottable(s);
   }
@@ -324,7 +324,8 @@ public class SnapshotManager implements SnapshotStats {
         SnapshottableDirectoryStatus status = new SnapshottableDirectoryStatus(
             dir.getModificationTime(), dir.getAccessTime(),
             dir.getFsPermission(), dir.getUserName(), dir.getGroupName(),
-            dir.getLocalNameBytes(), dir.getId(), dir.getChildrenNum(null),
+            dir.getLocalNameBytes(), dir.getId(), 
+            dir.getChildrenNum(Snapshot.CURRENT_STATE_ID),
             dir.getNumSnapshots(),
             dir.getSnapshotQuota(), dir.getParent() == null ? 
                 DFSUtil.EMPTY_BYTES : 

+ 6 - 6
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/StorageReport.java

@@ -21,7 +21,7 @@ package org.apache.hadoop.hdfs.server.protocol;
  * Utilization report for a Datanode storage
  */
 public class StorageReport {
-  private final String storageID;
+  private final DatanodeStorage storage;
   private final boolean failed;
   private final long capacity;
   private final long dfsUsed;
@@ -30,9 +30,9 @@ public class StorageReport {
 
   public static final StorageReport[] EMPTY_ARRAY = {};
   
-  public StorageReport(String sid, boolean failed, long capacity, long dfsUsed,
-      long remaining, long bpUsed) {
-    this.storageID = sid;
+  public StorageReport(DatanodeStorage storage, boolean failed,
+      long capacity, long dfsUsed, long remaining, long bpUsed) {
+    this.storage = storage;
     this.failed = failed;
     this.capacity = capacity;
     this.dfsUsed = dfsUsed;
@@ -40,8 +40,8 @@ public class StorageReport {
     this.blockPoolUsed = bpUsed;
   }
 
-  public String getStorageID() {
-    return storageID;
+  public DatanodeStorage getStorage() {
+    return storage;
   }
 
   public boolean isFailed() {

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto

@@ -196,12 +196,13 @@ message HeartbeatRequestProto {
 }
 
 message StorageReportProto {
-  required string storageUuid = 1;
+  required string storageUuid = 1 [ deprecated = true ];
   optional bool failed = 2 [ default = false ];
   optional uint64 capacity = 3 [ default = 0 ];
   optional uint64 dfsUsed = 4 [ default = 0 ];
   optional uint64 remaining = 5 [ default = 0 ];
   optional uint64 blockPoolUsed = 6 [ default = 0 ];
+  optional DatanodeStorageProto storage = 7; // supersedes StorageUuid
 }
 
 /**

+ 30 - 16
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java

@@ -140,6 +140,7 @@ public class MiniDFSCluster {
     private int nameNodeHttpPort = 0;
     private final Configuration conf;
     private int numDataNodes = 1;
+    private StorageType storageType = StorageType.DEFAULT;
     private boolean format = true;
     private boolean manageNameDfsDirs = true;
     private boolean manageNameDfsSharedDirs = true;
@@ -185,6 +186,14 @@ public class MiniDFSCluster {
       return this;
     }
 
+    /**
+     * Default: StorageType.DEFAULT
+     */
+    public Builder storageType(StorageType type) {
+      this.storageType = type;
+      return this;
+    }
+
     /**
      * Default: true
      */
@@ -341,6 +350,7 @@ public class MiniDFSCluster {
       
     initMiniDFSCluster(builder.conf,
                        builder.numDataNodes,
+                       builder.storageType,
                        builder.format,
                        builder.manageNameDfsDirs,
                        builder.manageNameDfsSharedDirs,
@@ -592,7 +602,7 @@ public class MiniDFSCluster {
                         String[] racks, String hosts[],
                         long[] simulatedCapacities) throws IOException {
     this.nameNodes = new NameNodeInfo[1]; // Single namenode in the cluster
-    initMiniDFSCluster(conf, numDataNodes, format,
+    initMiniDFSCluster(conf, numDataNodes, StorageType.DEFAULT, format,
         manageNameDfsDirs, true, manageDataDfsDirs, manageDataDfsDirs,
         operation, racks, hosts,
         simulatedCapacities, null, true, false,
@@ -601,7 +611,7 @@ public class MiniDFSCluster {
 
   private void initMiniDFSCluster(
       Configuration conf,
-      int numDataNodes, boolean format, boolean manageNameDfsDirs,
+      int numDataNodes, StorageType storageType, boolean format, boolean manageNameDfsDirs,
       boolean manageNameDfsSharedDirs, boolean enableManagedDfsDirsRedundancy,
       boolean manageDataDfsDirs, StartupOption operation, String[] racks,
       String[] hosts, long[] simulatedCapacities, String clusterId,
@@ -670,7 +680,7 @@ public class MiniDFSCluster {
     }
 
     // Start the DataNodes
-    startDataNodes(conf, numDataNodes, manageDataDfsDirs, operation, racks,
+    startDataNodes(conf, numDataNodes, storageType, manageDataDfsDirs, operation, racks,
         hosts, simulatedCapacities, setupHostsFile, checkDataNodeAddrConfig, checkDataNodeHostConfig);
     waitClusterUp();
     //make sure ProxyUsers uses the latest conf
@@ -990,6 +1000,19 @@ public class MiniDFSCluster {
     }
   }
 
+  String makeDataNodeDirs(int dnIndex, StorageType storageType) throws IOException {
+    StringBuilder sb = new StringBuilder();
+    for (int j = 0; j < DIRS_PER_DATANODE; ++j) {
+      File dir = getInstanceStorageDir(dnIndex, j);
+      dir.mkdirs();
+      if (!dir.isDirectory()) {
+        throw new IOException("Mkdirs failed to create directory for DataNode " + dir);
+      }
+      sb.append((j > 0 ? "," : "") + "[" + storageType + "]" + fileAsURI(dir));
+    }
+    return sb.toString();
+  }
+
   /**
    * Modify the config and start up additional DataNodes.  The info port for
    * DataNodes is guaranteed to use a free port.
@@ -1052,7 +1075,7 @@ public class MiniDFSCluster {
                              String[] racks, String[] hosts,
                              long[] simulatedCapacities,
                              boolean setupHostsFile) throws IOException {
-    startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, hosts,
+    startDataNodes(conf, numDataNodes, StorageType.DEFAULT, manageDfsDirs, operation, racks, hosts,
         simulatedCapacities, setupHostsFile, false, false);
   }
 
@@ -1066,7 +1089,7 @@ public class MiniDFSCluster {
       long[] simulatedCapacities,
       boolean setupHostsFile,
       boolean checkDataNodeAddrConfig) throws IOException {
-    startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, hosts,
+    startDataNodes(conf, numDataNodes, StorageType.DEFAULT, manageDfsDirs, operation, racks, hosts,
         simulatedCapacities, setupHostsFile, checkDataNodeAddrConfig, false);
   }
 
@@ -1098,7 +1121,7 @@ public class MiniDFSCluster {
    * @throws IllegalStateException if NameNode has been shutdown
    */
   public synchronized void startDataNodes(Configuration conf, int numDataNodes,
-      boolean manageDfsDirs, StartupOption operation, 
+      StorageType storageType, boolean manageDfsDirs, StartupOption operation,
       String[] racks, String[] hosts,
       long[] simulatedCapacities,
       boolean setupHostsFile,
@@ -1154,16 +1177,7 @@ public class MiniDFSCluster {
       // Set up datanode address
       setupDatanodeAddress(dnConf, setupHostsFile, checkDataNodeAddrConfig);
       if (manageDfsDirs) {
-        StringBuilder sb = new StringBuilder();
-        for (int j = 0; j < DIRS_PER_DATANODE; ++j) {
-          File dir = getInstanceStorageDir(i, j);
-          dir.mkdirs();
-          if (!dir.isDirectory()) {
-            throw new IOException("Mkdirs failed to create directory for DataNode " + dir);
-          }
-          sb.append((j > 0 ? "," : "") + fileAsURI(dir));
-        }
-        String dirs = sb.toString();
+        String dirs = makeDataNodeDirs(i, storageType);
         dnConf.set(DFS_DATANODE_DATA_DIR_KEY, dirs);
         conf.set(DFS_DATANODE_DATA_DIR_KEY, dirs);
       }

+ 5 - 13
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java

@@ -50,7 +50,7 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
   }
 
   public synchronized void startDataNodes(Configuration conf, int numDataNodes,
-      boolean manageDfsDirs, StartupOption operation, 
+      StorageType storageType, boolean manageDfsDirs, StartupOption operation,
       String[] racks, String[] nodeGroups, String[] hosts,
       long[] simulatedCapacities,
       boolean setupHostsFile,
@@ -112,15 +112,7 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
       // Set up datanode address
       setupDatanodeAddress(dnConf, setupHostsFile, checkDataNodeAddrConfig);
       if (manageDfsDirs) {
-        File dir1 = getInstanceStorageDir(i, 0);
-        File dir2 = getInstanceStorageDir(i, 1);
-        dir1.mkdirs();
-        dir2.mkdirs();
-        if (!dir1.isDirectory() || !dir2.isDirectory()) { 
-          throw new IOException("Mkdirs failed to create directory for DataNode "
-              + i + ": " + dir1 + " or " + dir2);
-        }
-        String dirs = fileAsURI(dir1) + "," + fileAsURI(dir2);
+        String dirs = makeDataNodeDirs(i, storageType);
         dnConf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dirs);
         conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dirs);
       }
@@ -198,7 +190,7 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
       String[] racks, String[] nodeGroups, String[] hosts,
       long[] simulatedCapacities,
       boolean setupHostsFile) throws IOException {
-    startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, nodeGroups, 
+    startDataNodes(conf, numDataNodes, StorageType.DEFAULT, manageDfsDirs, operation, racks, nodeGroups,
         hosts, simulatedCapacities, setupHostsFile, false, false);
   }
 
@@ -213,13 +205,13 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
   // This is for initialize from parent class.
   @Override
   public synchronized void startDataNodes(Configuration conf, int numDataNodes, 
-      boolean manageDfsDirs, StartupOption operation, 
+      StorageType storageType, boolean manageDfsDirs, StartupOption operation,
       String[] racks, String[] hosts,
       long[] simulatedCapacities,
       boolean setupHostsFile,
       boolean checkDataNodeAddrConfig,
       boolean checkDataNodeHostConfig) throws IOException {
-    startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, 
+    startDataNodes(conf, numDataNodes, storageType, manageDfsDirs, operation, racks,
         NODE_GROUPS, hosts, simulatedCapacities, setupHostsFile, 
         checkDataNodeAddrConfig, checkDataNodeHostConfig);
   }

+ 3 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java

@@ -257,8 +257,10 @@ public class BlockManagerTestUtil {
       DatanodeDescriptor dnd) {
     ArrayList<StorageReport> reports = new ArrayList<StorageReport>();
     for (DatanodeStorageInfo storage : dnd.getStorageInfos()) {
+      DatanodeStorage dns = new DatanodeStorage(
+          storage.getStorageID(), storage.getState(), storage.getStorageType());
       StorageReport report = new StorageReport(
-          storage.getStorageID(), false, storage.getCapacity(),
+          dns ,false, storage.getCapacity(),
           storage.getDfsUsed(), storage.getRemaining(),
           storage.getBlockPoolUsed());
       reports.add(report);

+ 5 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestJspHelper.java

@@ -470,11 +470,14 @@ public class TestJspHelper {
     BlockManagerTestUtil.updateStorage(dnDesc1, new DatanodeStorage("dnStorage1"));
     BlockManagerTestUtil.updateStorage(dnDesc2, new DatanodeStorage("dnStorage2"));
 
+    DatanodeStorage dns1 = new DatanodeStorage("dnStorage1");
+    DatanodeStorage dns2 = new DatanodeStorage("dnStorage2");
+
     StorageReport[] report1 = new StorageReport[] {
-        new StorageReport("dnStorage1", false, 1024, 100, 924, 100)
+        new StorageReport(dns1, false, 1024, 100, 924, 100)
     };
     StorageReport[] report2 = new StorageReport[] {
-        new StorageReport("dnStorage2", false, 2500, 200, 1848, 200)
+        new StorageReport(dns2, false, 2500, 200, 1848, 200)
     };
     dnDesc1.updateHeartbeat(report1, 5l, 3l, 10, 2);
     dnDesc2.updateHeartbeat(report2, 10l, 2l, 20, 1);

+ 3 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java

@@ -394,8 +394,9 @@ public class SimulatedFSDataset implements FsDatasetSpi<FsVolumeSpi> {
     }
 
     synchronized StorageReport getStorageReport(String bpid) {
-      return new StorageReport(getStorageUuid(), false, getCapacity(),
-          getUsed(), getFree(), map.get(bpid).getUsed());
+      return new StorageReport(new DatanodeStorage(getStorageUuid()),
+          false, getCapacity(), getUsed(), getFree(),
+          map.get(bpid).getUsed());
     }
   }
   

+ 3 - 3
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java

@@ -40,6 +40,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.protocol.datatransfer.BlockConstructionStage;
 import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
+import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.util.DataChecksum;
 import org.junit.After;
@@ -186,9 +187,8 @@ public class TestDiskError {
     // Check permissions on directories in 'dfs.datanode.data.dir'
     FileSystem localFS = FileSystem.getLocal(conf);
     for (DataNode dn : cluster.getDataNodes()) {
-      String[] dataDirs =
-        dn.getConf().getStrings(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY);
-      for (String dir : dataDirs) {
+      for (FsVolumeSpi v : dn.getFSDataset().getVolumes()) {
+        String dir = v.getBasePath();
         Path dataDir = new Path(dir);
         FsPermission actual = localFS.getFileStatus(dataDir).getPermission();
           assertEquals("Permission for dir: " + dataDir + ", is " + actual +

+ 45 - 3
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestFsDatasetCache.java

@@ -43,10 +43,13 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.LogVerificationAppender;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
+import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
@@ -109,8 +112,9 @@ public class TestFsDatasetCache {
   public void setUp() throws Exception {
     assumeTrue(!Path.WINDOWS);
     conf = new HdfsConfiguration();
-    conf.setLong(DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_RETRY_INTERVAL_MS,
-        500);
+    conf.setLong(
+        DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS, 100);
+    conf.setLong(DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_KEY, 500);
     conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
     conf.setLong(DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY,
         CACHE_CAPACITY);
@@ -328,7 +332,7 @@ public class TestFsDatasetCache {
 
     // Create some test files that will exceed total cache capacity
     final int numFiles = 5;
-    final long fileSize = 15000;
+    final long fileSize = CACHE_CAPACITY / (numFiles-1);
 
     final Path[] testFiles = new Path[numFiles];
     final HdfsBlockLocation[][] fileLocs = new HdfsBlockLocation[numFiles][];
@@ -477,4 +481,42 @@ public class TestFsDatasetCache {
     setHeartbeatResponse(uncacheBlocks(locs));
     verifyExpectedCacheUsage(0, 0);
   }
+
+  @Test(timeout=60000)
+  public void testUncacheQuiesces() throws Exception {
+    // Create a file
+    Path fileName = new Path("/testUncacheQuiesces");
+    int fileLen = 4096;
+    DFSTestUtil.createFile(fs, fileName, fileLen, (short)1, 0xFDFD);
+    // Cache it
+    DistributedFileSystem dfs = cluster.getFileSystem();
+    dfs.addCachePool(new CachePoolInfo("pool"));
+    dfs.addCacheDirective(new CacheDirectiveInfo.Builder()
+        .setPool("pool").setPath(fileName).setReplication((short)3).build());
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        MetricsRecordBuilder dnMetrics = getMetrics(dn.getMetrics().name());
+        long blocksCached =
+            MetricsAsserts.getLongCounter("BlocksCached", dnMetrics);
+        return blocksCached > 0;
+      }
+    }, 1000, 30000);
+    // Uncache it
+    dfs.removeCacheDirective(1);
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        MetricsRecordBuilder dnMetrics = getMetrics(dn.getMetrics().name());
+        long blocksUncached =
+            MetricsAsserts.getLongCounter("BlocksUncached", dnMetrics);
+        return blocksUncached > 0;
+      }
+    }, 1000, 30000);
+    // Make sure that no additional messages were sent
+    Thread.sleep(10000);
+    MetricsRecordBuilder dnMetrics = getMetrics(dn.getMetrics().name());
+    MetricsAsserts.assertCounter("BlocksCached", 1l, dnMetrics);
+    MetricsAsserts.assertCounter("BlocksUncached", 1l, dnMetrics);
+  }
 }

+ 113 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestStorageReport.java

@@ -0,0 +1,113 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.datanode;
+
+import java.io.IOException;
+
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.*;
+import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
+import org.apache.hadoop.hdfs.server.protocol.StorageReport;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mockito;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyInt;
+import static org.mockito.Matchers.anyLong;
+
+public class TestStorageReport {
+  public static final Log LOG = LogFactory.getLog(TestStorageReport.class);
+
+  private static short REPL_FACTOR = 1;
+  private static final StorageType storageType = StorageType.SSD; // pick non-default.
+
+  private static Configuration conf;
+  private MiniDFSCluster cluster;
+  private DistributedFileSystem fs;
+  static String bpid;
+
+  @Before
+  public void startUpCluster() throws IOException {
+    conf = new HdfsConfiguration();
+    cluster = new MiniDFSCluster.Builder(conf)
+        .numDataNodes(REPL_FACTOR)
+        .storageType(storageType)
+        .build();
+    fs = cluster.getFileSystem();
+    bpid = cluster.getNamesystem().getBlockPoolId();
+  }
+
+  @After
+  public void shutDownCluster() throws IOException {
+    if (cluster != null) {
+      fs.close();
+      cluster.shutdown();
+      cluster = null;
+    }
+  }
+
+  /**
+   * Ensure that storage type and storage state are propagated
+   * in Storage Reports.
+   */
+  @Test
+  public void testStorageReportHasStorageTypeAndState() throws IOException {
+
+    // Make sure we are not testing with the default type, that would not
+    // be a very good test.
+    assertNotSame(storageType, StorageType.DEFAULT);
+    NameNode nn = cluster.getNameNode();
+    DataNode dn = cluster.getDataNodes().get(0);
+
+    // Insert a spy object for the NN RPC.
+    DatanodeProtocolClientSideTranslatorPB nnSpy =
+        DataNodeTestUtils.spyOnBposToNN(dn, nn);
+
+    // Trigger a heartbeat so there is an interaction with the spy
+    // object.
+    DataNodeTestUtils.triggerHeartbeat(dn);
+
+    // Verify that the callback passed in the expected parameters.
+    ArgumentCaptor<StorageReport[]> captor =
+        ArgumentCaptor.forClass(StorageReport[].class);
+
+    Mockito.verify(nnSpy).sendHeartbeat(
+        any(DatanodeRegistration.class),
+        captor.capture(),
+        anyLong(), anyLong(), anyInt(), anyInt(), anyInt());
+
+    StorageReport[] reports = captor.getValue();
+
+    for (StorageReport report: reports) {
+      assertThat(report.getStorage().getStorageType(), is(storageType));
+      assertThat(report.getStorage().getState(), is(DatanodeStorage.State.NORMAL));
+    }
+  }
+}

+ 12 - 9
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java

@@ -938,7 +938,7 @@ public class NNThroughputBenchmark implements Tool {
       // register datanode
       dnRegistration = nameNodeProto.registerDatanode(dnRegistration);
       //first block reports
-      storage = new DatanodeStorage(dnRegistration.getDatanodeUuid());
+      storage = new DatanodeStorage(DatanodeStorage.generateUuid());
       final StorageBlockReport[] reports = {
           new StorageBlockReport(storage,
               new BlockListAsLongs(null, null).getBlockListAsLongs())
@@ -954,8 +954,8 @@ public class NNThroughputBenchmark implements Tool {
     void sendHeartbeat() throws IOException {
       // register datanode
       // TODO:FEDERATION currently a single block pool is supported
-      StorageReport[] rep = { new StorageReport(dnRegistration.getDatanodeUuid(),
-          false, DF_CAPACITY, DF_USED, DF_CAPACITY - DF_USED, DF_USED) };
+      StorageReport[] rep = { new StorageReport(storage, false,
+          DF_CAPACITY, DF_USED, DF_CAPACITY - DF_USED, DF_USED) };
       DatanodeCommand[] cmds = nameNodeProto.sendHeartbeat(dnRegistration, rep,
           0L, 0L, 0, 0, 0).getCommands();
       if(cmds != null) {
@@ -1001,7 +1001,7 @@ public class NNThroughputBenchmark implements Tool {
     @SuppressWarnings("unused") // keep it for future blockReceived benchmark
     int replicateBlocks() throws IOException {
       // register datanode
-      StorageReport[] rep = { new StorageReport(dnRegistration.getDatanodeUuid(),
+      StorageReport[] rep = { new StorageReport(storage,
           false, DF_CAPACITY, DF_USED, DF_CAPACITY - DF_USED, DF_USED) };
       DatanodeCommand[] cmds = nameNodeProto.sendHeartbeat(dnRegistration,
           rep, 0L, 0L, 0, 0, 0).getCommands();
@@ -1010,7 +1010,8 @@ public class NNThroughputBenchmark implements Tool {
           if (cmd.getAction() == DatanodeProtocol.DNA_TRANSFER) {
             // Send a copy of a block to another datanode
             BlockCommand bcmd = (BlockCommand)cmd;
-            return transferBlocks(bcmd.getBlocks(), bcmd.getTargets());
+            return transferBlocks(bcmd.getBlocks(), bcmd.getTargets(),
+                                  bcmd.getTargetStorageIDs());
           }
         }
       }
@@ -1023,12 +1024,14 @@ public class NNThroughputBenchmark implements Tool {
      * that the blocks have been received.
      */
     private int transferBlocks( Block blocks[], 
-                                DatanodeInfo xferTargets[][] 
+                                DatanodeInfo xferTargets[][],
+                                String targetStorageIDs[][]
                               ) throws IOException {
       for(int i = 0; i < blocks.length; i++) {
         DatanodeInfo blockTargets[] = xferTargets[i];
         for(int t = 0; t < blockTargets.length; t++) {
           DatanodeInfo dnInfo = blockTargets[t];
+          String targetStorageID = targetStorageIDs[i][t];
           DatanodeRegistration receivedDNReg;
           receivedDNReg = new DatanodeRegistration(dnInfo,
             new DataStorage(nsInfo),
@@ -1038,7 +1041,7 @@ public class NNThroughputBenchmark implements Tool {
                   blocks[i], ReceivedDeletedBlockInfo.BlockStatus.RECEIVED_BLOCK,
                   null) };
           StorageReceivedDeletedBlocks[] report = { new StorageReceivedDeletedBlocks(
-              receivedDNReg.getDatanodeUuid(), rdBlocks) };
+              targetStorageID, rdBlocks) };
           nameNodeProto.blockReceivedAndDeleted(receivedDNReg, nameNode
               .getNamesystem().getBlockPoolId(), report);
         }
@@ -1127,7 +1130,7 @@ public class NNThroughputBenchmark implements Tool {
       }
 
       // create files 
-      LOG.info("Creating " + nrFiles + " with " + blocksPerFile + " blocks each.");
+      LOG.info("Creating " + nrFiles + " files with " + blocksPerFile + " blocks each.");
       FileNameGenerator nameGenerator;
       nameGenerator = new FileNameGenerator(getBaseDir(), 100);
       String clientName = getClientName(007);
@@ -1161,7 +1164,7 @@ public class NNThroughputBenchmark implements Tool {
               loc.getBlock().getLocalBlock(),
               ReceivedDeletedBlockInfo.BlockStatus.RECEIVED_BLOCK, null) };
           StorageReceivedDeletedBlocks[] report = { new StorageReceivedDeletedBlocks(
-              datanodes[dnIdx].dnRegistration.getDatanodeUuid(), rdBlocks) };
+              datanodes[dnIdx].storage.getStorageID(), rdBlocks) };
           nameNodeProto.blockReceivedAndDeleted(datanodes[dnIdx].dnRegistration, loc
               .getBlock().getBlockPoolId(), report);
         }

+ 3 - 156
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java

@@ -20,7 +20,6 @@ package org.apache.hadoop.hdfs.server.namenode;
 
 import java.io.File;
 import java.io.IOException;
-import java.security.PrivilegedExceptionAction;
 import java.util.Iterator;
 
 import org.apache.commons.logging.Log;
@@ -29,25 +28,13 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileContext;
-import org.apache.hadoop.fs.Options.Rename;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.hdfs.DFSClientAdapter;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
-import org.apache.hadoop.hdfs.protocol.HdfsConstants;
-import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
-import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.common.Util;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.security.token.Token;
 
 /**
  * OfflineEditsViewerHelper is a helper class for TestOfflineEditsViewer,
@@ -135,151 +122,11 @@ public class OfflineEditsViewerHelper {
    * OP_CLEAR_NS_QUOTA  (12)
    */
   private CheckpointSignature runOperations() throws IOException {
-
     LOG.info("Creating edits by performing fs operations");
     // no check, if it's not it throws an exception which is what we want
-    DistributedFileSystem dfs =
-      (DistributedFileSystem)cluster.getFileSystem();
-    FileContext fc = FileContext.getFileContext(cluster.getURI(0), config);
-    // OP_ADD 0
-    Path pathFileCreate = new Path("/file_create_u\1F431");
-    FSDataOutputStream s = dfs.create(pathFileCreate);
-    // OP_CLOSE 9
-    s.close();
-    // OP_RENAME_OLD 1
-    Path pathFileMoved = new Path("/file_moved");
-    dfs.rename(pathFileCreate, pathFileMoved);
-    // OP_DELETE 2
-    dfs.delete(pathFileMoved, false);
-    // OP_MKDIR 3
-    Path pathDirectoryMkdir = new Path("/directory_mkdir");
-    dfs.mkdirs(pathDirectoryMkdir);
-    // OP_ALLOW_SNAPSHOT 29
-    dfs.allowSnapshot(pathDirectoryMkdir);
-    // OP_DISALLOW_SNAPSHOT 30
-    dfs.disallowSnapshot(pathDirectoryMkdir);
-    // OP_CREATE_SNAPSHOT 26
-    String ssName = "snapshot1";
-    dfs.allowSnapshot(pathDirectoryMkdir);
-    dfs.createSnapshot(pathDirectoryMkdir, ssName);
-    // OP_RENAME_SNAPSHOT 28
-    String ssNewName = "snapshot2";
-    dfs.renameSnapshot(pathDirectoryMkdir, ssName, ssNewName);
-    // OP_DELETE_SNAPSHOT 27
-    dfs.deleteSnapshot(pathDirectoryMkdir, ssNewName);
-    // OP_SET_REPLICATION 4
-    s = dfs.create(pathFileCreate);
-    s.close();
-    dfs.setReplication(pathFileCreate, (short)1);
-    // OP_SET_PERMISSIONS 7
-    Short permission = 0777;
-    dfs.setPermission(pathFileCreate, new FsPermission(permission));
-    // OP_SET_OWNER 8
-    dfs.setOwner(pathFileCreate, new String("newOwner"), null);
-    // OP_CLOSE 9 see above
-    // OP_SET_GENSTAMP 10 see above
-    // OP_SET_NS_QUOTA 11 obsolete
-    // OP_CLEAR_NS_QUOTA 12 obsolete
-    // OP_TIMES 13
-    long mtime = 1285195527000L; // Wed, 22 Sep 2010 22:45:27 GMT
-    long atime = mtime;
-    dfs.setTimes(pathFileCreate, mtime, atime);
-    // OP_SET_QUOTA 14
-    dfs.setQuota(pathDirectoryMkdir, 1000L, HdfsConstants.QUOTA_DONT_SET);
-    // OP_RENAME 15
-    fc.rename(pathFileCreate, pathFileMoved, Rename.NONE);
-    // OP_CONCAT_DELETE 16
-    Path   pathConcatTarget = new Path("/file_concat_target");
-    Path[] pathConcatFiles  = new Path[2];
-    pathConcatFiles[0]      = new Path("/file_concat_0");
-    pathConcatFiles[1]      = new Path("/file_concat_1");
-
-    long  length      = blockSize * 3; // multiple of blocksize for concat
-    short replication = 1;
-    long  seed        = 1;
-
-    DFSTestUtil.createFile(dfs, pathConcatTarget, length, replication, seed);
-    DFSTestUtil.createFile(dfs, pathConcatFiles[0], length, replication, seed);
-    DFSTestUtil.createFile(dfs, pathConcatFiles[1], length, replication, seed);
-    dfs.concat(pathConcatTarget, pathConcatFiles);
-    // OP_SYMLINK 17
-    Path pathSymlink = new Path("/file_symlink");
-    fc.createSymlink(pathConcatTarget, pathSymlink, false);
-    // OP_GET_DELEGATION_TOKEN 18
-    // OP_RENEW_DELEGATION_TOKEN 19
-    // OP_CANCEL_DELEGATION_TOKEN 20
-    // see TestDelegationToken.java
-    // fake the user to renew token for
-    final Token<?>[] tokens = dfs.addDelegationTokens("JobTracker", null);
-    UserGroupInformation longUgi = UserGroupInformation.createRemoteUser(
-      "JobTracker/foo.com@FOO.COM");
-    try {
-      longUgi.doAs(new PrivilegedExceptionAction<Object>() {
-        @Override
-        public Object run() throws IOException, InterruptedException {
-          for (Token<?> token : tokens) {
-            token.renew(config);
-            token.cancel(config);
-          }
-          return null;
-        }
-      });
-    } catch(InterruptedException e) {
-      throw new IOException(
-        "renewDelegationToken threw InterruptedException", e);
-    }
-    // OP_UPDATE_MASTER_KEY 21
-    //   done by getDelegationTokenSecretManager().startThreads();
-
-    // OP_ADD_CACHE_POOL 35
-    final String pool = "poolparty";
-    dfs.addCachePool(new CachePoolInfo(pool));
-    // OP_MODIFY_CACHE_POOL 36
-    dfs.modifyCachePool(new CachePoolInfo(pool)
-        .setOwnerName("carlton")
-        .setGroupName("party")
-        .setMode(new FsPermission((short)0700))
-        .setLimit(1989l));
-    // OP_ADD_PATH_BASED_CACHE_DIRECTIVE 33
-    long id = dfs.addCacheDirective(
-        new CacheDirectiveInfo.Builder().
-            setPath(new Path("/bar")).
-            setReplication((short)1).
-            setPool(pool).
-            build());
-    // OP_MODIFY_PATH_BASED_CACHE_DIRECTIVE 38
-    dfs.modifyCacheDirective(
-        new CacheDirectiveInfo.Builder().
-            setId(id).
-            setPath(new Path("/bar2")).
-            build());
-    // OP_REMOVE_PATH_BASED_CACHE_DIRECTIVE 34
-    dfs.removeCacheDirective(id);
-    // OP_REMOVE_CACHE_POOL 37
-    dfs.removeCachePool(pool);
-    // sync to disk, otherwise we parse partial edits
-    cluster.getNameNode().getFSImage().getEditLog().logSync();
-    
-    // OP_REASSIGN_LEASE 22
-    String filePath = "/hard-lease-recovery-test";
-    byte[] bytes = "foo-bar-baz".getBytes();
-    DFSClientAdapter.stopLeaseRenewer(dfs);
-    FSDataOutputStream leaseRecoveryPath = dfs.create(new Path(filePath));
-    leaseRecoveryPath.write(bytes);
-    leaseRecoveryPath.hflush();
-    // Set the hard lease timeout to 1 second.
-    cluster.setLeasePeriod(60 * 1000, 1000);
-    // wait for lease recovery to complete
-    LocatedBlocks locatedBlocks;
-    do {
-      try {
-        Thread.sleep(1000);
-      } catch (InterruptedException e) {
-        LOG.info("Innocuous exception", e);
-      }
-      locatedBlocks = DFSClientAdapter.callGetBlockLocations(
-          cluster.getNameNodeRpc(), filePath, 0L, bytes.length);
-    } while (locatedBlocks.isUnderConstruction());
+    DistributedFileSystem dfs = (DistributedFileSystem) cluster.getFileSystem();
+    DFSTestUtil.runOperations(cluster, dfs, cluster.getConfiguration(0),
+        dfs.getDefaultBlockSize(), 0);
 
     // Force a roll so we get an OP_END_LOG_SEGMENT txn
     return cluster.getNameNodeRpc().rollEditLog();

+ 52 - 13
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java

@@ -57,17 +57,18 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.LogVerificationAppender;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
+import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo.Expiration;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveIterator;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveStats;
 import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
 import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
+import org.apache.hadoop.hdfs.protocol.CachePoolStats;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
-import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo.Expiration;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
-import org.apache.hadoop.hdfs.protocol.CachePoolStats;
 import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList.Type;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
@@ -81,6 +82,7 @@ import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.util.GSet;
 import org.apache.log4j.Level;
 import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -603,8 +605,8 @@ public class TestCacheDirectives {
    * Wait for the NameNode to have an expected number of cached blocks
    * and replicas.
    * @param nn NameNode
-   * @param expectedCachedBlocks
-   * @param expectedCachedReplicas
+   * @param expectedCachedBlocks if -1, treat as wildcard
+   * @param expectedCachedReplicas if -1, treat as wildcard
    * @throws Exception
    */
   private static void waitForCachedBlocks(NameNode nn,
@@ -633,16 +635,18 @@ public class TestCacheDirectives {
         } finally {
           namesystem.readUnlock();
         }
-        if ((numCachedBlocks == expectedCachedBlocks) && 
-            (numCachedReplicas == expectedCachedReplicas)) {
-          return true;
-        } else {
-          LOG.info(logString + " cached blocks: have " + numCachedBlocks +
-              " / " + expectedCachedBlocks + ".  " +
-              "cached replicas: have " + numCachedReplicas +
-              " / " + expectedCachedReplicas);
-          return false;
+        if (expectedCachedBlocks == -1 ||
+            numCachedBlocks == expectedCachedBlocks) {
+          if (expectedCachedReplicas == -1 ||
+              numCachedReplicas == expectedCachedReplicas) {
+            return true;
+          }
         }
+        LOG.info(logString + " cached blocks: have " + numCachedBlocks +
+            " / " + expectedCachedBlocks + ".  " +
+            "cached replicas: have " + numCachedReplicas +
+            " / " + expectedCachedReplicas);
+        return false;
       }
     }, 500, 60000);
   }
@@ -1351,4 +1355,39 @@ public class TestCacheDirectives {
         .setExpiration(Expiration.newRelative(RELATIVE_EXPIRY_NEVER - 1))
         .build());
   }
+
+  @Test(timeout=60000)
+  public void testExceedsCapacity() throws Exception {
+    // Create a giant file
+    final Path fileName = new Path("/exceeds");
+    final long fileLen = CACHE_CAPACITY * (NUM_DATANODES*2);
+    int numCachedReplicas = (int) ((CACHE_CAPACITY*NUM_DATANODES)/BLOCK_SIZE);
+    DFSTestUtil.createFile(dfs, fileName, fileLen, (short) NUM_DATANODES,
+        0xFADED);
+    // Set up a log appender watcher
+    final LogVerificationAppender appender = new LogVerificationAppender();
+    final Logger logger = Logger.getRootLogger();
+    logger.addAppender(appender);
+    dfs.addCachePool(new CachePoolInfo("pool"));
+    dfs.addCacheDirective(new CacheDirectiveInfo.Builder().setPool("pool")
+        .setPath(fileName).setReplication((short) 1).build());
+    waitForCachedBlocks(namenode, -1, numCachedReplicas,
+        "testExceeds:1");
+    // Check that no DNs saw an excess CACHE message
+    int lines = appender.countLinesWithMessage(
+        "more bytes in the cache: " +
+        DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY);
+    assertEquals("Namenode should not send extra CACHE commands", 0, lines);
+    // Try creating a file with giant-sized blocks that exceed cache capacity
+    dfs.delete(fileName, false);
+    DFSTestUtil.createFile(dfs, fileName, 4096, fileLen, CACHE_CAPACITY * 2,
+        (short) 1, 0xFADED);
+    // Nothing will get cached, so just force sleep for a bit
+    Thread.sleep(4000);
+    // Still should not see any excess commands
+    lines = appender.countLinesWithMessage(
+        "more bytes in the cache: " +
+        DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY);
+    assertEquals("Namenode should not send extra CACHE commands", 0, lines);
+  }
 }

+ 3 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java

@@ -140,8 +140,9 @@ public class TestDeadDatanode {
 
     // Ensure heartbeat from dead datanode is rejected with a command
     // that asks datanode to register again
-    StorageReport[] rep = { new StorageReport(reg.getDatanodeUuid(), false, 0, 0,
-        0, 0) };
+    StorageReport[] rep = { new StorageReport(
+        new DatanodeStorage(reg.getDatanodeUuid()),
+        false, 0, 0, 0, 0) };
     DatanodeCommand[] cmd = dnp.sendHeartbeat(reg, rep, 0L, 0L, 0, 0, 0)
         .getCommands();
     assertEquals(1, cmd.length);

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirectory.java

@@ -29,6 +29,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -119,7 +120,7 @@ public class TestFSDirectory {
     fsdir.reset();
     Assert.assertFalse(fsdir.isReady());
     final INodeDirectory root = (INodeDirectory) fsdir.getINode("/");
-    Assert.assertTrue(root.getChildrenList(null).isEmpty());
+    Assert.assertTrue(root.getChildrenList(Snapshot.CURRENT_STATE_ID).isEmpty());
     fsdir.imageLoadComplete();
     Assert.assertTrue(fsdir.isReady());
   }

+ 5 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java

@@ -34,6 +34,7 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
@@ -43,6 +44,7 @@ import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiff;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper;
 import org.apache.hadoop.hdfs.util.Canceler;
 import org.apache.log4j.Level;
@@ -195,11 +197,12 @@ public class TestFSImageWithSnapshot {
     INodeDirectorySnapshottable rootNode = 
         (INodeDirectorySnapshottable) fsn.dir.getINode4Write(root.toString());
     assertTrue("The children list of root should be empty", 
-        rootNode.getChildrenList(null).isEmpty());
+        rootNode.getChildrenList(Snapshot.CURRENT_STATE_ID).isEmpty());
     // one snapshot on root: s1
     List<DirectoryDiff> diffList = rootNode.getDiffs().asList();
     assertEquals(1, diffList.size());
-    assertEquals("s1", diffList.get(0).getSnapshot().getRoot().getLocalName());
+    Snapshot s1 = rootNode.getSnapshot(DFSUtil.string2Bytes("s1"));
+    assertEquals(s1.getId(), diffList.get(0).getSnapshotId());
     
     // check SnapshotManager's snapshottable directory list
     assertEquals(1, fsn.getSnapshotManager().getNumSnapshottableDirs());

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java

@@ -60,6 +60,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.util.Time;
@@ -781,7 +782,7 @@ public class TestINodeFile {
       }
       System.out.println("Adding component " + DFSUtil.bytes2String(component));
       dir = new INodeDirectory(++id, component, permstatus, 0);
-      prev.addChild(dir, false, null);
+      prev.addChild(dir, false, Snapshot.CURRENT_STATE_ID);
       prev = dir;
     }
     return dir; // Last Inode in the chain

+ 7 - 3
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSnapshotPathINodes.java

@@ -123,8 +123,12 @@ public class TestSnapshotPathINodes {
       final Snapshot snapshot, int index) {
     assertEquals(isSnapshot, inodesInPath.isSnapshot());
     assertEquals(index, inodesInPath.getSnapshotRootIndex());
-    assertEquals(isSnapshot? snapshot: null, inodesInPath.getPathSnapshot());
-    assertEquals(isSnapshot? null: snapshot, inodesInPath.getLatestSnapshot());
+    assertEquals(Snapshot.getSnapshotId(isSnapshot ? snapshot : null),
+        inodesInPath.getPathSnapshotId());
+    if (!isSnapshot) {
+      assertEquals(Snapshot.getSnapshotId(snapshot),
+          inodesInPath.getLatestSnapshotId());
+    }
     if (isSnapshot && index >= 0) {
       assertEquals(Snapshot.Root.class, inodesInPath.getINodes()[index].getClass());
     }
@@ -424,7 +428,7 @@ public class TestSnapshotPathINodes {
     // The modification time of the snapshot INode should be the same with the
     // original INode before modification
     assertEquals(modTime,
-        snapshotFileNode.getModificationTime(ssNodesInPath.getPathSnapshot()));
+        snapshotFileNode.getModificationTime(ssNodesInPath.getPathSnapshotId()));
 
     // Check the INode for /TestSnapshot/sub1/file1 again
     names = INode.getPathNames(file1.toString());

+ 8 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java

@@ -47,6 +47,8 @@ import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.metrics2.MetricsRecordBuilder;
+import org.apache.hadoop.metrics2.MetricsSource;
+import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.test.MetricsAsserts;
 import org.apache.hadoop.util.Time;
 import org.apache.log4j.Level;
@@ -108,6 +110,12 @@ public class TestNameNodeMetrics {
   
   @After
   public void tearDown() throws Exception {
+    MetricsSource source = DefaultMetricsSystem.instance().getSource("UgiMetrics");
+    if (source != null) {
+      // Run only once since the UGI metrics is cleaned up during teardown
+      MetricsRecordBuilder rb = getMetrics(source);
+      assertQuantileGauges("GetGroups1s", rb);
+    }
     cluster.shutdown();
   }
   

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotTestHelper.java

@@ -270,7 +270,8 @@ public class SnapshotTestHelper {
 
   public static void dumpTree2File(FSDirectory fsdir, File f) throws IOException{
     final PrintWriter out = new PrintWriter(new FileWriter(f, false), true);
-    fsdir.getINode("/").dumpTreeRecursively(out, new StringBuilder(), null);
+    fsdir.getINode("/").dumpTreeRecursively(out, new StringBuilder(),
+        Snapshot.CURRENT_STATE_ID);
     out.close();
   }
 

+ 3 - 5
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestINodeFileUnderConstructionWithSnapshot.java

@@ -156,7 +156,6 @@ public class TestINodeFileUnderConstructionWithSnapshot {
     INodeDirectorySnapshottable dirNode = (INodeDirectorySnapshottable) fsdir
         .getINode(dir.toString());
     DirectoryDiff last = dirNode.getDiffs().getLast();
-    Snapshot s0 = last.snapshot;
     
     // 2. append without closing stream
     out = appendFileWithoutClosing(file, BLOCKSIZE);
@@ -164,7 +163,7 @@ public class TestINodeFileUnderConstructionWithSnapshot {
     
     // re-check nodeInDeleted_S0
     dirNode = (INodeDirectorySnapshottable) fsdir.getINode(dir.toString());
-    assertEquals(BLOCKSIZE * 2, fileNode.computeFileSize(s0));
+    assertEquals(BLOCKSIZE * 2, fileNode.computeFileSize(last.getSnapshotId()));
     
     // 3. take snapshot --> close stream
     hdfs.createSnapshot(dir, "s1");
@@ -175,9 +174,8 @@ public class TestINodeFileUnderConstructionWithSnapshot {
     fileNode = (INodeFile) fsdir.getINode(file.toString());
     dirNode = (INodeDirectorySnapshottable) fsdir.getINode(dir.toString());
     last = dirNode.getDiffs().getLast();
-    Snapshot s1 = last.snapshot;
     assertTrue(fileNode.isWithSnapshot());
-    assertEquals(BLOCKSIZE * 3, fileNode.computeFileSize(s1));
+    assertEquals(BLOCKSIZE * 3, fileNode.computeFileSize(last.getSnapshotId()));
     
     // 4. modify file --> append without closing stream --> take snapshot -->
     // close stream
@@ -187,7 +185,7 @@ public class TestINodeFileUnderConstructionWithSnapshot {
     out.close();
     
     // re-check the size of nodeInDeleted_S1
-    assertEquals(BLOCKSIZE * 3, fileNode.computeFileSize(s1));
+    assertEquals(BLOCKSIZE * 3, fileNode.computeFileSize(last.getSnapshotId()));
   }
   
   /**

+ 107 - 60
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java

@@ -44,6 +44,7 @@ import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSOutputStream;
 import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag;
@@ -72,6 +73,7 @@ import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.Mockito;
+;
 
 /** Testing rename with snapshots. */
 public class TestRenameWithSnapshots {
@@ -402,9 +404,11 @@ public class TestRenameWithSnapshots {
     final Path foo_s3 = SnapshotTestHelper.getSnapshotPath(sdir1, "s3",
         "foo");
     assertFalse(hdfs.exists(foo_s3));
+    INodeDirectorySnapshottable sdir2Node = 
+        (INodeDirectorySnapshottable) fsdir.getINode(sdir2.toString());
+    Snapshot s2 = sdir2Node.getSnapshot(DFSUtil.string2Bytes("s2"));
     INodeFile sfoo = fsdir.getINode(newfoo.toString()).asFile();
-    assertEquals("s2", sfoo.getDiffs().getLastSnapshot().getRoot()
-        .getLocalName());
+    assertEquals(s2.getId(), sfoo.getDiffs().getLastSnapshotId());
   }
   
   /**
@@ -604,8 +608,10 @@ public class TestRenameWithSnapshots {
     
     INodeFile snode = fsdir.getINode(newfoo.toString()).asFile();
     assertEquals(1, snode.getDiffs().asList().size());
-    assertEquals("s2", snode.getDiffs().getLastSnapshot().getRoot()
-        .getLocalName());
+    INodeDirectorySnapshottable sdir2Node = 
+        (INodeDirectorySnapshottable) fsdir.getINode(sdir2.toString());
+    Snapshot s2 = sdir2Node.getSnapshot(DFSUtil.string2Bytes("s2"));
+    assertEquals(s2.getId(), snode.getDiffs().getLastSnapshotId());
     
     // restart cluster
     restartClusterAndCheckImage(true);
@@ -758,12 +764,14 @@ public class TestRenameWithSnapshots {
     assertEquals(2, fooWithCount.getReferenceCount());
     INodeDirectory foo = fooWithCount.asDirectory();
     assertEquals(1, foo.getDiffs().asList().size());
-    assertEquals("s1", foo.getDirectoryWithSnapshotFeature().getLastSnapshot()
-        .getRoot().getLocalName());
+    INodeDirectorySnapshottable sdir1Node = 
+        (INodeDirectorySnapshottable) fsdir.getINode(sdir1.toString());
+    Snapshot s1 = sdir1Node.getSnapshot(DFSUtil.string2Bytes("s1"));
+    assertEquals(s1.getId(), foo.getDirectoryWithSnapshotFeature()
+        .getLastSnapshotId());
     INodeFile bar1 = fsdir.getINode4Write(bar1_dir1.toString()).asFile();
     assertEquals(1, bar1.getDiffs().asList().size());
-    assertEquals("s1", bar1.getDiffs().getLastSnapshot().getRoot()
-        .getLocalName());
+    assertEquals(s1.getId(), bar1.getDiffs().getLastSnapshotId());
     
     INodeReference barRef = fsdir.getINode4Write(bar2_dir1.toString())
         .asReference();
@@ -772,8 +780,7 @@ public class TestRenameWithSnapshots {
     assertEquals(2, barWithCount.getReferenceCount());
     INodeFile bar = barWithCount.asFile();
     assertEquals(1, bar.getDiffs().asList().size());
-    assertEquals("s1", bar.getDiffs().getLastSnapshot().getRoot()
-        .getLocalName());
+    assertEquals(s1.getId(), bar.getDiffs().getLastSnapshotId());
     
     // restart the cluster and check fsimage
     restartClusterAndCheckImage(true);
@@ -967,6 +974,13 @@ public class TestRenameWithSnapshots {
     hdfs.rename(bar_dir2, bar_dir1);
     
     // check the internal details
+    INodeDirectorySnapshottable sdir1Node = 
+        (INodeDirectorySnapshottable) fsdir.getINode(sdir1.toString());
+    INodeDirectorySnapshottable sdir2Node = 
+        (INodeDirectorySnapshottable) fsdir.getINode(sdir2.toString());
+    INodeDirectorySnapshottable sdir3Node = 
+        (INodeDirectorySnapshottable) fsdir.getINode(sdir3.toString());
+    
     INodeReference fooRef = fsdir.getINode4Write(foo_dir1.toString())
         .asReference();
     INodeReference.WithCount fooWithCount = (WithCount) fooRef.getReferredINode();
@@ -975,16 +989,22 @@ public class TestRenameWithSnapshots {
     INodeDirectory foo = fooWithCount.asDirectory();
     List<DirectoryDiff> fooDiffs = foo.getDiffs().asList();
     assertEquals(4, fooDiffs.size());
-    assertEquals("s2222", fooDiffs.get(3).snapshot.getRoot().getLocalName());
-    assertEquals("s333", fooDiffs.get(2).snapshot.getRoot().getLocalName());
-    assertEquals("s22", fooDiffs.get(1).snapshot.getRoot().getLocalName());
-    assertEquals("s1", fooDiffs.get(0).snapshot.getRoot().getLocalName());
+    
+    Snapshot s2222 = sdir2Node.getSnapshot(DFSUtil.string2Bytes("s2222"));
+    Snapshot s333 = sdir3Node.getSnapshot(DFSUtil.string2Bytes("s333"));
+    Snapshot s22 = sdir2Node.getSnapshot(DFSUtil.string2Bytes("s22"));
+    Snapshot s1 = sdir1Node.getSnapshot(DFSUtil.string2Bytes("s1"));
+    
+    assertEquals(s2222.getId(), fooDiffs.get(3).getSnapshotId());
+    assertEquals(s333.getId(), fooDiffs.get(2).getSnapshotId());
+    assertEquals(s22.getId(), fooDiffs.get(1).getSnapshotId());
+    assertEquals(s1.getId(), fooDiffs.get(0).getSnapshotId());
     INodeFile bar1 = fsdir.getINode4Write(bar1_dir1.toString()).asFile();
     List<FileDiff> bar1Diffs = bar1.getDiffs().asList();
     assertEquals(3, bar1Diffs.size());
-    assertEquals("s333", bar1Diffs.get(2).snapshot.getRoot().getLocalName());
-    assertEquals("s22", bar1Diffs.get(1).snapshot.getRoot().getLocalName());
-    assertEquals("s1", bar1Diffs.get(0).snapshot.getRoot().getLocalName());
+    assertEquals(s333.getId(), bar1Diffs.get(2).getSnapshotId());
+    assertEquals(s22.getId(), bar1Diffs.get(1).getSnapshotId());
+    assertEquals(s1.getId(), bar1Diffs.get(0).getSnapshotId());
     
     INodeReference barRef = fsdir.getINode4Write(bar_dir1.toString())
         .asReference();
@@ -994,10 +1014,10 @@ public class TestRenameWithSnapshots {
     INodeFile bar = barWithCount.asFile();
     List<FileDiff> barDiffs = bar.getDiffs().asList();
     assertEquals(4, barDiffs.size());
-    assertEquals("s2222", barDiffs.get(3).snapshot.getRoot().getLocalName());
-    assertEquals("s333", barDiffs.get(2).snapshot.getRoot().getLocalName());
-    assertEquals("s22", barDiffs.get(1).snapshot.getRoot().getLocalName());
-    assertEquals("s1", barDiffs.get(0).snapshot.getRoot().getLocalName());
+    assertEquals(s2222.getId(), barDiffs.get(3).getSnapshotId());
+    assertEquals(s333.getId(), barDiffs.get(2).getSnapshotId());
+    assertEquals(s22.getId(), barDiffs.get(1).getSnapshotId());
+    assertEquals(s1.getId(), barDiffs.get(0).getSnapshotId());
     
     // restart the cluster and check fsimage
     restartClusterAndCheckImage(true);
@@ -1033,10 +1053,10 @@ public class TestRenameWithSnapshots {
     foo = fooWithCount.asDirectory();
     fooDiffs = foo.getDiffs().asList();
     assertEquals(4, fooDiffs.size());
-    assertEquals("s2222", fooDiffs.get(3).snapshot.getRoot().getLocalName());
+    assertEquals(s2222.getId(), fooDiffs.get(3).getSnapshotId());
     bar1Diffs = bar1.getDiffs().asList();
     assertEquals(3, bar1Diffs.size());
-    assertEquals("s333", bar1Diffs.get(2).snapshot.getRoot().getLocalName());
+    assertEquals(s333.getId(), bar1Diffs.get(2).getSnapshotId());
     
     barRef = fsdir.getINode(bar_s2222.toString()).asReference();
     barWithCount = (WithCount) barRef.getReferredINode();
@@ -1044,7 +1064,7 @@ public class TestRenameWithSnapshots {
     bar = barWithCount.asFile();
     barDiffs = bar.getDiffs().asList();
     assertEquals(4, barDiffs.size());
-    assertEquals("s2222", barDiffs.get(3).snapshot.getRoot().getLocalName());
+    assertEquals(s2222.getId(), barDiffs.get(3).getSnapshotId());
   }
   
   /**
@@ -1164,6 +1184,9 @@ public class TestRenameWithSnapshots {
     assertTrue(hdfs.exists(bar_s2));
     
     // check internal details
+    INodeDirectorySnapshottable sdir2Node = 
+        (INodeDirectorySnapshottable) fsdir.getINode(sdir2.toString());
+    Snapshot s2 = sdir2Node.getSnapshot(DFSUtil.string2Bytes("s2"));
     final Path foo_s2 = SnapshotTestHelper.getSnapshotPath(sdir2, "s2", "foo");
     INodeReference fooRef = fsdir.getINode(foo_s2.toString()).asReference();
     assertTrue(fooRef instanceof INodeReference.WithName);
@@ -1172,7 +1195,7 @@ public class TestRenameWithSnapshots {
     INodeDirectory fooDir = fooWC.getReferredINode().asDirectory();
     List<DirectoryDiff> diffs = fooDir.getDiffs().asList();
     assertEquals(1, diffs.size());
-    assertEquals("s2", diffs.get(0).snapshot.getRoot().getLocalName());
+    assertEquals(s2.getId(), diffs.get(0).getSnapshotId());
     
     // restart the cluster and check fsimage
     restartClusterAndCheckImage(true);
@@ -1260,7 +1283,7 @@ public class TestRenameWithSnapshots {
     INodeDirectory dir2 = fsdir.getINode4Write(sdir2.toString()).asDirectory();
     INodeDirectory mockDir2 = spy(dir2);
     doReturn(false).when(mockDir2).addChild((INode) anyObject(), anyBoolean(),
-            (Snapshot) anyObject());
+           Mockito.anyInt());
     INodeDirectory root = fsdir.getINode4Write("/").asDirectory();
     root.replaceChild(dir2, mockDir2, fsdir.getINodeMap());
     
@@ -1271,12 +1294,14 @@ public class TestRenameWithSnapshots {
     // check the current internal details
     INodeDirectorySnapshottable dir1Node = (INodeDirectorySnapshottable) fsdir
         .getINode4Write(sdir1.toString());
-    ReadOnlyList<INode> dir1Children = dir1Node.getChildrenList(null);
+    Snapshot s1 = dir1Node.getSnapshot(DFSUtil.string2Bytes("s1"));
+    ReadOnlyList<INode> dir1Children = dir1Node
+        .getChildrenList(Snapshot.CURRENT_STATE_ID);
     assertEquals(1, dir1Children.size());
     assertEquals(foo.getName(), dir1Children.get(0).getLocalName());
     List<DirectoryDiff> dir1Diffs = dir1Node.getDiffs().asList();
     assertEquals(1, dir1Diffs.size());
-    assertEquals("s1", dir1Diffs.get(0).snapshot.getRoot().getLocalName());
+    assertEquals(s1.getId(), dir1Diffs.get(0).getSnapshotId());
     
     // after the undo of rename, both the created and deleted list of sdir1
     // should be empty
@@ -1288,7 +1313,7 @@ public class TestRenameWithSnapshots {
     assertTrue(fooNode.isDirectory() && fooNode.asDirectory().isWithSnapshot());
     List<DirectoryDiff> fooDiffs = fooNode.asDirectory().getDiffs().asList();
     assertEquals(1, fooDiffs.size());
-    assertEquals("s1", fooDiffs.get(0).snapshot.getRoot().getLocalName());
+    assertEquals(s1.getId(), fooDiffs.get(0).getSnapshotId());
     
     final Path foo_s1 = SnapshotTestHelper.getSnapshotPath(sdir1, "s1", "foo");
     INode fooNode_s1 = fsdir.getINode(foo_s1.toString());
@@ -1299,7 +1324,8 @@ public class TestRenameWithSnapshots {
     INodeDirectory dir2Node = fsdir.getINode4Write(sdir2.toString())
         .asDirectory();
     assertFalse(dir2Node.isWithSnapshot());
-    ReadOnlyList<INode> dir2Children = dir2Node.getChildrenList(null);
+    ReadOnlyList<INode> dir2Children = dir2Node
+        .getChildrenList(Snapshot.CURRENT_STATE_ID);
     assertEquals(1, dir2Children.size());
     assertEquals(dir2file.getName(), dir2Children.get(0).getLocalName());
   }
@@ -1327,7 +1353,7 @@ public class TestRenameWithSnapshots {
     INodeDirectory dir2 = fsdir.getINode4Write(sdir2.toString()).asDirectory();
     INodeDirectory mockDir2 = spy(dir2);
     doReturn(false).when(mockDir2).addChild((INode) anyObject(), anyBoolean(),
-            (Snapshot) anyObject());
+            Mockito.anyInt());
     INodeDirectory root = fsdir.getINode4Write("/").asDirectory();
     root.replaceChild(dir2, mockDir2, fsdir.getINodeMap());
     
@@ -1338,12 +1364,14 @@ public class TestRenameWithSnapshots {
     // check the current internal details
     INodeDirectorySnapshottable dir1Node = (INodeDirectorySnapshottable) fsdir
         .getINode4Write(sdir1.toString());
-    ReadOnlyList<INode> dir1Children = dir1Node.getChildrenList(null);
+    Snapshot s1 = dir1Node.getSnapshot(DFSUtil.string2Bytes("s1"));
+    ReadOnlyList<INode> dir1Children = dir1Node
+        .getChildrenList(Snapshot.CURRENT_STATE_ID);
     assertEquals(1, dir1Children.size());
     assertEquals(foo.getName(), dir1Children.get(0).getLocalName());
     List<DirectoryDiff> dir1Diffs = dir1Node.getDiffs().asList();
     assertEquals(1, dir1Diffs.size());
-    assertEquals("s1", dir1Diffs.get(0).snapshot.getRoot().getLocalName());
+    assertEquals(s1.getId(), dir1Diffs.get(0).getSnapshotId());
     
     // after the undo of rename, the created list of sdir1 should contain 
     // 1 element
@@ -1363,7 +1391,8 @@ public class TestRenameWithSnapshots {
     INodeDirectory dir2Node = fsdir.getINode4Write(sdir2.toString())
         .asDirectory();
     assertFalse(dir2Node.isWithSnapshot());
-    ReadOnlyList<INode> dir2Children = dir2Node.getChildrenList(null);
+    ReadOnlyList<INode> dir2Children = dir2Node
+        .getChildrenList(Snapshot.CURRENT_STATE_ID);
     assertEquals(1, dir2Children.size());
     assertEquals(dir2file.getName(), dir2Children.get(0).getLocalName());
   }
@@ -1389,7 +1418,7 @@ public class TestRenameWithSnapshots {
     INodeDirectory dir3 = fsdir.getINode4Write(sdir3.toString()).asDirectory();
     INodeDirectory mockDir3 = spy(dir3);
     doReturn(false).when(mockDir3).addChild((INode) anyObject(), anyBoolean(),
-            (Snapshot) anyObject());
+            Mockito.anyInt());
     INodeDirectory root = fsdir.getINode4Write("/").asDirectory();
     root.replaceChild(dir3, mockDir3, fsdir.getINodeMap());
     
@@ -1400,13 +1429,18 @@ public class TestRenameWithSnapshots {
     assertFalse(result);
     
     // check the current internal details
+    INodeDirectorySnapshottable dir1Node = (INodeDirectorySnapshottable) fsdir
+        .getINode4Write(sdir1.toString());
+    Snapshot s1 = dir1Node.getSnapshot(DFSUtil.string2Bytes("s1"));
     INodeDirectorySnapshottable dir2Node = (INodeDirectorySnapshottable) fsdir
         .getINode4Write(sdir2.toString());
-    ReadOnlyList<INode> dir2Children = dir2Node.getChildrenList(null);
+    Snapshot s2 = dir2Node.getSnapshot(DFSUtil.string2Bytes("s2"));
+    ReadOnlyList<INode> dir2Children = dir2Node
+        .getChildrenList(Snapshot.CURRENT_STATE_ID);
     assertEquals(1, dir2Children.size());
     List<DirectoryDiff> dir2Diffs = dir2Node.getDiffs().asList();
     assertEquals(1, dir2Diffs.size());
-    assertEquals("s2", Snapshot.getSnapshotName(dir2Diffs.get(0).snapshot));
+    assertEquals(s2.getId(), dir2Diffs.get(0).getSnapshotId());
     ChildrenDiff childrenDiff = dir2Diffs.get(0).getChildrenDiff();
     assertEquals(0, childrenDiff.getList(ListType.DELETED).size());
     assertEquals(1, childrenDiff.getList(ListType.CREATED).size());
@@ -1418,7 +1452,7 @@ public class TestRenameWithSnapshots {
     assertTrue(fooNode instanceof INodeReference.DstReference);
     List<DirectoryDiff> fooDiffs = fooNode.asDirectory().getDiffs().asList();
     assertEquals(1, fooDiffs.size());
-    assertEquals("s1", fooDiffs.get(0).snapshot.getRoot().getLocalName());
+    assertEquals(s1.getId(), fooDiffs.get(0).getSnapshotId());
     
     // create snapshot on sdir2 and rename again
     hdfs.createSnapshot(sdir2, "s3");
@@ -1428,13 +1462,14 @@ public class TestRenameWithSnapshots {
     // check internal details again
     dir2Node = (INodeDirectorySnapshottable) fsdir.getINode4Write(sdir2
         .toString());
+    Snapshot s3 = dir2Node.getSnapshot(DFSUtil.string2Bytes("s3"));
     fooNode = fsdir.getINode4Write(foo_dir2.toString());
-    dir2Children = dir2Node.getChildrenList(null);
+    dir2Children = dir2Node.getChildrenList(Snapshot.CURRENT_STATE_ID);
     assertEquals(1, dir2Children.size());
     dir2Diffs = dir2Node.getDiffs().asList();
     assertEquals(2, dir2Diffs.size());
-    assertEquals("s2", Snapshot.getSnapshotName(dir2Diffs.get(0).snapshot));
-    assertEquals("s3", Snapshot.getSnapshotName(dir2Diffs.get(1).snapshot));
+    assertEquals(s2.getId(), dir2Diffs.get(0).getSnapshotId());
+    assertEquals(s3.getId(), dir2Diffs.get(1).getSnapshotId());
     
     childrenDiff = dir2Diffs.get(0).getChildrenDiff();
     assertEquals(0, childrenDiff.getList(ListType.DELETED).size());
@@ -1452,8 +1487,8 @@ public class TestRenameWithSnapshots {
     assertTrue(fooNode instanceof INodeReference.DstReference);
     fooDiffs = fooNode.asDirectory().getDiffs().asList();
     assertEquals(2, fooDiffs.size());
-    assertEquals("s1", fooDiffs.get(0).snapshot.getRoot().getLocalName());
-    assertEquals("s3", fooDiffs.get(1).snapshot.getRoot().getLocalName());
+    assertEquals(s1.getId(), fooDiffs.get(0).getSnapshotId());
+    assertEquals(s3.getId(), fooDiffs.get(1).getSnapshotId());
   }
   
   /**
@@ -1489,9 +1524,9 @@ public class TestRenameWithSnapshots {
     INodeDirectory mockDir3 = spy(dir3);
     // fail the rename but succeed in undo
     doReturn(false).when(mockDir3).addChild((INode) Mockito.isNull(),
-        anyBoolean(), (Snapshot) anyObject());
+        anyBoolean(), Mockito.anyInt());
     Mockito.when(mockDir3.addChild((INode) Mockito.isNotNull(), anyBoolean(), 
-        (Snapshot) anyObject())).thenReturn(false).thenCallRealMethod();
+        Mockito.anyInt())).thenReturn(false).thenCallRealMethod();
     INodeDirectory root = fsdir.getINode4Write("/").asDirectory();
     root.replaceChild(dir3, mockDir3, fsdir.getINodeMap());
     foo3Node.setParent(mockDir3);
@@ -1551,7 +1586,7 @@ public class TestRenameWithSnapshots {
     INodeDirectory dir1Node = fsdir.getINode4Write(dir1.toString())
         .asDirectory();
     List<INode> childrenList = ReadOnlyList.Util.asList(dir1Node
-        .getChildrenList(null));
+        .getChildrenList(Snapshot.CURRENT_STATE_ID));
     assertEquals(1, childrenList.size());
     INode fooNode = childrenList.get(0);
     assertTrue(fooNode.asDirectory().isWithSnapshot());
@@ -1572,7 +1607,7 @@ public class TestRenameWithSnapshots {
     assertEquals(3, counts.get(Quota.NAMESPACE));
     assertEquals(0, counts.get(Quota.DISKSPACE));
     childrenList = ReadOnlyList.Util.asList(dir2Node.asDirectory()
-        .getChildrenList(null));
+        .getChildrenList(Snapshot.CURRENT_STATE_ID));
     assertEquals(1, childrenList.size());
     INode subdir2Node = childrenList.get(0);
     assertSame(dir2Node, subdir2Node.getParent());
@@ -1627,7 +1662,7 @@ public class TestRenameWithSnapshots {
     INodeDirectory dir1Node = fsdir.getINode4Write(dir1.toString())
         .asDirectory();
     List<INode> childrenList = ReadOnlyList.Util.asList(dir1Node
-        .getChildrenList(null));
+        .getChildrenList(Snapshot.CURRENT_STATE_ID));
     assertEquals(1, childrenList.size());
     INode fooNode = childrenList.get(0);
     assertTrue(fooNode.asDirectory().isWithSnapshot());
@@ -1646,7 +1681,7 @@ public class TestRenameWithSnapshots {
     assertEquals(4, counts.get(Quota.NAMESPACE));
     assertEquals(0, counts.get(Quota.DISKSPACE));
     childrenList = ReadOnlyList.Util.asList(dir2Node.asDirectory()
-        .getChildrenList(null));
+        .getChildrenList(Snapshot.CURRENT_STATE_ID));
     assertEquals(1, childrenList.size());
     INode subdir2Node = childrenList.get(0);
     assertTrue(subdir2Node.asDirectory().isWithSnapshot());
@@ -1690,14 +1725,18 @@ public class TestRenameWithSnapshots {
     }
     
     // check
+    INodeDirectorySnapshottable rootNode = (INodeDirectorySnapshottable) fsdir
+        .getINode4Write(root.toString());
     INodeDirectory fooNode = fsdir.getINode4Write(foo.toString()).asDirectory();
-    ReadOnlyList<INode> children = fooNode.getChildrenList(null);
+    ReadOnlyList<INode> children = fooNode
+        .getChildrenList(Snapshot.CURRENT_STATE_ID);
     assertEquals(1, children.size());
     List<DirectoryDiff> diffList = fooNode.getDiffs().asList();
     assertEquals(1, diffList.size());
     DirectoryDiff diff = diffList.get(0);
     // this diff is generated while renaming
-    assertEquals(snap1, Snapshot.getSnapshotName(diff.snapshot));
+    Snapshot s1 = rootNode.getSnapshot(DFSUtil.string2Bytes(snap1));
+    assertEquals(s1.getId(), diff.getSnapshotId());
     // after undo, the diff should be empty
     assertTrue(diff.getChildrenDiff().getList(ListType.DELETED).isEmpty());
     assertTrue(diff.getChildrenDiff().getList(ListType.CREATED).isEmpty());
@@ -1709,7 +1748,7 @@ public class TestRenameWithSnapshots {
     List<FileDiff> barDiffList = barNode.getDiffs().asList();
     assertEquals(1, barDiffList.size());
     FileDiff barDiff = barDiffList.get(0);
-    assertEquals(snap1, Snapshot.getSnapshotName(barDiff.snapshot));
+    assertEquals(s1.getId(), barDiff.getSnapshotId());
     
     // restart cluster multiple times to make sure the fsimage and edits log are
     // correct. Note that when loading fsimage, foo and bar will be converted 
@@ -1941,12 +1980,14 @@ public class TestRenameWithSnapshots {
         (WithCount) fooRef.asReference().getReferredINode();
     assertEquals(1, wc.getReferenceCount());
     INodeDirectory fooNode = wc.getReferredINode().asDirectory();
-    ReadOnlyList<INode> children = fooNode.getChildrenList(null);
+    ReadOnlyList<INode> children = fooNode
+        .getChildrenList(Snapshot.CURRENT_STATE_ID);
     assertEquals(1, children.size());
     assertEquals(bar.getName(), children.get(0).getLocalName());
     List<DirectoryDiff> diffList = fooNode.getDiffs().asList();
     assertEquals(1, diffList.size());
-    assertEquals("s1", Snapshot.getSnapshotName(diffList.get(0).snapshot));
+    Snapshot s1 = dir1Node.getSnapshot(DFSUtil.string2Bytes("s1"));
+    assertEquals(s1.getId(), diffList.get(0).getSnapshotId());
     ChildrenDiff diff = diffList.get(0).getChildrenDiff();
     assertEquals(0, diff.getList(ListType.CREATED).size());
     assertEquals(0, diff.getList(ListType.DELETED).size());
@@ -2009,14 +2050,16 @@ public class TestRenameWithSnapshots {
         (WithCount) fooRef.asReference().getReferredINode();
     assertEquals(2, wc.getReferenceCount());
     INodeDirectory fooNode = wc.getReferredINode().asDirectory();
-    ReadOnlyList<INode> children = fooNode.getChildrenList(null);
+    ReadOnlyList<INode> children = fooNode
+        .getChildrenList(Snapshot.CURRENT_STATE_ID);
     assertEquals(3, children.size());
     assertEquals(bar.getName(), children.get(0).getLocalName());
     assertEquals(bar2.getName(), children.get(1).getLocalName());
     assertEquals(bar3.getName(), children.get(2).getLocalName());
     List<DirectoryDiff> diffList = fooNode.getDiffs().asList();
     assertEquals(1, diffList.size());
-    assertEquals("s1", Snapshot.getSnapshotName(diffList.get(0).snapshot));
+    Snapshot s1 = dir1Node.getSnapshot(DFSUtil.string2Bytes("s1"));
+    assertEquals(s1.getId(), diffList.get(0).getSnapshotId());
     ChildrenDiff diff = diffList.get(0).getChildrenDiff();
     // bar2 and bar3 in the created list
     assertEquals(2, diff.getList(ListType.CREATED).size());
@@ -2134,11 +2177,12 @@ public class TestRenameWithSnapshots {
     // recordModification before the rename
     assertTrue(fooNode.isWithSnapshot());
     assertTrue(fooNode.getDiffs().asList().isEmpty());
-    INodeDirectory barNode = fooNode.getChildrenList(null).get(0).asDirectory();
+    INodeDirectory barNode = fooNode.getChildrenList(Snapshot.CURRENT_STATE_ID)
+        .get(0).asDirectory();
     // bar should also be INodeDirectory (With Snapshot), and both of its diff 
     // list and children list are empty 
     assertTrue(barNode.getDiffs().asList().isEmpty());
-    assertTrue(barNode.getChildrenList(null).isEmpty());
+    assertTrue(barNode.getChildrenList(Snapshot.CURRENT_STATE_ID).isEmpty());
     
     restartClusterAndCheckImage(true);
   }
@@ -2210,7 +2254,10 @@ public class TestRenameWithSnapshots {
     List<DirectoryDiff> barDiffList = barNode.getDiffs().asList();
     assertEquals(1, barDiffList.size());
     DirectoryDiff diff = barDiffList.get(0);
-    assertEquals("s0", Snapshot.getSnapshotName(diff.snapshot));
+    INodeDirectorySnapshottable testNode = 
+        (INodeDirectorySnapshottable) fsdir.getINode4Write(test.toString());
+    Snapshot s0 = testNode.getSnapshot(DFSUtil.string2Bytes("s0"));
+    assertEquals(s0.getId(), diff.getSnapshotId());
     // and file should be stored in the deleted list of this snapshot diff
     assertEquals("file", diff.getChildrenDiff().getList(ListType.DELETED)
         .get(0).getLocalName());
@@ -2276,7 +2323,7 @@ public class TestRenameWithSnapshots {
     final Path barInS0 = SnapshotTestHelper.getSnapshotPath(test, "s0",
         "foo/bar");
     INodeDirectory barNode = fsdir.getINode(barInS0.toString()).asDirectory();
-    assertEquals(0, barNode.getChildrenList(null).size());
+    assertEquals(0, barNode.getChildrenList(Snapshot.CURRENT_STATE_ID).size());
     List<DirectoryDiff> diffList = barNode.getDiffs().asList();
     assertEquals(1, diffList.size());
     DirectoryDiff diff = diffList.get(0);

+ 4 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSetQuotaWithSnapshot.java

@@ -28,6 +28,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
@@ -153,7 +154,9 @@ public class TestSetQuotaWithSnapshot {
     assertTrue(subNode.asDirectory().isWithSnapshot());
     List<DirectoryDiff> diffList = subNode.asDirectory().getDiffs().asList();
     assertEquals(1, diffList.size());
-    assertEquals("s2", Snapshot.getSnapshotName(diffList.get(0).snapshot));
+    Snapshot s2 = ((INodeDirectorySnapshottable) dirNode).getSnapshot(DFSUtil
+        .string2Bytes("s2"));
+    assertEquals(s2.getId(), diffList.get(0).getSnapshotId());
     List<INode> createdList = diffList.get(0).getChildrenDiff().getList(ListType.CREATED);
     assertEquals(1, createdList.size());
     assertSame(fsdir.getINode4Write(file.toString()), createdList.get(0));

+ 17 - 13
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDeletion.java

@@ -268,7 +268,8 @@ public class TestSnapshotDeletion {
         (INodeDirectory) fsdir.getINode(snapshotNoChangeDir.toString());
     // should still be an INodeDirectory
     assertEquals(INodeDirectory.class, snapshotNode.getClass());
-    ReadOnlyList<INode> children = snapshotNode.getChildrenList(null);
+    ReadOnlyList<INode> children = snapshotNode
+        .getChildrenList(Snapshot.CURRENT_STATE_ID);
     // check 2 children: noChangeFile and metaChangeFile2
     assertEquals(2, children.size());
     INode noChangeFileSCopy = children.get(1);
@@ -286,11 +287,11 @@ public class TestSnapshotDeletion {
     
     // check the replication factor of metaChangeFile2SCopy
     assertEquals(REPLICATION_1,
-        metaChangeFile2SCopy.getFileReplication(null));
+        metaChangeFile2SCopy.getFileReplication(Snapshot.CURRENT_STATE_ID));
     assertEquals(REPLICATION_1,
-        metaChangeFile2SCopy.getFileReplication(snapshot1));
+        metaChangeFile2SCopy.getFileReplication(snapshot1.getId()));
     assertEquals(REPLICATION,
-        metaChangeFile2SCopy.getFileReplication(snapshot0));
+        metaChangeFile2SCopy.getFileReplication(snapshot0.getId()));
     
     // Case 4: delete directory sub
     // before deleting sub, we first create a new file under sub
@@ -316,23 +317,25 @@ public class TestSnapshotDeletion {
     assertTrue(snapshotNode4Sub.isWithSnapshot());
     // the snapshot copy of sub has only one child subsub.
     // newFile should have been destroyed
-    assertEquals(1, snapshotNode4Sub.getChildrenList(null).size());
+    assertEquals(1, snapshotNode4Sub.getChildrenList(Snapshot.CURRENT_STATE_ID)
+        .size());
     // but should have two children, subsub and noChangeDir, when s1 was taken  
-    assertEquals(2, snapshotNode4Sub.getChildrenList(snapshot1).size());
+    assertEquals(2, snapshotNode4Sub.getChildrenList(snapshot1.getId()).size());
     
     // check the snapshot copy of subsub, which is contained in the subtree of
     // sub's snapshot copy
-    INode snapshotNode4Subsub = snapshotNode4Sub.getChildrenList(null).get(0);
+    INode snapshotNode4Subsub = snapshotNode4Sub.getChildrenList(
+        Snapshot.CURRENT_STATE_ID).get(0);
     assertTrue(snapshotNode4Subsub.asDirectory().isWithSnapshot());
     assertTrue(snapshotNode4Sub == snapshotNode4Subsub.getParent());
     // check the children of subsub
     INodeDirectory snapshotSubsubDir = (INodeDirectory) snapshotNode4Subsub;
-    children = snapshotSubsubDir.getChildrenList(null);
+    children = snapshotSubsubDir.getChildrenList(Snapshot.CURRENT_STATE_ID);
     assertEquals(2, children.size());
     assertEquals(children.get(0).getLocalName(), metaChangeFile1.getName());
     assertEquals(children.get(1).getLocalName(), newFileAfterS0.getName());
     // only one child before snapshot s0 
-    children = snapshotSubsubDir.getChildrenList(snapshot0);
+    children = snapshotSubsubDir.getChildrenList(snapshot0.getId());
     assertEquals(1, children.size());
     INode child = children.get(0);
     assertEquals(child.getLocalName(), metaChangeFile1.getName());
@@ -341,11 +344,11 @@ public class TestSnapshotDeletion {
     assertTrue(metaChangeFile1SCopy.isWithSnapshot());
     assertFalse(metaChangeFile1SCopy.isUnderConstruction());
     assertEquals(REPLICATION_1,
-        metaChangeFile1SCopy.getFileReplication(null));
+        metaChangeFile1SCopy.getFileReplication(Snapshot.CURRENT_STATE_ID));
     assertEquals(REPLICATION_1,
-        metaChangeFile1SCopy.getFileReplication(snapshot1));
+        metaChangeFile1SCopy.getFileReplication(snapshot1.getId()));
     assertEquals(REPLICATION,
-        metaChangeFile1SCopy.getFileReplication(snapshot0));
+        metaChangeFile1SCopy.getFileReplication(snapshot0.getId()));
   }
   
   /**
@@ -474,9 +477,10 @@ public class TestSnapshotDeletion {
         (INodeDirectorySnapshottable) fsdir.getINode(dir.toString());
     Snapshot snapshot0 = dirNode.getSnapshot(DFSUtil.string2Bytes("s0"));
     assertNull(snapshot0);
+    Snapshot snapshot1 = dirNode.getSnapshot(DFSUtil.string2Bytes("s1"));
     DirectoryDiffList diffList = dirNode.getDiffs();
     assertEquals(1, diffList.asList().size());
-    assertEquals("s1", diffList.getLast().snapshot.getRoot().getLocalName());
+    assertEquals(snapshot1.getId(), diffList.getLast().getSnapshotId());
     diffList = fsdir.getINode(metaChangeDir.toString()).asDirectory()
         .getDiffs();
     assertEquals(0, diffList.asList().size());

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotRename.java

@@ -101,7 +101,8 @@ public class TestSnapshotRename {
     List<DirectoryDiff> listByTime = srcRoot.getDiffs().asList();
     assertEquals(names.length, listByTime.size());
     for (int i = 0; i < listByTime.size(); i++) {
-      assertEquals(names[i], listByTime.get(i).getSnapshot().getRoot().getLocalName());
+      Snapshot s = srcRoot.getSnapshotById(listByTime.get(i).getSnapshotId());
+      assertEquals(names[i], s.getRoot().getLocalName());
     }
   }
   

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotReplication.java

@@ -150,7 +150,7 @@ public class TestSnapshotReplication {
       assertEquals(expectedBlockRep, ssInode.getBlockReplication());
       // Also check the number derived from INodeFile#getFileReplication
       assertEquals(snapshotRepMap.get(ss).shortValue(),
-          ssInode.getFileReplication(iip.getPathSnapshot()));
+          ssInode.getFileReplication(iip.getPathSnapshotId()));
     }
   }
   

+ 93 - 124
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/TestOfflineEditsViewer.java

@@ -26,8 +26,6 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.channels.FileChannel;
-import java.util.HashMap;
-import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -36,168 +34,142 @@ import org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes;
 import org.apache.hadoop.hdfs.server.namenode.OfflineEditsViewerHelper;
 import org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer.Flags;
 import org.apache.hadoop.test.PathUtils;
+import org.junit.After;
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
 
-public class TestOfflineEditsViewer {
-  private static final Log LOG = LogFactory.getLog(TestOfflineEditsViewer.class);
-
-  private static final Map<FSEditLogOpCodes, Boolean> obsoleteOpCodes =
-    new HashMap<FSEditLogOpCodes, Boolean>();
-
-  private static final Map<FSEditLogOpCodes, Boolean> missingOpCodes =
-      new HashMap<FSEditLogOpCodes, Boolean>();
+import com.google.common.collect.ImmutableSet;
 
-  static {
-    initializeObsoleteOpCodes();
-    initializeMissingOpCodes();
-  }
-
-  private static String buildDir =
-    PathUtils.getTestDirName(TestOfflineEditsViewer.class);
+public class TestOfflineEditsViewer {
+  private static final Log LOG = LogFactory
+      .getLog(TestOfflineEditsViewer.class);
 
-  private static String cacheDir =
-    System.getProperty("test.cache.data", "build/test/cache");
+  private static String buildDir = PathUtils
+      .getTestDirName(TestOfflineEditsViewer.class);
 
   // to create edits and get edits filename
-  private static final OfflineEditsViewerHelper nnHelper 
-    = new OfflineEditsViewerHelper();
+  private static final OfflineEditsViewerHelper nnHelper = new OfflineEditsViewerHelper();
+  private static final ImmutableSet<FSEditLogOpCodes> skippedOps = skippedOps();
 
-  /**
-   * Initialize obsoleteOpCodes
-   *
-   * Reason for suppressing "deprecation" warnings:
-   *
-   * These are the opcodes that are not used anymore, some
-   * are marked deprecated, we need to include them here to make
-   * sure we exclude them when checking for completeness of testing,
-   * that's why the "deprecation" warnings are suppressed.
-   */
   @SuppressWarnings("deprecation")
-  private static void initializeObsoleteOpCodes() {
-    obsoleteOpCodes.put(FSEditLogOpCodes.OP_DATANODE_ADD, true);
-    obsoleteOpCodes.put(FSEditLogOpCodes.OP_DATANODE_REMOVE, true);
-    obsoleteOpCodes.put(FSEditLogOpCodes.OP_SET_NS_QUOTA, true);
-    obsoleteOpCodes.put(FSEditLogOpCodes.OP_CLEAR_NS_QUOTA, true);
+  private static ImmutableSet<FSEditLogOpCodes> skippedOps() {
+    ImmutableSet.Builder<FSEditLogOpCodes> b = ImmutableSet
+        .<FSEditLogOpCodes> builder();
+
+    // Deprecated opcodes
+    b.add(FSEditLogOpCodes.OP_DATANODE_ADD)
+        .add(FSEditLogOpCodes.OP_DATANODE_REMOVE)
+        .add(FSEditLogOpCodes.OP_SET_NS_QUOTA)
+        .add(FSEditLogOpCodes.OP_CLEAR_NS_QUOTA)
+        .add(FSEditLogOpCodes.OP_SET_GENSTAMP_V1);
+
+    // Cannot test delegation token related code in insecure set up
+    b.add(FSEditLogOpCodes.OP_GET_DELEGATION_TOKEN)
+        .add(FSEditLogOpCodes.OP_RENEW_DELEGATION_TOKEN)
+        .add(FSEditLogOpCodes.OP_CANCEL_DELEGATION_TOKEN);
+
+    // Skip invalid opcode
+    b.add(FSEditLogOpCodes.OP_INVALID);
+    return b.build();
   }
 
-  /**
-   * Initialize missingOpcodes
-   *
-   *  Opcodes that are not available except after uprade from
-   *  an older version. We don't test these here.
-   */
-  private static void initializeMissingOpCodes() {
-    obsoleteOpCodes.put(FSEditLogOpCodes.OP_SET_GENSTAMP_V1, true);
-  }
+  @Rule
+  public final TemporaryFolder folder = new TemporaryFolder();
 
   @Before
-  public void setup() {
-    new File(cacheDir).mkdirs();
+  public void setUp() throws IOException {
+    nnHelper.startCluster(buildDir + "/dfs/");
+  }
+
+  @After
+  public void tearDown() throws IOException {
+    nnHelper.shutdownCluster();
   }
-  
+
   /**
    * Test the OfflineEditsViewer
    */
   @Test
   public void testGenerated() throws IOException {
-
-    LOG.info("START - testing with generated edits");
-
-    nnHelper.startCluster(buildDir + "/dfs/");
-
     // edits generated by nnHelper (MiniDFSCluster), should have all op codes
     // binary, XML, reparsed binary
-    String edits          = nnHelper.generateEdits();
-    String editsParsedXml = cacheDir + "/editsParsed.xml";
-    String editsReparsed  = cacheDir + "/editsReparsed";
+    String edits = nnHelper.generateEdits();
+    String editsParsedXml = folder.newFile("editsParsed.xml").getAbsolutePath();
+    String editsReparsed = folder.newFile("editsParsed").getAbsolutePath();
 
     // parse to XML then back to binary
     assertEquals(0, runOev(edits, editsParsedXml, "xml", false));
     assertEquals(0, runOev(editsParsedXml, editsReparsed, "binary", false));
 
     // judgment time
+    assertTrue("Edits " + edits + " should have all op codes",
+        hasAllOpCodes(edits));
+    LOG.info("Comparing generated file " + editsReparsed
+        + " with reference file " + edits);
     assertTrue(
-      "Edits " + edits + " should have all op codes",
-      hasAllOpCodes(edits));
-    LOG.info("Comparing generated file " + editsReparsed +
-             " with reference file " + edits);
-    assertTrue(
-      "Generated edits and reparsed (bin to XML to bin) should be same",
-      filesEqualIgnoreTrailingZeros(edits, editsReparsed));
-
-    // removes edits so do this at the end
-    nnHelper.shutdownCluster();
-
-    LOG.info("END");
+        "Generated edits and reparsed (bin to XML to bin) should be same",
+        filesEqualIgnoreTrailingZeros(edits, editsReparsed));
   }
 
   @Test
   public void testRecoveryMode() throws IOException {
-    LOG.info("START - testing with generated edits");
-
-    nnHelper.startCluster(buildDir + "/dfs/");
-
     // edits generated by nnHelper (MiniDFSCluster), should have all op codes
     // binary, XML, reparsed binary
-    String edits          = nnHelper.generateEdits();
-    
+    String edits = nnHelper.generateEdits();
+    FileOutputStream os = new FileOutputStream(edits, true);
     // Corrupt the file by truncating the end
-    FileChannel editsFile = new FileOutputStream(edits, true).getChannel();
+    FileChannel editsFile = os.getChannel();
     editsFile.truncate(editsFile.size() - 5);
-    
-    String editsParsedXml = cacheDir + "/editsRecoveredParsed.xml";
-    String editsReparsed  = cacheDir + "/editsRecoveredReparsed";
-    String editsParsedXml2 = cacheDir + "/editsRecoveredParsed2.xml";
+
+    String editsParsedXml = folder.newFile("editsRecoveredParsed.xml")
+        .getAbsolutePath();
+    String editsReparsed = folder.newFile("editsRecoveredReparsed")
+        .getAbsolutePath();
+    String editsParsedXml2 = folder.newFile("editsRecoveredParsed2.xml")
+        .getAbsolutePath();
 
     // Can't read the corrupted file without recovery mode
     assertEquals(-1, runOev(edits, editsParsedXml, "xml", false));
-    
+
     // parse to XML then back to binary
     assertEquals(0, runOev(edits, editsParsedXml, "xml", true));
-    assertEquals(0, runOev(editsParsedXml, editsReparsed,  "binary", false));
+    assertEquals(0, runOev(editsParsedXml, editsReparsed, "binary", false));
     assertEquals(0, runOev(editsReparsed, editsParsedXml2, "xml", false));
 
     // judgment time
     assertTrue("Test round trip",
-      filesEqualIgnoreTrailingZeros(editsParsedXml, editsParsedXml2));
+        filesEqualIgnoreTrailingZeros(editsParsedXml, editsParsedXml2));
 
-    // removes edits so do this at the end
-    nnHelper.shutdownCluster();
-
-    LOG.info("END");
+    os.close();
   }
 
   @Test
   public void testStored() throws IOException {
-
-    LOG.info("START - testing with stored reference edits");
-
     // reference edits stored with source code (see build.xml)
+    final String cacheDir = System.getProperty("test.cache.data",
+        "build/test/cache");
     // binary, XML, reparsed binary
-    String editsStored             = cacheDir + "/editsStored";
-    String editsStoredParsedXml    = cacheDir + "/editsStoredParsed.xml";
-    String editsStoredReparsed     = cacheDir + "/editsStoredReparsed";
+    String editsStored = cacheDir + "/editsStored";
+    String editsStoredParsedXml = cacheDir + "/editsStoredParsed.xml";
+    String editsStoredReparsed = cacheDir + "/editsStoredReparsed";
     // reference XML version of editsStored (see build.xml)
-    String editsStoredXml          = cacheDir + "/editsStored.xml";
-      
+    String editsStoredXml = cacheDir + "/editsStored.xml";
+
     // parse to XML then back to binary
     assertEquals(0, runOev(editsStored, editsStoredParsedXml, "xml", false));
-    assertEquals(0, runOev(editsStoredParsedXml, editsStoredReparsed,
-        "binary", false));
+    assertEquals(0,
+        runOev(editsStoredParsedXml, editsStoredReparsed, "binary", false));
 
     // judgement time
+    assertTrue("Edits " + editsStored + " should have all op codes",
+        hasAllOpCodes(editsStored));
+    assertTrue("Reference XML edits and parsed to XML should be same",
+        filesEqual(editsStoredXml, editsStoredParsedXml));
     assertTrue(
-      "Edits " + editsStored + " should have all op codes",
-      hasAllOpCodes(editsStored));
-    assertTrue(
-      "Reference XML edits and parsed to XML should be same",
-      filesEqual(editsStoredXml, editsStoredParsedXml));
-    assertTrue(
-      "Reference edits and reparsed (bin to XML to bin) should be same",
-      filesEqualIgnoreTrailingZeros(editsStored, editsStoredReparsed));
-
-    LOG.info("END");
+        "Reference edits and reparsed (bin to XML to bin) should be same",
+        filesEqualIgnoreTrailingZeros(editsStored, editsStoredReparsed));
   }
 
   /**
@@ -233,22 +205,17 @@ public class TestOfflineEditsViewer {
     OfflineEditsViewer oev = new OfflineEditsViewer();
     if (oev.go(inFilename, outFilename, "stats", new Flags(), visitor) != 0)
       return false;
-    LOG.info("Statistics for " + inFilename + "\n" +
-      visitor.getStatisticsString());
-    
+    LOG.info("Statistics for " + inFilename + "\n"
+        + visitor.getStatisticsString());
+
     boolean hasAllOpCodes = true;
-    for(FSEditLogOpCodes opCode : FSEditLogOpCodes.values()) {
+    for (FSEditLogOpCodes opCode : FSEditLogOpCodes.values()) {
       // don't need to test obsolete opCodes
-      if(obsoleteOpCodes.containsKey(opCode)) {
+      if (skippedOps.contains(opCode))
         continue;
-      } else if (missingOpCodes.containsKey(opCode)) {
-        continue;
-      } else if (opCode == FSEditLogOpCodes.OP_INVALID) {
-        continue;
-      }
 
       Long count = visitor.getStatistics().get(opCode);
-      if((count == null) || (count == 0)) {
+      if ((count == null) || (count == 0)) {
         hasAllOpCodes = false;
         LOG.info("Opcode " + opCode + " not tested in " + inFilename);
       }
@@ -257,9 +224,9 @@ public class TestOfflineEditsViewer {
   }
 
   /**
-   * Compare two files, ignore trailing zeros at the end,
-   * for edits log the trailing zeros do not make any difference,
-   * throw exception is the files are not same
+   * Compare two files, ignore trailing zeros at the end, for edits log the
+   * trailing zeros do not make any difference, throw exception is the files are
+   * not same
    *
    * @param filenameSmall first file to compare (doesn't have to be smaller)
    * @param filenameLarge second file to compare (doesn't have to be larger)
@@ -271,7 +238,7 @@ public class TestOfflineEditsViewer {
     ByteBuffer large = ByteBuffer.wrap(DFSTestUtil.loadFile(filenameLarge));
 
     // now correct if it's otherwise
-    if(small.capacity() > large.capacity()) {
+    if (small.capacity() > large.capacity()) {
       ByteBuffer tmpByteBuffer = small;
       small = large;
       large = tmpByteBuffer;
@@ -288,13 +255,15 @@ public class TestOfflineEditsViewer {
     large.limit(small.capacity());
 
     // compares position to limit
-    if(!small.equals(large)) { return false; }
+    if (!small.equals(large)) {
+      return false;
+    }
 
     // everything after limit should be 0xFF
     int i = large.limit();
     large.clear();
-    for(; i < large.capacity(); i++) {
-      if(large.get(i) != FSEditLogOpCodes.OP_INVALID.getOpCode()) {
+    for (; i < large.capacity(); i++) {
+      if (large.get(i) != FSEditLogOpCodes.OP_INVALID.getOpCode()) {
         return false;
       }
     }

+ 5 - 0
hadoop-mapreduce-project/CHANGES.txt

@@ -196,6 +196,8 @@ Release 2.4.0 - UNRELEASED
     MAPREDUCE-5550. Task Status message (reporter.setStatus) not shown in UI
     with Hadoop 2.0 (Gera Shegalov via Sandy Ryza)
 
+    MAPREDUCE-3310. Custom grouping comparator cannot be set for Combiners (tucu)
+
   OPTIMIZATIONS
 
     MAPREDUCE-5484. YarnChild unnecessarily loads job conf twice (Sandy Ryza)
@@ -267,6 +269,9 @@ Release 2.4.0 - UNRELEASED
     MAPREDUCE-5685. Fixed a bug with JobContext getCacheFiles API inside the
     WrappedReducer class. (Yi Song via vinodkv)
 
+    MAPREDUCE-5689. MRAppMaster does not preempt reducers when scheduled maps 
+    cannot be fulfilled. (lohit via kasha)
+
 Release 2.3.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 2 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java

@@ -229,7 +229,8 @@ public class RMContainerAllocator extends RMContainerRequestor
 
     int completedMaps = getJob().getCompletedMaps();
     int completedTasks = completedMaps + getJob().getCompletedReduces();
-    if (lastCompletedTasks != completedTasks) {
+    if ((lastCompletedTasks != completedTasks) ||
+          (scheduledRequests.maps.size() > 0)) {
       lastCompletedTasks = completedTasks;
       recalculateReduceSchedule = true;
     }

+ 15 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java

@@ -1604,6 +1604,21 @@ public class TestRMContainerAllocator {
         numPendingReduces, 
         maxReduceRampupLimit, reduceSlowStart);
     verify(allocator).rampDownReduces(anyInt());
+
+    // Test reduce ramp-down for when there are scheduled maps
+    // Since we have two scheduled Maps, rampDownReducers 
+    // should be invoked twice.
+    scheduledMaps = 2;
+    assignedReduces = 2;
+    doReturn(10 * 1024).when(allocator).getMemLimit();
+    allocator.scheduleReduces(
+        totalMaps, succeededMaps, 
+        scheduledMaps, scheduledReduces, 
+        assignedMaps, assignedReduces, 
+        mapResourceReqt, reduceResourceReqt, 
+        numPendingReduces, 
+        maxReduceRampupLimit, reduceSlowStart);
+    verify(allocator, times(2)).rampDownReduces(anyInt());
   }
 
   private static class RecalculateContainerAllocator extends MyContainerAllocator {

+ 51 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java

@@ -949,12 +949,29 @@ public class JobConf extends Configuration {
     return get(KeyFieldBasedPartitioner.PARTITIONER_OPTIONS);
   }
 
+  /**
+   * Get the user defined {@link WritableComparable} comparator for
+   * grouping keys of inputs to the combiner.
+   *
+   * @return comparator set by the user for grouping values.
+   * @see #setCombinerKeyGroupingComparator(Class) for details.
+   */
+  public RawComparator getCombinerKeyGroupingComparator() {
+    Class<? extends RawComparator> theClass = getClass(
+        JobContext.COMBINER_GROUP_COMPARATOR_CLASS, null, RawComparator.class);
+    if (theClass == null) {
+      return getOutputKeyComparator();
+    }
+
+    return ReflectionUtils.newInstance(theClass, this);
+  }
+
   /** 
    * Get the user defined {@link WritableComparable} comparator for 
    * grouping keys of inputs to the reduce.
    * 
    * @return comparator set by the user for grouping values.
-   * @see #setOutputValueGroupingComparator(Class) for details.  
+   * @see #setOutputValueGroupingComparator(Class) for details.
    */
   public RawComparator getOutputValueGroupingComparator() {
     Class<? extends RawComparator> theClass = getClass(
@@ -966,6 +983,37 @@ public class JobConf extends Configuration {
     return ReflectionUtils.newInstance(theClass, this);
   }
 
+  /**
+   * Set the user defined {@link RawComparator} comparator for
+   * grouping keys in the input to the combiner.
+   * <p/>
+   * <p>This comparator should be provided if the equivalence rules for keys
+   * for sorting the intermediates are different from those for grouping keys
+   * before each call to
+   * {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.</p>
+   * <p/>
+   * <p>For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed
+   * in a single call to the reduce function if K1 and K2 compare as equal.</p>
+   * <p/>
+   * <p>Since {@link #setOutputKeyComparatorClass(Class)} can be used to control
+   * how keys are sorted, this can be used in conjunction to simulate
+   * <i>secondary sort on values</i>.</p>
+   * <p/>
+   * <p><i>Note</i>: This is not a guarantee of the combiner sort being
+   * <i>stable</i> in any sense. (In any case, with the order of available
+   * map-outputs to the combiner being non-deterministic, it wouldn't make
+   * that much sense.)</p>
+   *
+   * @param theClass the comparator class to be used for grouping keys for the
+   * combiner. It should implement <code>RawComparator</code>.
+   * @see #setOutputKeyComparatorClass(Class)
+   */
+  public void setCombinerKeyGroupingComparator(
+      Class<? extends RawComparator> theClass) {
+    setClass(JobContext.COMBINER_GROUP_COMPARATOR_CLASS,
+        theClass, RawComparator.class);
+  }
+
   /** 
    * Set the user defined {@link RawComparator} comparator for 
    * grouping keys in the input to the reduce.
@@ -989,7 +1037,8 @@ public class JobConf extends Configuration {
    * 
    * @param theClass the comparator class to be used for grouping keys. 
    *                 It should implement <code>RawComparator</code>.
-   * @see #setOutputKeyComparatorClass(Class)                 
+   * @see #setOutputKeyComparatorClass(Class)
+   * @see #setCombinerKeyGroupingComparator(Class)
    */
   public void setOutputValueGroupingComparator(
       Class<? extends RawComparator> theClass) {

+ 3 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java

@@ -1575,7 +1575,8 @@ abstract public class Task implements Writable, Configurable {
       combinerClass = cls;
       keyClass = (Class<K>) job.getMapOutputKeyClass();
       valueClass = (Class<V>) job.getMapOutputValueClass();
-      comparator = (RawComparator<K>) job.getOutputKeyComparator();
+      comparator = (RawComparator<K>)
+          job.getCombinerKeyGroupingComparator();
     }
 
     @SuppressWarnings("unchecked")
@@ -1624,7 +1625,7 @@ abstract public class Task implements Writable, Configurable {
       this.taskId = taskId;
       keyClass = (Class<K>) context.getMapOutputKeyClass();
       valueClass = (Class<V>) context.getMapOutputValueClass();
-      comparator = (RawComparator<K>) context.getSortComparator();
+      comparator = (RawComparator<K>) context.getCombinerKeyGroupingComparator();
       this.committer = committer;
     }
 

+ 17 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java

@@ -948,11 +948,27 @@ public class Job extends JobContextImpl implements JobContext {
     conf.setOutputValueClass(theClass);
   }
 
+  /**
+   * Define the comparator that controls which keys are grouped together
+   * for a single call to combiner,
+   * {@link Reducer#reduce(Object, Iterable,
+   * org.apache.hadoop.mapreduce.Reducer.Context)}
+   *
+   * @param cls the raw comparator to use
+   * @throws IllegalStateException if the job is submitted
+   */
+  public void setCombinerKeyGroupingComparatorClass(
+      Class<? extends RawComparator> cls) throws IllegalStateException {
+    ensureState(JobState.DEFINE);
+    conf.setCombinerKeyGroupingComparator(cls);
+  }
+
   /**
    * Define the comparator that controls how the keys are sorted before they
    * are passed to the {@link Reducer}.
    * @param cls the raw comparator
    * @throws IllegalStateException if the job is submitted
+   * @see #setCombinerKeyGroupingComparatorClass(Class)
    */
   public void setSortComparatorClass(Class<? extends RawComparator> cls
                                      ) throws IllegalStateException {
@@ -967,6 +983,7 @@ public class Job extends JobContextImpl implements JobContext {
    *                       org.apache.hadoop.mapreduce.Reducer.Context)}
    * @param cls the raw comparator to use
    * @throws IllegalStateException if the job is submitted
+   * @see #setCombinerKeyGroupingComparatorClass(Class)
    */
   public void setGroupingComparatorClass(Class<? extends RawComparator> cls
                                          ) throws IllegalStateException {

+ 16 - 6
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobContext.java

@@ -167,13 +167,23 @@ public interface JobContext extends MRJobConfig {
    */
   public String getJar();
 
-  /** 
-   * Get the user defined {@link RawComparator} comparator for 
-   * grouping keys of inputs to the reduce.
-   * 
+  /**
+   * Get the user defined {@link RawComparator} comparator for
+   * grouping keys of inputs to the combiner.
+   *
    * @return comparator set by the user for grouping values.
-   * @see Job#setGroupingComparatorClass(Class) for details.  
-   */
+   * @see Job#setCombinerKeyGroupingComparatorClass(Class)
+   */
+  public RawComparator<?> getCombinerKeyGroupingComparator();
+
+    /**
+     * Get the user defined {@link RawComparator} comparator for
+     * grouping keys of inputs to the reduce.
+     *
+     * @return comparator set by the user for grouping values.
+     * @see Job#setGroupingComparatorClass(Class)
+     * @see #getCombinerKeyGroupingComparator()
+     */
   public RawComparator<?> getGroupingComparator();
   
   /**

+ 2 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java

@@ -93,6 +93,8 @@ public interface MRJobConfig {
 
   public static final String KEY_COMPARATOR = "mapreduce.job.output.key.comparator.class";
 
+  public static final String COMBINER_GROUP_COMPARATOR_CLASS = "mapreduce.job.combiner.group.comparator.class";
+
   public static final String GROUP_COMPARATOR_CLASS = "mapreduce.job.output.group.comparator.class";
 
   public static final String WORKING_DIR = "mapreduce.job.working.dir";

+ 5 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainMapContextImpl.java

@@ -166,6 +166,11 @@ class ChainMapContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT> implements
     return base.getFileTimestamps();
   }
 
+  @Override
+  public RawComparator<?> getCombinerKeyGroupingComparator() {
+    return base.getCombinerKeyGroupingComparator();
+  }
+
   @Override
   public RawComparator<?> getGroupingComparator() {
     return base.getGroupingComparator();

+ 5 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainReduceContextImpl.java

@@ -159,6 +159,11 @@ class ChainReduceContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT> implements
     return base.getFileTimestamps();
   }
 
+  @Override
+  public RawComparator<?> getCombinerKeyGroupingComparator() {
+    return base.getCombinerKeyGroupingComparator();
+  }
+
   @Override
   public RawComparator<?> getGroupingComparator() {
     return base.getGroupingComparator();

+ 5 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/map/WrappedMapper.java

@@ -168,6 +168,11 @@ public class WrappedMapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
       return mapContext.getFileTimestamps();
     }
 
+    @Override
+    public RawComparator<?> getCombinerKeyGroupingComparator() {
+      return mapContext.getCombinerKeyGroupingComparator();
+    }
+
     @Override
     public RawComparator<?> getGroupingComparator() {
       return mapContext.getGroupingComparator();

+ 5 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/reduce/WrappedReducer.java

@@ -161,6 +161,11 @@ public class WrappedReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
       return reduceContext.getFileTimestamps();
     }
 
+    @Override
+    public RawComparator<?> getCombinerKeyGroupingComparator() {
+      return reduceContext.getCombinerKeyGroupingComparator();
+    }
+
     @Override
     public RawComparator<?> getGroupingComparator() {
       return reduceContext.getGroupingComparator();

+ 11 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/JobContextImpl.java

@@ -252,6 +252,17 @@ public class JobContextImpl implements JobContext {
     return conf.getJar();
   }
 
+  /**
+   * Get the user defined {@link RawComparator} comparator for
+   * grouping keys of inputs to the combiner.
+   *
+   * @return comparator set by the user for grouping values.
+   * @see Job#setCombinerKeyGroupingComparatorClass(Class) for details.
+   */
+  public RawComparator<?> getCombinerKeyGroupingComparator() {
+    return conf.getCombinerKeyGroupingComparator();
+  }
+
   /** 
    * Get the user defined {@link RawComparator} comparator for 
    * grouping keys of inputs to the reduce.

+ 1 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManagerImpl.java

@@ -582,7 +582,7 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
     Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
     Class<V> valClass = (Class<V>) job.getMapOutputValueClass();
     RawComparator<K> comparator = 
-      (RawComparator<K>)job.getOutputKeyComparator();
+      (RawComparator<K>)job.getCombinerKeyGroupingComparator();
     try {
       CombineValuesIterator values = new CombineValuesIterator(
           kvIter, comparator, keyClass, valClass, job, Reporter.NULL,

+ 191 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestOldCombinerGrouping.java

@@ -0,0 +1,191 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred;
+
+import junit.framework.Assert;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.UUID;
+
+public class TestOldCombinerGrouping {
+  private static String TEST_ROOT_DIR =
+      new File("build", UUID.randomUUID().toString()).getAbsolutePath();
+
+  public static class Map implements
+      Mapper<LongWritable, Text, Text, LongWritable> {
+    @Override
+    public void map(LongWritable key, Text value,
+        OutputCollector<Text, LongWritable> output, Reporter reporter)
+        throws IOException {
+      String v = value.toString();
+      String k = v.substring(0, v.indexOf(","));
+      v = v.substring(v.indexOf(",") + 1);
+      output.collect(new Text(k), new LongWritable(Long.parseLong(v)));
+    }
+
+    @Override
+    public void close() throws IOException {
+    }
+
+    @Override
+    public void configure(JobConf job) {
+    }
+  }
+
+  public static class Reduce implements
+      Reducer<Text, LongWritable, Text, LongWritable> {
+
+    @Override
+    public void reduce(Text key, Iterator<LongWritable> values,
+        OutputCollector<Text, LongWritable> output, Reporter reporter)
+        throws IOException {
+      LongWritable maxValue = null;
+      while (values.hasNext()) {
+        LongWritable value = values.next();
+        if (maxValue == null) {
+          maxValue = value;
+        } else if (value.compareTo(maxValue) > 0) {
+          maxValue = value;
+        }
+      }
+      output.collect(key, maxValue);
+    }
+
+    @Override
+    public void close() throws IOException {
+    }
+
+    @Override
+    public void configure(JobConf job) {
+    }
+  }
+
+  public static class Combiner extends Reduce {
+  }
+
+  public static class GroupComparator implements RawComparator<Text> {
+    @Override
+    public int compare(byte[] bytes, int i, int i2, byte[] bytes2, int i3,
+        int i4) {
+      byte[] b1 = new byte[i2];
+      System.arraycopy(bytes, i, b1, 0, i2);
+
+      byte[] b2 = new byte[i4];
+      System.arraycopy(bytes2, i3, b2, 0, i4);
+
+      return compare(new Text(new String(b1)), new Text(new String(b2)));
+    }
+
+    @Override
+    public int compare(Text o1, Text o2) {
+      String s1 = o1.toString();
+      String s2 = o2.toString();
+      s1 = s1.substring(0, s1.indexOf("|"));
+      s2 = s2.substring(0, s2.indexOf("|"));
+      return s1.compareTo(s2);
+    }
+
+  }
+
+  @Test
+  public void testCombiner() throws Exception {
+    if (!new File(TEST_ROOT_DIR).mkdirs()) {
+      throw new RuntimeException("Could not create test dir: " + TEST_ROOT_DIR);
+    }
+    File in = new File(TEST_ROOT_DIR, "input");
+    if (!in.mkdirs()) {
+      throw new RuntimeException("Could not create test dir: " + in);
+    }
+    File out = new File(TEST_ROOT_DIR, "output");
+    PrintWriter pw = new PrintWriter(new FileWriter(new File(in, "data.txt")));
+    pw.println("A|a,1");
+    pw.println("A|b,2");
+    pw.println("B|a,3");
+    pw.println("B|b,4");
+    pw.println("B|c,5");
+    pw.close();
+    JobConf job = new JobConf();
+    job.set("mapreduce.framework.name", "local");
+    TextInputFormat.setInputPaths(job, new Path(in.getPath()));
+    TextOutputFormat.setOutputPath(job, new Path(out.getPath()));
+    job.setMapperClass(Map.class);
+    job.setReducerClass(Reduce.class);
+    job.setInputFormat(TextInputFormat.class);
+    job.setMapOutputKeyClass(Text.class);
+    job.setMapOutputValueClass(LongWritable.class);
+    job.setOutputFormat(TextOutputFormat.class);
+    job.setOutputValueGroupingComparator(GroupComparator.class);
+
+    job.setCombinerClass(Combiner.class);
+    job.setCombinerKeyGroupingComparator(GroupComparator.class);
+    job.setInt("min.num.spills.for.combine", 0);
+
+    JobClient client = new JobClient(job);
+    RunningJob runningJob = client.submitJob(job);
+    runningJob.waitForCompletion();
+    if (runningJob.isSuccessful()) {
+      Counters counters = runningJob.getCounters();
+
+      long combinerInputRecords = counters.getGroup(
+          "org.apache.hadoop.mapreduce.TaskCounter").
+          getCounter("COMBINE_INPUT_RECORDS");
+      long combinerOutputRecords = counters.getGroup(
+          "org.apache.hadoop.mapreduce.TaskCounter").
+          getCounter("COMBINE_OUTPUT_RECORDS");
+      Assert.assertTrue(combinerInputRecords > 0);
+      Assert.assertTrue(combinerInputRecords > combinerOutputRecords);
+
+      BufferedReader br = new BufferedReader(new FileReader(
+          new File(out, "part-00000")));
+      Set<String> output = new HashSet<String>();
+      String line = br.readLine();
+      Assert.assertNotNull(line);
+      output.add(line.substring(0, 1) + line.substring(4, 5));
+      line = br.readLine();
+      Assert.assertNotNull(line);
+      output.add(line.substring(0, 1) + line.substring(4, 5));
+      line = br.readLine();
+      Assert.assertNull(line);
+      br.close();
+
+      Set<String> expected = new HashSet<String>();
+      expected.add("A2");
+      expected.add("B5");
+
+      Assert.assertEquals(expected, output);
+
+    } else {
+      Assert.fail("Job failed");
+    }
+  }
+
+}

+ 178 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestNewCombinerGrouping.java

@@ -0,0 +1,178 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce;
+
+import junit.framework.Assert;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.junit.Test;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.UUID;
+
+public class TestNewCombinerGrouping {
+  private static String TEST_ROOT_DIR =
+      new File("build", UUID.randomUUID().toString()).getAbsolutePath();
+
+  public static class Map extends
+      Mapper<LongWritable, Text, Text, LongWritable> {
+
+    @Override
+    protected void map(LongWritable key, Text value,
+        Context context)
+        throws IOException, InterruptedException {
+      String v = value.toString();
+      String k = v.substring(0, v.indexOf(","));
+      v = v.substring(v.indexOf(",") + 1);
+      context.write(new Text(k), new LongWritable(Long.parseLong(v)));
+    }
+  }
+
+  public static class Reduce extends
+      Reducer<Text, LongWritable, Text, LongWritable> {
+
+    @Override
+    protected void reduce(Text key, Iterable<LongWritable> values,
+        Context context)
+        throws IOException, InterruptedException {
+      LongWritable maxValue = null;
+      for (LongWritable value : values) {
+        if (maxValue == null) {
+          maxValue = value;
+        } else if (value.compareTo(maxValue) > 0) {
+          maxValue = value;
+        }
+      }
+      context.write(key, maxValue);
+    }
+  }
+
+  public static class Combiner extends Reduce {
+  }
+
+  public static class GroupComparator implements RawComparator<Text> {
+    @Override
+    public int compare(byte[] bytes, int i, int i2, byte[] bytes2, int i3,
+        int i4) {
+      byte[] b1 = new byte[i2];
+      System.arraycopy(bytes, i, b1, 0, i2);
+
+      byte[] b2 = new byte[i4];
+      System.arraycopy(bytes2, i3, b2, 0, i4);
+
+      return compare(new Text(new String(b1)), new Text(new String(b2)));
+    }
+
+    @Override
+    public int compare(Text o1, Text o2) {
+      String s1 = o1.toString();
+      String s2 = o2.toString();
+      s1 = s1.substring(0, s1.indexOf("|"));
+      s2 = s2.substring(0, s2.indexOf("|"));
+      return s1.compareTo(s2);
+    }
+
+  }
+
+  @Test
+  public void testCombiner() throws Exception {
+    if (!new File(TEST_ROOT_DIR).mkdirs()) {
+      throw new RuntimeException("Could not create test dir: " + TEST_ROOT_DIR);
+    }
+    File in = new File(TEST_ROOT_DIR, "input");
+    if (!in.mkdirs()) {
+      throw new RuntimeException("Could not create test dir: " + in);
+    }
+    File out = new File(TEST_ROOT_DIR, "output");
+    PrintWriter pw = new PrintWriter(new FileWriter(new File(in, "data.txt")));
+    pw.println("A|a,1");
+    pw.println("A|b,2");
+    pw.println("B|a,3");
+    pw.println("B|b,4");
+    pw.println("B|c,5");
+    pw.close();
+    JobConf conf = new JobConf();
+    conf.set("mapreduce.framework.name", "local");
+    Job job = new Job(conf);
+    TextInputFormat.setInputPaths(job, new Path(in.getPath()));
+    TextOutputFormat.setOutputPath(job, new Path(out.getPath()));
+
+    job.setMapperClass(Map.class);
+    job.setReducerClass(Reduce.class);
+    job.setInputFormatClass(TextInputFormat.class);
+    job.setMapOutputKeyClass(Text.class);
+    job.setMapOutputValueClass(LongWritable.class);
+    job.setOutputFormatClass(TextOutputFormat.class);
+    job.setGroupingComparatorClass(GroupComparator.class);
+
+    job.setCombinerKeyGroupingComparatorClass(GroupComparator.class);
+    job.setCombinerClass(Combiner.class);
+    job.getConfiguration().setInt("min.num.spills.for.combine", 0);
+
+    job.submit();
+    job.waitForCompletion(false);
+    if (job.isSuccessful()) {
+      Counters counters = job.getCounters();
+
+      long combinerInputRecords = counters.findCounter(
+          "org.apache.hadoop.mapreduce.TaskCounter",
+          "COMBINE_INPUT_RECORDS").getValue();
+      long combinerOutputRecords = counters.findCounter(
+          "org.apache.hadoop.mapreduce.TaskCounter",
+          "COMBINE_OUTPUT_RECORDS").getValue();
+      Assert.assertTrue(combinerInputRecords > 0);
+      Assert.assertTrue(combinerInputRecords > combinerOutputRecords);
+
+      BufferedReader br = new BufferedReader(new FileReader(
+          new File(out, "part-r-00000")));
+      Set<String> output = new HashSet<String>();
+      String line = br.readLine();
+      Assert.assertNotNull(line);
+      output.add(line.substring(0, 1) + line.substring(4, 5));
+      line = br.readLine();
+      Assert.assertNotNull(line);
+      output.add(line.substring(0, 1) + line.substring(4, 5));
+      line = br.readLine();
+      Assert.assertNull(line);
+      br.close();
+
+      Set<String> expected = new HashSet<String>();
+      expected.add("A2");
+      expected.add("B5");
+
+      Assert.assertEquals(expected, output);
+
+    } else {
+      Assert.fail("Job failed");
+    }
+  }
+
+}

+ 20 - 0
hadoop-yarn-project/CHANGES.txt

@@ -55,6 +55,9 @@ Release 2.4.0 - UNRELEASED
     YARN-1028. Added FailoverProxyProvider capability to ResourceManager to help
     with RM failover. (Karthik Kambatla via vinodkv)
 
+    YARN-1029. Added embedded leader election in the ResourceManager. (Karthik
+    Kambatla via vinodkv)
+
   IMPROVEMENTS
 
     YARN-7. Support CPU resource for DistributedShell. (Junping Du via llu)
@@ -197,6 +200,11 @@ Release 2.4.0 - UNRELEASED
     YARN-1493. Changed ResourceManager and Scheduler interfacing to recognize
     app-attempts separately from apps. (Jian He via vinodkv)
 
+    YARN-1482. Modified WebApplicationProxy to make it work across ResourceManager
+    fail-over. (Xuan Gong via vinodkv)
+
+    YARN-1568. Rename clusterid to clusterId in ActiveRMInfoProto (kasha)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -286,6 +294,18 @@ Release 2.4.0 - UNRELEASED
     YARN-1549. Fixed a bug in ResourceManager's ApplicationMasterService that
     was causing unamanged AMs to not finish correctly. (haosdent via vinodkv)
 
+    YARN-1559. Race between ServerRMProxy and ClientRMProxy setting 
+    RMProxy#INSTANCE. (kasha and vinodkv via kasha)
+
+    YARN-1560. Fixed TestYarnClient#testAMMRTokens failure with null AMRM token.
+    (Ted Yu via jianhe)
+
+    YARN-1409. NonAggregatingLogHandler can throw RejectedExecutionException
+    (Tsuyoshi OZAWA via jlowe)
+
+    YARN-1293. Fixed TestContainerLaunch#testInvalidEnvSyntaxDiagnostics failure
+    caused by non-English system locale. (Tsuyoshi OZAWA via jianhe)
+
 Release 2.3.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 0 - 9
hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml

@@ -309,13 +309,4 @@
     <Class name="org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore" />
     <Bug pattern="IS2_INCONSISTENT_SYNC" />
   </Match>
-
-  <!-- Ignore INSTANCE not being final as it is created in sub-classes -->
-  <Match>
-    <Class name="org.apache.hadoop.yarn.client.RMProxy" />
-    <Field name="INSTANCE" />
-    <Bug pattern="MS_SHOULD_BE_FINAL"/>
-  </Match>
-
-
 </FindBugsFilter>

+ 17 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/HAUtil.java

@@ -51,6 +51,22 @@ public class HAUtil {
         YarnConfiguration.DEFAULT_RM_HA_ENABLED);
   }
 
+  public static boolean isAutomaticFailoverEnabled(Configuration conf) {
+    return conf.getBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED,
+        YarnConfiguration.DEFAULT_AUTO_FAILOVER_ENABLED);
+  }
+
+  public static boolean isAutomaticFailoverEnabledAndEmbedded(
+      Configuration conf) {
+    return isAutomaticFailoverEnabled(conf) &&
+        isAutomaticFailoverEmbedded(conf);
+  }
+
+  public static boolean isAutomaticFailoverEmbedded(Configuration conf) {
+    return conf.getBoolean(YarnConfiguration.AUTO_FAILOVER_EMBEDDED,
+        YarnConfiguration.DEFAULT_AUTO_FAILOVER_EMBEDDED);
+  }
+
   /**
    * Verify configuration for Resource Manager HA.
    * @param conf Configuration
@@ -162,8 +178,7 @@ public class HAUtil {
    * @param conf Configuration. Please use verifyAndSetRMHAId to check.
    * @return RM Id on success
    */
-  @VisibleForTesting
-  static String getRMHAId(Configuration conf) {
+  public static String getRMHAId(Configuration conf) {
     return conf.get(YarnConfiguration.RM_HA_ID);
   }
 

+ 49 - 31
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

@@ -59,7 +59,7 @@ public class YarnConfiguration extends Configuration {
   public static final String IPC_PREFIX = YARN_PREFIX + "ipc.";
 
   /** Factory to create client IPC classes.*/
-  public static final String IPC_CLIENT_FACTORY_CLASS = 
+  public static final String IPC_CLIENT_FACTORY_CLASS =
     IPC_PREFIX + "client.factory.class";
   public static final String DEFAULT_IPC_CLIENT_FACTORY_CLASS = 
       "org.apache.hadoop.yarn.factories.impl.pb.RpcClientFactoryPBImpl";
@@ -87,6 +87,8 @@ public class YarnConfiguration extends Configuration {
   ////////////////////////////////
   public static final String RM_PREFIX = "yarn.resourcemanager.";
 
+  public static final String RM_CLUSTER_ID = RM_PREFIX + "cluster-id";
+
   /** The address of the applications manager interface in the RM.*/
   public static final String RM_ADDRESS = 
     RM_PREFIX + "address";
@@ -278,6 +280,36 @@ public class YarnConfiguration extends Configuration {
   public static final String RECOVERY_ENABLED = RM_PREFIX + "recovery.enabled";
   public static final boolean DEFAULT_RM_RECOVERY_ENABLED = false;
 
+  /** Zookeeper interaction configs */
+  public static final String RM_ZK_PREFIX = RM_PREFIX + "zk-";
+
+  public static final String RM_ZK_ADDRESS = RM_ZK_PREFIX + "address";
+
+  public static final String RM_ZK_NUM_RETRIES = RM_ZK_PREFIX + "num-retries";
+  public static final int DEFAULT_ZK_RM_NUM_RETRIES = 500;
+
+  public static final String RM_ZK_RETRY_INTERVAL_MS =
+      RM_ZK_PREFIX + "retry-interval-ms";
+  public static final long DEFAULT_RM_ZK_RETRY_INTERVAL_MS = 2000;
+
+  public static final String RM_ZK_TIMEOUT_MS = RM_ZK_PREFIX + "timeout-ms";
+  public static final int DEFAULT_RM_ZK_TIMEOUT_MS = 10000;
+
+  public static final String RM_ZK_ACL = RM_ZK_PREFIX + "acl";
+  public static final String DEFAULT_RM_ZK_ACL = "world:anyone:rwcda";
+
+  public static final String ZK_STATE_STORE_PREFIX =
+      RM_PREFIX + "zk-state-store.";
+
+  /** Parent znode path under which ZKRMStateStore will create znodes */
+  public static final String ZK_RM_STATE_STORE_PARENT_PATH =
+      ZK_STATE_STORE_PREFIX + "parent-path";
+  public static final String DEFAULT_ZK_RM_STATE_STORE_PARENT_PATH = "/rmstore";
+
+  /** Root node ACLs for fencing */
+  public static final String ZK_RM_STATE_STORE_ROOT_NODE_ACL =
+      ZK_STATE_STORE_PREFIX + "root-node.acl";
+
   /** HA related configs */
   public static final String RM_HA_PREFIX = RM_PREFIX + "ha.";
   public static final String RM_HA_ENABLED = RM_HA_PREFIX + "enabled";
@@ -296,6 +328,22 @@ public class YarnConfiguration extends Configuration {
           HttpConfig.isSecure() ? RM_WEBAPP_HTTPS_ADDRESS
               : RM_WEBAPP_ADDRESS));
 
+  public static final String AUTO_FAILOVER_PREFIX =
+      RM_HA_PREFIX + "automatic-failover.";
+
+  public static final String AUTO_FAILOVER_ENABLED =
+      AUTO_FAILOVER_PREFIX + "enabled";
+  public static final boolean DEFAULT_AUTO_FAILOVER_ENABLED = false;
+
+  public static final String AUTO_FAILOVER_EMBEDDED =
+      AUTO_FAILOVER_PREFIX + "embedded";
+  public static final boolean DEFAULT_AUTO_FAILOVER_EMBEDDED = false;
+
+  public static final String AUTO_FAILOVER_ZK_BASE_PATH =
+      AUTO_FAILOVER_PREFIX + "zk-base-path";
+  public static final String DEFAULT_AUTO_FAILOVER_ZK_BASE_PATH =
+      "/yarn-leader-election";
+
   public static final String CLIENT_FAILOVER_PREFIX =
       YARN_PREFIX + "client.failover-";
   public static final String CLIENT_FAILOVER_PROXY_PROVIDER =
@@ -334,36 +382,6 @@ public class YarnConfiguration extends Configuration {
       + "fs.state-store.retry-policy-spec";
   public static final String DEFAULT_FS_RM_STATE_STORE_RETRY_POLICY_SPEC =
       "2000, 500";
-  /**
-   * Comma separated host:port pairs, each corresponding to a ZK server for
-   * ZKRMStateStore
-   */
-  public static final String ZK_STATE_STORE_PREFIX =
-      RM_PREFIX + "zk-state-store.";
-  public static final String ZK_RM_STATE_STORE_NUM_RETRIES =
-      ZK_STATE_STORE_PREFIX + "num-retries";
-  public static final int DEFAULT_ZK_RM_STATE_STORE_NUM_RETRIES = 500;
-  /** retry interval when connecting to zookeeper*/
-  public static final String ZK_RM_STATE_STORE_RETRY_INTERVAL_MS =
-      ZK_STATE_STORE_PREFIX + "retry-interval-ms";
-  public static final long DEFAULT_ZK_RM_STATE_STORE_RETRY_INTERVAL_MS = 2000;
-  public static final String ZK_RM_STATE_STORE_ADDRESS =
-      ZK_STATE_STORE_PREFIX + "address";
-  /** Timeout in millisec for ZK server connection for ZKRMStateStore */
-  public static final String ZK_RM_STATE_STORE_TIMEOUT_MS =
-      ZK_STATE_STORE_PREFIX + "timeout-ms";
-  public static final int DEFAULT_ZK_RM_STATE_STORE_TIMEOUT_MS = 60000;
-  /** Parent znode path under which ZKRMStateStore will create znodes */
-  public static final String ZK_RM_STATE_STORE_PARENT_PATH =
-      ZK_STATE_STORE_PREFIX + "parent-path";
-  public static final String DEFAULT_ZK_RM_STATE_STORE_PARENT_PATH = "/rmstore";
-  /** ACL for znodes in ZKRMStateStore */
-  public static final String ZK_RM_STATE_STORE_ACL =
-      ZK_STATE_STORE_PREFIX + "acl";
-  public static final String DEFAULT_ZK_RM_STATE_STORE_ACL =
-      "world:anyone:rwcda";
-  public static final String ZK_RM_STATE_STORE_ROOT_NODE_ACL =
-      ZK_STATE_STORE_PREFIX + "root-node.acl";
 
   /** The maximum number of completed applications RM keeps. */ 
   public static final String RM_MAX_COMPLETED_APPLICATIONS =

+ 8 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto

@@ -133,3 +133,11 @@ message RMStateVersionProto {
   optional int32 major_version = 1;
   optional int32 minor_version = 2;
 }
+
+//////////////////////////////////////////////////////////////////
+///////////// RM Failover related records ////////////////////////
+//////////////////////////////////////////////////////////////////
+message ActiveRMInfoProto {
+  required string clusterId = 1;
+  required string rmId = 2;
+}

+ 7 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml

@@ -30,7 +30,13 @@
   </properties>
 
   <dependencies>
-  	<dependency>
+    <dependency>
+      <groupId>org.apache.zookeeper</groupId>
+      <artifactId>zookeeper</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
   		<groupId>org.apache.hadoop</groupId>
   		<artifactId>hadoop-yarn-api</artifactId>
   	</dependency>

Một số tệp đã không được hiển thị bởi vì quá nhiều tập tin thay đổi trong này khác