Bladeren bron

Merge trunk to HDFS-4685.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-4685@1556097 13f79535-47bb-0310-9956-ffa450edef68
Chris Nauroth 11 jaren geleden
bovenliggende
commit
2fbb3d694e
100 gewijzigde bestanden met toevoegingen van 3581 en 1208 verwijderingen
  1. 5 0
      hadoop-common-project/hadoop-auth/pom.xml
  2. 19 1
      hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/PseudoAuthenticationHandler.java
  3. 1 1
      hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestPseudoAuthenticationHandler.java
  4. 17 0
      hadoop-common-project/hadoop-common/CHANGES.txt
  5. 43 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/JavaKeyStoreProvider.java
  6. 14 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java
  7. 30 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/UserProvider.java
  8. 3 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
  9. 18 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
  10. 21 9
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Credentials.java
  11. 1 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java
  12. 9 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcServer.java
  13. 30 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
  14. 4 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringUtils.java
  15. 44 0
      hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c
  16. 11 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyProviderFactory.java
  17. 87 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSourceAdapter.java
  18. 32 8
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
  19. 24 3
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java
  20. 171 132
      hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
  21. 3 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
  22. 15 5
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
  23. 19 24
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
  24. 254 137
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java
  25. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
  26. 7 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
  27. 2 5
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
  28. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
  29. 97 124
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java
  30. 52 48
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
  31. 12 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
  32. 8 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
  33. 6 6
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/StorageReport.java
  34. 40 21
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/CacheAdmin.java
  35. 3 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/GetConf.java
  36. 2 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto
  37. 6 6
      hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
  38. 7 6
      hadoop-hdfs-project/hadoop-hdfs/src/site/apt/CentralizedCacheManagement.apt.vm
  39. 30 16
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
  40. 5 13
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java
  41. 3 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
  42. 5 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestJspHelper.java
  43. 3 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java
  44. 3 3
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java
  45. 74 6
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestFsDatasetCache.java
  46. 113 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestStorageReport.java
  47. 109 9
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java
  48. 3 156
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java
  49. 81 65
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java
  50. 3 2
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java
  51. 25 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
  52. 8 0
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
  53. 72 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java
  54. 93 124
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/TestOfflineEditsViewer.java
  55. BIN
      hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored
  56. 90 74
      hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml
  57. 14 0
      hadoop-mapreduce-project/CHANGES.txt
  58. 66 10
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java
  59. 3 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java
  60. 1 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptEventType.java
  61. 25 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
  62. 3 2
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
  63. 6 7
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/AMPreemptionPolicy.java
  64. 290 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/CheckpointAMPreemptionPolicy.java
  65. 4 5
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/KillAMPreemptionPolicy.java
  66. 4 5
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/NoopAMPreemptionPolicy.java
  67. 134 23
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java
  68. 329 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestCheckpointPreemptionPolicy.java
  69. 15 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java
  70. 35 7
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalJobRunner.java
  71. 33 7
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestMRWithDistributedCache.java
  72. 63 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/AMFeedback.java
  73. 51 2
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java
  74. 33 10
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java
  75. 1 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskStatus.java
  76. 37 8
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java
  77. 17 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java
  78. 16 6
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobContext.java
  79. 2 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
  80. 21 26
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/TaskCheckpointID.java
  81. 5 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainMapContextImpl.java
  82. 5 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainReduceContextImpl.java
  83. 5 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/map/WrappedMapper.java
  84. 6 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/reduce/WrappedReducer.java
  85. 11 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/JobContextImpl.java
  86. 1 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManagerImpl.java
  87. 4 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java
  88. 11 4
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistoryServer.java
  89. 23 9
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapProgress.java
  90. 191 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestOldCombinerGrouping.java
  91. 24 7
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestTaskCommit.java
  92. 178 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestNewCombinerGrouping.java
  93. 12 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestJHSSecurity.java
  94. 1 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestUmbilicalProtocolWithJobToken.java
  95. 1 1
      hadoop-project/pom.xml
  96. 19 18
      hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java
  97. 7 4
      hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java
  98. 28 0
      hadoop-yarn-project/CHANGES.txt
  99. 0 9
      hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
  100. 5 5
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/ResourceManagerAdministrationProtocol.java

+ 5 - 0
hadoop-common-project/hadoop-auth/pom.xml

@@ -92,6 +92,11 @@
       <artifactId>hadoop-minikdc</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpclient</artifactId>
+      <scope>compile</scope>
+    </dependency>
   </dependencies>
 
   <build>

+ 19 - 1
hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/PseudoAuthenticationHandler.java

@@ -16,10 +16,15 @@ package org.apache.hadoop.security.authentication.server;
 import org.apache.hadoop.security.authentication.client.AuthenticationException;
 import org.apache.hadoop.security.authentication.client.PseudoAuthenticator;
 
+import org.apache.http.client.utils.URLEncodedUtils;
+import org.apache.http.NameValuePair;
+
 import javax.servlet.ServletException;
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;
 import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.List;
 import java.util.Properties;
 
 /**
@@ -48,6 +53,7 @@ public class PseudoAuthenticationHandler implements AuthenticationHandler {
    */
   public static final String ANONYMOUS_ALLOWED = TYPE + ".anonymous.allowed";
 
+  private static final Charset UTF8_CHARSET = Charset.forName("UTF-8");
   private boolean acceptAnonymous;
 
   /**
@@ -114,6 +120,18 @@ public class PseudoAuthenticationHandler implements AuthenticationHandler {
     return true;
   }
 
+  private String getUserName(HttpServletRequest request) {
+    List<NameValuePair> list = URLEncodedUtils.parse(request.getQueryString(), UTF8_CHARSET);
+    if (list != null) {
+      for (NameValuePair nv : list) {
+        if (PseudoAuthenticator.USER_NAME.equals(nv.getName())) {
+          return nv.getValue();
+        }
+      }
+    }
+    return null;
+  }
+
   /**
    * Authenticates an HTTP client request.
    * <p/>
@@ -139,7 +157,7 @@ public class PseudoAuthenticationHandler implements AuthenticationHandler {
   public AuthenticationToken authenticate(HttpServletRequest request, HttpServletResponse response)
     throws IOException, AuthenticationException {
     AuthenticationToken token;
-    String userName = request.getParameter(PseudoAuthenticator.USER_NAME);
+    String userName = getUserName(request);
     if (userName == null) {
       if (getAcceptAnonymous()) {
         token = AuthenticationToken.ANONYMOUS;

+ 1 - 1
hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestPseudoAuthenticationHandler.java

@@ -94,7 +94,7 @@ public class TestPseudoAuthenticationHandler {
 
       HttpServletRequest request = Mockito.mock(HttpServletRequest.class);
       HttpServletResponse response = Mockito.mock(HttpServletResponse.class);
-      Mockito.when(request.getParameter(PseudoAuthenticator.USER_NAME)).thenReturn("user");
+      Mockito.when(request.getQueryString()).thenReturn(PseudoAuthenticator.USER_NAME + "=" + "user");
 
       AuthenticationToken token = handler.authenticate(request, response);
 

+ 17 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -108,6 +108,8 @@ Trunk (Unreleased)
     HADOOP-10141. Create KeyProvider API to separate encryption key storage
     from the applications. (omalley)
 
+    HADOOP-10201. Add listing to KeyProvider API. (Larry McCay via omalley)
+
   BUG FIXES
 
     HADOOP-9451. Fault single-layer config if node group topology is enabled.
@@ -407,6 +409,9 @@ Release 2.4.0 - UNRELEASED
     HADOOP-10169. Remove the unnecessary synchronized in JvmMetrics class.
     (Liang Xie via jing9) 
 
+    HADOOP-10198. DomainSocket: add support for socketpair.
+    (Colin Patrick McCabe via wang)
+
   OPTIMIZATIONS
 
     HADOOP-9748. Reduce blocking on UGI.ensureInitialized (daryn)
@@ -416,6 +421,9 @@ Release 2.4.0 - UNRELEASED
 
     HADOOP-10172. Cache SASL server factories (daryn)
 
+    HADOOP-10173. Remove UGI from DIGEST-MD5 SASL server creation (daryn via
+    kihwal)
+
   BUG FIXES
 
     HADOOP-9964. Fix deadlocks in TestHttpServer by synchronize
@@ -489,6 +497,9 @@ Release 2.4.0 - UNRELEASED
 
     HADOOP-10171. TestRPC fails intermittently on jkd7 (Mit Desai via jeagles)
 
+    HADOOP-10147  HDFS-5678 Upgrade to commons-logging 1.1.3 to avoid potential
+    deadlock in MiniDFSCluster (stevel)
+
 Release 2.3.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -568,6 +579,12 @@ Release 2.3.0 - UNRELEASED
     HADOOP-10175. Har files system authority should preserve userinfo.
     (Chuan Liu via cnauroth)
 
+    HADOOP-10090. Jobtracker metrics not updated properly after execution
+    of a mapreduce job. (ivanmi)
+
+    HADOOP-10193. hadoop-auth's PseudoAuthenticationHandler can consume getInputStream. 
+    (gchanan via tucu)
+
 Release 2.2.0 - 2013-10-13
 
   INCOMPATIBLE CHANGES

+ 43 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/JavaKeyStoreProvider.java

@@ -36,8 +36,11 @@ import java.security.KeyStoreException;
 import java.security.NoSuchAlgorithmException;
 import java.security.UnrecoverableKeyException;
 import java.security.cert.CertificateException;
+import java.util.ArrayList;
 import java.util.Date;
+import java.util.Enumeration;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 
 /**
@@ -56,6 +59,7 @@ import java.util.Map;
  */
 @InterfaceAudience.Private
 public class JavaKeyStoreProvider extends KeyProvider {
+  private static final String KEY_METADATA = "KeyMetadata";
   public static final String SCHEME_NAME = "jceks";
   public static final String KEYSTORE_PASSWORD_NAME =
       "HADOOP_KEYSTORE_PASSWORD";
@@ -117,6 +121,44 @@ public class JavaKeyStoreProvider extends KeyProvider {
     return new KeyVersion(versionName, key.getEncoded());
   }
 
+  @Override
+  public List<String> getKeys() throws IOException {
+    ArrayList<String> list = new ArrayList<String>();
+    String alias = null;
+    try {
+      Enumeration<String> e = keyStore.aliases();
+      while (e.hasMoreElements()) {
+         alias = e.nextElement();
+         // only include the metadata key names in the list of names
+         if (!alias.contains("@")) {
+             list.add(alias);
+         }
+      }
+    } catch (KeyStoreException e) {
+      throw new IOException("Can't get key " + alias + " from " + path, e);
+    }
+    return list;
+  }
+
+  @Override
+  public List<KeyVersion> getKeyVersions(String name) throws IOException {
+    List<KeyVersion> list = new ArrayList<KeyVersion>();
+    Metadata km = getMetadata(name);
+    if (km != null) {
+      int latestVersion = km.getVersions();
+      KeyVersion v = null;
+      String versionName = null;
+      for (int i = 0; i < latestVersion; i++) {
+        versionName = buildVersionName(name, i);
+        v = getKeyVersion(versionName);
+        if (v != null) {
+          list.add(v);
+        }
+      }
+    }
+    return list;
+  }
+
   @Override
   public Metadata getMetadata(String name) throws IOException {
     if (cache.containsKey(name)) {
@@ -288,7 +330,7 @@ public class JavaKeyStoreProvider extends KeyProvider {
 
     @Override
     public String getFormat() {
-      return "KeyMetadata";
+      return KEY_METADATA;
     }
 
     @Override

+ 14 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java

@@ -254,6 +254,20 @@ public abstract class KeyProvider {
   public abstract KeyVersion getKeyVersion(String versionName
                                             ) throws IOException;
 
+  /**
+   * Get the key names for all keys.
+   * @return the list of key names
+   * @throws IOException
+   */
+  public abstract List<String> getKeys() throws IOException;
+
+  /**
+   * Get the key material for all versions of a specific key name.
+   * @return the list of key material
+   * @throws IOException
+   */
+  public abstract List<KeyVersion> getKeyVersions(String name) throws IOException;
+
   /**
    * Get the current version of the key, which should be used for encrypting new
    * data.

+ 30 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/UserProvider.java

@@ -20,8 +20,10 @@ package org.apache.hadoop.crypto.key;
 
 import java.io.IOException;
 import java.net.URI;
+import java.util.ArrayList;
 import java.util.Date;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -142,4 +144,32 @@ public class UserProvider extends KeyProvider {
       return null;
     }
   }
+
+  @Override
+  public List<String> getKeys() throws IOException {
+    List<String> list = new ArrayList<String>();
+    List<Text> keys = credentials.getAllSecretKeys();
+    for (Text key : keys) {
+      if (key.find("@") == -1) {
+        list.add(key.toString());
+      }
+    }
+    return list;
+  }
+
+  @Override
+  public List<KeyVersion> getKeyVersions(String name) throws IOException {
+      List<KeyVersion> list = new ArrayList<KeyVersion>();
+      Metadata km = getMetadata(name);
+      if (km != null) {
+        int latestVersion = km.getVersions();
+        for (int i = 0; i < latestVersion; i++) {
+          KeyVersion v = getKeyVersion(buildVersionName(name, i));
+          if (v != null) {
+            list.add(v);
+          }
+        }
+      }
+      return list;
+  }
 }

+ 3 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java

@@ -21,7 +21,6 @@ package org.apache.hadoop.fs;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.http.lib.StaticUserWebFilter;
-import org.apache.hadoop.security.authorize.Service;
 
 /** 
  * This class contains constants for configuration keys used
@@ -240,4 +239,7 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
   /** Default value for IPC_SERVER_CONNECTION_IDLE_SCAN_INTERVAL_KEY */
   public static final int IPC_CLIENT_CONNECTION_IDLESCANINTERVAL_DEFAULT =
       10000;
+
+  public static final String HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS =
+    "hadoop.user.group.metrics.percentiles.intervals";
 }

+ 18 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java

@@ -276,6 +276,24 @@ public class DomainSocket implements Closeable {
     return new DomainSocket(path, fd);
   }
 
+  /**
+   * Create a pair of UNIX domain sockets which are connected to each other
+   * by calling socketpair(2).
+   *
+   * @return                An array of two UNIX domain sockets connected to
+   *                        each other.
+   * @throws IOException    on error.
+   */
+  public static DomainSocket[] socketpair() throws IOException {
+    int fds[] = socketpair0();
+    return new DomainSocket[] {
+      new DomainSocket("(anonymous0)", fds[0]),
+      new DomainSocket("(anonymous1)", fds[1])
+    };
+  }
+
+  private static native int[] socketpair0() throws IOException;
+
   private static native int accept0(int fd) throws IOException;
 
   /**

+ 21 - 9
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Credentials.java

@@ -29,7 +29,9 @@ import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -73,15 +75,6 @@ public class Credentials implements Writable {
     this.addAll(credentials);
   }
   
-  /**
-   * Returns the key bytes for the alias
-   * @param alias the alias for the key
-   * @return key for this alias
-   */
-  public byte[] getSecretKey(Text alias) {
-    return secretKeysMap.get(alias);
-  }
-  
   /**
    * Returns the Token object for the alias
    * @param alias the alias for the Token
@@ -117,6 +110,15 @@ public class Credentials implements Writable {
   public int numberOfTokens() {
     return tokenMap.size();
   }
+
+  /**
+   * Returns the key bytes for the alias
+   * @param alias the alias for the key
+   * @return key for this alias
+   */
+  public byte[] getSecretKey(Text alias) {
+    return secretKeysMap.get(alias);
+  }
   
   /**
    * @return number of keys in the in-memory map
@@ -142,6 +144,16 @@ public class Credentials implements Writable {
     secretKeysMap.remove(alias);
   }
 
+  /**
+   * Return all the secret key entries in the in-memory map
+   */
+  public List<Text> getAllSecretKeys() {
+    List<Text> list = new java.util.ArrayList<Text>();
+    list.addAll(secretKeysMap.keySet());
+
+    return list;
+  }
+
   /**
    * Convenience method for reading a token storage file, and loading the Tokens
    * therein in the passed UGI

+ 1 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java

@@ -138,6 +138,7 @@ public class Groups {
     List<String> groupList = impl.getGroups(user);
     long endMs = Time.monotonicNow();
     long deltaMs = endMs - startMs ;
+    UserGroupInformation.metrics.addGetGroups(deltaMs);
     if (deltaMs > warningDeltaMs) {
       LOG.warn("Potential performance problem: getGroups(user=" + user +") " +
           "took " + deltaMs + " milliseconds.");

+ 9 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcServer.java

@@ -131,7 +131,7 @@ public class SaslRpcServer {
   public SaslServer create(Connection connection,
                            SecretManager<TokenIdentifier> secretManager
       ) throws IOException, InterruptedException {
-    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
+    UserGroupInformation ugi = null;
     final CallbackHandler callback;
     switch (authMethod) {
       case TOKEN: {
@@ -139,6 +139,7 @@ public class SaslRpcServer {
         break;
       }
       case KERBEROS: {
+        ugi = UserGroupInformation.getCurrentUser();
         if (serverId.isEmpty()) {
           throw new AccessControlException(
               "Kerberos principal name does NOT have the expected "
@@ -153,7 +154,9 @@ public class SaslRpcServer {
             "Server does not support SASL " + authMethod);
     }
     
-    SaslServer saslServer = ugi.doAs(
+    final SaslServer saslServer;
+    if (ugi != null) {
+      saslServer = ugi.doAs(
         new PrivilegedExceptionAction<SaslServer>() {
           @Override
           public SaslServer run() throws SaslException  {
@@ -161,6 +164,10 @@ public class SaslRpcServer {
                 SaslRpcServer.SASL_PROPS, callback);
           }
         });
+    } else {
+      saslServer = saslFactory.createSaslServer(mechanism, protocol, serverId,
+          SaslRpcServer.SASL_PROPS, callback);
+    }
     if (saslServer == null) {
       throw new AccessControlException(
           "Unable to find SASL server implementation for " + mechanism);

+ 30 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java

@@ -19,6 +19,7 @@ package org.apache.hadoop.security;
 
 import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN;
 import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN_DEFAULT;
+import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS;
 
 import java.io.File;
 import java.io.IOException;
@@ -58,6 +59,8 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.metrics2.annotation.Metric;
 import org.apache.hadoop.metrics2.annotation.Metrics;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+import org.apache.hadoop.metrics2.lib.MutableQuantiles;
 import org.apache.hadoop.metrics2.lib.MutableRate;
 import org.apache.hadoop.security.SaslRpcServer.AuthMethod;
 import org.apache.hadoop.security.authentication.util.KerberosUtil;
@@ -92,14 +95,27 @@ public class UserGroupInformation {
    */
   @Metrics(about="User and group related metrics", context="ugi")
   static class UgiMetrics {
+    final MetricsRegistry registry = new MetricsRegistry("UgiMetrics");
+
     @Metric("Rate of successful kerberos logins and latency (milliseconds)")
     MutableRate loginSuccess;
     @Metric("Rate of failed kerberos logins and latency (milliseconds)")
     MutableRate loginFailure;
+    @Metric("GetGroups") MutableRate getGroups;
+    MutableQuantiles[] getGroupsQuantiles;
 
     static UgiMetrics create() {
       return DefaultMetricsSystem.instance().register(new UgiMetrics());
     }
+
+    void addGetGroups(long latency) {
+      getGroups.add(latency);
+      if (getGroupsQuantiles != null) {
+        for (MutableQuantiles q : getGroupsQuantiles) {
+          q.add(latency);
+        }
+      }
+    }
   }
   
   /**
@@ -250,6 +266,20 @@ public class UserGroupInformation {
       groups = Groups.getUserToGroupsMappingService(conf);
     }
     UserGroupInformation.conf = conf;
+
+    if (metrics.getGroupsQuantiles == null) {
+      int[] intervals = conf.getInts(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS);
+      if (intervals != null && intervals.length > 0) {
+        final int length = intervals.length;
+        MutableQuantiles[] getGroupsQuantiles = new MutableQuantiles[length];
+        for (int i = 0; i < length; i++) {
+          getGroupsQuantiles[i] = metrics.registry.newQuantiles(
+            "getGroups" + intervals[i] + "s",
+            "Get groups", "ops", "latency", intervals[i]);
+        }
+        metrics.getGroupsQuantiles = getGroupsQuantiles;
+      }
+    }
   }
 
   /**

+ 4 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringUtils.java

@@ -928,8 +928,10 @@ public class StringUtils {
    * @param args  List of arguments.
    * @return      null if the option was not found; the value of the 
    *              option otherwise.
+   * @throws IllegalArgumentException if the option's argument is not present
    */
-  public static String popOptionWithArgument(String name, List<String> args) {
+  public static String popOptionWithArgument(String name, List<String> args)
+      throws IllegalArgumentException {
     String val = null;
     for (Iterator<String> iter = args.iterator(); iter.hasNext(); ) {
       String cur = iter.next();
@@ -939,7 +941,7 @@ public class StringUtils {
       } else if (cur.equals(name)) {
         iter.remove();
         if (!iter.hasNext()) {
-          throw new RuntimeException("option " + name + " requires 1 " +
+          throw new IllegalArgumentException("option " + name + " requires 1 " +
               "argument.");
         }
         val = iter.next();

+ 44 - 0
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c

@@ -364,6 +364,50 @@ JNIEnv *env, jclass clazz, jstring path)
   return fd;
 }
 
+#define SOCKETPAIR_ARRAY_LEN 2
+
+JNIEXPORT jarray JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_socketpair0(
+JNIEnv *env, jclass clazz)
+{
+  jarray arr = NULL;
+  int idx, err, fds[SOCKETPAIR_ARRAY_LEN] = { -1, -1 };
+  jthrowable jthr = NULL;
+
+  arr = (*env)->NewIntArray(env, SOCKETPAIR_ARRAY_LEN);
+  jthr = (*env)->ExceptionOccurred(env);
+  if (jthr) {
+    (*env)->ExceptionClear(env);
+    goto done;
+  }
+  if (socketpair(PF_UNIX, SOCK_STREAM, 0, fds) < 0) {
+    err = errno;
+    jthr = newSocketException(env, err,
+            "socketpair(2) error: %s", terror(err));
+    goto done;
+  }
+  (*env)->SetIntArrayRegion(env, arr, 0, SOCKETPAIR_ARRAY_LEN, fds);
+  jthr = (*env)->ExceptionOccurred(env);
+  if (jthr) {
+    (*env)->ExceptionClear(env);
+    goto done;
+  }
+
+done:
+  if (jthr) {
+    (*env)->DeleteLocalRef(env, arr);
+    arr = NULL;
+    for (idx = 0; idx < SOCKETPAIR_ARRAY_LEN; idx++) {
+      if (fds[idx] >= 0) {
+        close(fds[idx]);
+        fds[idx] = -1;
+      }
+    }
+    (*env)->Throw(env, jthr);
+  }
+  return arr;
+}
+
 JNIEXPORT jint JNICALL
 Java_org_apache_hadoop_net_unix_DomainSocket_accept0(
 JNIEnv *env, jclass clazz, jint fd)

+ 11 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyProviderFactory.java

@@ -21,6 +21,7 @@ import java.io.File;
 import java.io.IOException;
 import java.util.List;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.crypto.key.KeyProvider.KeyVersion;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -160,6 +161,16 @@ public class TestKeyProviderFactory {
         provider.getCurrentKey("key4").getMaterial());
     assertArrayEquals(key3, provider.getCurrentKey("key3").getMaterial());
     assertEquals("key3@0", provider.getCurrentKey("key3").getVersionName());
+
+    List<String> keys = provider.getKeys();
+    assertTrue("Keys should have been returned.", keys.size() == 2);
+    assertTrue("Returned Keys should have included key3.", keys.contains("key3"));
+    assertTrue("Returned Keys should have included key4.", keys.contains("key4"));
+
+    List<KeyVersion> kvl = provider.getKeyVersions("key3");
+    assertTrue("KeyVersions should have been returned for key3.", kvl.size() == 1);
+    assertTrue("KeyVersions should have included key3@0.", kvl.get(0).getVersionName().equals("key3@0"));
+    assertArrayEquals(key3, kvl.get(0).getMaterial());
   }
 
   @Test

+ 87 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSourceAdapter.java

@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.metrics2.impl;
+
+import static org.junit.Assert.*;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.metrics2.MetricsSource;
+import org.apache.hadoop.metrics2.MetricsTag;
+import org.apache.hadoop.metrics2.annotation.Metric;
+import org.apache.hadoop.metrics2.annotation.Metrics;
+import org.apache.hadoop.metrics2.lib.MetricsAnnotations;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+import org.apache.hadoop.metrics2.lib.MetricsSourceBuilder;
+import org.apache.hadoop.metrics2.lib.MutableCounterLong;
+import org.junit.Test;
+
+public class TestMetricsSourceAdapter {
+
+  @Test
+  public void testGetMetricsAndJmx() throws Exception {
+    // create test source with a single metric counter of value 0
+    TestSource source = new TestSource("test");
+    MetricsSourceBuilder sb = MetricsAnnotations.newSourceBuilder(source);
+    final MetricsSource s = sb.build();
+
+    List<MetricsTag> injectedTags = new ArrayList<MetricsTag>();
+    MetricsSourceAdapter sa = new MetricsSourceAdapter(
+        "test", "test", "test desc", s, injectedTags, null, null, 1, false);
+
+    // all metrics are initially assumed to have changed
+    MetricsCollectorImpl builder = new MetricsCollectorImpl();
+    Iterable<MetricsRecordImpl> metricsRecords = sa.getMetrics(builder, true);
+
+    // Validate getMetrics and JMX initial values
+    MetricsRecordImpl metricsRecord = metricsRecords.iterator().next();
+    assertEquals(0L,
+        metricsRecord.metrics().iterator().next().value().longValue());
+
+    Thread.sleep(100); // skip JMX cache TTL
+    assertEquals(0L, (Number)sa.getAttribute("C1"));
+
+    // change metric value
+    source.incrementCnt();
+
+    // validate getMetrics and JMX
+    builder = new MetricsCollectorImpl();
+    metricsRecords = sa.getMetrics(builder, true);
+    metricsRecord = metricsRecords.iterator().next();
+    assertTrue(metricsRecord.metrics().iterator().hasNext());
+    Thread.sleep(100); // skip JMX cache TTL
+    assertEquals(1L, (Number)sa.getAttribute("C1"));
+  }
+
+  @SuppressWarnings("unused")
+  @Metrics(context="test")
+  private static class TestSource {
+    @Metric("C1 desc") MutableCounterLong c1;
+    final MetricsRegistry registry;
+
+    TestSource(String recName) {
+      registry = new MetricsRegistry(recName);
+    }
+
+    public void incrementCnt() {
+      c1.incr();
+    }
+  }
+}

+ 32 - 8
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java

@@ -420,7 +420,8 @@ public class TestDomainSocket {
    * @throws IOException
    */
   void testClientServer1(final Class<? extends WriteStrategy> writeStrategyClass,
-      final Class<? extends ReadStrategy> readStrategyClass) throws Exception {
+      final Class<? extends ReadStrategy> readStrategyClass,
+      final DomainSocket preConnectedSockets[]) throws Exception {
     final String TEST_PATH = new File(sockDir.getDir(),
         "test_sock_client_server1").getAbsolutePath();
     final byte clientMsg1[] = new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6 };
@@ -428,13 +429,15 @@ public class TestDomainSocket {
     final byte clientMsg2 = 0x45;
     final ArrayBlockingQueue<Throwable> threadResults =
         new ArrayBlockingQueue<Throwable>(2);
-    final DomainSocket serv = DomainSocket.bindAndListen(TEST_PATH);
+    final DomainSocket serv = (preConnectedSockets != null) ?
+      null : DomainSocket.bindAndListen(TEST_PATH);
     Thread serverThread = new Thread() {
       public void run(){
         // Run server
         DomainSocket conn = null;
         try {
-          conn = serv.accept();
+          conn = preConnectedSockets != null ?
+                    preConnectedSockets[0] : serv.accept();
           byte in1[] = new byte[clientMsg1.length];
           ReadStrategy reader = readStrategyClass.newInstance();
           reader.init(conn);
@@ -459,7 +462,8 @@ public class TestDomainSocket {
     Thread clientThread = new Thread() {
       public void run(){
         try {
-          DomainSocket client = DomainSocket.connect(TEST_PATH);
+          DomainSocket client = preConnectedSockets != null ?
+                preConnectedSockets[1] : DomainSocket.connect(TEST_PATH);
           WriteStrategy writer = writeStrategyClass.newInstance();
           writer.init(client);
           writer.write(clientMsg1);
@@ -487,25 +491,45 @@ public class TestDomainSocket {
     }
     serverThread.join(120000);
     clientThread.join(120000);
-    serv.close();
+    if (serv != null) {
+      serv.close();
+    }
   }
 
   @Test(timeout=180000)
   public void testClientServerOutStreamInStream() throws Exception {
     testClientServer1(OutputStreamWriteStrategy.class,
-        InputStreamReadStrategy.class);
+        InputStreamReadStrategy.class, null);
+  }
+
+  @Test(timeout=180000)
+  public void testClientServerOutStreamInStreamWithSocketpair() throws Exception {
+    testClientServer1(OutputStreamWriteStrategy.class,
+        InputStreamReadStrategy.class, DomainSocket.socketpair());
   }
 
   @Test(timeout=180000)
   public void testClientServerOutStreamInDbb() throws Exception {
     testClientServer1(OutputStreamWriteStrategy.class,
-        DirectByteBufferReadStrategy.class);
+        DirectByteBufferReadStrategy.class, null);
+  }
+
+  @Test(timeout=180000)
+  public void testClientServerOutStreamInDbbWithSocketpair() throws Exception {
+    testClientServer1(OutputStreamWriteStrategy.class,
+        DirectByteBufferReadStrategy.class, DomainSocket.socketpair());
   }
 
   @Test(timeout=180000)
   public void testClientServerOutStreamInAbb() throws Exception {
     testClientServer1(OutputStreamWriteStrategy.class,
-        ArrayBackedByteBufferReadStrategy.class);
+        ArrayBackedByteBufferReadStrategy.class, null);
+  }
+
+  @Test(timeout=180000)
+  public void testClientServerOutStreamInAbbWithSocketpair() throws Exception {
+    testClientServer1(OutputStreamWriteStrategy.class,
+        ArrayBackedByteBufferReadStrategy.class, DomainSocket.socketpair());
   }
 
   static private class PassedFile {

+ 24 - 3
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java

@@ -19,7 +19,6 @@ package org.apache.hadoop.security;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.ipc.TestSaslRPC;
 import org.apache.hadoop.metrics2.MetricsRecordBuilder;
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
 import org.apache.hadoop.security.authentication.util.KerberosName;
@@ -40,9 +39,9 @@ import java.util.Collection;
 import java.util.LinkedHashSet;
 import java.util.Set;
 
+import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS;
 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL;
 import static org.apache.hadoop.ipc.TestSaslRPC.*;
-import static org.apache.hadoop.security.token.delegation.TestDelegationToken.TestDelegationTokenIdentifier;
 import static org.apache.hadoop.test.MetricsAsserts.*;
 import static org.junit.Assert.*;
 import static org.mockito.Mockito.mock;
@@ -55,6 +54,8 @@ public class TestUserGroupInformation {
   final private static String GROUP3_NAME = "group3";
   final private static String[] GROUP_NAMES = 
     new String[]{GROUP1_NAME, GROUP2_NAME, GROUP3_NAME};
+  // Rollover interval of percentile metrics (in seconds)
+  private static final int PERCENTILES_INTERVAL = 1;
   private static Configuration conf;
   
   /**
@@ -80,7 +81,8 @@ public class TestUserGroupInformation {
     // doesn't matter what it is, but getGroups needs it set...
     // use HADOOP_HOME environment variable to prevent interfering with logic
     // that finds winutils.exe
-    System.setProperty("hadoop.home.dir", System.getenv("HADOOP_HOME"));
+    String home = System.getenv("HADOOP_HOME");
+    System.setProperty("hadoop.home.dir", (home != null ? home : "."));
     // fake the realm is kerberos is enabled
     System.setProperty("java.security.krb5.kdc", "");
     System.setProperty("java.security.krb5.realm", "DEFAULT.REALM");
@@ -150,11 +152,15 @@ public class TestUserGroupInformation {
   /** Test login method */
   @Test (timeout = 30000)
   public void testLogin() throws Exception {
+    conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
+      String.valueOf(PERCENTILES_INTERVAL));
+    UserGroupInformation.setConfiguration(conf);
     // login from unix
     UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
     assertEquals(UserGroupInformation.getCurrentUser(),
                  UserGroupInformation.getLoginUser());
     assertTrue(ugi.getGroupNames().length >= 1);
+    verifyGroupMetrics(1);
 
     // ensure that doAs works correctly
     UserGroupInformation userGroupInfo = 
@@ -728,6 +734,21 @@ public class TestUserGroupInformation {
     }
   }
 
+  private static void verifyGroupMetrics(
+      long groups) throws InterruptedException {
+    MetricsRecordBuilder rb = getMetrics("UgiMetrics");
+    if (groups > 0) {
+      assertCounter("GetGroupsNumOps", groups, rb);
+      double avg = getDoubleGauge("GetGroupsAvgTime", rb);
+      assertTrue(avg >= 0.0);
+
+      // Sleep for an interval+slop to let the percentiles rollover
+      Thread.sleep((PERCENTILES_INTERVAL+1)*1000);
+      // Check that the percentiles were updated
+      assertQuantileGauges("GetGroups1s", rb);
+    }
+  }
+
   /**
    * Test for the case that UserGroupInformation.getCurrentUser()
    * is called when the AccessControlContext has a Subject associated

+ 171 - 132
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -13,10 +13,6 @@ Trunk (Unreleased)
 
     HDFS-3125. Add JournalService to enable Journal Daemon. (suresh)
 
-    HDFS-2832. Heterogeneous Storages support in HDFS phase 1 - treat DataNode
-    as a collection of storages (see breakdown of tasks below for features and
-    contributors).
-
   IMPROVEMENTS
 
     HDFS-4665. Move TestNetworkTopologyWithNodeGroup to common.
@@ -243,13 +239,18 @@ Trunk (Unreleased)
 
     HDFS-5636. Enforce a max TTL per cache pool. (awang via cmccabe)
 
+    HDFS-5651. Remove dfs.namenode.caching.enabled and improve CRM locking.
+    (cmccabe via wang)
+
   OPTIMIZATIONS
+
     HDFS-5349. DNA_CACHE and DNA_UNCACHE should be by blockId only. (cmccabe)
 
     HDFS-5665. Remove the unnecessary writeLock while initializing CacheManager
     in FsNameSystem Ctor. (Uma Maheswara Rao G via Andrew Wang)
 
   BUG FIXES
+
     HADOOP-9635 Fix potential Stack Overflow in DomainSocket.c (V. Karthik Kumar
                 via cmccabe)
 
@@ -444,139 +445,27 @@ Trunk (Unreleased)
 
     HDFS-5626. dfsadmin -report shows incorrect cache values. (cmccabe)
 
-    HDFS-5406. Send incremental block reports for all storages in a
-    single call. (Arpit Agarwal)
-
-    HDFS-5454. DataNode UUID should be assigned prior to FsDataset
-    initialization. (Arpit Agarwal)
-
     HDFS-5679. TestCacheDirectives should handle the case where native code
     is not available. (wang)
 
-  BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
-
-    HDFS-4985. Add storage type to the protocol and expose it in block report
-    and block locations. (Arpit Agarwal)
-
-    HDFS-5115. Make StorageID a UUID. (Arpit Agarwal)
-
-    HDFS-5000. DataNode configuration should allow specifying storage type.
-    (Arpit Agarwal)
-
-    HDFS-4987. Namenode changes to track multiple storages per datanode.
-    (szetszwo)
-
-    HDFS-5154. Fix TestBlockManager and TestDatanodeDescriptor after HDFS-4987.
-    (Junping Du via szetszwo)
+    HDFS-5701. Fix the CacheAdmin -addPool -maxTtl option name.
+    (Stephen Chu via wang)
 
-    HDFS-5009. Include storage information in the LocatedBlock.  (szetszwo)
+    HDFS-5708. The CacheManager throws a NPE in the DataNode logs when
+    processing cache reports that refer to a block not known to the
+    BlockManager. (cmccabe via wang)
 
-    HDFS-5134. Move blockContentsStale, heartbeatedSinceFailover and
-    firstBlockReport from DatanodeDescriptor to DatanodeStorageInfo; and
-    fix a synchronization problem in DatanodeStorageInfo.  (szetszwo)
+    HDFS-5659. dfsadmin -report doesn't output cache information properly.
+    (wang)
 
-    HDFS-5157. Add StorageType to FsVolume.  (Junping Du via szetszwo)
+    HDFS-5705. TestSecondaryNameNodeUpgrade#testChangeNsIDFails may fail due
+    to ConcurrentModificationException. (Ted Yu via brandonli)
 
-    HDFS-4990. Change BlockPlacementPolicy to choose storages instead of
-    datanodes.  (szetszwo)
+    HDFS-5719. FSImage#doRollback() should close prevState before return
+    (Ted Yu via brandonli)
 
-    HDFS-5232. Protocol changes to transmit StorageUuid. (Arpit Agarwal)
-
-    HDFS-5233. Use Datanode UUID to identify Datanodes. (Arpit Agarwal)
-
-    HDFS-5222. Move block schedule information from DatanodeDescriptor to
-    DatanodeStorageInfo.  (szetszwo)
-
-    HDFS-4988. Datanode must support all the volumes as individual storages.
-    (Arpit Agarwal)
-
-    HDFS-5377. Heartbeats from Datandode should include one storage report
-    per storage directory. (Arpit Agarwal)
-
-    HDFS-5398. NameNode changes to process storage reports per storage
-    directory. (Arpit Agarwal)
-
-    HDFS-5390. Send one incremental block report per storage directory.
-    (Arpit Agarwal)
-
-    HDFS-5401. Fix NPE in Directory Scanner. (Arpit Agarwal)
-
-    HDFS-5417. Fix storage IDs in PBHelper and UpgradeUtilities.  (szetszwo)
-
-    HDFS-5214. Fix NPEs in BlockManager and DirectoryScanner. (Arpit Agarwal)
-
-    HDFS-5435. File append fails to initialize storageIDs. (Junping Du via
-    Arpit Agarwal)
-
-    HDFS-5437. Fix TestBlockReport and TestBPOfferService failures. (Arpit
-    Agarwal)
-
-    HDFS-5447. Fix TestJspHelper. (Arpit Agarwal)
-
-    HDFS-5452. Fix TestReplicationPolicy and TestBlocksScheduledCounter.
-
-    HDFS-5448. Datanode should generate its ID on first registration. (Arpit
-    Agarwal)
-
-    HDFS-5448. Fix break caused by previous checkin for HDFS-5448. (Arpit
-    Agarwal)
-
-    HDFS-5455. NN should update storageMap on first heartbeat. (Arpit Agarwal)
-
-    HDFS-5457. Fix TestDatanodeRegistration, TestFsck and TestAddBlockRetry.
-    (Contributed by szetszwo)
-
-    HDFS-5466. Update storage IDs when the pipeline is updated. (Contributed
-    by szetszwo)
-
-    HDFS-5439. Fix TestPendingReplication. (Contributed by Junping Du, Arpit
-    Agarwal)
-
-    HDFS-5470. Add back trunk's reportDiff algorithm to the branch.
-    (Contributed by szetszwo)
-
-    HDFS-5472. Fix TestDatanodeManager, TestSafeMode and
-    TestNNThroughputBenchmark (Contributed by szetszwo)
-
-    HDFS-5475. NN incorrectly tracks more than one replica per DN. (Arpit
-    Agarwal)
-
-    HDFS-5481. Fix TestDataNodeVolumeFailure in branch HDFS-2832. (Contributed
-    by Junping Du)
-
-    HDFS-5480. Update Balancer for HDFS-2832. (Contributed by szetszwo)
-
-    HDFS-5486. Fix TestNameNodeMetrics for HDFS-2832. (Arpit Agarwal)
-
-    HDFS-5491. Update editsStored for HDFS-2832. (Arpit Agarwal)
-
-    HDFS-5494. Fix findbugs warnings for HDFS-2832. (Arpit Agarwal)
-
-    HDFS-5508. Fix compilation error after merge. (Contributed by szetszwo)
-
-    HDFS-5501. Fix pendingReceivedRequests tracking in BPServiceActor. (Arpit
-    Agarwal)
-
-    HDFS-5510. Fix a findbug warning in DataStorage.java on HDFS-2832 branch.
-    (Junping Du via Arpit Agarwal)
- 
-    HDFS-5515. Fix TestDFSStartupVersions for HDFS-2832. (Arpit Agarwal)
-
-    HDFS-5527. Fix TestUnderReplicatedBlocks on branch HDFS-2832. (Arpit
-    Agarwal)
-
-    HDFS-5547. Fix build break after merge from trunk to HDFS-2832. (Arpit
-    Agarwal)
-
-    HDFS-5542. Fix TODO and clean up the code in HDFS-2832. (Contributed by
-    szetszwo)
-
-    HDFS-5559. Fix TestDatanodeConfig in HDFS-2832. (Contributed by szetszwo)
-
-    HDFS-5484. StorageType and State in DatanodeStorageInfo in NameNode is
-    not accurate. (Eric Sirianni via Arpit Agarwal)
-
-    HDFS-5648. Get rid of FsDatasetImpl#perVolumeReplicaMap. (Arpit Agarwal)
+    HDFS-5589. Namenode loops caching and uncaching when data should be
+    uncached (awang via cmccabe)
 
 Release 2.4.0 - UNRELEASED
 
@@ -608,6 +497,10 @@ Release 2.4.0 - UNRELEASED
 
     HDFS-5514. FSNamesystem's fsLock should allow custom implementation (daryn)
 
+    HDFS-2832. Heterogeneous Storages support in HDFS phase 1 - treat DataNode
+    as a collection of storages (see breakdown of tasks below for features and
+    contributors).
+
   IMPROVEMENTS
 
     HDFS-5267. Remove volatile from LightWeightHashSet. (Junping Du via llu)
@@ -618,9 +511,6 @@ Release 2.4.0 - UNRELEASED
     HDFS-5004. Add additional JMX bean for NameNode status data
     (Trevor Lorimer via cos)
 
-    HDFS-5068. Convert NNThroughputBenchmark to a Tool to allow generic options.
-    (shv)
-
     HDFS-4994. Audit log getContentSummary() calls. (Robert Parker via kihwal)
 
     HDFS-5144. Document time unit to NameNodeMetrics. (Akira Ajisaka via
@@ -768,6 +658,11 @@ Release 2.4.0 - UNRELEASED
     HDFS-2933. Improve DataNode Web UI Index Page. (Vivek Ganesan via
     Arpit Agarwal)
 
+    HDFS-5695. Clean up TestOfflineEditsViewer and OfflineEditsViewerHelper.
+    (Haohui Mai via jing9)
+
+    HDFS-5220. Expose group resolution time as metric (jxiang via cmccabe)
+
   OPTIMIZATIONS
 
     HDFS-5239.  Allow FSNamesystem lock fairness to be configurable (daryn)
@@ -833,6 +728,139 @@ Release 2.4.0 - UNRELEASED
     HDFS-5690. DataNode fails to start in secure mode when dfs.http.policy equals to 
     HTTP_ONLY. (Haohui Mai via jing9)
 
+  BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
+
+    HDFS-4985. Add storage type to the protocol and expose it in block report
+    and block locations. (Arpit Agarwal)
+
+    HDFS-5115. Make StorageID a UUID. (Arpit Agarwal)
+
+    HDFS-5000. DataNode configuration should allow specifying storage type.
+    (Arpit Agarwal)
+
+    HDFS-4987. Namenode changes to track multiple storages per datanode.
+    (szetszwo)
+
+    HDFS-5154. Fix TestBlockManager and TestDatanodeDescriptor after HDFS-4987.
+    (Junping Du via szetszwo)
+
+    HDFS-5009. Include storage information in the LocatedBlock.  (szetszwo)
+
+    HDFS-5134. Move blockContentsStale, heartbeatedSinceFailover and
+    firstBlockReport from DatanodeDescriptor to DatanodeStorageInfo; and
+    fix a synchronization problem in DatanodeStorageInfo.  (szetszwo)
+
+    HDFS-5157. Add StorageType to FsVolume.  (Junping Du via szetszwo)
+
+    HDFS-4990. Change BlockPlacementPolicy to choose storages instead of
+    datanodes.  (szetszwo)
+
+    HDFS-5232. Protocol changes to transmit StorageUuid. (Arpit Agarwal)
+
+    HDFS-5233. Use Datanode UUID to identify Datanodes. (Arpit Agarwal)
+
+    HDFS-5222. Move block schedule information from DatanodeDescriptor to
+    DatanodeStorageInfo.  (szetszwo)
+
+    HDFS-4988. Datanode must support all the volumes as individual storages.
+    (Arpit Agarwal)
+
+    HDFS-5377. Heartbeats from Datandode should include one storage report
+    per storage directory. (Arpit Agarwal)
+
+    HDFS-5398. NameNode changes to process storage reports per storage
+    directory. (Arpit Agarwal)
+
+    HDFS-5390. Send one incremental block report per storage directory.
+    (Arpit Agarwal)
+
+    HDFS-5401. Fix NPE in Directory Scanner. (Arpit Agarwal)
+
+    HDFS-5417. Fix storage IDs in PBHelper and UpgradeUtilities.  (szetszwo)
+
+    HDFS-5214. Fix NPEs in BlockManager and DirectoryScanner. (Arpit Agarwal)
+
+    HDFS-5435. File append fails to initialize storageIDs. (Junping Du via
+    Arpit Agarwal)
+
+    HDFS-5437. Fix TestBlockReport and TestBPOfferService failures. (Arpit
+    Agarwal)
+
+    HDFS-5447. Fix TestJspHelper. (Arpit Agarwal)
+
+    HDFS-5452. Fix TestReplicationPolicy and TestBlocksScheduledCounter.
+
+    HDFS-5448. Datanode should generate its ID on first registration. (Arpit
+    Agarwal)
+
+    HDFS-5448. Fix break caused by previous checkin for HDFS-5448. (Arpit
+    Agarwal)
+
+    HDFS-5455. NN should update storageMap on first heartbeat. (Arpit Agarwal)
+
+    HDFS-5457. Fix TestDatanodeRegistration, TestFsck and TestAddBlockRetry.
+    (Contributed by szetszwo)
+
+    HDFS-5466. Update storage IDs when the pipeline is updated. (Contributed
+    by szetszwo)
+
+    HDFS-5439. Fix TestPendingReplication. (Contributed by Junping Du, Arpit
+    Agarwal)
+
+    HDFS-5470. Add back trunk's reportDiff algorithm to the branch.
+    (Contributed by szetszwo)
+
+    HDFS-5472. Fix TestDatanodeManager, TestSafeMode and
+    TestNNThroughputBenchmark (Contributed by szetszwo)
+
+    HDFS-5475. NN incorrectly tracks more than one replica per DN. (Arpit
+    Agarwal)
+
+    HDFS-5481. Fix TestDataNodeVolumeFailure in branch HDFS-2832. (Contributed
+    by Junping Du)
+
+    HDFS-5480. Update Balancer for HDFS-2832. (Contributed by szetszwo)
+
+    HDFS-5486. Fix TestNameNodeMetrics for HDFS-2832. (Arpit Agarwal)
+
+    HDFS-5491. Update editsStored for HDFS-2832. (Arpit Agarwal)
+
+    HDFS-5494. Fix findbugs warnings for HDFS-2832. (Arpit Agarwal)
+
+    HDFS-5508. Fix compilation error after merge. (Contributed by szetszwo)
+
+    HDFS-5501. Fix pendingReceivedRequests tracking in BPServiceActor. (Arpit
+    Agarwal)
+
+    HDFS-5510. Fix a findbug warning in DataStorage.java on HDFS-2832 branch.
+    (Junping Du via Arpit Agarwal)
+ 
+    HDFS-5515. Fix TestDFSStartupVersions for HDFS-2832. (Arpit Agarwal)
+
+    HDFS-5527. Fix TestUnderReplicatedBlocks on branch HDFS-2832. (Arpit
+    Agarwal)
+
+    HDFS-5547. Fix build break after merge from trunk to HDFS-2832. (Arpit
+    Agarwal)
+
+    HDFS-5542. Fix TODO and clean up the code in HDFS-2832. (Contributed by
+    szetszwo)
+
+    HDFS-5559. Fix TestDatanodeConfig in HDFS-2832. (Contributed by szetszwo)
+
+    HDFS-5484. StorageType and State in DatanodeStorageInfo in NameNode is
+    not accurate. (Eric Sirianni via Arpit Agarwal)
+
+    HDFS-5648. Get rid of FsDatasetImpl#perVolumeReplicaMap. (Arpit Agarwal)
+
+    HDFS-5406. Send incremental block reports for all storages in a
+    single call. (Arpit Agarwal)
+
+    HDFS-5454. DataNode UUID should be assigned prior to FsDataset
+    initialization. (Arpit Agarwal)
+
+    HDFS-5667. Include DatanodeStorage in StorageReport. (Arpit Agarwal)
+
 Release 2.3.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -866,6 +894,12 @@ Release 2.3.0 - UNRELEASED
     HDFS-5662. Can't decommission a DataNode due to file's replication factor
     larger than the rest of the cluster size. (brandonli)
 
+    HDFS-5068. Convert NNThroughputBenchmark to a Tool to allow generic options.
+    (shv)
+
+    HDFS-5675. Add Mkdirs operation to NNThroughputBenchmark.
+    (Plamen Jeliazkov via shv)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -1016,6 +1050,11 @@ Release 2.3.0 - UNRELEASED
     HDFS-5661. Browsing FileSystem via web ui, should use datanode's fqdn instead of ip 
     address. (Benoy Antony via jing9)
 
+    HDFS-5582. hdfs getconf -excludeFile or -includeFile always failed (sathish
+    via cmccabe)
+
+    HDFS-5671. Fix socket leak in DFSInputStream#getBlockReader. (JamesLi via umamahesh) 
+
 Release 2.2.0 - 2013-10-13
 
   INCOMPATIBLE CHANGES

+ 3 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

@@ -108,8 +108,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final long    DFS_DATANODE_MAX_LOCKED_MEMORY_DEFAULT = 0;
   public static final String  DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_KEY = "dfs.datanode.fsdatasetcache.max.threads.per.volume";
   public static final int     DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_DEFAULT = 4;
-  public static final String  DFS_NAMENODE_CACHING_ENABLED_KEY = "dfs.namenode.caching.enabled";
-  public static final boolean DFS_NAMENODE_CACHING_ENABLED_DEFAULT = false;
+  public static final String  DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT =
+    "dfs.namenode.path.based.cache.block.map.allocation.percent";
+  public static final float    DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT = 0.25f;
 
   public static final String  DFS_NAMENODE_HTTP_PORT_KEY = "dfs.http.port";
   public static final int     DFS_NAMENODE_HTTP_PORT_DEFAULT = 50070;

+ 15 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java

@@ -1188,11 +1188,21 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
     }
     // Try to create a new remote peer.
     Peer peer = newTcpPeer(dnAddr);
-    return BlockReaderFactory.newBlockReader(
-        dfsClient.getConf(), file, block, blockToken, startOffset,
-        len, verifyChecksum, clientName, peer, chosenNode, 
-        dsFactory, peerCache, fileInputStreamCache, false,
-        curCachingStrategy);
+    try {
+      reader = BlockReaderFactory.newBlockReader(dfsClient.getConf(), file,
+          block, blockToken, startOffset, len, verifyChecksum, clientName,
+          peer, chosenNode, dsFactory, peerCache, fileInputStreamCache, false,
+          curCachingStrategy);
+      return reader;
+    } catch (IOException ex) {
+      DFSClient.LOG.debug(
+          "Exception while getting block reader, closing stale " + peer, ex);
+      throw ex;
+    } finally {
+      if (reader == null) {
+        IOUtils.closeQuietly(peer);
+      }
+    }
   }
 
 

+ 19 - 24
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java

@@ -533,21 +533,7 @@ public class PBHelper {
   
   static public DatanodeInfoProto convertDatanodeInfo(DatanodeInfo di) {
     if (di == null) return null;
-    DatanodeInfoProto.Builder builder = DatanodeInfoProto.newBuilder();
-    if (di.getNetworkLocation() != null) {
-      builder.setLocation(di.getNetworkLocation());
-    }
-        
-    return builder.
-     setId(PBHelper.convert((DatanodeID) di)).
-     setCapacity(di.getCapacity()).
-     setDfsUsed(di.getDfsUsed()).
-     setRemaining(di.getRemaining()).
-     setBlockPoolUsed(di.getBlockPoolUsed()).
-     setLastUpdate(di.getLastUpdate()).
-     setXceiverCount(di.getXceiverCount()).
-     setAdminState(PBHelper.convert(di.getAdminState())).
-     build();     
+    return convert(di);
   }
   
   
@@ -591,15 +577,20 @@ public class PBHelper {
   
   public static DatanodeInfoProto convert(DatanodeInfo info) {
     DatanodeInfoProto.Builder builder = DatanodeInfoProto.newBuilder();
-    builder.setBlockPoolUsed(info.getBlockPoolUsed());
-    builder.setAdminState(PBHelper.convert(info.getAdminState()));
-    builder.setCapacity(info.getCapacity())
-        .setDfsUsed(info.getDfsUsed())
+    if (info.getNetworkLocation() != null) {
+      builder.setLocation(info.getNetworkLocation());
+    }
+    builder
         .setId(PBHelper.convert((DatanodeID)info))
-        .setLastUpdate(info.getLastUpdate())
-        .setLocation(info.getNetworkLocation())
+        .setCapacity(info.getCapacity())
+        .setDfsUsed(info.getDfsUsed())
         .setRemaining(info.getRemaining())
+        .setBlockPoolUsed(info.getBlockPoolUsed())
+        .setCacheCapacity(info.getCacheCapacity())
+        .setCacheUsed(info.getCacheUsed())
+        .setLastUpdate(info.getLastUpdate())
         .setXceiverCount(info.getXceiverCount())
+        .setAdminState(PBHelper.convert(info.getAdminState()))
         .build();
     return builder.build();
   }
@@ -1591,13 +1582,17 @@ public class PBHelper {
     StorageReportProto.Builder builder = StorageReportProto.newBuilder()
         .setBlockPoolUsed(r.getBlockPoolUsed()).setCapacity(r.getCapacity())
         .setDfsUsed(r.getDfsUsed()).setRemaining(r.getRemaining())
-        .setStorageUuid(r.getStorageID());
+        .setStorageUuid(r.getStorage().getStorageID())
+        .setStorage(convert(r.getStorage()));
     return builder.build();
   }
 
   public static StorageReport convert(StorageReportProto p) {
-    return new StorageReport(p.getStorageUuid(), p.getFailed(),
-        p.getCapacity(), p.getDfsUsed(), p.getRemaining(),
+    return new StorageReport(
+        p.hasStorage() ?
+            convert(p.getStorage()) :
+            new DatanodeStorage(p.getStorageUuid()),
+        p.getFailed(), p.getCapacity(), p.getDfsUsed(), p.getRemaining(),
         p.getBlockPoolUsed());
   }
 

+ 254 - 137
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java

@@ -21,12 +21,14 @@ import static org.apache.hadoop.util.ExitUtil.terminate;
 
 import java.io.Closeable;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Date;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Random;
+import java.util.TreeMap;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.ReentrantLock;
@@ -76,7 +78,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
   /**
    * Pseudorandom number source
    */
-  private final Random random = new Random();
+  private static final Random random = new Random();
 
   /**
    * The interval at which we scan the namesystem for caching changes.
@@ -87,17 +89,17 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
    * The CacheReplicationMonitor (CRM) lock. Used to synchronize starting and
    * waiting for rescan operations.
    */
-  private final ReentrantLock lock = new ReentrantLock();
+  private final ReentrantLock lock;
 
   /**
    * Notifies the scan thread that an immediate rescan is needed.
    */
-  private final Condition doRescan = lock.newCondition();
+  private final Condition doRescan;
 
   /**
    * Notifies waiting threads that a rescan has finished.
    */
-  private final Condition scanFinished = lock.newCondition();
+  private final Condition scanFinished;
 
   /**
    * Whether there are pending CacheManager operations that necessitate a
@@ -121,11 +123,6 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
    */
   private boolean shutdown = false;
 
-  /**
-   * The monotonic time at which the current scan started.
-   */
-  private long startTimeMs;
-
   /**
    * Mark status of the current scan.
    */
@@ -142,24 +139,27 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
   private long scannedBlocks;
 
   public CacheReplicationMonitor(FSNamesystem namesystem,
-      CacheManager cacheManager, long intervalMs) {
+      CacheManager cacheManager, long intervalMs, ReentrantLock lock) {
     this.namesystem = namesystem;
     this.blockManager = namesystem.getBlockManager();
     this.cacheManager = cacheManager;
     this.cachedBlocks = cacheManager.getCachedBlocks();
     this.intervalMs = intervalMs;
+    this.lock = lock;
+    this.doRescan = this.lock.newCondition();
+    this.scanFinished = this.lock.newCondition();
   }
 
   @Override
   public void run() {
-    startTimeMs = 0;
+    long startTimeMs = 0;
+    Thread.currentThread().setName("CacheReplicationMonitor(" +
+        System.identityHashCode(this) + ")");
     LOG.info("Starting CacheReplicationMonitor with interval " +
              intervalMs + " milliseconds");
     try {
       long curTimeMs = Time.monotonicNow();
       while (true) {
-        // Not all of the variables accessed here need the CRM lock, but take
-        // it anyway for simplicity
         lock.lock();
         try {
           while (true) {
@@ -180,12 +180,6 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
             doRescan.await(delta, TimeUnit.MILLISECONDS);
             curTimeMs = Time.monotonicNow();
           }
-        } finally {
-          lock.unlock();
-        }
-        // Mark scan as started, clear needsRescan
-        lock.lock();
-        try {
           isScanning = true;
           needsRescan = false;
         } finally {
@@ -195,7 +189,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
         mark = !mark;
         rescan();
         curTimeMs = Time.monotonicNow();
-        // Retake the CRM lock to update synchronization-related variables
+        // Update synchronization-related variables.
         lock.lock();
         try {
           isScanning = false;
@@ -208,32 +202,15 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
             scannedBlocks + " block(s) in " + (curTimeMs - startTimeMs) + " " +
             "millisecond(s).");
       }
+    } catch (InterruptedException e) {
+      LOG.info("Shutting down CacheReplicationMonitor.");
+      return;
     } catch (Throwable t) {
       LOG.fatal("Thread exiting", t);
       terminate(1, t);
     }
   }
 
-  /**
-   * Similar to {@link CacheReplicationMonitor#waitForRescan()}, except it only
-   * waits if there are pending operations that necessitate a rescan as
-   * indicated by {@link #setNeedsRescan()}.
-   * <p>
-   * Note that this call may release the FSN lock, so operations before and
-   * after are not necessarily atomic.
-   */
-  public void waitForRescanIfNeeded() {
-    lock.lock();
-    try {
-      if (!needsRescan) {
-        return;
-      }
-    } finally {
-      lock.unlock();
-    }
-    waitForRescan();
-  }
-
   /**
    * Waits for a rescan to complete. This doesn't guarantee consistency with
    * pending operations, only relative recency, since it will not force a new
@@ -242,49 +219,27 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
    * Note that this call will release the FSN lock, so operations before and
    * after are not atomic.
    */
-  public void waitForRescan() {
-    // Drop the FSN lock temporarily and retake it after we finish waiting
-    // Need to handle both the read lock and the write lock
-    boolean retakeWriteLock = false;
-    if (namesystem.hasWriteLock()) {
-      namesystem.writeUnlock();
-      retakeWriteLock = true;
-    } else if (namesystem.hasReadLock()) {
-      namesystem.readUnlock();
-    } else {
-      // Expected to have at least one of the locks
-      Preconditions.checkState(false,
-          "Need to be holding either the read or write lock");
+  public void waitForRescanIfNeeded() {
+    Preconditions.checkArgument(!namesystem.hasWriteLock(),
+        "Must not hold the FSN write lock when waiting for a rescan.");
+    Preconditions.checkArgument(lock.isHeldByCurrentThread(),
+        "Must hold the CRM lock when waiting for a rescan.");
+    if (!needsRescan) {
+      return;
     }
-    // try/finally for retaking FSN lock
-    try {
-      lock.lock();
-      // try/finally for releasing CRM lock
+    // If no scan is already ongoing, mark the CRM as dirty and kick
+    if (!isScanning) {
+      doRescan.signal();
+    }
+    // Wait until the scan finishes and the count advances
+    final long startCount = scanCount;
+    while ((!shutdown) && (startCount >= scanCount)) {
       try {
-        // If no scan is already ongoing, mark the CRM as dirty and kick
-        if (!isScanning) {
-          needsRescan = true;
-          doRescan.signal();
-        }
-        // Wait until the scan finishes and the count advances
-        final long startCount = scanCount;
-        while (startCount >= scanCount) {
-          try {
-            scanFinished.await();
-          } catch (InterruptedException e) {
-            LOG.warn("Interrupted while waiting for CacheReplicationMonitor"
-                + " rescan", e);
-            break;
-          }
-        }
-      } finally {
-        lock.unlock();
-      }
-    } finally {
-      if (retakeWriteLock) {
-        namesystem.writeLock();
-      } else {
-        namesystem.readLock();
+        scanFinished.await();
+      } catch (InterruptedException e) {
+        LOG.warn("Interrupted while waiting for CacheReplicationMonitor"
+            + " rescan", e);
+        break;
       }
     }
   }
@@ -294,42 +249,43 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
    * changes that require a rescan.
    */
   public void setNeedsRescan() {
-    lock.lock();
-    try {
-      this.needsRescan = true;
-    } finally {
-      lock.unlock();
-    }
+    Preconditions.checkArgument(lock.isHeldByCurrentThread(),
+        "Must hold the CRM lock when setting the needsRescan bit.");
+    this.needsRescan = true;
   }
 
   /**
-   * Shut down and join the monitor thread.
+   * Shut down the monitor thread.
    */
   @Override
   public void close() throws IOException {
+    Preconditions.checkArgument(namesystem.hasWriteLock());
     lock.lock();
     try {
       if (shutdown) return;
+      // Since we hold both the FSN write lock and the CRM lock here,
+      // we know that the CRM thread cannot be currently modifying
+      // the cache manager state while we're closing it.
+      // Since the CRM thread checks the value of 'shutdown' after waiting
+      // for a lock, we know that the thread will not modify the cache
+      // manager state after this point.
       shutdown = true;
       doRescan.signalAll();
       scanFinished.signalAll();
     } finally {
       lock.unlock();
     }
-    try {
-      if (this.isAlive()) {
-        this.join(60000);
-      }
-    } catch (InterruptedException e) {
-      Thread.currentThread().interrupt();
-    }
   }
 
-  private void rescan() {
+  private void rescan() throws InterruptedException {
     scannedDirectives = 0;
     scannedBlocks = 0;
     namesystem.writeLock();
     try {
+      if (shutdown) {
+        throw new InterruptedException("CacheReplicationMonitor was " +
+            "shut down.");
+      }
       resetStatistics();
       rescanCacheDirectives();
       rescanCachedBlockMap();
@@ -356,8 +312,6 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
     FSDirectory fsDir = namesystem.getFSDirectory();
     final long now = new Date().getTime();
     for (CacheDirective directive : cacheManager.getCacheDirectives()) {
-      // Reset the directive's statistics
-      directive.resetStatistics();
       // Skip processing this entry if it has expired
       if (LOG.isTraceEnabled()) {
         LOG.trace("Directive expiry is at " + directive.getExpiryTime());
@@ -460,14 +414,21 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
             directive.getReplication()) * blockInfo.getNumBytes();
         cachedTotal += cachedByBlock;
 
-        if (mark != ocblock.getMark()) {
-          // Mark hasn't been set in this scan, so update replication and mark.
+        if ((mark != ocblock.getMark()) ||
+            (ocblock.getReplication() < directive.getReplication())) {
+          //
+          // Overwrite the block's replication and mark in two cases:
+          //
+          // 1. If the mark on the CachedBlock is different from the mark for
+          // this scan, that means the block hasn't been updated during this
+          // scan, and we should overwrite whatever is there, since it is no
+          // longer valid.
+          //
+          // 2. If the replication in the CachedBlock is less than what the
+          // directive asks for, we want to increase the block's replication
+          // field to what the directive asks for.
+          //
           ocblock.setReplicationAndMark(directive.getReplication(), mark);
-        } else {
-          // Mark already set in this scan.  Set replication to highest value in
-          // any CacheDirective that covers this file.
-          ocblock.setReplicationAndMark((short)Math.max(
-              directive.getReplication(), ocblock.getReplication()), mark);
         }
       }
     }
@@ -483,6 +444,39 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
     }
   }
 
+  private String findReasonForNotCaching(CachedBlock cblock, 
+          BlockInfo blockInfo) {
+    if (blockInfo == null) {
+      // Somehow, a cache report with the block arrived, but the block
+      // reports from the DataNode haven't (yet?) described such a block.
+      // Alternately, the NameNode might have invalidated the block, but the
+      // DataNode hasn't caught up.  In any case, we want to tell the DN
+      // to uncache this.
+      return "not tracked by the BlockManager";
+    } else if (!blockInfo.isComplete()) {
+      // When a cached block changes state from complete to some other state
+      // on the DataNode (perhaps because of append), it will begin the
+      // uncaching process.  However, the uncaching process is not
+      // instantaneous, especially if clients have pinned the block.  So
+      // there may be a period of time when incomplete blocks remain cached
+      // on the DataNodes.
+      return "not complete";
+    } else if (cblock.getReplication() == 0) {
+      // Since 0 is not a valid value for a cache directive's replication
+      // field, seeing a replication of 0 on a CacheBlock means that it
+      // has never been reached by any sweep.
+      return "not needed by any directives";
+    } else if (cblock.getMark() != mark) { 
+      // Although the block was needed in the past, we didn't reach it during
+      // the current sweep.  Therefore, it doesn't need to be cached any more.
+      // Need to set the replication to 0 so it doesn't flip back to cached
+      // when the mark flips on the next scan
+      cblock.setReplicationAndMark((short)0, mark);
+      return "no longer needed by any directives";
+    }
+    return null;
+  }
+
   /**
    * Scan through the cached block map.
    * Any blocks which are under-replicated should be assigned new Datanodes.
@@ -508,11 +502,17 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
           iter.remove();
         }
       }
-      // If the block's mark doesn't match with the mark of this scan, that
-      // means that this block couldn't be reached during this scan.  That means
-      // it doesn't need to be cached any more.
-      int neededCached = (cblock.getMark() != mark) ?
-          0 : cblock.getReplication();
+      BlockInfo blockInfo = blockManager.
+            getStoredBlock(new Block(cblock.getBlockId()));
+      String reason = findReasonForNotCaching(cblock, blockInfo);
+      int neededCached = 0;
+      if (reason != null) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("not caching " + cblock + " because it is " + reason);
+        }
+      } else {
+        neededCached = cblock.getReplication();
+      }
       int numCached = cached.size();
       if (numCached >= neededCached) {
         // If we have enough replicas, drop all pending cached.
@@ -566,9 +566,6 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
   private void addNewPendingUncached(int neededUncached,
       CachedBlock cachedBlock, List<DatanodeDescriptor> cached,
       List<DatanodeDescriptor> pendingUncached) {
-    if (!cacheManager.isActive()) {
-      return;
-    }
     // Figure out which replicas can be uncached.
     LinkedList<DatanodeDescriptor> possibilities =
         new LinkedList<DatanodeDescriptor>();
@@ -601,19 +598,18 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
    * @param pendingCached    A list of DataNodes that will soon cache the
    *                         block.
    */
-  private void addNewPendingCached(int neededCached,
+  private void addNewPendingCached(final int neededCached,
       CachedBlock cachedBlock, List<DatanodeDescriptor> cached,
       List<DatanodeDescriptor> pendingCached) {
-    if (!cacheManager.isActive()) {
-      return;
-    }
     // To figure out which replicas can be cached, we consult the
     // blocksMap.  We don't want to try to cache a corrupt replica, though.
     BlockInfo blockInfo = blockManager.
           getStoredBlock(new Block(cachedBlock.getBlockId()));
     if (blockInfo == null) {
-      LOG.debug("Not caching block " + cachedBlock + " because it " +
-          "was deleted from all DataNodes.");
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Not caching block " + cachedBlock + " because there " +
+            "is no record of it on the NameNode.");
+      }
       return;
     }
     if (!blockInfo.isComplete()) {
@@ -623,35 +619,156 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
       }
       return;
     }
-    List<DatanodeDescriptor> possibilities = new LinkedList<DatanodeDescriptor>();
+    // Filter the list of replicas to only the valid targets
+    List<DatanodeDescriptor> possibilities =
+        new LinkedList<DatanodeDescriptor>();
     int numReplicas = blockInfo.getCapacity();
     Collection<DatanodeDescriptor> corrupt =
         blockManager.getCorruptReplicas(blockInfo);
+    int outOfCapacity = 0;
     for (int i = 0; i < numReplicas; i++) {
       DatanodeDescriptor datanode = blockInfo.getDatanode(i);
-      if ((datanode != null) && 
-          ((!pendingCached.contains(datanode)) &&
-          ((corrupt == null) || (!corrupt.contains(datanode))))) {
-        possibilities.add(datanode);
+      if (datanode == null) {
+        continue;
       }
-    }
-    while (neededCached > 0) {
-      if (possibilities.isEmpty()) {
-        LOG.warn("We need " + neededCached + " more replica(s) than " +
-            "actually exist to provide a cache replication of " +
-            cachedBlock.getReplication() + " for " + cachedBlock);
-        return;
+      if (datanode.isDecommissioned() || datanode.isDecommissionInProgress()) {
+        continue;
       }
-      DatanodeDescriptor datanode =
-          possibilities.remove(random.nextInt(possibilities.size()));
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("AddNewPendingCached: datanode " + datanode + 
-            " will now cache block " + cachedBlock);
+      if (corrupt != null && corrupt.contains(datanode)) {
+        continue;
+      }
+      if (pendingCached.contains(datanode) || cached.contains(datanode)) {
+        continue;
+      }
+      long pendingCapacity = datanode.getCacheRemaining();
+      // Subtract pending cached blocks from effective capacity
+      Iterator<CachedBlock> it = datanode.getPendingCached().iterator();
+      while (it.hasNext()) {
+        CachedBlock cBlock = it.next();
+        BlockInfo info =
+            blockManager.getStoredBlock(new Block(cBlock.getBlockId()));
+        if (info != null) {
+          pendingCapacity -= info.getNumBytes();
+        }
+      }
+      it = datanode.getPendingUncached().iterator();
+      // Add pending uncached blocks from effective capacity
+      while (it.hasNext()) {
+        CachedBlock cBlock = it.next();
+        BlockInfo info =
+            blockManager.getStoredBlock(new Block(cBlock.getBlockId()));
+        if (info != null) {
+          pendingCapacity += info.getNumBytes();
+        }
       }
+      if (pendingCapacity < blockInfo.getNumBytes()) {
+        if (LOG.isTraceEnabled()) {
+          LOG.trace("Datanode " + datanode + " is not a valid possibility for"
+              + " block " + blockInfo.getBlockId() + " of size "
+              + blockInfo.getNumBytes() + " bytes, only has "
+              + datanode.getCacheRemaining() + " bytes of cache remaining.");
+        }
+        outOfCapacity++;
+        continue;
+      }
+      possibilities.add(datanode);
+    }
+    List<DatanodeDescriptor> chosen = chooseDatanodesForCaching(possibilities,
+        neededCached, blockManager.getDatanodeManager().getStaleInterval());
+    for (DatanodeDescriptor datanode : chosen) {
       pendingCached.add(datanode);
       boolean added = datanode.getPendingCached().add(cachedBlock);
       assert added;
-      neededCached--;
     }
+    // We were unable to satisfy the requested replication factor
+    if (neededCached > chosen.size()) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(
+            "Only have " +
+            (cachedBlock.getReplication() - neededCached + chosen.size()) +
+            " of " + cachedBlock.getReplication() + " cached replicas for " +
+            cachedBlock + " (" + outOfCapacity + " nodes have insufficient " +
+            "capacity).");
+      }
+    }
+  }
+
+  /**
+   * Chooses datanode locations for caching from a list of valid possibilities.
+   * Non-stale nodes are chosen before stale nodes.
+   * 
+   * @param possibilities List of candidate datanodes
+   * @param neededCached Number of replicas needed
+   * @param staleInterval Age of a stale datanode
+   * @return A list of chosen datanodes
+   */
+  private static List<DatanodeDescriptor> chooseDatanodesForCaching(
+      final List<DatanodeDescriptor> possibilities, final int neededCached,
+      final long staleInterval) {
+    // Make a copy that we can modify
+    List<DatanodeDescriptor> targets =
+        new ArrayList<DatanodeDescriptor>(possibilities);
+    // Selected targets
+    List<DatanodeDescriptor> chosen = new LinkedList<DatanodeDescriptor>();
+
+    // Filter out stale datanodes
+    List<DatanodeDescriptor> stale = new LinkedList<DatanodeDescriptor>();
+    Iterator<DatanodeDescriptor> it = targets.iterator();
+    while (it.hasNext()) {
+      DatanodeDescriptor d = it.next();
+      if (d.isStale(staleInterval)) {
+        it.remove();
+        stale.add(d);
+      }
+    }
+    // Select targets
+    while (chosen.size() < neededCached) {
+      // Try to use stale nodes if we're out of non-stale nodes, else we're done
+      if (targets.isEmpty()) {
+        if (!stale.isEmpty()) {
+          targets = stale;
+        } else {
+          break;
+        }
+      }
+      // Select a random target
+      DatanodeDescriptor target =
+          chooseRandomDatanodeByRemainingCapacity(targets);
+      chosen.add(target);
+      targets.remove(target);
+    }
+    return chosen;
+  }
+
+  /**
+   * Choose a single datanode from the provided list of possible
+   * targets, weighted by the percentage of free space remaining on the node.
+   * 
+   * @return The chosen datanode
+   */
+  private static DatanodeDescriptor chooseRandomDatanodeByRemainingCapacity(
+      final List<DatanodeDescriptor> targets) {
+    // Use a weighted probability to choose the target datanode
+    float total = 0;
+    for (DatanodeDescriptor d : targets) {
+      total += d.getCacheRemainingPercent();
+    }
+    // Give each datanode a portion of keyspace equal to its relative weight
+    // [0, w1) selects d1, [w1, w2) selects d2, etc.
+    TreeMap<Integer, DatanodeDescriptor> lottery =
+        new TreeMap<Integer, DatanodeDescriptor>();
+    int offset = 0;
+    for (DatanodeDescriptor d : targets) {
+      // Since we're using floats, be paranoid about negative values
+      int weight =
+          Math.max(1, (int)((d.getCacheRemainingPercent() / total) * 1000000));
+      offset += weight;
+      lottery.put(offset, d);
+    }
+    // Choose a number from [0, offset), which is the total amount of weight,
+    // to select the winner
+    DatanodeDescriptor winner =
+        lottery.higherEntry(random.nextInt(offset)).getValue();
+    return winner;
   }
 }

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java

@@ -355,11 +355,11 @@ public class DatanodeDescriptor extends DatanodeInfo {
     setLastUpdate(Time.now());    
     this.volumeFailures = volFailures;
     for (StorageReport report : reports) {
-      DatanodeStorageInfo storage = storageMap.get(report.getStorageID());
+      DatanodeStorageInfo storage = storageMap.get(report.getStorage().getStorageID());
       if (storage == null) {
         // This is seen during cluster initialization when the heartbeat
         // is received before the initial block reports from each storage.
-        storage = updateStorage(new DatanodeStorage(report.getStorageID()));
+        storage = updateStorage(report.getStorage());
       }
       storage.receivedHeartbeat(report);
       totalCapacity += report.getCapacity();

+ 7 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java

@@ -1443,6 +1443,13 @@ public class DatanodeManager {
     return getClass().getSimpleName() + ": " + host2DatanodeMap;
   }
 
+  public void clearPendingCachingCommands() {
+    for (DatanodeDescriptor dn : datanodeMap.values()) {
+      dn.getPendingCached().clear();
+      dn.getPendingUncached().clear();
+    }
+  }
+
   public void setShouldSendCachingCommands(boolean shouldSendCachingCommands) {
     this.shouldSendCachingCommands = shouldSendCachingCommands;
   }

+ 2 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java

@@ -121,7 +121,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
       reports = new StorageReport[volumes.volumes.size()];
       int i = 0;
       for (FsVolumeImpl volume : volumes.volumes) {
-        reports[i++] = new StorageReport(volume.getStorageID(),
+        reports[i++] = new StorageReport(volume.toDatanodeStorage(),
                                          false,
                                          volume.getCapacity(),
                                          volume.getDfsUsed(),
@@ -237,12 +237,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
     final List<FsVolumeImpl> volArray = new ArrayList<FsVolumeImpl>(
         storage.getNumStorageDirs());
     for (int idx = 0; idx < storage.getNumStorageDirs(); idx++) {
-      // TODO: getStorageTypeFromLocations() is only a temporary workaround and 
-      // should be replaced with getting storage type from DataStorage (missing 
-      // storage type now) directly.
       Storage.StorageDirectory sd = storage.getStorageDir(idx);
       final File dir = sd.getCurrentDir();
-      final StorageType storageType = getStorageTypeFromLocations(dataLocations, dir);
+      final StorageType storageType = getStorageTypeFromLocations(dataLocations, sd.getRoot());
       volArray.add(new FsVolumeImpl(this, sd.getStorageUuid(), dir, conf,
           storageType));
       LOG.info("Added volume - " + dir + ", StorageType: " + storageType);

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java

@@ -19,10 +19,10 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl;
 
 import java.io.File;
 import java.io.IOException;
-import java.util.HashMap;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.Executor;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.ThreadFactory;
@@ -54,7 +54,7 @@ class FsVolumeImpl implements FsVolumeSpi {
   private final String storageID;
   private final StorageType storageType;
   private final Map<String, BlockPoolSlice> bpSlices
-      = new HashMap<String, BlockPoolSlice>();
+      = new ConcurrentHashMap<String, BlockPoolSlice>();
   private final File currentDir;    // <StorageDirectory>/current
   private final DF usage;           
   private final long reserved;

+ 97 - 124
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java

@@ -17,8 +17,8 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_DEFAULT;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES;
@@ -40,6 +40,7 @@ import java.util.List;
 import java.util.Map.Entry;
 import java.util.SortedMap;
 import java.util.TreeMap;
+import java.util.concurrent.locks.ReentrantLock;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.logging.Log;
@@ -62,7 +63,6 @@ import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
 import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
-import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
@@ -85,7 +85,7 @@ import com.google.common.annotations.VisibleForTesting;
 /**
  * The Cache Manager handles caching on DataNodes.
  *
- * This class is instantiated by the FSNamesystem when caching is enabled.
+ * This class is instantiated by the FSNamesystem.
  * It maintains the mapping of cached blocks to datanodes via processing
  * datanode cache reports. Based on these reports and addition and removal of
  * caching directives, we will schedule caching and uncaching work.
@@ -94,6 +94,8 @@ import com.google.common.annotations.VisibleForTesting;
 public final class CacheManager {
   public static final Log LOG = LogFactory.getLog(CacheManager.class);
 
+  private static final float MIN_CACHED_BLOCKS_PERCENT = 0.001f;
+
   // TODO: add pending / underCached / schedule cached blocks stats.
 
   /**
@@ -149,32 +151,14 @@ public final class CacheManager {
   private final long scanIntervalMs;
 
   /**
-   * Whether caching is enabled.
-   *
-   * If caching is disabled, we will not process cache reports or store
-   * information about what is cached where.  We also do not start the
-   * CacheReplicationMonitor thread.  This will save resources, but provide
-   * less functionality.
-   *     
-   * Even when caching is disabled, we still store path-based cache
-   * information.  This information is stored in the edit log and fsimage.  We
-   * don't want to lose it just because a configuration setting was turned off.
-   * However, we will not act on this information if caching is disabled.
-   */
-  private final boolean enabled;
-
-  /**
-   * Whether the CacheManager is active.
-   * 
-   * When the CacheManager is active, it tells the DataNodes what to cache
-   * and uncache.  The CacheManager cannot become active if enabled = false.
+   * All cached blocks.
    */
-  private boolean active = false;
+  private final GSet<CachedBlock, CachedBlock> cachedBlocks;
 
   /**
-   * All cached blocks.
+   * Lock which protects the CacheReplicationMonitor.
    */
-  private final GSet<CachedBlock, CachedBlock> cachedBlocks;
+  private final ReentrantLock crmLock = new ReentrantLock();
 
   /**
    * The CacheReplicationMonitor.
@@ -195,54 +179,51 @@ public final class CacheManager {
     scanIntervalMs = conf.getLong(
         DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS,
         DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT);
-    this.enabled = conf.getBoolean(DFS_NAMENODE_CACHING_ENABLED_KEY,
-        DFS_NAMENODE_CACHING_ENABLED_DEFAULT);
-    this.cachedBlocks = !enabled ? null :
-        new LightWeightGSet<CachedBlock, CachedBlock>(
-            LightWeightGSet.computeCapacity(0.25, "cachedBlocks"));
+    float cachedBlocksPercent = conf.getFloat(
+          DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT,
+          DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT);
+    if (cachedBlocksPercent < MIN_CACHED_BLOCKS_PERCENT) {
+      LOG.info("Using minimum value " + MIN_CACHED_BLOCKS_PERCENT +
+        " for " + DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT);
+      cachedBlocksPercent = MIN_CACHED_BLOCKS_PERCENT;
+    }
+    this.cachedBlocks = new LightWeightGSet<CachedBlock, CachedBlock>(
+          LightWeightGSet.computeCapacity(cachedBlocksPercent,
+              "cachedBlocks"));
+
   }
 
-  /**
-   * Activate the cache manager.
-   * 
-   * When the cache manager is active, tell the datanodes where to cache files.
-   */
-  public void activate() {
-    assert namesystem.hasWriteLock();
-    if (enabled && (!active)) {
-      LOG.info("Activating CacheManager.  " +
-          "Starting replication monitor thread...");
-      active = true;
-      monitor = new CacheReplicationMonitor(namesystem, this,
-         scanIntervalMs);
-      monitor.start();
+  public void startMonitorThread() {
+    crmLock.lock();
+    try {
+      if (this.monitor == null) {
+        this.monitor = new CacheReplicationMonitor(namesystem, this,
+            scanIntervalMs, crmLock);
+        this.monitor.start();
+      }
+    } finally {
+      crmLock.unlock();
     }
   }
 
-  /**
-   * Deactivate the cache manager.
-   * 
-   * When the cache manager is inactive, it does not tell the datanodes where to
-   * cache files.
-   */
-  public void deactivate() {
-    assert namesystem.hasWriteLock();
-    if (active) {
-      LOG.info("Deactivating CacheManager.  " +
-          "stopping CacheReplicationMonitor thread...");
-      active = false;
-      IOUtils.closeQuietly(monitor);
-      monitor = null;
-      LOG.info("CacheReplicationMonitor thread stopped and deactivated.");
+  public void stopMonitorThread() {
+    crmLock.lock();
+    try {
+      if (this.monitor != null) {
+        CacheReplicationMonitor prevMonitor = this.monitor;
+        this.monitor = null;
+        IOUtils.closeQuietly(prevMonitor);
+      }
+    } finally {
+      crmLock.unlock();
     }
   }
 
-  /**
-   * Return true only if the cache manager is active.
-   * Must be called under the FSN read or write lock.
-   */
-  public boolean isActive() {
-    return active;
+  public void clearDirectiveStats() {
+    assert namesystem.hasWriteLock();
+    for (CacheDirective directive : directivesById.values()) {
+      directive.resetStatistics();
+    }
   }
 
   /**
@@ -481,9 +462,7 @@ public final class CacheManager {
     directive.addBytesNeeded(stats.getBytesNeeded());
     directive.addFilesNeeded(directive.getFilesNeeded());
 
-    if (monitor != null) {
-      monitor.setNeedsRescan();
-    }
+    setNeedsRescan();
   }
 
   /**
@@ -515,10 +494,6 @@ public final class CacheManager {
       long expiryTime = validateExpiryTime(info, pool.getMaxRelativeExpiryMs());
       // Do quota validation if required
       if (!flags.contains(CacheFlag.FORCE)) {
-        // Can't kick and wait if caching is disabled
-        if (monitor != null) {
-          monitor.waitForRescan();
-        }
         checkLimit(pool, path, replication);
       }
       // All validation passed
@@ -623,9 +598,7 @@ public final class CacheManager {
       validateExpiryTime(infoWithDefaults, destPool.getMaxRelativeExpiryMs());
 
       // Indicate changes to the CRM
-      if (monitor != null) {
-        monitor.setNeedsRescan();
-      }
+      setNeedsRescan();
 
       // Validation passed
       removeInternal(prevEntry);
@@ -660,9 +633,7 @@ public final class CacheManager {
     pool.getDirectiveList().remove(directive);
     assert directive.getPool() == null;
 
-    if (monitor != null) {
-      monitor.setNeedsRescan();
-    }
+    setNeedsRescan();
   }
 
   public void removeDirective(long id, FSPermissionChecker pc)
@@ -695,9 +666,6 @@ public final class CacheManager {
     if (filter.getReplication() != null) {
       throw new IOException("Filtering by replication is unsupported.");
     }
-    if (monitor != null) {
-      monitor.waitForRescanIfNeeded();
-    }
     ArrayList<CacheDirectiveEntry> replies =
         new ArrayList<CacheDirectiveEntry>(NUM_PRE_ALLOCATED_ENTRIES);
     int numReplies = 0;
@@ -806,9 +774,7 @@ public final class CacheManager {
         bld.append(prefix).append("set limit to " + info.getLimit());
         prefix = "; ";
         // New limit changes stats, need to set needs refresh
-        if (monitor != null) {
-          monitor.setNeedsRescan();
-        }
+        setNeedsRescan();
       }
       if (info.getMaxRelativeExpiryMs() != null) {
         final Long maxRelativeExpiry = info.getMaxRelativeExpiryMs();
@@ -854,9 +820,7 @@ public final class CacheManager {
         directivesById.remove(directive.getId());
         iter.remove();
       }
-      if (monitor != null) {
-        monitor.setNeedsRescan();
-      }
+      setNeedsRescan();
     } catch (IOException e) {
       LOG.info("removeCachePool of " + poolName + " failed: ", e);
       throw e;
@@ -867,9 +831,6 @@ public final class CacheManager {
   public BatchedListEntries<CachePoolEntry>
       listCachePools(FSPermissionChecker pc, String prevKey) {
     assert namesystem.hasReadLock();
-    if (monitor != null) {
-      monitor.waitForRescanIfNeeded();
-    }
     final int NUM_PRE_ALLOCATED_ENTRIES = 16;
     ArrayList<CachePoolEntry> results = 
         new ArrayList<CachePoolEntry>(NUM_PRE_ALLOCATED_ENTRIES);
@@ -885,9 +846,6 @@ public final class CacheManager {
   }
 
   public void setCachedLocations(LocatedBlock block) {
-    if (!enabled) {
-      return;
-    }
     CachedBlock cachedBlock =
         new CachedBlock(block.getBlock().getBlockId(),
             (short)0, false);
@@ -903,12 +861,6 @@ public final class CacheManager {
 
   public final void processCacheReport(final DatanodeID datanodeID,
       final List<Long> blockIds) throws IOException {
-    if (!enabled) {
-      LOG.info("Ignoring cache report from " + datanodeID +
-          " because " + DFS_NAMENODE_CACHING_ENABLED_KEY + " = false. " +
-          "number of blocks: " + blockIds.size());
-      return;
-    }
     namesystem.writeLock();
     final long startTime = Time.monotonicNow();
     final long endTime;
@@ -940,39 +892,28 @@ public final class CacheManager {
       final List<Long> blockIds) {
     CachedBlocksList cached = datanode.getCached();
     cached.clear();
+    CachedBlocksList cachedList = datanode.getCached();
+    CachedBlocksList pendingCachedList = datanode.getPendingCached();
     for (Iterator<Long> iter = blockIds.iterator(); iter.hasNext(); ) {
-      Block block = new Block(iter.next());
-      BlockInfo blockInfo = blockManager.getStoredBlock(block);
-      if (!blockInfo.isComplete()) {
-        LOG.warn("Ignoring block id " + block.getBlockId() + ", because " +
-            "it is in not complete yet.  It is in state " + 
-            blockInfo.getBlockUCState());
-        continue;
-      }
-      Collection<DatanodeDescriptor> corruptReplicas =
-          blockManager.getCorruptReplicas(blockInfo);
-      if ((corruptReplicas != null) && corruptReplicas.contains(datanode)) {
-        // The NameNode will eventually remove or update the corrupt block.
-        // Until then, we pretend that it isn't cached.
-        LOG.warn("Ignoring cached replica on " + datanode + " of " + block +
-            " because it is corrupt.");
-        continue;
-      }
+      long blockId = iter.next();
       CachedBlock cachedBlock =
-          new CachedBlock(block.getBlockId(), (short)0, false);
+          new CachedBlock(blockId, (short)0, false);
       CachedBlock prevCachedBlock = cachedBlocks.get(cachedBlock);
-      // Use the existing CachedBlock if it's present; otherwise,
-      // insert a new one.
+      // Add the block ID from the cache report to the cachedBlocks map
+      // if it's not already there.
       if (prevCachedBlock != null) {
         cachedBlock = prevCachedBlock;
       } else {
         cachedBlocks.put(cachedBlock);
       }
-      if (!cachedBlock.isPresent(datanode.getCached())) {
-        datanode.getCached().add(cachedBlock);
+      // Add the block to the datanode's implicit cached block list
+      // if it's not already there.  Similarly, remove it from the pending
+      // cached block list if it exists there.
+      if (!cachedBlock.isPresent(cachedList)) {
+        cachedList.add(cachedBlock);
       }
-      if (cachedBlock.isPresent(datanode.getPendingCached())) {
-        datanode.getPendingCached().remove(cachedBlock);
+      if (cachedBlock.isPresent(pendingCachedList)) {
+        pendingCachedList.remove(cachedBlock);
       }
     }
   }
@@ -1097,4 +1038,36 @@ public final class CacheManager {
     }
     prog.endStep(Phase.LOADING_FSIMAGE, step);
   }
+
+  public void waitForRescanIfNeeded() {
+    crmLock.lock();
+    try {
+      if (monitor != null) {
+        monitor.waitForRescanIfNeeded();
+      }
+    } finally {
+      crmLock.unlock();
+    }
+  }
+
+  private void setNeedsRescan() {
+    crmLock.lock();
+    try {
+      if (monitor != null) {
+        monitor.setNeedsRescan();
+      }
+    } finally {
+      crmLock.unlock();
+    }
+  }
+
+  @VisibleForTesting
+  public Thread getCacheReplicationMonitor() {
+    crmLock.lock();
+    try {
+      return monitor;
+    } finally {
+      crmLock.unlock();
+    }
+  }
 }

+ 52 - 48
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java

@@ -405,60 +405,64 @@ public class FSImage implements Closeable {
     // Directories that don't have previous state do not rollback
     boolean canRollback = false;
     FSImage prevState = new FSImage(conf);
-    prevState.getStorage().layoutVersion = HdfsConstants.LAYOUT_VERSION;
-    for (Iterator<StorageDirectory> it = storage.dirIterator(); it.hasNext();) {
-      StorageDirectory sd = it.next();
-      File prevDir = sd.getPreviousDir();
-      if (!prevDir.exists()) {  // use current directory then
-        LOG.info("Storage directory " + sd.getRoot()
-                 + " does not contain previous fs state.");
-        // read and verify consistency with other directories
-        storage.readProperties(sd);
-        continue;
-      }
+    try {
+      prevState.getStorage().layoutVersion = HdfsConstants.LAYOUT_VERSION;
+      for (Iterator<StorageDirectory> it = storage.dirIterator(); it.hasNext();) {
+        StorageDirectory sd = it.next();
+        File prevDir = sd.getPreviousDir();
+        if (!prevDir.exists()) {  // use current directory then
+          LOG.info("Storage directory " + sd.getRoot()
+            + " does not contain previous fs state.");
+          // read and verify consistency with other directories
+          storage.readProperties(sd);
+          continue;
+        }
 
-      // read and verify consistency of the prev dir
-      prevState.getStorage().readPreviousVersionProperties(sd);
+        // read and verify consistency of the prev dir
+        prevState.getStorage().readPreviousVersionProperties(sd);
 
-      if (prevState.getLayoutVersion() != HdfsConstants.LAYOUT_VERSION) {
-        throw new IOException(
-          "Cannot rollback to storage version " +
-          prevState.getLayoutVersion() +
-          " using this version of the NameNode, which uses storage version " +
-          HdfsConstants.LAYOUT_VERSION + ". " +
-          "Please use the previous version of HDFS to perform the rollback.");
+        if (prevState.getLayoutVersion() != HdfsConstants.LAYOUT_VERSION) {
+          throw new IOException(
+            "Cannot rollback to storage version " +
+                prevState.getLayoutVersion() +
+                " using this version of the NameNode, which uses storage version " +
+                HdfsConstants.LAYOUT_VERSION + ". " +
+              "Please use the previous version of HDFS to perform the rollback.");
+        }
+        canRollback = true;
       }
-      canRollback = true;
-    }
-    if (!canRollback)
-      throw new IOException("Cannot rollback. None of the storage "
-                            + "directories contain previous fs state.");
+      if (!canRollback)
+        throw new IOException("Cannot rollback. None of the storage "
+            + "directories contain previous fs state.");
 
-    // Now that we know all directories are going to be consistent
-    // Do rollback for each directory containing previous state
-    for (Iterator<StorageDirectory> it = storage.dirIterator(); it.hasNext();) {
-      StorageDirectory sd = it.next();
-      File prevDir = sd.getPreviousDir();
-      if (!prevDir.exists())
-        continue;
+      // Now that we know all directories are going to be consistent
+      // Do rollback for each directory containing previous state
+      for (Iterator<StorageDirectory> it = storage.dirIterator(); it.hasNext();) {
+        StorageDirectory sd = it.next();
+        File prevDir = sd.getPreviousDir();
+        if (!prevDir.exists())
+          continue;
+
+        LOG.info("Rolling back storage directory " + sd.getRoot()
+          + ".\n   new LV = " + prevState.getStorage().getLayoutVersion()
+          + "; new CTime = " + prevState.getStorage().getCTime());
+        File tmpDir = sd.getRemovedTmp();
+        assert !tmpDir.exists() : "removed.tmp directory must not exist.";
+        // rename current to tmp
+        File curDir = sd.getCurrentDir();
+        assert curDir.exists() : "Current directory must exist.";
+        NNStorage.rename(curDir, tmpDir);
+        // rename previous to current
+        NNStorage.rename(prevDir, curDir);
 
-      LOG.info("Rolling back storage directory " + sd.getRoot()
-               + ".\n   new LV = " + prevState.getStorage().getLayoutVersion()
-               + "; new CTime = " + prevState.getStorage().getCTime());
-      File tmpDir = sd.getRemovedTmp();
-      assert !tmpDir.exists() : "removed.tmp directory must not exist.";
-      // rename current to tmp
-      File curDir = sd.getCurrentDir();
-      assert curDir.exists() : "Current directory must exist.";
-      NNStorage.rename(curDir, tmpDir);
-      // rename previous to current
-      NNStorage.rename(prevDir, curDir);
-
-      // delete tmp dir
-      NNStorage.deleteDir(tmpDir);
-      LOG.info("Rollback of " + sd.getRoot()+ " is complete.");
+        // delete tmp dir
+        NNStorage.deleteDir(tmpDir);
+        LOG.info("Rollback of " + sd.getRoot()+ " is complete.");
+      }
+      isUpgradeFinalized = true;
+    } finally {
+      prevState.close();
     }
-    isUpgradeFinalized = true;
   }
 
   private void doFinalize(StorageDirectory sd) throws IOException {

+ 12 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -931,7 +931,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     writeLock();
     try {
       if (blockManager != null) blockManager.close();
-      cacheManager.deactivate();
     } finally {
       writeUnlock();
     }
@@ -1001,7 +1000,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
           editLogRollerThreshold, editLogRollerInterval));
       nnEditLogRoller.start();
 
-      cacheManager.activate();
+      cacheManager.startMonitorThread();
       blockManager.getDatanodeManager().setShouldSendCachingCommands(true);
     } finally {
       writeUnlock();
@@ -1052,7 +1051,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         // so that the tailer starts from the right spot.
         dir.fsImage.updateLastAppliedTxIdFromWritten();
       }
-      cacheManager.deactivate();
+      cacheManager.stopMonitorThread();
+      cacheManager.clearDirectiveStats();
+      blockManager.getDatanodeManager().clearPendingCachingCommands();
       blockManager.getDatanodeManager().setShouldSendCachingCommands(false);
     } finally {
       writeUnlock();
@@ -7066,6 +7067,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       return (Long) cacheEntry.getPayload();
     }
     boolean success = false;
+    if (!flags.contains(CacheFlag.FORCE)) {
+      cacheManager.waitForRescanIfNeeded();
+    }
     writeLock();
     Long result = null;
     try {
@@ -7107,6 +7111,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     if (cacheEntry != null && cacheEntry.isSuccess()) {
       return;
     }
+    if (!flags.contains(CacheFlag.FORCE)) {
+      cacheManager.waitForRescanIfNeeded();
+    }
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
@@ -7166,6 +7173,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     final FSPermissionChecker pc = isPermissionEnabled ?
         getPermissionChecker() : null;
     BatchedListEntries<CacheDirectiveEntry> results;
+    cacheManager.waitForRescanIfNeeded();
     readLock();
     boolean success = false;
     try {
@@ -7289,6 +7297,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     BatchedListEntries<CachePoolEntry> results;
     checkOperation(OperationCategory.READ);
     boolean success = false;
+    cacheManager.waitForRescanIfNeeded();
     readLock();
     try {
       checkOperation(OperationCategory.READ);

+ 8 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java

@@ -480,6 +480,14 @@ public class NameNode implements NameNodeStatusMXBean {
    * @param conf the configuration
    */
   protected void initialize(Configuration conf) throws IOException {
+    if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) {
+      String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY);
+      if (intervals != null) {
+        conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
+          intervals);
+      }
+    }
+
     UserGroupInformation.setConfiguration(conf);
     loginAsNameNodeUser(conf);
 

+ 6 - 6
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/StorageReport.java

@@ -21,7 +21,7 @@ package org.apache.hadoop.hdfs.server.protocol;
  * Utilization report for a Datanode storage
  */
 public class StorageReport {
-  private final String storageID;
+  private final DatanodeStorage storage;
   private final boolean failed;
   private final long capacity;
   private final long dfsUsed;
@@ -30,9 +30,9 @@ public class StorageReport {
 
   public static final StorageReport[] EMPTY_ARRAY = {};
   
-  public StorageReport(String sid, boolean failed, long capacity, long dfsUsed,
-      long remaining, long bpUsed) {
-    this.storageID = sid;
+  public StorageReport(DatanodeStorage storage, boolean failed,
+      long capacity, long dfsUsed, long remaining, long bpUsed) {
+    this.storage = storage;
     this.failed = failed;
     this.capacity = capacity;
     this.dfsUsed = dfsUsed;
@@ -40,8 +40,8 @@ public class StorageReport {
     this.blockPoolUsed = bpUsed;
   }
 
-  public String getStorageID() {
-    return storageID;
+  public DatanodeStorage getStorage() {
+    return storage;
   }
 
   public boolean isFailed() {

+ 40 - 21
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/CacheAdmin.java

@@ -84,7 +84,12 @@ public class CacheAdmin extends Configured implements Tool {
     for (int j = 1; j < args.length; j++) {
       argsList.add(args[j]);
     }
-    return command.run(getConf(), argsList);
+    try {
+      return command.run(getConf(), argsList);
+    } catch (IllegalArgumentException e) {
+      System.err.println(prettifyException(e));
+      return -1;
+    }
   }
 
   public static void main(String[] argsArray) throws IOException {
@@ -135,6 +140,20 @@ public class CacheAdmin extends Configured implements Tool {
     return maxTtl;
   }
 
+  private static Expiration parseExpirationString(String ttlString)
+      throws IOException {
+    Expiration ex = null;
+    if (ttlString != null) {
+      if (ttlString.equalsIgnoreCase("never")) {
+        ex = CacheDirectiveInfo.Expiration.NEVER;
+      } else {
+        long ttl = DFSUtil.parseRelativeTime(ttlString);
+        ex = CacheDirectiveInfo.Expiration.newRelative(ttl);
+      }
+    }
+    return ex;
+  }
+
   interface Command {
     String getName();
     String getShortUsage();
@@ -171,6 +190,7 @@ public class CacheAdmin extends Configured implements Tool {
       listing.addRow("<time-to-live>", "How long the directive is " +
           "valid. Can be specified in minutes, hours, and days, e.g. " +
           "30m, 4h, 2d. Valid units are [smhd]." +
+          " \"never\" indicates a directive that never expires." +
           " If unspecified, the directive never expires.");
       return getShortUsage() + "\n" +
         "Add a new cache directive.\n\n" +
@@ -203,15 +223,15 @@ public class CacheAdmin extends Configured implements Tool {
       }
 
       String ttlString = StringUtils.popOptionWithArgument("-ttl", args);
-      if (ttlString != null) {
-        try {
-          long ttl = DFSUtil.parseRelativeTime(ttlString);
-          builder.setExpiration(CacheDirectiveInfo.Expiration.newRelative(ttl));
-        } catch (IOException e) {
-          System.err.println(
-              "Error while parsing ttl value: " + e.getMessage());
-          return 1;
+      try {
+        Expiration ex = parseExpirationString(ttlString);
+        if (ex != null) {
+          builder.setExpiration(ex);
         }
+      } catch (IOException e) {
+        System.err.println(
+            "Error while parsing ttl value: " + e.getMessage());
+        return 1;
       }
 
       if (!args.isEmpty()) {
@@ -326,7 +346,7 @@ public class CacheAdmin extends Configured implements Tool {
       listing.addRow("<time-to-live>", "How long the directive is " +
           "valid. Can be specified in minutes, hours, and days, e.g. " +
           "30m, 4h, 2d. Valid units are [smhd]." +
-          " If unspecified, the directive never expires.");
+          " \"never\" indicates a directive that never expires.");
       return getShortUsage() + "\n" +
         "Modify a cache directive.\n\n" +
         listing.toString();
@@ -362,17 +382,16 @@ public class CacheAdmin extends Configured implements Tool {
         modified = true;
       }
       String ttlString = StringUtils.popOptionWithArgument("-ttl", args);
-      if (ttlString != null) {
-        long ttl;
-        try {
-          ttl = DFSUtil.parseRelativeTime(ttlString);
-        } catch (IOException e) {
-          System.err.println(
-              "Error while parsing ttl value: " + e.getMessage());
-          return 1;
+      try {
+        Expiration ex = parseExpirationString(ttlString);
+        if (ex != null) {
+          builder.setExpiration(ex);
+          modified = true;
         }
-        builder.setExpiration(CacheDirectiveInfo.Expiration.newRelative(ttl));
-        modified = true;
+      } catch (IOException e) {
+        System.err.println(
+            "Error while parsing ttl value: " + e.getMessage());
+        return 1;
       }
       if (!args.isEmpty()) {
         System.err.println("Can't understand argument: " + args.get(0));
@@ -578,7 +597,7 @@ public class CacheAdmin extends Configured implements Tool {
     public String getShortUsage() {
       return "[" + NAME + " <name> [-owner <owner>] " +
           "[-group <group>] [-mode <mode>] [-limit <limit>] " +
-          "[-maxttl <maxTtl>]\n";
+          "[-maxTtl <maxTtl>]\n";
     }
 
     @Override

+ 3 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/GetConf.java

@@ -29,6 +29,7 @@ import java.util.Map;
 import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.DFSUtil.ConfiguredNNAddress;
@@ -85,9 +86,9 @@ public class GetConf extends Configured implements Tool {
       map.put(BACKUP.getName().toLowerCase(), 
           new BackupNodesCommandHandler());
       map.put(INCLUDE_FILE.getName().toLowerCase(), 
-          new CommandHandler("DFSConfigKeys.DFS_HOSTS"));
+          new CommandHandler(DFSConfigKeys.DFS_HOSTS));
       map.put(EXCLUDE_FILE.getName().toLowerCase(),
-          new CommandHandler("DFSConfigKeys.DFS_HOSTS_EXCLUDE"));
+          new CommandHandler(DFSConfigKeys.DFS_HOSTS_EXCLUDE));
       map.put(NNRPCADDRESSES.getName().toLowerCase(),
           new NNRpcAddressesCommandHandler());
       map.put(CONFKEY.getName().toLowerCase(),

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto

@@ -196,12 +196,13 @@ message HeartbeatRequestProto {
 }
 
 message StorageReportProto {
-  required string storageUuid = 1;
+  required string storageUuid = 1 [ deprecated = true ];
   optional bool failed = 2 [ default = false ];
   optional uint64 capacity = 3 [ default = 0 ];
   optional uint64 dfsUsed = 4 [ default = 0 ];
   optional uint64 remaining = 5 [ default = 0 ];
   optional uint64 blockPoolUsed = 6 [ default = 0 ];
+  optional DatanodeStorageProto storage = 7; // supersedes StorageUuid
 }
 
 /**

+ 6 - 6
hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

@@ -1476,13 +1476,13 @@
 </property>
 
 <property>
-  <name>dfs.namenode.caching.enabled</name>
-  <value>false</value>
+  <name>dfs.namenode.path.based.cache.block.map.allocation.percent</name>
+  <value>0.25</value>
   <description>
-    Set to true to enable block caching.  This flag enables the NameNode to
-    maintain a mapping of cached blocks to DataNodes via processing DataNode
-    cache reports.  Based on these reports and addition and removal of caching
-    directives, the NameNode will schedule caching and uncaching work.
+    The percentage of the Java heap which we will allocate to the cached blocks
+    map.  The cached blocks map is a hash map which uses chained hashing.
+    Smaller maps may be accessed more slowly if the number of cached blocks is
+    large; larger maps will consume more memory.
   </description>
 </property>
 

+ 7 - 6
hadoop-hdfs-project/hadoop-hdfs/src/site/apt/CentralizedCacheManagement.apt.vm

@@ -242,12 +242,6 @@ Centralized Cache Management in HDFS
 
   Be sure to configure the following:
 
-  * dfs.namenode.caching.enabled
-
-    This must be set to true to enable caching. If this is false, the NameNode
-    will ignore cache reports, and will not ask DataNodes to cache
-    blocks.
-
   * dfs.datanode.max.locked.memory
 
     The DataNode will treat this as the maximum amount of memory it can use for
@@ -281,6 +275,13 @@ Centralized Cache Management in HDFS
 
     By default, this parameter is set to 10000, which is 10 seconds.
 
+  * dfs.namenode.path.based.cache.block.map.allocation.percent
+
+    The percentage of the Java heap which we will allocate to the cached blocks
+    map.  The cached blocks map is a hash map which uses chained hashing.
+    Smaller maps may be accessed more slowly if the number of cached blocks is
+    large; larger maps will consume more memory.  The default is 0.25 percent.
+
 ** {OS Limits}
 
   If you get the error "Cannot start datanode because the configured max

+ 30 - 16
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java

@@ -140,6 +140,7 @@ public class MiniDFSCluster {
     private int nameNodeHttpPort = 0;
     private final Configuration conf;
     private int numDataNodes = 1;
+    private StorageType storageType = StorageType.DEFAULT;
     private boolean format = true;
     private boolean manageNameDfsDirs = true;
     private boolean manageNameDfsSharedDirs = true;
@@ -185,6 +186,14 @@ public class MiniDFSCluster {
       return this;
     }
 
+    /**
+     * Default: StorageType.DEFAULT
+     */
+    public Builder storageType(StorageType type) {
+      this.storageType = type;
+      return this;
+    }
+
     /**
      * Default: true
      */
@@ -341,6 +350,7 @@ public class MiniDFSCluster {
       
     initMiniDFSCluster(builder.conf,
                        builder.numDataNodes,
+                       builder.storageType,
                        builder.format,
                        builder.manageNameDfsDirs,
                        builder.manageNameDfsSharedDirs,
@@ -592,7 +602,7 @@ public class MiniDFSCluster {
                         String[] racks, String hosts[],
                         long[] simulatedCapacities) throws IOException {
     this.nameNodes = new NameNodeInfo[1]; // Single namenode in the cluster
-    initMiniDFSCluster(conf, numDataNodes, format,
+    initMiniDFSCluster(conf, numDataNodes, StorageType.DEFAULT, format,
         manageNameDfsDirs, true, manageDataDfsDirs, manageDataDfsDirs,
         operation, racks, hosts,
         simulatedCapacities, null, true, false,
@@ -601,7 +611,7 @@ public class MiniDFSCluster {
 
   private void initMiniDFSCluster(
       Configuration conf,
-      int numDataNodes, boolean format, boolean manageNameDfsDirs,
+      int numDataNodes, StorageType storageType, boolean format, boolean manageNameDfsDirs,
       boolean manageNameDfsSharedDirs, boolean enableManagedDfsDirsRedundancy,
       boolean manageDataDfsDirs, StartupOption operation, String[] racks,
       String[] hosts, long[] simulatedCapacities, String clusterId,
@@ -670,7 +680,7 @@ public class MiniDFSCluster {
     }
 
     // Start the DataNodes
-    startDataNodes(conf, numDataNodes, manageDataDfsDirs, operation, racks,
+    startDataNodes(conf, numDataNodes, storageType, manageDataDfsDirs, operation, racks,
         hosts, simulatedCapacities, setupHostsFile, checkDataNodeAddrConfig, checkDataNodeHostConfig);
     waitClusterUp();
     //make sure ProxyUsers uses the latest conf
@@ -990,6 +1000,19 @@ public class MiniDFSCluster {
     }
   }
 
+  String makeDataNodeDirs(int dnIndex, StorageType storageType) throws IOException {
+    StringBuilder sb = new StringBuilder();
+    for (int j = 0; j < DIRS_PER_DATANODE; ++j) {
+      File dir = getInstanceStorageDir(dnIndex, j);
+      dir.mkdirs();
+      if (!dir.isDirectory()) {
+        throw new IOException("Mkdirs failed to create directory for DataNode " + dir);
+      }
+      sb.append((j > 0 ? "," : "") + "[" + storageType + "]" + fileAsURI(dir));
+    }
+    return sb.toString();
+  }
+
   /**
    * Modify the config and start up additional DataNodes.  The info port for
    * DataNodes is guaranteed to use a free port.
@@ -1052,7 +1075,7 @@ public class MiniDFSCluster {
                              String[] racks, String[] hosts,
                              long[] simulatedCapacities,
                              boolean setupHostsFile) throws IOException {
-    startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, hosts,
+    startDataNodes(conf, numDataNodes, StorageType.DEFAULT, manageDfsDirs, operation, racks, hosts,
         simulatedCapacities, setupHostsFile, false, false);
   }
 
@@ -1066,7 +1089,7 @@ public class MiniDFSCluster {
       long[] simulatedCapacities,
       boolean setupHostsFile,
       boolean checkDataNodeAddrConfig) throws IOException {
-    startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, hosts,
+    startDataNodes(conf, numDataNodes, StorageType.DEFAULT, manageDfsDirs, operation, racks, hosts,
         simulatedCapacities, setupHostsFile, checkDataNodeAddrConfig, false);
   }
 
@@ -1098,7 +1121,7 @@ public class MiniDFSCluster {
    * @throws IllegalStateException if NameNode has been shutdown
    */
   public synchronized void startDataNodes(Configuration conf, int numDataNodes,
-      boolean manageDfsDirs, StartupOption operation, 
+      StorageType storageType, boolean manageDfsDirs, StartupOption operation,
       String[] racks, String[] hosts,
       long[] simulatedCapacities,
       boolean setupHostsFile,
@@ -1154,16 +1177,7 @@ public class MiniDFSCluster {
       // Set up datanode address
       setupDatanodeAddress(dnConf, setupHostsFile, checkDataNodeAddrConfig);
       if (manageDfsDirs) {
-        StringBuilder sb = new StringBuilder();
-        for (int j = 0; j < DIRS_PER_DATANODE; ++j) {
-          File dir = getInstanceStorageDir(i, j);
-          dir.mkdirs();
-          if (!dir.isDirectory()) {
-            throw new IOException("Mkdirs failed to create directory for DataNode " + dir);
-          }
-          sb.append((j > 0 ? "," : "") + fileAsURI(dir));
-        }
-        String dirs = sb.toString();
+        String dirs = makeDataNodeDirs(i, storageType);
         dnConf.set(DFS_DATANODE_DATA_DIR_KEY, dirs);
         conf.set(DFS_DATANODE_DATA_DIR_KEY, dirs);
       }

+ 5 - 13
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java

@@ -50,7 +50,7 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
   }
 
   public synchronized void startDataNodes(Configuration conf, int numDataNodes,
-      boolean manageDfsDirs, StartupOption operation, 
+      StorageType storageType, boolean manageDfsDirs, StartupOption operation,
       String[] racks, String[] nodeGroups, String[] hosts,
       long[] simulatedCapacities,
       boolean setupHostsFile,
@@ -112,15 +112,7 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
       // Set up datanode address
       setupDatanodeAddress(dnConf, setupHostsFile, checkDataNodeAddrConfig);
       if (manageDfsDirs) {
-        File dir1 = getInstanceStorageDir(i, 0);
-        File dir2 = getInstanceStorageDir(i, 1);
-        dir1.mkdirs();
-        dir2.mkdirs();
-        if (!dir1.isDirectory() || !dir2.isDirectory()) { 
-          throw new IOException("Mkdirs failed to create directory for DataNode "
-              + i + ": " + dir1 + " or " + dir2);
-        }
-        String dirs = fileAsURI(dir1) + "," + fileAsURI(dir2);
+        String dirs = makeDataNodeDirs(i, storageType);
         dnConf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dirs);
         conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dirs);
       }
@@ -198,7 +190,7 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
       String[] racks, String[] nodeGroups, String[] hosts,
       long[] simulatedCapacities,
       boolean setupHostsFile) throws IOException {
-    startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, nodeGroups, 
+    startDataNodes(conf, numDataNodes, StorageType.DEFAULT, manageDfsDirs, operation, racks, nodeGroups,
         hosts, simulatedCapacities, setupHostsFile, false, false);
   }
 
@@ -213,13 +205,13 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
   // This is for initialize from parent class.
   @Override
   public synchronized void startDataNodes(Configuration conf, int numDataNodes, 
-      boolean manageDfsDirs, StartupOption operation, 
+      StorageType storageType, boolean manageDfsDirs, StartupOption operation,
       String[] racks, String[] hosts,
       long[] simulatedCapacities,
       boolean setupHostsFile,
       boolean checkDataNodeAddrConfig,
       boolean checkDataNodeHostConfig) throws IOException {
-    startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, 
+    startDataNodes(conf, numDataNodes, storageType, manageDfsDirs, operation, racks,
         NODE_GROUPS, hosts, simulatedCapacities, setupHostsFile, 
         checkDataNodeAddrConfig, checkDataNodeHostConfig);
   }

+ 3 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java

@@ -257,8 +257,10 @@ public class BlockManagerTestUtil {
       DatanodeDescriptor dnd) {
     ArrayList<StorageReport> reports = new ArrayList<StorageReport>();
     for (DatanodeStorageInfo storage : dnd.getStorageInfos()) {
+      DatanodeStorage dns = new DatanodeStorage(
+          storage.getStorageID(), storage.getState(), storage.getStorageType());
       StorageReport report = new StorageReport(
-          storage.getStorageID(), false, storage.getCapacity(),
+          dns ,false, storage.getCapacity(),
           storage.getDfsUsed(), storage.getRemaining(),
           storage.getBlockPoolUsed());
       reports.add(report);

+ 5 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestJspHelper.java

@@ -470,11 +470,14 @@ public class TestJspHelper {
     BlockManagerTestUtil.updateStorage(dnDesc1, new DatanodeStorage("dnStorage1"));
     BlockManagerTestUtil.updateStorage(dnDesc2, new DatanodeStorage("dnStorage2"));
 
+    DatanodeStorage dns1 = new DatanodeStorage("dnStorage1");
+    DatanodeStorage dns2 = new DatanodeStorage("dnStorage2");
+
     StorageReport[] report1 = new StorageReport[] {
-        new StorageReport("dnStorage1", false, 1024, 100, 924, 100)
+        new StorageReport(dns1, false, 1024, 100, 924, 100)
     };
     StorageReport[] report2 = new StorageReport[] {
-        new StorageReport("dnStorage2", false, 2500, 200, 1848, 200)
+        new StorageReport(dns2, false, 2500, 200, 1848, 200)
     };
     dnDesc1.updateHeartbeat(report1, 5l, 3l, 10, 2);
     dnDesc2.updateHeartbeat(report2, 10l, 2l, 20, 1);

+ 3 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java

@@ -394,8 +394,9 @@ public class SimulatedFSDataset implements FsDatasetSpi<FsVolumeSpi> {
     }
 
     synchronized StorageReport getStorageReport(String bpid) {
-      return new StorageReport(getStorageUuid(), false, getCapacity(),
-          getUsed(), getFree(), map.get(bpid).getUsed());
+      return new StorageReport(new DatanodeStorage(getStorageUuid()),
+          false, getCapacity(), getUsed(), getFree(),
+          map.get(bpid).getUsed());
     }
   }
   

+ 3 - 3
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java

@@ -40,6 +40,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.protocol.datatransfer.BlockConstructionStage;
 import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
+import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.util.DataChecksum;
 import org.junit.After;
@@ -186,9 +187,8 @@ public class TestDiskError {
     // Check permissions on directories in 'dfs.datanode.data.dir'
     FileSystem localFS = FileSystem.getLocal(conf);
     for (DataNode dn : cluster.getDataNodes()) {
-      String[] dataDirs =
-        dn.getConf().getStrings(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY);
-      for (String dir : dataDirs) {
+      for (FsVolumeSpi v : dn.getFSDataset().getVolumes()) {
+        String dir = v.getBasePath();
         Path dataDir = new Path(dir);
         FsPermission actual = localFS.getFileStatus(dataDir).getPermission();
           assertEquals("Permission for dir: " + dataDir + ", is " + actual +

+ 74 - 6
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestFsDatasetCache.java

@@ -36,16 +36,20 @@ import java.util.Set;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.HdfsBlockLocation;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.LogVerificationAppender;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
+import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
@@ -82,7 +86,11 @@ public class TestFsDatasetCache {
 
   // Most Linux installs allow a default of 64KB locked memory
   private static final long CACHE_CAPACITY = 64 * 1024;
-  private static final long BLOCK_SIZE = 4096;
+  // mlock always locks the entire page. So we don't need to deal with this
+  // rounding, use the OS page size for the block size.
+  private static final long PAGE_SIZE =
+      NativeIO.POSIX.getCacheManipulator().getOperatingSystemPageSize();
+  private static final long BLOCK_SIZE = PAGE_SIZE;
 
   private static Configuration conf;
   private static MiniDFSCluster cluster = null;
@@ -104,14 +112,13 @@ public class TestFsDatasetCache {
   public void setUp() throws Exception {
     assumeTrue(!Path.WINDOWS);
     conf = new HdfsConfiguration();
-    conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY, true);
-    conf.setLong(DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_RETRY_INTERVAL_MS,
-        500);
+    conf.setLong(
+        DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS, 100);
+    conf.setLong(DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_KEY, 500);
     conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
     conf.setLong(DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY,
         CACHE_CAPACITY);
     conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
-    conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY, true);
 
     prevCacheManipulator = NativeIO.POSIX.getCacheManipulator();
     NativeIO.POSIX.setCacheManipulator(new NoMlockCacheManipulator());
@@ -325,7 +332,7 @@ public class TestFsDatasetCache {
 
     // Create some test files that will exceed total cache capacity
     final int numFiles = 5;
-    final long fileSize = 15000;
+    final long fileSize = CACHE_CAPACITY / (numFiles-1);
 
     final Path[] testFiles = new Path[numFiles];
     final HdfsBlockLocation[][] fileLocs = new HdfsBlockLocation[numFiles][];
@@ -451,4 +458,65 @@ public class TestFsDatasetCache {
       }
     }, 100, 10000);
   }
+
+  @Test(timeout=60000)
+  public void testPageRounder() throws Exception {
+    // Write a small file
+    Path fileName = new Path("/testPageRounder");
+    final int smallBlocks = 512; // This should be smaller than the page size
+    assertTrue("Page size should be greater than smallBlocks!",
+        PAGE_SIZE > smallBlocks);
+    final int numBlocks = 5;
+    final int fileLen = smallBlocks * numBlocks;
+    FSDataOutputStream out =
+        fs.create(fileName, false, 4096, (short)1, smallBlocks);
+    out.write(new byte[fileLen]);
+    out.close();
+    HdfsBlockLocation[] locs = (HdfsBlockLocation[])fs.getFileBlockLocations(
+        fileName, 0, fileLen);
+    // Cache the file and check the sizes match the page size
+    setHeartbeatResponse(cacheBlocks(locs));
+    verifyExpectedCacheUsage(PAGE_SIZE * numBlocks, numBlocks);
+    // Uncache and check that it decrements by the page size too
+    setHeartbeatResponse(uncacheBlocks(locs));
+    verifyExpectedCacheUsage(0, 0);
+  }
+
+  @Test(timeout=60000)
+  public void testUncacheQuiesces() throws Exception {
+    // Create a file
+    Path fileName = new Path("/testUncacheQuiesces");
+    int fileLen = 4096;
+    DFSTestUtil.createFile(fs, fileName, fileLen, (short)1, 0xFDFD);
+    // Cache it
+    DistributedFileSystem dfs = cluster.getFileSystem();
+    dfs.addCachePool(new CachePoolInfo("pool"));
+    dfs.addCacheDirective(new CacheDirectiveInfo.Builder()
+        .setPool("pool").setPath(fileName).setReplication((short)3).build());
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        MetricsRecordBuilder dnMetrics = getMetrics(dn.getMetrics().name());
+        long blocksCached =
+            MetricsAsserts.getLongCounter("BlocksCached", dnMetrics);
+        return blocksCached > 0;
+      }
+    }, 1000, 30000);
+    // Uncache it
+    dfs.removeCacheDirective(1);
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        MetricsRecordBuilder dnMetrics = getMetrics(dn.getMetrics().name());
+        long blocksUncached =
+            MetricsAsserts.getLongCounter("BlocksUncached", dnMetrics);
+        return blocksUncached > 0;
+      }
+    }, 1000, 30000);
+    // Make sure that no additional messages were sent
+    Thread.sleep(10000);
+    MetricsRecordBuilder dnMetrics = getMetrics(dn.getMetrics().name());
+    MetricsAsserts.assertCounter("BlocksCached", 1l, dnMetrics);
+    MetricsAsserts.assertCounter("BlocksUncached", 1l, dnMetrics);
+  }
 }

+ 113 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestStorageReport.java

@@ -0,0 +1,113 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.datanode;
+
+import java.io.IOException;
+
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.*;
+import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
+import org.apache.hadoop.hdfs.server.protocol.StorageReport;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mockito;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyInt;
+import static org.mockito.Matchers.anyLong;
+
+public class TestStorageReport {
+  public static final Log LOG = LogFactory.getLog(TestStorageReport.class);
+
+  private static short REPL_FACTOR = 1;
+  private static final StorageType storageType = StorageType.SSD; // pick non-default.
+
+  private static Configuration conf;
+  private MiniDFSCluster cluster;
+  private DistributedFileSystem fs;
+  static String bpid;
+
+  @Before
+  public void startUpCluster() throws IOException {
+    conf = new HdfsConfiguration();
+    cluster = new MiniDFSCluster.Builder(conf)
+        .numDataNodes(REPL_FACTOR)
+        .storageType(storageType)
+        .build();
+    fs = cluster.getFileSystem();
+    bpid = cluster.getNamesystem().getBlockPoolId();
+  }
+
+  @After
+  public void shutDownCluster() throws IOException {
+    if (cluster != null) {
+      fs.close();
+      cluster.shutdown();
+      cluster = null;
+    }
+  }
+
+  /**
+   * Ensure that storage type and storage state are propagated
+   * in Storage Reports.
+   */
+  @Test
+  public void testStorageReportHasStorageTypeAndState() throws IOException {
+
+    // Make sure we are not testing with the default type, that would not
+    // be a very good test.
+    assertNotSame(storageType, StorageType.DEFAULT);
+    NameNode nn = cluster.getNameNode();
+    DataNode dn = cluster.getDataNodes().get(0);
+
+    // Insert a spy object for the NN RPC.
+    DatanodeProtocolClientSideTranslatorPB nnSpy =
+        DataNodeTestUtils.spyOnBposToNN(dn, nn);
+
+    // Trigger a heartbeat so there is an interaction with the spy
+    // object.
+    DataNodeTestUtils.triggerHeartbeat(dn);
+
+    // Verify that the callback passed in the expected parameters.
+    ArgumentCaptor<StorageReport[]> captor =
+        ArgumentCaptor.forClass(StorageReport[].class);
+
+    Mockito.verify(nnSpy).sendHeartbeat(
+        any(DatanodeRegistration.class),
+        captor.capture(),
+        anyLong(), anyLong(), anyInt(), anyInt(), anyInt());
+
+    StorageReport[] reports = captor.getValue();
+
+    for (StorageReport report: reports) {
+      assertThat(report.getStorage().getStorageType(), is(storageType));
+      assertThat(report.getStorage().getState(), is(DatanodeStorage.State.NORMAL));
+    }
+  }
+}

+ 109 - 9
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java

@@ -605,6 +605,98 @@ public class NNThroughputBenchmark implements Tool {
     }
   }
 
+  /**
+   * Directory creation statistics.
+   *
+   * Each thread creates the same (+ or -1) number of directories.
+   * Directory names are pre-generated during initialization.
+   */
+  class MkdirsStats extends OperationStatsBase {
+    // Operation types
+    static final String OP_MKDIRS_NAME = "mkdirs";
+    static final String OP_MKDIRS_USAGE = "-op mkdirs [-threads T] [-dirs N] " +
+        "[-dirsPerDir P]";
+
+    protected FileNameGenerator nameGenerator;
+    protected String[][] dirPaths;
+
+    MkdirsStats(List<String> args) {
+      super();
+      parseArguments(args);
+    }
+
+    @Override
+    String getOpName() {
+      return OP_MKDIRS_NAME;
+    }
+
+    @Override
+    void parseArguments(List<String> args) {
+      boolean ignoreUnrelatedOptions = verifyOpArgument(args);
+      int nrDirsPerDir = 2;
+      for (int i = 2; i < args.size(); i++) {       // parse command line
+        if(args.get(i).equals("-dirs")) {
+          if(i+1 == args.size())  printUsage();
+          numOpsRequired = Integer.parseInt(args.get(++i));
+        } else if(args.get(i).equals("-threads")) {
+          if(i+1 == args.size())  printUsage();
+          numThreads = Integer.parseInt(args.get(++i));
+        } else if(args.get(i).equals("-dirsPerDir")) {
+          if(i+1 == args.size())  printUsage();
+          nrDirsPerDir = Integer.parseInt(args.get(++i));
+        } else if(!ignoreUnrelatedOptions)
+          printUsage();
+      }
+      nameGenerator = new FileNameGenerator(getBaseDir(), nrDirsPerDir);
+    }
+
+    @Override
+    void generateInputs(int[] opsPerThread) throws IOException {
+      assert opsPerThread.length == numThreads : "Error opsPerThread.length";
+      nameNodeProto.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE,
+          false);
+      LOG.info("Generate " + numOpsRequired + " inputs for " + getOpName());
+      dirPaths = new String[numThreads][];
+      for(int idx=0; idx < numThreads; idx++) {
+        int threadOps = opsPerThread[idx];
+        dirPaths[idx] = new String[threadOps];
+        for(int jdx=0; jdx < threadOps; jdx++)
+          dirPaths[idx][jdx] = nameGenerator.
+              getNextFileName("ThroughputBench");
+      }
+    }
+
+    /**
+     * returns client name
+     */
+    @Override
+    String getExecutionArgument(int daemonId) {
+      return getClientName(daemonId);
+    }
+
+    /**
+     * Do mkdirs operation.
+     */
+    @Override
+    long executeOp(int daemonId, int inputIdx, String clientName)
+        throws IOException {
+      long start = Time.now();
+      nameNodeProto.mkdirs(dirPaths[daemonId][inputIdx],
+          FsPermission.getDefault(), true);
+      long end = Time.now();
+      return end-start;
+    }
+
+    @Override
+    void printResults() {
+      LOG.info("--- " + getOpName() + " inputs ---");
+      LOG.info("nrDirs = " + numOpsRequired);
+      LOG.info("nrThreads = " + numThreads);
+      LOG.info("nrDirsPerDir = " + nameGenerator.getFilesPerDirectory());
+      printStats();
+    }
+  }
+
   /**
    * Open file statistics.
    * 
@@ -846,7 +938,7 @@ public class NNThroughputBenchmark implements Tool {
       // register datanode
       dnRegistration = nameNodeProto.registerDatanode(dnRegistration);
       //first block reports
-      storage = new DatanodeStorage(dnRegistration.getDatanodeUuid());
+      storage = new DatanodeStorage(DatanodeStorage.generateUuid());
       final StorageBlockReport[] reports = {
           new StorageBlockReport(storage,
               new BlockListAsLongs(null, null).getBlockListAsLongs())
@@ -862,8 +954,8 @@ public class NNThroughputBenchmark implements Tool {
     void sendHeartbeat() throws IOException {
       // register datanode
       // TODO:FEDERATION currently a single block pool is supported
-      StorageReport[] rep = { new StorageReport(dnRegistration.getDatanodeUuid(),
-          false, DF_CAPACITY, DF_USED, DF_CAPACITY - DF_USED, DF_USED) };
+      StorageReport[] rep = { new StorageReport(storage, false,
+          DF_CAPACITY, DF_USED, DF_CAPACITY - DF_USED, DF_USED) };
       DatanodeCommand[] cmds = nameNodeProto.sendHeartbeat(dnRegistration, rep,
           0L, 0L, 0, 0, 0).getCommands();
       if(cmds != null) {
@@ -909,7 +1001,7 @@ public class NNThroughputBenchmark implements Tool {
     @SuppressWarnings("unused") // keep it for future blockReceived benchmark
     int replicateBlocks() throws IOException {
       // register datanode
-      StorageReport[] rep = { new StorageReport(dnRegistration.getDatanodeUuid(),
+      StorageReport[] rep = { new StorageReport(storage,
           false, DF_CAPACITY, DF_USED, DF_CAPACITY - DF_USED, DF_USED) };
       DatanodeCommand[] cmds = nameNodeProto.sendHeartbeat(dnRegistration,
           rep, 0L, 0L, 0, 0, 0).getCommands();
@@ -918,7 +1010,8 @@ public class NNThroughputBenchmark implements Tool {
           if (cmd.getAction() == DatanodeProtocol.DNA_TRANSFER) {
             // Send a copy of a block to another datanode
             BlockCommand bcmd = (BlockCommand)cmd;
-            return transferBlocks(bcmd.getBlocks(), bcmd.getTargets());
+            return transferBlocks(bcmd.getBlocks(), bcmd.getTargets(),
+                                  bcmd.getTargetStorageIDs());
           }
         }
       }
@@ -931,12 +1024,14 @@ public class NNThroughputBenchmark implements Tool {
      * that the blocks have been received.
      */
     private int transferBlocks( Block blocks[], 
-                                DatanodeInfo xferTargets[][] 
+                                DatanodeInfo xferTargets[][],
+                                String targetStorageIDs[][]
                               ) throws IOException {
       for(int i = 0; i < blocks.length; i++) {
         DatanodeInfo blockTargets[] = xferTargets[i];
         for(int t = 0; t < blockTargets.length; t++) {
           DatanodeInfo dnInfo = blockTargets[t];
+          String targetStorageID = targetStorageIDs[i][t];
           DatanodeRegistration receivedDNReg;
           receivedDNReg = new DatanodeRegistration(dnInfo,
             new DataStorage(nsInfo),
@@ -946,7 +1041,7 @@ public class NNThroughputBenchmark implements Tool {
                   blocks[i], ReceivedDeletedBlockInfo.BlockStatus.RECEIVED_BLOCK,
                   null) };
           StorageReceivedDeletedBlocks[] report = { new StorageReceivedDeletedBlocks(
-              receivedDNReg.getDatanodeUuid(), rdBlocks) };
+              targetStorageID, rdBlocks) };
           nameNodeProto.blockReceivedAndDeleted(receivedDNReg, nameNode
               .getNamesystem().getBlockPoolId(), report);
         }
@@ -1035,7 +1130,7 @@ public class NNThroughputBenchmark implements Tool {
       }
 
       // create files 
-      LOG.info("Creating " + nrFiles + " with " + blocksPerFile + " blocks each.");
+      LOG.info("Creating " + nrFiles + " files with " + blocksPerFile + " blocks each.");
       FileNameGenerator nameGenerator;
       nameGenerator = new FileNameGenerator(getBaseDir(), 100);
       String clientName = getClientName(007);
@@ -1069,7 +1164,7 @@ public class NNThroughputBenchmark implements Tool {
               loc.getBlock().getLocalBlock(),
               ReceivedDeletedBlockInfo.BlockStatus.RECEIVED_BLOCK, null) };
           StorageReceivedDeletedBlocks[] report = { new StorageReceivedDeletedBlocks(
-              datanodes[dnIdx].dnRegistration.getDatanodeUuid(), rdBlocks) };
+              datanodes[dnIdx].storage.getStorageID(), rdBlocks) };
           nameNodeProto.blockReceivedAndDeleted(datanodes[dnIdx].dnRegistration, loc
               .getBlock().getBlockPoolId(), report);
         }
@@ -1279,6 +1374,7 @@ public class NNThroughputBenchmark implements Tool {
     System.err.println("Usage: NNThroughputBenchmark"
         + "\n\t"    + OperationStatsBase.OP_ALL_USAGE
         + " | \n\t" + CreateFileStats.OP_CREATE_USAGE
+        + " | \n\t" + MkdirsStats.OP_MKDIRS_USAGE
         + " | \n\t" + OpenFileStats.OP_OPEN_USAGE
         + " | \n\t" + DeleteFileStats.OP_DELETE_USAGE
         + " | \n\t" + FileStatusStats.OP_FILE_STATUS_USAGE
@@ -1328,6 +1424,10 @@ public class NNThroughputBenchmark implements Tool {
         opStat = new CreateFileStats(args);
         ops.add(opStat);
       }
+      if(runAll || MkdirsStats.OP_MKDIRS_NAME.equals(type)) {
+        opStat = new MkdirsStats(args);
+        ops.add(opStat);
+      }
       if(runAll || OpenFileStats.OP_OPEN_NAME.equals(type)) {
         opStat = new OpenFileStats(args);
         ops.add(opStat);

+ 3 - 156
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java

@@ -20,7 +20,6 @@ package org.apache.hadoop.hdfs.server.namenode;
 
 import java.io.File;
 import java.io.IOException;
-import java.security.PrivilegedExceptionAction;
 import java.util.Iterator;
 
 import org.apache.commons.logging.Log;
@@ -29,25 +28,13 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileContext;
-import org.apache.hadoop.fs.Options.Rename;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.hdfs.DFSClientAdapter;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
-import org.apache.hadoop.hdfs.protocol.HdfsConstants;
-import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
-import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.common.Util;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.security.token.Token;
 
 /**
  * OfflineEditsViewerHelper is a helper class for TestOfflineEditsViewer,
@@ -135,151 +122,11 @@ public class OfflineEditsViewerHelper {
    * OP_CLEAR_NS_QUOTA  (12)
    */
   private CheckpointSignature runOperations() throws IOException {
-
     LOG.info("Creating edits by performing fs operations");
     // no check, if it's not it throws an exception which is what we want
-    DistributedFileSystem dfs =
-      (DistributedFileSystem)cluster.getFileSystem();
-    FileContext fc = FileContext.getFileContext(cluster.getURI(0), config);
-    // OP_ADD 0
-    Path pathFileCreate = new Path("/file_create_u\1F431");
-    FSDataOutputStream s = dfs.create(pathFileCreate);
-    // OP_CLOSE 9
-    s.close();
-    // OP_RENAME_OLD 1
-    Path pathFileMoved = new Path("/file_moved");
-    dfs.rename(pathFileCreate, pathFileMoved);
-    // OP_DELETE 2
-    dfs.delete(pathFileMoved, false);
-    // OP_MKDIR 3
-    Path pathDirectoryMkdir = new Path("/directory_mkdir");
-    dfs.mkdirs(pathDirectoryMkdir);
-    // OP_ALLOW_SNAPSHOT 29
-    dfs.allowSnapshot(pathDirectoryMkdir);
-    // OP_DISALLOW_SNAPSHOT 30
-    dfs.disallowSnapshot(pathDirectoryMkdir);
-    // OP_CREATE_SNAPSHOT 26
-    String ssName = "snapshot1";
-    dfs.allowSnapshot(pathDirectoryMkdir);
-    dfs.createSnapshot(pathDirectoryMkdir, ssName);
-    // OP_RENAME_SNAPSHOT 28
-    String ssNewName = "snapshot2";
-    dfs.renameSnapshot(pathDirectoryMkdir, ssName, ssNewName);
-    // OP_DELETE_SNAPSHOT 27
-    dfs.deleteSnapshot(pathDirectoryMkdir, ssNewName);
-    // OP_SET_REPLICATION 4
-    s = dfs.create(pathFileCreate);
-    s.close();
-    dfs.setReplication(pathFileCreate, (short)1);
-    // OP_SET_PERMISSIONS 7
-    Short permission = 0777;
-    dfs.setPermission(pathFileCreate, new FsPermission(permission));
-    // OP_SET_OWNER 8
-    dfs.setOwner(pathFileCreate, new String("newOwner"), null);
-    // OP_CLOSE 9 see above
-    // OP_SET_GENSTAMP 10 see above
-    // OP_SET_NS_QUOTA 11 obsolete
-    // OP_CLEAR_NS_QUOTA 12 obsolete
-    // OP_TIMES 13
-    long mtime = 1285195527000L; // Wed, 22 Sep 2010 22:45:27 GMT
-    long atime = mtime;
-    dfs.setTimes(pathFileCreate, mtime, atime);
-    // OP_SET_QUOTA 14
-    dfs.setQuota(pathDirectoryMkdir, 1000L, HdfsConstants.QUOTA_DONT_SET);
-    // OP_RENAME 15
-    fc.rename(pathFileCreate, pathFileMoved, Rename.NONE);
-    // OP_CONCAT_DELETE 16
-    Path   pathConcatTarget = new Path("/file_concat_target");
-    Path[] pathConcatFiles  = new Path[2];
-    pathConcatFiles[0]      = new Path("/file_concat_0");
-    pathConcatFiles[1]      = new Path("/file_concat_1");
-
-    long  length      = blockSize * 3; // multiple of blocksize for concat
-    short replication = 1;
-    long  seed        = 1;
-
-    DFSTestUtil.createFile(dfs, pathConcatTarget, length, replication, seed);
-    DFSTestUtil.createFile(dfs, pathConcatFiles[0], length, replication, seed);
-    DFSTestUtil.createFile(dfs, pathConcatFiles[1], length, replication, seed);
-    dfs.concat(pathConcatTarget, pathConcatFiles);
-    // OP_SYMLINK 17
-    Path pathSymlink = new Path("/file_symlink");
-    fc.createSymlink(pathConcatTarget, pathSymlink, false);
-    // OP_GET_DELEGATION_TOKEN 18
-    // OP_RENEW_DELEGATION_TOKEN 19
-    // OP_CANCEL_DELEGATION_TOKEN 20
-    // see TestDelegationToken.java
-    // fake the user to renew token for
-    final Token<?>[] tokens = dfs.addDelegationTokens("JobTracker", null);
-    UserGroupInformation longUgi = UserGroupInformation.createRemoteUser(
-      "JobTracker/foo.com@FOO.COM");
-    try {
-      longUgi.doAs(new PrivilegedExceptionAction<Object>() {
-        @Override
-        public Object run() throws IOException, InterruptedException {
-          for (Token<?> token : tokens) {
-            token.renew(config);
-            token.cancel(config);
-          }
-          return null;
-        }
-      });
-    } catch(InterruptedException e) {
-      throw new IOException(
-        "renewDelegationToken threw InterruptedException", e);
-    }
-    // OP_UPDATE_MASTER_KEY 21
-    //   done by getDelegationTokenSecretManager().startThreads();
-
-    // OP_ADD_CACHE_POOL 35
-    final String pool = "poolparty";
-    dfs.addCachePool(new CachePoolInfo(pool));
-    // OP_MODIFY_CACHE_POOL 36
-    dfs.modifyCachePool(new CachePoolInfo(pool)
-        .setOwnerName("carlton")
-        .setGroupName("party")
-        .setMode(new FsPermission((short)0700))
-        .setLimit(1989l));
-    // OP_ADD_PATH_BASED_CACHE_DIRECTIVE 33
-    long id = dfs.addCacheDirective(
-        new CacheDirectiveInfo.Builder().
-            setPath(new Path("/bar")).
-            setReplication((short)1).
-            setPool(pool).
-            build());
-    // OP_MODIFY_PATH_BASED_CACHE_DIRECTIVE 38
-    dfs.modifyCacheDirective(
-        new CacheDirectiveInfo.Builder().
-            setId(id).
-            setPath(new Path("/bar2")).
-            build());
-    // OP_REMOVE_PATH_BASED_CACHE_DIRECTIVE 34
-    dfs.removeCacheDirective(id);
-    // OP_REMOVE_CACHE_POOL 37
-    dfs.removeCachePool(pool);
-    // sync to disk, otherwise we parse partial edits
-    cluster.getNameNode().getFSImage().getEditLog().logSync();
-    
-    // OP_REASSIGN_LEASE 22
-    String filePath = "/hard-lease-recovery-test";
-    byte[] bytes = "foo-bar-baz".getBytes();
-    DFSClientAdapter.stopLeaseRenewer(dfs);
-    FSDataOutputStream leaseRecoveryPath = dfs.create(new Path(filePath));
-    leaseRecoveryPath.write(bytes);
-    leaseRecoveryPath.hflush();
-    // Set the hard lease timeout to 1 second.
-    cluster.setLeasePeriod(60 * 1000, 1000);
-    // wait for lease recovery to complete
-    LocatedBlocks locatedBlocks;
-    do {
-      try {
-        Thread.sleep(1000);
-      } catch (InterruptedException e) {
-        LOG.info("Innocuous exception", e);
-      }
-      locatedBlocks = DFSClientAdapter.callGetBlockLocations(
-          cluster.getNameNodeRpc(), filePath, 0L, bytes.length);
-    } while (locatedBlocks.isUnderConstruction());
+    DistributedFileSystem dfs = (DistributedFileSystem) cluster.getFileSystem();
+    DFSTestUtil.runOperations(cluster, dfs, cluster.getConfiguration(0),
+        dfs.getDefaultBlockSize(), 0);
 
     // Force a roll so we get an OP_END_LOG_SEGMENT txn
     return cluster.getNameNodeRpc().rollEditLog();

+ 81 - 65
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java

@@ -21,7 +21,6 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS;
 import static org.apache.hadoop.hdfs.protocol.CachePoolInfo.RELATIVE_EXPIRY_NEVER;
 import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains;
@@ -58,17 +57,21 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.LogVerificationAppender;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
+import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo.Expiration;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveIterator;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveStats;
 import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
 import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
-import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo.Expiration;
 import org.apache.hadoop.hdfs.protocol.CachePoolStats;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList.Type;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.io.nativeio.NativeIO;
 import org.apache.hadoop.io.nativeio.NativeIO.POSIX.CacheManipulator;
@@ -79,6 +82,7 @@ import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.util.GSet;
 import org.apache.log4j.Level;
 import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -104,7 +108,7 @@ public class TestCacheDirectives {
     EditLogFileOutputStream.setShouldSkipFsyncForTesting(false);
   }
 
-  private static final long BLOCK_SIZE = 512;
+  private static final long BLOCK_SIZE = 4096;
   private static final int NUM_DATANODES = 4;
   // Most Linux installs will allow non-root users to lock 64KB.
   // In this test though, we stub out mlock so this doesn't matter.
@@ -115,7 +119,6 @@ public class TestCacheDirectives {
     conf.setLong(DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
     conf.setLong(DFS_DATANODE_MAX_LOCKED_MEMORY_KEY, CACHE_CAPACITY);
     conf.setLong(DFS_HEARTBEAT_INTERVAL_KEY, 1);
-    conf.setBoolean(DFS_NAMENODE_CACHING_ENABLED_KEY, true);
     conf.setLong(DFS_CACHEREPORT_INTERVAL_MSEC_KEY, 1000);
     conf.setLong(DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS, 1000);
     // set low limits here for testing purposes
@@ -602,8 +605,8 @@ public class TestCacheDirectives {
    * Wait for the NameNode to have an expected number of cached blocks
    * and replicas.
    * @param nn NameNode
-   * @param expectedCachedBlocks
-   * @param expectedCachedReplicas
+   * @param expectedCachedBlocks if -1, treat as wildcard
+   * @param expectedCachedReplicas if -1, treat as wildcard
    * @throws Exception
    */
   private static void waitForCachedBlocks(NameNode nn,
@@ -632,16 +635,18 @@ public class TestCacheDirectives {
         } finally {
           namesystem.readUnlock();
         }
-        if ((numCachedBlocks == expectedCachedBlocks) && 
-            (numCachedReplicas == expectedCachedReplicas)) {
-          return true;
-        } else {
-          LOG.info(logString + " cached blocks: have " + numCachedBlocks +
-              " / " + expectedCachedBlocks + ".  " +
-              "cached replicas: have " + numCachedReplicas +
-              " / " + expectedCachedReplicas);
-          return false;
+        if (expectedCachedBlocks == -1 ||
+            numCachedBlocks == expectedCachedBlocks) {
+          if (expectedCachedReplicas == -1 ||
+              numCachedReplicas == expectedCachedReplicas) {
+            return true;
+          }
         }
+        LOG.info(logString + " cached blocks: have " + numCachedBlocks +
+            " / " + expectedCachedBlocks + ".  " +
+            "cached replicas: have " + numCachedReplicas +
+            " / " + expectedCachedReplicas);
+        return false;
       }
     }, 500, 60000);
   }
@@ -796,7 +801,15 @@ public class TestCacheDirectives {
       }
     }, 500, 60000);
 
+    // Send a cache report referring to a bogus block.  It is important that
+    // the NameNode be robust against this.
     NamenodeProtocols nnRpc = namenode.getRpcServer();
+    DataNode dn0 = cluster.getDataNodes().get(0);
+    String bpid = cluster.getNamesystem().getBlockPoolId();
+    LinkedList<Long> bogusBlockIds = new LinkedList<Long> ();
+    bogusBlockIds.add(999999L);
+    nnRpc.cacheReport(dn0.getDNRegistrationForBP(bpid), bpid, bogusBlockIds);
+
     Path rootDir = helper.getDefaultWorkingDirectory(dfs);
     // Create the pool
     final String pool = "friendlyPool";
@@ -826,6 +839,24 @@ public class TestCacheDirectives {
       waitForCachedBlocks(namenode, expected, expected,
           "testWaitForCachedReplicas:1");
     }
+
+    // Check that the datanodes have the right cache values
+    DatanodeInfo[] live = dfs.getDataNodeStats(DatanodeReportType.LIVE);
+    assertEquals("Unexpected number of live nodes", NUM_DATANODES, live.length);
+    long totalUsed = 0;
+    for (DatanodeInfo dn : live) {
+      final long cacheCapacity = dn.getCacheCapacity();
+      final long cacheUsed = dn.getCacheUsed();
+      final long cacheRemaining = dn.getCacheRemaining();
+      assertEquals("Unexpected cache capacity", CACHE_CAPACITY, cacheCapacity);
+      assertEquals("Capacity not equal to used + remaining",
+          cacheCapacity, cacheUsed + cacheRemaining);
+      assertEquals("Remaining not equal to capacity - used",
+          cacheCapacity - cacheUsed, cacheRemaining);
+      totalUsed += cacheUsed;
+    }
+    assertEquals(expected*BLOCK_SIZE, totalUsed);
+
     // Uncache and check each path in sequence
     RemoteIterator<CacheDirectiveEntry> entries =
       new CacheDirectiveIterator(nnRpc, null);
@@ -838,55 +869,6 @@ public class TestCacheDirectives {
     }
   }
 
-  @Test(timeout=120000)
-  public void testAddingCacheDirectiveInfosWhenCachingIsDisabled()
-      throws Exception {
-    cluster.shutdown();
-    HdfsConfiguration conf = createCachingConf();
-    conf.setBoolean(DFS_NAMENODE_CACHING_ENABLED_KEY, false);
-    MiniDFSCluster cluster =
-      new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build();
-
-    try {
-      cluster.waitActive();
-      DistributedFileSystem dfs = cluster.getFileSystem();
-      NameNode namenode = cluster.getNameNode();
-      // Create the pool
-      String pool = "pool1";
-      namenode.getRpcServer().addCachePool(new CachePoolInfo(pool));
-      // Create some test files
-      final int numFiles = 2;
-      final int numBlocksPerFile = 2;
-      final List<String> paths = new ArrayList<String>(numFiles);
-      for (int i=0; i<numFiles; i++) {
-        Path p = new Path("/testCachePaths-" + i);
-        FileSystemTestHelper.createFile(dfs, p, numBlocksPerFile,
-            (int)BLOCK_SIZE);
-        paths.add(p.toUri().getPath());
-      }
-      // Check the initial statistics at the namenode
-      waitForCachedBlocks(namenode, 0, 0,
-          "testAddingCacheDirectiveInfosWhenCachingIsDisabled:0");
-      // Cache and check each path in sequence
-      int expected = 0;
-      for (int i=0; i<numFiles; i++) {
-        CacheDirectiveInfo directive =
-            new CacheDirectiveInfo.Builder().
-              setPath(new Path(paths.get(i))).
-              setPool(pool).
-              build();
-        dfs.addCacheDirective(directive);
-        waitForCachedBlocks(namenode, expected, 0,
-          "testAddingCacheDirectiveInfosWhenCachingIsDisabled:1");
-      }
-      Thread.sleep(20000);
-      waitForCachedBlocks(namenode, expected, 0,
-          "testAddingCacheDirectiveInfosWhenCachingIsDisabled:2");
-    } finally {
-      cluster.shutdown();
-    }
-  }
-
   @Test(timeout=120000)
   public void testWaitForCachedReplicasInDirectory() throws Exception {
     // Create the pool
@@ -965,7 +947,6 @@ public class TestCacheDirectives {
         (4+3) * numBlocksPerFile * BLOCK_SIZE,
         3, 2,
         poolInfo, "testWaitForCachedReplicasInDirectory:2:pool");
-
     // remove and watch numCached go to 0
     dfs.removeCacheDirective(id);
     dfs.removeCacheDirective(id2);
@@ -1374,4 +1355,39 @@ public class TestCacheDirectives {
         .setExpiration(Expiration.newRelative(RELATIVE_EXPIRY_NEVER - 1))
         .build());
   }
+
+  @Test(timeout=60000)
+  public void testExceedsCapacity() throws Exception {
+    // Create a giant file
+    final Path fileName = new Path("/exceeds");
+    final long fileLen = CACHE_CAPACITY * (NUM_DATANODES*2);
+    int numCachedReplicas = (int) ((CACHE_CAPACITY*NUM_DATANODES)/BLOCK_SIZE);
+    DFSTestUtil.createFile(dfs, fileName, fileLen, (short) NUM_DATANODES,
+        0xFADED);
+    // Set up a log appender watcher
+    final LogVerificationAppender appender = new LogVerificationAppender();
+    final Logger logger = Logger.getRootLogger();
+    logger.addAppender(appender);
+    dfs.addCachePool(new CachePoolInfo("pool"));
+    dfs.addCacheDirective(new CacheDirectiveInfo.Builder().setPool("pool")
+        .setPath(fileName).setReplication((short) 1).build());
+    waitForCachedBlocks(namenode, -1, numCachedReplicas,
+        "testExceeds:1");
+    // Check that no DNs saw an excess CACHE message
+    int lines = appender.countLinesWithMessage(
+        "more bytes in the cache: " +
+        DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY);
+    assertEquals("Namenode should not send extra CACHE commands", 0, lines);
+    // Try creating a file with giant-sized blocks that exceed cache capacity
+    dfs.delete(fileName, false);
+    DFSTestUtil.createFile(dfs, fileName, 4096, fileLen, CACHE_CAPACITY * 2,
+        (short) 1, 0xFADED);
+    // Nothing will get cached, so just force sleep for a bit
+    Thread.sleep(4000);
+    // Still should not see any excess commands
+    lines = appender.countLinesWithMessage(
+        "more bytes in the cache: " +
+        DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY);
+    assertEquals("Namenode should not send extra CACHE commands", 0, lines);
+  }
 }

+ 3 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java

@@ -140,8 +140,9 @@ public class TestDeadDatanode {
 
     // Ensure heartbeat from dead datanode is rejected with a command
     // that asks datanode to register again
-    StorageReport[] rep = { new StorageReport(reg.getDatanodeUuid(), false, 0, 0,
-        0, 0) };
+    StorageReport[] rep = { new StorageReport(
+        new DatanodeStorage(reg.getDatanodeUuid()),
+        false, 0, 0, 0, 0) };
     DatanodeCommand[] cmd = dnp.sendHeartbeat(reg, rep, 0L, 0L, 0, 0, 0)
         .getCommands();
     assertEquals(1, cmd.length);

+ 25 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java

@@ -27,6 +27,7 @@ import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.net.URI;
+import java.util.LinkedList;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 
 import org.apache.commons.logging.Log;
@@ -59,6 +60,8 @@ import org.junit.Assert;
 import org.junit.Test;
 import org.mockito.Mockito;
 
+import com.google.common.util.concurrent.Uninterruptibles;
+
 /**
  * Tests state transition from active->standby, and manual failover
  * and failback between two namenodes.
@@ -124,6 +127,17 @@ public class TestHAStateTransitions {
     }
   }
 
+  private void addCrmThreads(MiniDFSCluster cluster,
+      LinkedList<Thread> crmThreads) {
+    for (int nn = 0; nn <= 1; nn++) {
+      Thread thread = cluster.getNameNode(nn).getNamesystem().
+          getCacheManager().getCacheReplicationMonitor();
+      if (thread != null) {
+        crmThreads.add(thread);
+      }
+    }
+  }
+
   /**
    * Test that transitioning a service to the state that it is already
    * in is a nop, specifically, an exception is not thrown.
@@ -131,19 +145,30 @@ public class TestHAStateTransitions {
   @Test
   public void testTransitionToCurrentStateIsANop() throws Exception {
     Configuration conf = new Configuration();
+    conf.setLong(DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS, 1L);
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
       .nnTopology(MiniDFSNNTopology.simpleHATopology())
       .numDataNodes(1)
       .build();
+    LinkedList<Thread> crmThreads = new LinkedList<Thread>();
     try {
       cluster.waitActive();
+      addCrmThreads(cluster, crmThreads);
       cluster.transitionToActive(0);
+      addCrmThreads(cluster, crmThreads);
       cluster.transitionToActive(0);
+      addCrmThreads(cluster, crmThreads);
       cluster.transitionToStandby(0);
+      addCrmThreads(cluster, crmThreads);
       cluster.transitionToStandby(0);
+      addCrmThreads(cluster, crmThreads);
     } finally {
       cluster.shutdown();
     }
+    // Verify that all cacheReplicationMonitor threads shut down
+    for (Thread thread : crmThreads) {
+      Uninterruptibles.joinUninterruptibly(thread);
+    }
   }
 
   /**

+ 8 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java

@@ -47,6 +47,8 @@ import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.metrics2.MetricsRecordBuilder;
+import org.apache.hadoop.metrics2.MetricsSource;
+import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.test.MetricsAsserts;
 import org.apache.hadoop.util.Time;
 import org.apache.log4j.Level;
@@ -108,6 +110,12 @@ public class TestNameNodeMetrics {
   
   @After
   public void tearDown() throws Exception {
+    MetricsSource source = DefaultMetricsSystem.instance().getSource("UgiMetrics");
+    if (source != null) {
+      // Run only once since the UGI metrics is cleaned up during teardown
+      MetricsRecordBuilder rb = getMetrics(source);
+      assertQuantileGauges("GetGroups1s", rb);
+    }
     cluster.shutdown();
   }
   

+ 72 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java

@@ -33,10 +33,15 @@ import java.io.PrintStream;
 import java.net.InetSocketAddress;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.StringTokenizer;
 
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.DFSUtil.ConfiguredNNAddress;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
@@ -55,7 +60,7 @@ public class TestGetConf {
   enum TestType {
     NAMENODE, BACKUP, SECONDARY, NNRPCADDRESSES
   }
-  
+  FileSystem localFileSys; 
   /** Setup federation nameServiceIds in the configuration */
   private void setupNameServices(HdfsConfiguration conf, int nameServiceIdCount) {
     StringBuilder nsList = new StringBuilder();
@@ -379,4 +384,70 @@ public class TestGetConf {
       }
     }
   }
+  @Test
+  public void TestGetConfExcludeCommand() throws Exception{
+  	HdfsConfiguration conf = new HdfsConfiguration();
+    // Set up the hosts/exclude files.
+    localFileSys = FileSystem.getLocal(conf);
+    Path workingDir = localFileSys.getWorkingDirectory();
+    Path dir = new Path(workingDir, System.getProperty("test.build.data", "target/test/data") + "/Getconf/");
+    Path hostsFile = new Path(dir, "hosts");
+    Path excludeFile = new Path(dir, "exclude");
+    
+    // Setup conf
+    conf.set(DFSConfigKeys.DFS_HOSTS, hostsFile.toUri().getPath());
+    conf.set(DFSConfigKeys.DFS_HOSTS_EXCLUDE, excludeFile.toUri().getPath());
+    writeConfigFile(hostsFile, null);
+    writeConfigFile(excludeFile, null);    
+    String[] args = {"-excludeFile"};
+    String ret = runTool(conf, args, true);
+    assertEquals(excludeFile.toUri().getPath(),ret.trim());
+    cleanupFile(localFileSys, excludeFile.getParent());
+  }
+  
+  @Test
+  public void TestGetConfIncludeCommand() throws Exception{
+  	HdfsConfiguration conf = new HdfsConfiguration();
+    // Set up the hosts/exclude files.
+    localFileSys = FileSystem.getLocal(conf);
+    Path workingDir = localFileSys.getWorkingDirectory();
+    Path dir = new Path(workingDir, System.getProperty("test.build.data", "target/test/data") + "/Getconf/");
+    Path hostsFile = new Path(dir, "hosts");
+    Path excludeFile = new Path(dir, "exclude");
+    
+    // Setup conf
+    conf.set(DFSConfigKeys.DFS_HOSTS, hostsFile.toUri().getPath());
+    conf.set(DFSConfigKeys.DFS_HOSTS_EXCLUDE, excludeFile.toUri().getPath());
+    writeConfigFile(hostsFile, null);
+    writeConfigFile(excludeFile, null);    
+    String[] args = {"-includeFile"};
+    String ret = runTool(conf, args, true);
+    assertEquals(hostsFile.toUri().getPath(),ret.trim());
+    cleanupFile(localFileSys, excludeFile.getParent());
+  }
+  
+  private void writeConfigFile(Path name, ArrayList<String> nodes) 
+      throws IOException {
+      // delete if it already exists
+      if (localFileSys.exists(name)) {
+        localFileSys.delete(name, true);
+      }
+
+      FSDataOutputStream stm = localFileSys.create(name);
+      
+      if (nodes != null) {
+        for (Iterator<String> it = nodes.iterator(); it.hasNext();) {
+          String node = it.next();
+          stm.writeBytes(node);
+          stm.writeBytes("\n");
+        }
+      }
+      stm.close();
+    }
+  
+  private void cleanupFile(FileSystem fileSys, Path name) throws IOException {
+    assertTrue(fileSys.exists(name));
+    fileSys.delete(name, true);
+    assertTrue(!fileSys.exists(name));
+  }
 }

+ 93 - 124
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/TestOfflineEditsViewer.java

@@ -26,8 +26,6 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.channels.FileChannel;
-import java.util.HashMap;
-import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -36,168 +34,142 @@ import org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes;
 import org.apache.hadoop.hdfs.server.namenode.OfflineEditsViewerHelper;
 import org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer.Flags;
 import org.apache.hadoop.test.PathUtils;
+import org.junit.After;
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
 
-public class TestOfflineEditsViewer {
-  private static final Log LOG = LogFactory.getLog(TestOfflineEditsViewer.class);
-
-  private static final Map<FSEditLogOpCodes, Boolean> obsoleteOpCodes =
-    new HashMap<FSEditLogOpCodes, Boolean>();
-
-  private static final Map<FSEditLogOpCodes, Boolean> missingOpCodes =
-      new HashMap<FSEditLogOpCodes, Boolean>();
+import com.google.common.collect.ImmutableSet;
 
-  static {
-    initializeObsoleteOpCodes();
-    initializeMissingOpCodes();
-  }
-
-  private static String buildDir =
-    PathUtils.getTestDirName(TestOfflineEditsViewer.class);
+public class TestOfflineEditsViewer {
+  private static final Log LOG = LogFactory
+      .getLog(TestOfflineEditsViewer.class);
 
-  private static String cacheDir =
-    System.getProperty("test.cache.data", "build/test/cache");
+  private static String buildDir = PathUtils
+      .getTestDirName(TestOfflineEditsViewer.class);
 
   // to create edits and get edits filename
-  private static final OfflineEditsViewerHelper nnHelper 
-    = new OfflineEditsViewerHelper();
+  private static final OfflineEditsViewerHelper nnHelper = new OfflineEditsViewerHelper();
+  private static final ImmutableSet<FSEditLogOpCodes> skippedOps = skippedOps();
 
-  /**
-   * Initialize obsoleteOpCodes
-   *
-   * Reason for suppressing "deprecation" warnings:
-   *
-   * These are the opcodes that are not used anymore, some
-   * are marked deprecated, we need to include them here to make
-   * sure we exclude them when checking for completeness of testing,
-   * that's why the "deprecation" warnings are suppressed.
-   */
   @SuppressWarnings("deprecation")
-  private static void initializeObsoleteOpCodes() {
-    obsoleteOpCodes.put(FSEditLogOpCodes.OP_DATANODE_ADD, true);
-    obsoleteOpCodes.put(FSEditLogOpCodes.OP_DATANODE_REMOVE, true);
-    obsoleteOpCodes.put(FSEditLogOpCodes.OP_SET_NS_QUOTA, true);
-    obsoleteOpCodes.put(FSEditLogOpCodes.OP_CLEAR_NS_QUOTA, true);
+  private static ImmutableSet<FSEditLogOpCodes> skippedOps() {
+    ImmutableSet.Builder<FSEditLogOpCodes> b = ImmutableSet
+        .<FSEditLogOpCodes> builder();
+
+    // Deprecated opcodes
+    b.add(FSEditLogOpCodes.OP_DATANODE_ADD)
+        .add(FSEditLogOpCodes.OP_DATANODE_REMOVE)
+        .add(FSEditLogOpCodes.OP_SET_NS_QUOTA)
+        .add(FSEditLogOpCodes.OP_CLEAR_NS_QUOTA)
+        .add(FSEditLogOpCodes.OP_SET_GENSTAMP_V1);
+
+    // Cannot test delegation token related code in insecure set up
+    b.add(FSEditLogOpCodes.OP_GET_DELEGATION_TOKEN)
+        .add(FSEditLogOpCodes.OP_RENEW_DELEGATION_TOKEN)
+        .add(FSEditLogOpCodes.OP_CANCEL_DELEGATION_TOKEN);
+
+    // Skip invalid opcode
+    b.add(FSEditLogOpCodes.OP_INVALID);
+    return b.build();
   }
 
-  /**
-   * Initialize missingOpcodes
-   *
-   *  Opcodes that are not available except after uprade from
-   *  an older version. We don't test these here.
-   */
-  private static void initializeMissingOpCodes() {
-    obsoleteOpCodes.put(FSEditLogOpCodes.OP_SET_GENSTAMP_V1, true);
-  }
+  @Rule
+  public final TemporaryFolder folder = new TemporaryFolder();
 
   @Before
-  public void setup() {
-    new File(cacheDir).mkdirs();
+  public void setUp() throws IOException {
+    nnHelper.startCluster(buildDir + "/dfs/");
+  }
+
+  @After
+  public void tearDown() throws IOException {
+    nnHelper.shutdownCluster();
   }
-  
+
   /**
    * Test the OfflineEditsViewer
    */
   @Test
   public void testGenerated() throws IOException {
-
-    LOG.info("START - testing with generated edits");
-
-    nnHelper.startCluster(buildDir + "/dfs/");
-
     // edits generated by nnHelper (MiniDFSCluster), should have all op codes
     // binary, XML, reparsed binary
-    String edits          = nnHelper.generateEdits();
-    String editsParsedXml = cacheDir + "/editsParsed.xml";
-    String editsReparsed  = cacheDir + "/editsReparsed";
+    String edits = nnHelper.generateEdits();
+    String editsParsedXml = folder.newFile("editsParsed.xml").getAbsolutePath();
+    String editsReparsed = folder.newFile("editsParsed").getAbsolutePath();
 
     // parse to XML then back to binary
     assertEquals(0, runOev(edits, editsParsedXml, "xml", false));
     assertEquals(0, runOev(editsParsedXml, editsReparsed, "binary", false));
 
     // judgment time
+    assertTrue("Edits " + edits + " should have all op codes",
+        hasAllOpCodes(edits));
+    LOG.info("Comparing generated file " + editsReparsed
+        + " with reference file " + edits);
     assertTrue(
-      "Edits " + edits + " should have all op codes",
-      hasAllOpCodes(edits));
-    LOG.info("Comparing generated file " + editsReparsed +
-             " with reference file " + edits);
-    assertTrue(
-      "Generated edits and reparsed (bin to XML to bin) should be same",
-      filesEqualIgnoreTrailingZeros(edits, editsReparsed));
-
-    // removes edits so do this at the end
-    nnHelper.shutdownCluster();
-
-    LOG.info("END");
+        "Generated edits and reparsed (bin to XML to bin) should be same",
+        filesEqualIgnoreTrailingZeros(edits, editsReparsed));
   }
 
   @Test
   public void testRecoveryMode() throws IOException {
-    LOG.info("START - testing with generated edits");
-
-    nnHelper.startCluster(buildDir + "/dfs/");
-
     // edits generated by nnHelper (MiniDFSCluster), should have all op codes
     // binary, XML, reparsed binary
-    String edits          = nnHelper.generateEdits();
-    
+    String edits = nnHelper.generateEdits();
+    FileOutputStream os = new FileOutputStream(edits, true);
     // Corrupt the file by truncating the end
-    FileChannel editsFile = new FileOutputStream(edits, true).getChannel();
+    FileChannel editsFile = os.getChannel();
     editsFile.truncate(editsFile.size() - 5);
-    
-    String editsParsedXml = cacheDir + "/editsRecoveredParsed.xml";
-    String editsReparsed  = cacheDir + "/editsRecoveredReparsed";
-    String editsParsedXml2 = cacheDir + "/editsRecoveredParsed2.xml";
+
+    String editsParsedXml = folder.newFile("editsRecoveredParsed.xml")
+        .getAbsolutePath();
+    String editsReparsed = folder.newFile("editsRecoveredReparsed")
+        .getAbsolutePath();
+    String editsParsedXml2 = folder.newFile("editsRecoveredParsed2.xml")
+        .getAbsolutePath();
 
     // Can't read the corrupted file without recovery mode
     assertEquals(-1, runOev(edits, editsParsedXml, "xml", false));
-    
+
     // parse to XML then back to binary
     assertEquals(0, runOev(edits, editsParsedXml, "xml", true));
-    assertEquals(0, runOev(editsParsedXml, editsReparsed,  "binary", false));
+    assertEquals(0, runOev(editsParsedXml, editsReparsed, "binary", false));
     assertEquals(0, runOev(editsReparsed, editsParsedXml2, "xml", false));
 
     // judgment time
     assertTrue("Test round trip",
-      filesEqualIgnoreTrailingZeros(editsParsedXml, editsParsedXml2));
+        filesEqualIgnoreTrailingZeros(editsParsedXml, editsParsedXml2));
 
-    // removes edits so do this at the end
-    nnHelper.shutdownCluster();
-
-    LOG.info("END");
+    os.close();
   }
 
   @Test
   public void testStored() throws IOException {
-
-    LOG.info("START - testing with stored reference edits");
-
     // reference edits stored with source code (see build.xml)
+    final String cacheDir = System.getProperty("test.cache.data",
+        "build/test/cache");
     // binary, XML, reparsed binary
-    String editsStored             = cacheDir + "/editsStored";
-    String editsStoredParsedXml    = cacheDir + "/editsStoredParsed.xml";
-    String editsStoredReparsed     = cacheDir + "/editsStoredReparsed";
+    String editsStored = cacheDir + "/editsStored";
+    String editsStoredParsedXml = cacheDir + "/editsStoredParsed.xml";
+    String editsStoredReparsed = cacheDir + "/editsStoredReparsed";
     // reference XML version of editsStored (see build.xml)
-    String editsStoredXml          = cacheDir + "/editsStored.xml";
-      
+    String editsStoredXml = cacheDir + "/editsStored.xml";
+
     // parse to XML then back to binary
     assertEquals(0, runOev(editsStored, editsStoredParsedXml, "xml", false));
-    assertEquals(0, runOev(editsStoredParsedXml, editsStoredReparsed,
-        "binary", false));
+    assertEquals(0,
+        runOev(editsStoredParsedXml, editsStoredReparsed, "binary", false));
 
     // judgement time
+    assertTrue("Edits " + editsStored + " should have all op codes",
+        hasAllOpCodes(editsStored));
+    assertTrue("Reference XML edits and parsed to XML should be same",
+        filesEqual(editsStoredXml, editsStoredParsedXml));
     assertTrue(
-      "Edits " + editsStored + " should have all op codes",
-      hasAllOpCodes(editsStored));
-    assertTrue(
-      "Reference XML edits and parsed to XML should be same",
-      filesEqual(editsStoredXml, editsStoredParsedXml));
-    assertTrue(
-      "Reference edits and reparsed (bin to XML to bin) should be same",
-      filesEqualIgnoreTrailingZeros(editsStored, editsStoredReparsed));
-
-    LOG.info("END");
+        "Reference edits and reparsed (bin to XML to bin) should be same",
+        filesEqualIgnoreTrailingZeros(editsStored, editsStoredReparsed));
   }
 
   /**
@@ -233,22 +205,17 @@ public class TestOfflineEditsViewer {
     OfflineEditsViewer oev = new OfflineEditsViewer();
     if (oev.go(inFilename, outFilename, "stats", new Flags(), visitor) != 0)
       return false;
-    LOG.info("Statistics for " + inFilename + "\n" +
-      visitor.getStatisticsString());
-    
+    LOG.info("Statistics for " + inFilename + "\n"
+        + visitor.getStatisticsString());
+
     boolean hasAllOpCodes = true;
-    for(FSEditLogOpCodes opCode : FSEditLogOpCodes.values()) {
+    for (FSEditLogOpCodes opCode : FSEditLogOpCodes.values()) {
       // don't need to test obsolete opCodes
-      if(obsoleteOpCodes.containsKey(opCode)) {
+      if (skippedOps.contains(opCode))
         continue;
-      } else if (missingOpCodes.containsKey(opCode)) {
-        continue;
-      } else if (opCode == FSEditLogOpCodes.OP_INVALID) {
-        continue;
-      }
 
       Long count = visitor.getStatistics().get(opCode);
-      if((count == null) || (count == 0)) {
+      if ((count == null) || (count == 0)) {
         hasAllOpCodes = false;
         LOG.info("Opcode " + opCode + " not tested in " + inFilename);
       }
@@ -257,9 +224,9 @@ public class TestOfflineEditsViewer {
   }
 
   /**
-   * Compare two files, ignore trailing zeros at the end,
-   * for edits log the trailing zeros do not make any difference,
-   * throw exception is the files are not same
+   * Compare two files, ignore trailing zeros at the end, for edits log the
+   * trailing zeros do not make any difference, throw exception is the files are
+   * not same
    *
    * @param filenameSmall first file to compare (doesn't have to be smaller)
    * @param filenameLarge second file to compare (doesn't have to be larger)
@@ -271,7 +238,7 @@ public class TestOfflineEditsViewer {
     ByteBuffer large = ByteBuffer.wrap(DFSTestUtil.loadFile(filenameLarge));
 
     // now correct if it's otherwise
-    if(small.capacity() > large.capacity()) {
+    if (small.capacity() > large.capacity()) {
       ByteBuffer tmpByteBuffer = small;
       small = large;
       large = tmpByteBuffer;
@@ -288,13 +255,15 @@ public class TestOfflineEditsViewer {
     large.limit(small.capacity());
 
     // compares position to limit
-    if(!small.equals(large)) { return false; }
+    if (!small.equals(large)) {
+      return false;
+    }
 
     // everything after limit should be 0xFF
     int i = large.limit();
     large.clear();
-    for(; i < large.capacity(); i++) {
-      if(large.get(i) != FSEditLogOpCodes.OP_INVALID.getOpCode()) {
+    for (; i < large.capacity(); i++) {
+      if (large.get(i) != FSEditLogOpCodes.OP_INVALID.getOpCode()) {
         return false;
       }
     }

BIN
hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored


+ 90 - 74
hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml

@@ -13,8 +13,8 @@
       <TXID>2</TXID>
       <DELEGATION_KEY>
         <KEY_ID>1</KEY_ID>
-        <EXPIRY_DATE>1388171826188</EXPIRY_DATE>
-        <KEY>c7d869c22c8afce1</KEY>
+        <EXPIRY_DATE>1389121087930</EXPIRY_DATE>
+        <KEY>d48b4b3e6a43707b</KEY>
       </DELEGATION_KEY>
     </DATA>
   </RECORD>
@@ -24,8 +24,8 @@
       <TXID>3</TXID>
       <DELEGATION_KEY>
         <KEY_ID>2</KEY_ID>
-        <EXPIRY_DATE>1388171826191</EXPIRY_DATE>
-        <KEY>a3c41446507dfca9</KEY>
+        <EXPIRY_DATE>1389121087937</EXPIRY_DATE>
+        <KEY>62b6fae6bff918a9</KEY>
       </DELEGATION_KEY>
     </DATA>
   </RECORD>
@@ -37,17 +37,17 @@
       <INODEID>16386</INODEID>
       <PATH>/file_create_u\0001;F431</PATH>
       <REPLICATION>1</REPLICATION>
-      <MTIME>1387480626844</MTIME>
-      <ATIME>1387480626844</ATIME>
+      <MTIME>1388429889312</MTIME>
+      <ATIME>1388429889312</ATIME>
       <BLOCKSIZE>512</BLOCKSIZE>
-      <CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
+      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
       <CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
       <PERMISSION_STATUS>
         <USERNAME>andrew</USERNAME>
         <GROUPNAME>supergroup</GROUPNAME>
         <MODE>420</MODE>
       </PERMISSION_STATUS>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
       <RPC_CALLID>7</RPC_CALLID>
     </DATA>
   </RECORD>
@@ -59,8 +59,8 @@
       <INODEID>0</INODEID>
       <PATH>/file_create_u\0001;F431</PATH>
       <REPLICATION>1</REPLICATION>
-      <MTIME>1387480626885</MTIME>
-      <ATIME>1387480626844</ATIME>
+      <MTIME>1388429889328</MTIME>
+      <ATIME>1388429889312</ATIME>
       <BLOCKSIZE>512</BLOCKSIZE>
       <CLIENT_NAME></CLIENT_NAME>
       <CLIENT_MACHINE></CLIENT_MACHINE>
@@ -78,8 +78,8 @@
       <LENGTH>0</LENGTH>
       <SRC>/file_create_u\0001;F431</SRC>
       <DST>/file_moved</DST>
-      <TIMESTAMP>1387480626894</TIMESTAMP>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <TIMESTAMP>1388429889336</TIMESTAMP>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
       <RPC_CALLID>9</RPC_CALLID>
     </DATA>
   </RECORD>
@@ -89,8 +89,8 @@
       <TXID>7</TXID>
       <LENGTH>0</LENGTH>
       <PATH>/file_moved</PATH>
-      <TIMESTAMP>1387480626905</TIMESTAMP>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <TIMESTAMP>1388429889346</TIMESTAMP>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
       <RPC_CALLID>10</RPC_CALLID>
     </DATA>
   </RECORD>
@@ -101,7 +101,7 @@
       <LENGTH>0</LENGTH>
       <INODEID>16387</INODEID>
       <PATH>/directory_mkdir</PATH>
-      <TIMESTAMP>1387480626917</TIMESTAMP>
+      <TIMESTAMP>1388429889357</TIMESTAMP>
       <PERMISSION_STATUS>
         <USERNAME>andrew</USERNAME>
         <GROUPNAME>supergroup</GROUPNAME>
@@ -136,7 +136,7 @@
       <TXID>12</TXID>
       <SNAPSHOTROOT>/directory_mkdir</SNAPSHOTROOT>
       <SNAPSHOTNAME>snapshot1</SNAPSHOTNAME>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
       <RPC_CALLID>15</RPC_CALLID>
     </DATA>
   </RECORD>
@@ -147,7 +147,7 @@
       <SNAPSHOTROOT>/directory_mkdir</SNAPSHOTROOT>
       <SNAPSHOTOLDNAME>snapshot1</SNAPSHOTOLDNAME>
       <SNAPSHOTNEWNAME>snapshot2</SNAPSHOTNEWNAME>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
       <RPC_CALLID>16</RPC_CALLID>
     </DATA>
   </RECORD>
@@ -157,7 +157,7 @@
       <TXID>14</TXID>
       <SNAPSHOTROOT>/directory_mkdir</SNAPSHOTROOT>
       <SNAPSHOTNAME>snapshot2</SNAPSHOTNAME>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
       <RPC_CALLID>17</RPC_CALLID>
     </DATA>
   </RECORD>
@@ -169,17 +169,17 @@
       <INODEID>16388</INODEID>
       <PATH>/file_create_u\0001;F431</PATH>
       <REPLICATION>1</REPLICATION>
-      <MTIME>1387480626978</MTIME>
-      <ATIME>1387480626978</ATIME>
+      <MTIME>1388429889412</MTIME>
+      <ATIME>1388429889412</ATIME>
       <BLOCKSIZE>512</BLOCKSIZE>
-      <CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
+      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
       <CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
       <PERMISSION_STATUS>
         <USERNAME>andrew</USERNAME>
         <GROUPNAME>supergroup</GROUPNAME>
         <MODE>420</MODE>
       </PERMISSION_STATUS>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
       <RPC_CALLID>18</RPC_CALLID>
     </DATA>
   </RECORD>
@@ -191,8 +191,8 @@
       <INODEID>0</INODEID>
       <PATH>/file_create_u\0001;F431</PATH>
       <REPLICATION>1</REPLICATION>
-      <MTIME>1387480626985</MTIME>
-      <ATIME>1387480626978</ATIME>
+      <MTIME>1388429889420</MTIME>
+      <ATIME>1388429889412</ATIME>
       <BLOCKSIZE>512</BLOCKSIZE>
       <CLIENT_NAME></CLIENT_NAME>
       <CLIENT_MACHINE></CLIENT_MACHINE>
@@ -253,9 +253,9 @@
       <LENGTH>0</LENGTH>
       <SRC>/file_create_u\0001;F431</SRC>
       <DST>/file_moved</DST>
-      <TIMESTAMP>1387480627035</TIMESTAMP>
+      <TIMESTAMP>1388429889495</TIMESTAMP>
       <OPTIONS>NONE</OPTIONS>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
       <RPC_CALLID>25</RPC_CALLID>
     </DATA>
   </RECORD>
@@ -267,17 +267,17 @@
       <INODEID>16389</INODEID>
       <PATH>/file_concat_target</PATH>
       <REPLICATION>1</REPLICATION>
-      <MTIME>1387480627043</MTIME>
-      <ATIME>1387480627043</ATIME>
+      <MTIME>1388429889511</MTIME>
+      <ATIME>1388429889511</ATIME>
       <BLOCKSIZE>512</BLOCKSIZE>
-      <CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
+      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
       <CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
       <PERMISSION_STATUS>
         <USERNAME>andrew</USERNAME>
         <GROUPNAME>supergroup</GROUPNAME>
         <MODE>420</MODE>
       </PERMISSION_STATUS>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
       <RPC_CALLID>27</RPC_CALLID>
     </DATA>
   </RECORD>
@@ -388,8 +388,8 @@
       <INODEID>0</INODEID>
       <PATH>/file_concat_target</PATH>
       <REPLICATION>1</REPLICATION>
-      <MTIME>1387480627148</MTIME>
-      <ATIME>1387480627043</ATIME>
+      <MTIME>1388429889812</MTIME>
+      <ATIME>1388429889511</ATIME>
       <BLOCKSIZE>512</BLOCKSIZE>
       <CLIENT_NAME></CLIENT_NAME>
       <CLIENT_MACHINE></CLIENT_MACHINE>
@@ -423,17 +423,17 @@
       <INODEID>16390</INODEID>
       <PATH>/file_concat_0</PATH>
       <REPLICATION>1</REPLICATION>
-      <MTIME>1387480627155</MTIME>
-      <ATIME>1387480627155</ATIME>
+      <MTIME>1388429889825</MTIME>
+      <ATIME>1388429889825</ATIME>
       <BLOCKSIZE>512</BLOCKSIZE>
-      <CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
+      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
       <CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
       <PERMISSION_STATUS>
         <USERNAME>andrew</USERNAME>
         <GROUPNAME>supergroup</GROUPNAME>
         <MODE>420</MODE>
       </PERMISSION_STATUS>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
       <RPC_CALLID>40</RPC_CALLID>
     </DATA>
   </RECORD>
@@ -544,8 +544,8 @@
       <INODEID>0</INODEID>
       <PATH>/file_concat_0</PATH>
       <REPLICATION>1</REPLICATION>
-      <MTIME>1387480627193</MTIME>
-      <ATIME>1387480627155</ATIME>
+      <MTIME>1388429889909</MTIME>
+      <ATIME>1388429889825</ATIME>
       <BLOCKSIZE>512</BLOCKSIZE>
       <CLIENT_NAME></CLIENT_NAME>
       <CLIENT_MACHINE></CLIENT_MACHINE>
@@ -579,17 +579,17 @@
       <INODEID>16391</INODEID>
       <PATH>/file_concat_1</PATH>
       <REPLICATION>1</REPLICATION>
-      <MTIME>1387480627200</MTIME>
-      <ATIME>1387480627200</ATIME>
+      <MTIME>1388429889920</MTIME>
+      <ATIME>1388429889920</ATIME>
       <BLOCKSIZE>512</BLOCKSIZE>
-      <CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
+      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
       <CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
       <PERMISSION_STATUS>
         <USERNAME>andrew</USERNAME>
         <GROUPNAME>supergroup</GROUPNAME>
         <MODE>420</MODE>
       </PERMISSION_STATUS>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
       <RPC_CALLID>52</RPC_CALLID>
     </DATA>
   </RECORD>
@@ -700,8 +700,8 @@
       <INODEID>0</INODEID>
       <PATH>/file_concat_1</PATH>
       <REPLICATION>1</REPLICATION>
-      <MTIME>1387480627238</MTIME>
-      <ATIME>1387480627200</ATIME>
+      <MTIME>1388429890016</MTIME>
+      <ATIME>1388429889920</ATIME>
       <BLOCKSIZE>512</BLOCKSIZE>
       <CLIENT_NAME></CLIENT_NAME>
       <CLIENT_MACHINE></CLIENT_MACHINE>
@@ -733,12 +733,12 @@
       <TXID>56</TXID>
       <LENGTH>0</LENGTH>
       <TRG>/file_concat_target</TRG>
-      <TIMESTAMP>1387480627246</TIMESTAMP>
+      <TIMESTAMP>1388429890031</TIMESTAMP>
       <SOURCES>
         <SOURCE1>/file_concat_0</SOURCE1>
         <SOURCE2>/file_concat_1</SOURCE2>
       </SOURCES>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
       <RPC_CALLID>63</RPC_CALLID>
     </DATA>
   </RECORD>
@@ -750,14 +750,14 @@
       <INODEID>16392</INODEID>
       <PATH>/file_symlink</PATH>
       <VALUE>/file_concat_target</VALUE>
-      <MTIME>1387480627255</MTIME>
-      <ATIME>1387480627255</ATIME>
+      <MTIME>1388429890046</MTIME>
+      <ATIME>1388429890046</ATIME>
       <PERMISSION_STATUS>
         <USERNAME>andrew</USERNAME>
         <GROUPNAME>supergroup</GROUPNAME>
         <MODE>511</MODE>
       </PERMISSION_STATUS>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
       <RPC_CALLID>64</RPC_CALLID>
     </DATA>
   </RECORD>
@@ -771,11 +771,11 @@
         <OWNER>andrew</OWNER>
         <RENEWER>JobTracker</RENEWER>
         <REALUSER></REALUSER>
-        <ISSUE_DATE>1387480627262</ISSUE_DATE>
-        <MAX_DATE>1388085427262</MAX_DATE>
+        <ISSUE_DATE>1388429890059</ISSUE_DATE>
+        <MAX_DATE>1389034690059</MAX_DATE>
         <MASTER_KEY_ID>2</MASTER_KEY_ID>
       </DELEGATION_TOKEN_IDENTIFIER>
-      <EXPIRY_TIME>1387567027262</EXPIRY_TIME>
+      <EXPIRY_TIME>1388516290059</EXPIRY_TIME>
     </DATA>
   </RECORD>
   <RECORD>
@@ -788,11 +788,11 @@
         <OWNER>andrew</OWNER>
         <RENEWER>JobTracker</RENEWER>
         <REALUSER></REALUSER>
-        <ISSUE_DATE>1387480627262</ISSUE_DATE>
-        <MAX_DATE>1388085427262</MAX_DATE>
+        <ISSUE_DATE>1388429890059</ISSUE_DATE>
+        <MAX_DATE>1389034690059</MAX_DATE>
         <MASTER_KEY_ID>2</MASTER_KEY_ID>
       </DELEGATION_TOKEN_IDENTIFIER>
-      <EXPIRY_TIME>1387567027281</EXPIRY_TIME>
+      <EXPIRY_TIME>1388516290109</EXPIRY_TIME>
     </DATA>
   </RECORD>
   <RECORD>
@@ -805,8 +805,8 @@
         <OWNER>andrew</OWNER>
         <RENEWER>JobTracker</RENEWER>
         <REALUSER></REALUSER>
-        <ISSUE_DATE>1387480627262</ISSUE_DATE>
-        <MAX_DATE>1388085427262</MAX_DATE>
+        <ISSUE_DATE>1388429890059</ISSUE_DATE>
+        <MAX_DATE>1389034690059</MAX_DATE>
         <MASTER_KEY_ID>2</MASTER_KEY_ID>
       </DELEGATION_TOKEN_IDENTIFIER>
     </DATA>
@@ -821,7 +821,7 @@
       <MODE>493</MODE>
       <LIMIT>9223372036854775807</LIMIT>
       <MAXRELATIVEEXPIRY>2305843009213693951</MAXRELATIVEEXPIRY>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
       <RPC_CALLID>68</RPC_CALLID>
     </DATA>
   </RECORD>
@@ -834,7 +834,7 @@
       <GROUPNAME>party</GROUPNAME>
       <MODE>448</MODE>
       <LIMIT>1989</LIMIT>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
       <RPC_CALLID>69</RPC_CALLID>
     </DATA>
   </RECORD>
@@ -846,8 +846,8 @@
       <PATH>/bar</PATH>
       <REPLICATION>1</REPLICATION>
       <POOL>poolparty</POOL>
-      <EXPIRATION>2305844396694321272</EXPIRATION>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <EXPIRATION>2305844397643584141</EXPIRATION>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
       <RPC_CALLID>70</RPC_CALLID>
     </DATA>
   </RECORD>
@@ -857,7 +857,7 @@
       <TXID>64</TXID>
       <ID>1</ID>
       <PATH>/bar2</PATH>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
       <RPC_CALLID>71</RPC_CALLID>
     </DATA>
   </RECORD>
@@ -866,7 +866,7 @@
     <DATA>
       <TXID>65</TXID>
       <ID>1</ID>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
       <RPC_CALLID>72</RPC_CALLID>
     </DATA>
   </RECORD>
@@ -875,7 +875,7 @@
     <DATA>
       <TXID>66</TXID>
       <POOLNAME>poolparty</POOLNAME>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
       <RPC_CALLID>73</RPC_CALLID>
     </DATA>
   </RECORD>
@@ -887,17 +887,17 @@
       <INODEID>16393</INODEID>
       <PATH>/hard-lease-recovery-test</PATH>
       <REPLICATION>1</REPLICATION>
-      <MTIME>1387480627356</MTIME>
-      <ATIME>1387480627356</ATIME>
+      <MTIME>1388429890261</MTIME>
+      <ATIME>1388429890261</ATIME>
       <BLOCKSIZE>512</BLOCKSIZE>
-      <CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
+      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
       <CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
       <PERMISSION_STATUS>
         <USERNAME>andrew</USERNAME>
         <GROUPNAME>supergroup</GROUPNAME>
         <MODE>420</MODE>
       </PERMISSION_STATUS>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
       <RPC_CALLID>74</RPC_CALLID>
     </DATA>
   </RECORD>
@@ -954,28 +954,44 @@
     <OPCODE>OP_REASSIGN_LEASE</OPCODE>
     <DATA>
       <TXID>73</TXID>
-      <LEASEHOLDER>DFSClient_NONMAPREDUCE_1147796111_1</LEASEHOLDER>
+      <LEASEHOLDER>DFSClient_NONMAPREDUCE_-1396063717_1</LEASEHOLDER>
       <PATH>/hard-lease-recovery-test</PATH>
       <NEWHOLDER>HDFS_NameNode</NEWHOLDER>
     </DATA>
   </RECORD>
   <RECORD>
-    <OPCODE>OP_CLOSE</OPCODE>
+    <OPCODE>OP_SET_GENSTAMP_V2</OPCODE>
     <DATA>
       <TXID>74</TXID>
+      <GENSTAMPV2>1012</GENSTAMPV2>
+    </DATA>
+  </RECORD>
+  <RECORD>
+    <OPCODE>OP_REASSIGN_LEASE</OPCODE>
+    <DATA>
+      <TXID>75</TXID>
+      <LEASEHOLDER>HDFS_NameNode</LEASEHOLDER>
+      <PATH>/hard-lease-recovery-test</PATH>
+      <NEWHOLDER>HDFS_NameNode</NEWHOLDER>
+    </DATA>
+  </RECORD>
+  <RECORD>
+    <OPCODE>OP_CLOSE</OPCODE>
+    <DATA>
+      <TXID>76</TXID>
       <LENGTH>0</LENGTH>
       <INODEID>0</INODEID>
       <PATH>/hard-lease-recovery-test</PATH>
       <REPLICATION>1</REPLICATION>
-      <MTIME>1387480629729</MTIME>
-      <ATIME>1387480627356</ATIME>
+      <MTIME>1388429895216</MTIME>
+      <ATIME>1388429890261</ATIME>
       <BLOCKSIZE>512</BLOCKSIZE>
       <CLIENT_NAME></CLIENT_NAME>
       <CLIENT_MACHINE></CLIENT_MACHINE>
       <BLOCK>
         <BLOCK_ID>1073741834</BLOCK_ID>
         <NUM_BYTES>11</NUM_BYTES>
-        <GENSTAMP>1011</GENSTAMP>
+        <GENSTAMP>1012</GENSTAMP>
       </BLOCK>
       <PERMISSION_STATUS>
         <USERNAME>andrew</USERNAME>
@@ -987,7 +1003,7 @@
   <RECORD>
     <OPCODE>OP_END_LOG_SEGMENT</OPCODE>
     <DATA>
-      <TXID>75</TXID>
+      <TXID>77</TXID>
     </DATA>
   </RECORD>
 </EDITS>

+ 14 - 0
hadoop-mapreduce-project/CHANGES.txt

@@ -77,6 +77,9 @@ Trunk (Unreleased)
     MAPREDUCE-5189. Add policies and wiring to respond to preemption requests
     from YARN. (Carlo Curino via cdouglas)
 
+    MAPREDUCE-5196. Add bookkeeping for managing checkpoints of task state.
+    (Carlo Curino via cdouglas)
+
   BUG FIXES
 
     MAPREDUCE-4272. SortedRanges.Range#compareTo is not spec compliant.
@@ -193,6 +196,8 @@ Release 2.4.0 - UNRELEASED
     MAPREDUCE-5550. Task Status message (reporter.setStatus) not shown in UI
     with Hadoop 2.0 (Gera Shegalov via Sandy Ryza)
 
+    MAPREDUCE-3310. Custom grouping comparator cannot be set for Combiners (tucu)
+
   OPTIMIZATIONS
 
     MAPREDUCE-5484. YarnChild unnecessarily loads job conf twice (Sandy Ryza)
@@ -258,6 +263,15 @@ Release 2.4.0 - UNRELEASED
     MAPREDUCE-5687. Fixed failure in TestYARNRunner caused by YARN-1446. (Jian He
     via vinodkv)
 
+    MAPREDUCE-5694. Fixed MR AppMaster to shutdown the LogManager so as to avoid
+    losing syslog in some conditions. (Mohammad Kamrul Islam via vinodkv)
+
+    MAPREDUCE-5685. Fixed a bug with JobContext getCacheFiles API inside the
+    WrappedReducer class. (Yi Song via vinodkv)
+
+    MAPREDUCE-5689. MRAppMaster does not preempt reducers when scheduled maps 
+    cannot be fulfilled. (lohit via kasha)
+
 Release 2.3.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 66 - 10
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java

@@ -36,7 +36,9 @@ import org.apache.hadoop.ipc.Server;
 import org.apache.hadoop.mapred.SortedRanges.Range;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TypeConverter;
+import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
 import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.app.AppContext;
 import org.apache.hadoop.mapreduce.v2.app.TaskAttemptListener;
 import org.apache.hadoop.mapreduce.v2.app.TaskHeartbeatHandler;
@@ -45,8 +47,8 @@ import org.apache.hadoop.mapreduce.v2.app.job.Task;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptDiagnosticsUpdateEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
-import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus;
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent;
 import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler;
 import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy;
 import org.apache.hadoop.mapreduce.v2.app.security.authorize.MRAMPolicyProvider;
@@ -228,6 +230,22 @@ public class TaskAttemptListenerImpl extends CompositeService
             TaskAttemptEventType.TA_COMMIT_PENDING));
   }
 
+  @Override
+  public void preempted(TaskAttemptID taskAttemptID, TaskStatus taskStatus)
+          throws IOException, InterruptedException {
+    LOG.info("Preempted state update from " + taskAttemptID.toString());
+    // An attempt is telling us that it got preempted.
+    org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID =
+        TypeConverter.toYarn(taskAttemptID);
+
+    preemptionPolicy.reportSuccessfulPreemption(attemptID);
+    taskHeartbeatHandler.progressing(attemptID);
+
+    context.getEventHandler().handle(
+        new TaskAttemptEvent(attemptID,
+            TaskAttemptEventType.TA_PREEMPTED));
+  }
+
   @Override
   public void done(TaskAttemptID taskAttemptID) throws IOException {
     LOG.info("Done acknowledgement from " + taskAttemptID.toString());
@@ -250,6 +268,10 @@ public class TaskAttemptListenerImpl extends CompositeService
 
     org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID =
         TypeConverter.toYarn(taskAttemptID);
+
+    // handling checkpoints
+    preemptionPolicy.handleFailedContainer(attemptID);
+
     context.getEventHandler().handle(
         new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_FAILMSG));
   }
@@ -264,6 +286,10 @@ public class TaskAttemptListenerImpl extends CompositeService
 
     org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID =
         TypeConverter.toYarn(taskAttemptID);
+
+    // handling checkpoints
+    preemptionPolicy.handleFailedContainer(attemptID);
+
     context.getEventHandler().handle(
         new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_FAILMSG));
   }
@@ -293,12 +319,6 @@ public class TaskAttemptListenerImpl extends CompositeService
     return new MapTaskCompletionEventsUpdate(events, shouldReset);
   }
 
-  @Override
-  public boolean ping(TaskAttemptID taskAttemptID) throws IOException {
-    LOG.info("Ping from " + taskAttemptID.toString());
-    return true;
-  }
-
   @Override
   public void reportDiagnosticInfo(TaskAttemptID taskAttemptID, String diagnosticInfo)
  throws IOException {
@@ -321,11 +341,33 @@ public class TaskAttemptListenerImpl extends CompositeService
   }
 
   @Override
-  public boolean statusUpdate(TaskAttemptID taskAttemptID,
+  public AMFeedback statusUpdate(TaskAttemptID taskAttemptID,
       TaskStatus taskStatus) throws IOException, InterruptedException {
-    LOG.info("Status update from " + taskAttemptID.toString());
+
     org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId yarnAttemptID =
         TypeConverter.toYarn(taskAttemptID);
+
+    AMFeedback feedback = new AMFeedback();
+    feedback.setTaskFound(true);
+
+    // Propagating preemption to the task if TASK_PREEMPTION is enabled
+    if (getConfig().getBoolean(MRJobConfig.TASK_PREEMPTION, false)
+        && preemptionPolicy.isPreempted(yarnAttemptID)) {
+      feedback.setPreemption(true);
+      LOG.info("Setting preemption bit for task: "+ yarnAttemptID
+          + " of type " + yarnAttemptID.getTaskId().getTaskType());
+    }
+
+    if (taskStatus == null) {
+      //We are using statusUpdate only as a simple ping
+      LOG.info("Ping from " + taskAttemptID.toString());
+      taskHeartbeatHandler.progressing(yarnAttemptID);
+      return feedback;
+    }
+
+    // if we are here there is an actual status update to be processed
+    LOG.info("Status update from " + taskAttemptID.toString());
+
     taskHeartbeatHandler.progressing(yarnAttemptID);
     TaskAttemptStatus taskAttemptStatus =
         new TaskAttemptStatus();
@@ -386,7 +428,7 @@ public class TaskAttemptListenerImpl extends CompositeService
     context.getEventHandler().handle(
         new TaskAttemptStatusUpdateEvent(taskAttemptStatus.id,
             taskAttemptStatus));
-    return true;
+    return feedback;
   }
 
   @Override
@@ -494,4 +536,18 @@ public class TaskAttemptListenerImpl extends CompositeService
     return ProtocolSignature.getProtocolSignature(this, 
         protocol, clientVersion, clientMethodsHash);
   }
+
+  // task checkpoint bookeeping
+  @Override
+  public TaskCheckpointID getCheckpointID(TaskID taskId) {
+    TaskId tid = TypeConverter.toYarn(taskId);
+    return preemptionPolicy.getCheckpointID(tid);
+  }
+
+  @Override
+  public void setCheckpointID(TaskID taskId, TaskCheckpointID cid) {
+    TaskId tid = TypeConverter.toYarn(taskId);
+    preemptionPolicy.setCheckpointID(tid, cid);
+  }
+
 }

+ 3 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java

@@ -139,6 +139,7 @@ import org.apache.hadoop.yarn.security.client.ClientToAMTokenSecretManager;
 import org.apache.hadoop.yarn.util.Clock;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.hadoop.yarn.util.SystemClock;
+import org.apache.log4j.LogManager;
 
 import com.google.common.annotations.VisibleForTesting;
 
@@ -1395,6 +1396,8 @@ public class MRAppMaster extends CompositeService {
     } catch (Throwable t) {
       LOG.fatal("Error starting MRAppMaster", t);
       System.exit(1);
+    } finally {
+      LogManager.shutdown();
     }
   }
 

+ 1 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptEventType.java

@@ -47,6 +47,7 @@ public enum TaskAttemptEventType {
   TA_FAILMSG,
   TA_UPDATE,
   TA_TIMED_OUT,
+  TA_PREEMPTED,
 
   //Producer:TaskCleaner
   TA_CLEANUP_DONE,

+ 25 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java

@@ -304,6 +304,9 @@ public abstract class TaskAttemptImpl implements
      .addTransition(TaskAttemptStateInternal.RUNNING,
          TaskAttemptStateInternal.KILL_CONTAINER_CLEANUP, TaskAttemptEventType.TA_KILL,
          CLEANUP_CONTAINER_TRANSITION)
+     .addTransition(TaskAttemptStateInternal.RUNNING,
+         TaskAttemptStateInternal.KILLED,
+         TaskAttemptEventType.TA_PREEMPTED, new PreemptedTransition())
 
      // Transitions from COMMIT_PENDING state
      .addTransition(TaskAttemptStateInternal.COMMIT_PENDING,
@@ -437,6 +440,7 @@ public abstract class TaskAttemptImpl implements
              TaskAttemptEventType.TA_DONE,
              TaskAttemptEventType.TA_FAILMSG,
              TaskAttemptEventType.TA_CONTAINER_CLEANED,
+             TaskAttemptEventType.TA_PREEMPTED,
              // Container launch events can arrive late
              TaskAttemptEventType.TA_CONTAINER_LAUNCHED,
              TaskAttemptEventType.TA_CONTAINER_LAUNCH_FAILED))
@@ -1874,6 +1878,27 @@ public abstract class TaskAttemptImpl implements
     }
   }
 
+  private static class PreemptedTransition implements
+      SingleArcTransition<TaskAttemptImpl,TaskAttemptEvent> {
+    @SuppressWarnings("unchecked")
+    @Override
+    public void transition(TaskAttemptImpl taskAttempt,
+        TaskAttemptEvent event) {
+      taskAttempt.setFinishTime();
+      taskAttempt.taskAttemptListener.unregister(
+          taskAttempt.attemptId, taskAttempt.jvmID);
+      taskAttempt.eventHandler.handle(new ContainerLauncherEvent(
+          taskAttempt.attemptId,
+          taskAttempt.getAssignedContainerID(), taskAttempt.getAssignedContainerMgrAddress(),
+          taskAttempt.container.getContainerToken(),
+          ContainerLauncher.EventType.CONTAINER_REMOTE_CLEANUP));
+      taskAttempt.eventHandler.handle(new TaskTAttemptEvent(
+          taskAttempt.attemptId,
+          TaskEventType.T_ATTEMPT_KILLED));
+
+    }
+  }
+
   private static class CleanupContainerTransition implements
        SingleArcTransition<TaskAttemptImpl, TaskAttemptEvent> {
     @SuppressWarnings("unchecked")

+ 3 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java

@@ -229,7 +229,8 @@ public class RMContainerAllocator extends RMContainerRequestor
 
     int completedMaps = getJob().getCompletedMaps();
     int completedTasks = completedMaps + getJob().getCompletedReduces();
-    if (lastCompletedTasks != completedTasks) {
+    if ((lastCompletedTasks != completedTasks) ||
+          (scheduledRequests.maps.size() > 0)) {
       lastCompletedTasks = completedTasks;
       recalculateReduceSchedule = true;
     }
@@ -347,7 +348,7 @@ public class RMContainerAllocator extends RMContainerRequestor
       }
       
     } else if (
-        event.getType() == ContainerAllocator.EventType.CONTAINER_DEALLOCATE) {
+      event.getType() == ContainerAllocator.EventType.CONTAINER_DEALLOCATE) {
   
       LOG.info("Processing the event " + event.toString());
 

+ 6 - 7
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/AMPreemptionPolicy.java

@@ -19,10 +19,9 @@ package org.apache.hadoop.mapreduce.v2.app.rm.preemption;
 
 import java.util.List;
 
-import org.apache.hadoop.mapred.TaskAttemptID;
-import org.apache.hadoop.mapred.TaskID;
 import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
 import org.apache.hadoop.mapreduce.v2.app.AppContext;
 import org.apache.hadoop.yarn.api.records.Container;
@@ -81,7 +80,7 @@ public interface AMPreemptionPolicy {
    * successfully preempted (for bookeeping, counters, etc..)
    * @param attemptID Task attempt that preempted
    */
-  public void reportSuccessfulPreemption(TaskAttemptID attemptID);
+  public void reportSuccessfulPreemption(TaskAttemptId attemptID);
 
   /**
    * Callback informing the policy of containers exiting with a failure. This
@@ -98,20 +97,20 @@ public interface AMPreemptionPolicy {
   public void handleCompletedContainer(TaskAttemptId attemptID);
 
   /**
-   * Method to retrieve the latest checkpoint for a given {@link TaskID}
+   * Method to retrieve the latest checkpoint for a given {@link TaskId}
    * @param taskId TaskID
    * @return CheckpointID associated with this task or null
    */
-  public TaskCheckpointID getCheckpointID(TaskID taskId);
+  public TaskCheckpointID getCheckpointID(TaskId taskId);
 
   /**
    * Method to store the latest {@link
    * org.apache.hadoop.mapreduce.checkpoint.CheckpointID} for a given {@link
-   * TaskID}. Assigning a null is akin to remove all previous checkpoints for
+   * TaskId}. Assigning a null is akin to remove all previous checkpoints for
    * this task.
    * @param taskId TaskID
    * @param cid Checkpoint to assign or <tt>null</tt> to remove it.
    */
-  public void setCheckpointID(TaskID taskId, TaskCheckpointID cid);
+  public void setCheckpointID(TaskId taskId, TaskCheckpointID cid);
 
 }

+ 290 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/CheckpointAMPreemptionPolicy.java

@@ -0,0 +1,290 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapreduce.v2.app.rm.preemption;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.mapreduce.JobCounter;
+import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
+import org.apache.hadoop.mapreduce.v2.app.AppContext;
+import org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.PreemptionContainer;
+import org.apache.hadoop.yarn.api.records.PreemptionContract;
+import org.apache.hadoop.yarn.api.records.PreemptionMessage;
+import org.apache.hadoop.yarn.api.records.PreemptionResourceRequest;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.api.records.StrictPreemptionContract;
+import org.apache.hadoop.yarn.event.EventHandler;
+
+/**
+ * This policy works in combination with an implementation of task
+ * checkpointing. It computes the tasks to be preempted in response to the RM
+ * request for preemption. For strict requests, it maps containers to
+ * corresponding tasks; for fungible requests, it attempts to pick the best
+ * containers to preempt (reducers in reverse allocation order). The
+ * TaskAttemptListener will interrogate this policy when handling a task
+ * heartbeat to check whether the task should be preempted or not. When handling
+ * fungible requests, the policy discount the RM ask by the amount of currently
+ * in-flight preemptions (i.e., tasks that are checkpointing).
+ *
+ * This class it is also used to maintain the list of checkpoints for existing
+ * tasks. Centralizing this functionality here, allows us to have visibility on
+ * preemption and checkpoints in a single location, thus coordinating preemption
+ * and checkpoint management decisions in a single policy.
+ */
+public class CheckpointAMPreemptionPolicy implements AMPreemptionPolicy {
+
+  // task attempts flagged for preemption
+  private final Set<TaskAttemptId> toBePreempted;
+
+  private final Set<TaskAttemptId> countedPreemptions;
+
+  private final Map<TaskId,TaskCheckpointID> checkpoints;
+
+  private final Map<TaskAttemptId,Resource> pendingFlexiblePreemptions;
+
+  @SuppressWarnings("rawtypes")
+  private EventHandler eventHandler;
+
+  static final Log LOG = LogFactory
+      .getLog(CheckpointAMPreemptionPolicy.class);
+
+  public CheckpointAMPreemptionPolicy() {
+    this(Collections.synchronizedSet(new HashSet<TaskAttemptId>()),
+         Collections.synchronizedSet(new HashSet<TaskAttemptId>()),
+         Collections.synchronizedMap(new HashMap<TaskId,TaskCheckpointID>()),
+         Collections.synchronizedMap(new HashMap<TaskAttemptId,Resource>()));
+  }
+
+  CheckpointAMPreemptionPolicy(Set<TaskAttemptId> toBePreempted,
+      Set<TaskAttemptId> countedPreemptions,
+      Map<TaskId,TaskCheckpointID> checkpoints,
+      Map<TaskAttemptId,Resource> pendingFlexiblePreemptions) {
+    this.toBePreempted = toBePreempted;
+    this.countedPreemptions = countedPreemptions;
+    this.checkpoints = checkpoints;
+    this.pendingFlexiblePreemptions = pendingFlexiblePreemptions;
+  }
+
+  @Override
+  public void init(AppContext context) {
+    this.eventHandler = context.getEventHandler();
+  }
+
+  @Override
+  public void preempt(Context ctxt, PreemptionMessage preemptionRequests) {
+
+    if (preemptionRequests != null) {
+
+      // handling non-negotiable preemption
+
+      StrictPreemptionContract cStrict = preemptionRequests.getStrictContract();
+      if (cStrict != null
+          && cStrict.getContainers() != null
+          && cStrict.getContainers().size() > 0) {
+        LOG.info("strict preemption :" +
+            preemptionRequests.getStrictContract().getContainers().size() +
+            " containers to kill");
+
+        // handle strict preemptions. These containers are non-negotiable
+        for (PreemptionContainer c :
+            preemptionRequests.getStrictContract().getContainers()) {
+          ContainerId reqCont = c.getId();
+          TaskAttemptId reqTask = ctxt.getTaskAttempt(reqCont);
+          if (reqTask != null) {
+            // ignore requests for preempting containers running maps
+            if (org.apache.hadoop.mapreduce.v2.api.records.TaskType.REDUCE
+                .equals(reqTask.getTaskId().getTaskType())) {
+              toBePreempted.add(reqTask);
+              LOG.info("preempting " + reqCont + " running task:" + reqTask);
+            } else {
+              LOG.info("NOT preempting " + reqCont + " running task:" + reqTask);
+            }
+          }
+        }
+      }
+
+      // handling negotiable preemption
+      PreemptionContract cNegot = preemptionRequests.getContract();
+      if (cNegot != null
+          && cNegot.getResourceRequest() != null
+          && cNegot.getResourceRequest().size() > 0
+          && cNegot.getContainers() != null
+          && cNegot.getContainers().size() > 0) {
+
+        LOG.info("negotiable preemption :" +
+            preemptionRequests.getContract().getResourceRequest().size() +
+            " resourceReq, " +
+            preemptionRequests.getContract().getContainers().size() +
+            " containers");
+        // handle fungible preemption. Here we only look at the total amount of
+        // resources to be preempted and pick enough of our containers to
+        // satisfy that. We only support checkpointing for reducers for now.
+        List<PreemptionResourceRequest> reqResources =
+          preemptionRequests.getContract().getResourceRequest();
+
+        // compute the total amount of pending preemptions (to be discounted
+        // from current request)
+        int pendingPreemptionRam = 0;
+        int pendingPreemptionCores = 0;
+        for (Resource r : pendingFlexiblePreemptions.values()) {
+          pendingPreemptionRam += r.getMemory();
+          pendingPreemptionCores += r.getVirtualCores();
+        }
+
+        // discount preemption request based on currently pending preemption
+        for (PreemptionResourceRequest rr : reqResources) {
+          ResourceRequest reqRsrc = rr.getResourceRequest();
+          if (!ResourceRequest.ANY.equals(reqRsrc.getResourceName())) {
+            // For now, only respond to aggregate requests and ignore locality
+            continue;
+          }
+
+          LOG.info("ResourceRequest:" + reqRsrc);
+          int reqCont = reqRsrc.getNumContainers();
+          int reqMem = reqRsrc.getCapability().getMemory();
+          int totalMemoryToRelease = reqCont * reqMem;
+          int reqCores = reqRsrc.getCapability().getVirtualCores();
+          int totalCoresToRelease = reqCont * reqCores;
+
+          // remove
+          if (pendingPreemptionRam > 0) {
+            // if goes negative we simply exit
+            totalMemoryToRelease -= pendingPreemptionRam;
+            // decrement pending resources if zero or negatve we will
+            // ignore it while processing next PreemptionResourceRequest
+            pendingPreemptionRam -= totalMemoryToRelease;
+          }
+          if (pendingPreemptionCores > 0) {
+            totalCoresToRelease -= pendingPreemptionCores;
+            pendingPreemptionCores -= totalCoresToRelease;
+          }
+
+          // reverse order of allocation (for now)
+          List<Container> listOfCont = ctxt.getContainers(TaskType.REDUCE);
+          Collections.sort(listOfCont, new Comparator<Container>() {
+            @Override
+            public int compare(final Container o1, final Container o2) {
+              return o2.getId().getId() - o1.getId().getId();
+            }
+          });
+
+          // preempt reducers first
+          for (Container cont : listOfCont) {
+            if (totalMemoryToRelease <= 0 && totalCoresToRelease<=0) {
+              break;
+            }
+            TaskAttemptId reduceId = ctxt.getTaskAttempt(cont.getId());
+            int cMem = cont.getResource().getMemory();
+            int cCores = cont.getResource().getVirtualCores();
+
+            if (!toBePreempted.contains(reduceId)) {
+              totalMemoryToRelease -= cMem;
+              totalCoresToRelease -= cCores;
+                toBePreempted.add(reduceId);
+                pendingFlexiblePreemptions.put(reduceId, cont.getResource());
+            }
+            LOG.info("ResourceRequest:" + reqRsrc + " satisfied preempting "
+                + reduceId);
+          }
+          // if map was preemptable we would do add them to toBePreempted here
+        }
+      }
+    }
+  }
+
+  @Override
+  public void handleFailedContainer(TaskAttemptId attemptID) {
+    toBePreempted.remove(attemptID);
+    checkpoints.remove(attemptID.getTaskId());
+  }
+
+  @Override
+  public void handleCompletedContainer(TaskAttemptId attemptID){
+    LOG.info(" task completed:" + attemptID);
+    toBePreempted.remove(attemptID);
+    pendingFlexiblePreemptions.remove(attemptID);
+  }
+
+  @Override
+  public boolean isPreempted(TaskAttemptId yarnAttemptID) {
+    if (toBePreempted.contains(yarnAttemptID)) {
+      updatePreemptionCounters(yarnAttemptID);
+      return true;
+    }
+    return false;
+  }
+
+  @Override
+  public void reportSuccessfulPreemption(TaskAttemptId taskAttemptID) {
+    // ignore
+  }
+
+  @Override
+  public TaskCheckpointID getCheckpointID(TaskId taskId) {
+    return checkpoints.get(taskId);
+  }
+
+  @Override
+  public void setCheckpointID(TaskId taskId, TaskCheckpointID cid) {
+    checkpoints.put(taskId, cid);
+    if (cid != null) {
+      updateCheckpointCounters(taskId, cid);
+    }
+  }
+
+  @SuppressWarnings({ "unchecked" })
+  private void updateCheckpointCounters(TaskId taskId, TaskCheckpointID cid) {
+    JobCounterUpdateEvent jce = new JobCounterUpdateEvent(taskId.getJobId());
+    jce.addCounterUpdate(JobCounter.CHECKPOINTS, 1);
+    eventHandler.handle(jce);
+    jce = new JobCounterUpdateEvent(taskId.getJobId());
+    jce.addCounterUpdate(JobCounter.CHECKPOINT_BYTES, cid.getCheckpointBytes());
+    eventHandler.handle(jce);
+    jce = new JobCounterUpdateEvent(taskId.getJobId());
+    jce.addCounterUpdate(JobCounter.CHECKPOINT_TIME, cid.getCheckpointTime());
+    eventHandler.handle(jce);
+
+  }
+
+  @SuppressWarnings({ "unchecked" })
+  private void updatePreemptionCounters(TaskAttemptId yarnAttemptID) {
+    if (!countedPreemptions.contains(yarnAttemptID)) {
+      countedPreemptions.add(yarnAttemptID);
+      JobCounterUpdateEvent jce = new JobCounterUpdateEvent(yarnAttemptID
+          .getTaskId().getJobId());
+      jce.addCounterUpdate(JobCounter.TASKS_REQ_PREEMPT, 1);
+      eventHandler.handle(jce);
+    }
+  }
+
+}

+ 4 - 5
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/KillAMPreemptionPolicy.java

@@ -19,11 +19,10 @@ package org.apache.hadoop.mapreduce.v2.app.rm.preemption;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.mapred.TaskAttemptID;
-import org.apache.hadoop.mapred.TaskID;
 import org.apache.hadoop.mapreduce.JobCounter;
 import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.app.AppContext;
 import org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
@@ -89,17 +88,17 @@ public class KillAMPreemptionPolicy implements AMPreemptionPolicy {
   }
 
   @Override
-  public void reportSuccessfulPreemption(TaskAttemptID taskAttemptID) {
+  public void reportSuccessfulPreemption(TaskAttemptId taskAttemptID) {
     // ignore
   }
 
   @Override
-  public TaskCheckpointID getCheckpointID(TaskID taskId) {
+  public TaskCheckpointID getCheckpointID(TaskId taskId) {
     return null;
   }
 
   @Override
-  public void setCheckpointID(TaskID taskId, TaskCheckpointID cid) {
+  public void setCheckpointID(TaskId taskId, TaskCheckpointID cid) {
     // ignore
   }
 

+ 4 - 5
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/NoopAMPreemptionPolicy.java

@@ -17,10 +17,9 @@
 */
 package org.apache.hadoop.mapreduce.v2.app.rm.preemption;
 
-import org.apache.hadoop.mapred.TaskAttemptID;
-import org.apache.hadoop.mapred.TaskID;
 import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.app.AppContext;
 import org.apache.hadoop.yarn.api.records.PreemptionMessage;
 
@@ -50,17 +49,17 @@ public class NoopAMPreemptionPolicy implements AMPreemptionPolicy {
   }
 
   @Override
-  public void reportSuccessfulPreemption(TaskAttemptID taskAttemptID) {
+  public void reportSuccessfulPreemption(TaskAttemptId taskAttemptID) {
     // ignore
   }
 
   @Override
-  public TaskCheckpointID getCheckpointID(TaskID taskId) {
+  public TaskCheckpointID getCheckpointID(TaskId taskId) {
     return null;
   }
 
   @Override
-  public void setCheckpointID(TaskID taskId, TaskCheckpointID cid) {
+  public void setCheckpointID(TaskId taskId, TaskCheckpointID cid) {
     // ignore
   }
 

+ 134 - 23
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java

@@ -17,26 +17,23 @@
 */
 package org.apache.hadoop.mapred;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Matchers.any;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.never;
-import static org.mockito.Mockito.times;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.when;
+import org.apache.hadoop.mapred.Counters;
+import org.apache.hadoop.mapred.Counters.Counter;
+import org.apache.hadoop.mapreduce.checkpoint.EnumCounter;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
-
-import junit.framework.Assert;
+import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TaskType;
 import org.apache.hadoop.mapreduce.TypeConverter;
+import org.apache.hadoop.mapreduce.checkpoint.CheckpointID;
+import org.apache.hadoop.mapreduce.checkpoint.FSCheckpointID;
+import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
 import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent;
@@ -46,21 +43,31 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.app.AppContext;
 import org.apache.hadoop.mapreduce.v2.app.TaskHeartbeatHandler;
 import org.apache.hadoop.mapreduce.v2.app.job.Job;
+import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy;
+import org.apache.hadoop.mapreduce.v2.app.rm.preemption.CheckpointAMPreemptionPolicy;
 import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler;
 import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
+import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.util.SystemClock;
+
 import org.junit.Test;
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.*;
 
 public class TestTaskAttemptListenerImpl {
-  public static class MockTaskAttemptListenerImpl extends TaskAttemptListenerImpl {
+  public static class MockTaskAttemptListenerImpl
+      extends TaskAttemptListenerImpl {
 
     public MockTaskAttemptListenerImpl(AppContext context,
         JobTokenSecretManager jobTokenSecretManager,
         RMHeartbeatHandler rmHeartbeatHandler,
-        TaskHeartbeatHandler hbHandler) {
-      super(context, jobTokenSecretManager, rmHeartbeatHandler, null);
+        TaskHeartbeatHandler hbHandler,
+        AMPreemptionPolicy policy) {
+
+      super(context, jobTokenSecretManager, rmHeartbeatHandler, policy);
       this.taskHeartbeatHandler = hbHandler;
     }
     
@@ -87,9 +94,16 @@ public class TestTaskAttemptListenerImpl {
     RMHeartbeatHandler rmHeartbeatHandler =
         mock(RMHeartbeatHandler.class);
     TaskHeartbeatHandler hbHandler = mock(TaskHeartbeatHandler.class);
+    Dispatcher dispatcher = mock(Dispatcher.class);
+    EventHandler ea = mock(EventHandler.class);
+    when(dispatcher.getEventHandler()).thenReturn(ea);
+
+    when(appCtx.getEventHandler()).thenReturn(ea);
+    CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
+    policy.init(appCtx);
     MockTaskAttemptListenerImpl listener = 
       new MockTaskAttemptListenerImpl(appCtx, secret,
-          rmHeartbeatHandler, hbHandler);
+          rmHeartbeatHandler, hbHandler, policy);
     Configuration conf = new Configuration();
     listener.init(conf);
     listener.start();
@@ -144,7 +158,7 @@ public class TestTaskAttemptListenerImpl {
     assertNotNull(jvmid);
     try {
       JVMId.forName("jvm_001_002_m_004_006");
-      Assert.fail();
+      fail();
     } catch (IllegalArgumentException e) {
       assertEquals(e.getMessage(),
           "TaskId string : jvm_001_002_m_004_006 is not properly formed");
@@ -190,8 +204,14 @@ public class TestTaskAttemptListenerImpl {
     RMHeartbeatHandler rmHeartbeatHandler =
         mock(RMHeartbeatHandler.class);
     final TaskHeartbeatHandler hbHandler = mock(TaskHeartbeatHandler.class);
-    TaskAttemptListenerImpl listener =
-        new TaskAttemptListenerImpl(appCtx, secret, rmHeartbeatHandler, null) {
+    Dispatcher dispatcher = mock(Dispatcher.class);
+    EventHandler ea = mock(EventHandler.class);
+    when(dispatcher.getEventHandler()).thenReturn(ea);
+    when(appCtx.getEventHandler()).thenReturn(ea);
+    CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
+    policy.init(appCtx);
+    TaskAttemptListenerImpl listener = new TaskAttemptListenerImpl(
+        appCtx, secret, rmHeartbeatHandler, policy) {
       @Override
       protected void registerHeartbeatHandler(Configuration conf) {
         taskHeartbeatHandler = hbHandler;
@@ -219,7 +239,8 @@ public class TestTaskAttemptListenerImpl {
         isMap ? org.apache.hadoop.mapreduce.v2.api.records.TaskType.MAP
             : org.apache.hadoop.mapreduce.v2.api.records.TaskType.REDUCE);
     TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(tid, 0);
-    RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
+    RecordFactory recordFactory =
+      RecordFactoryProvider.getRecordFactory(null);
     TaskAttemptCompletionEvent tce = recordFactory
         .newRecordInstance(TaskAttemptCompletionEvent.class);
     tce.setEventId(eventId);
@@ -244,8 +265,14 @@ public class TestTaskAttemptListenerImpl {
     RMHeartbeatHandler rmHeartbeatHandler =
         mock(RMHeartbeatHandler.class);
     final TaskHeartbeatHandler hbHandler = mock(TaskHeartbeatHandler.class);
-    TaskAttemptListenerImpl listener =
-        new TaskAttemptListenerImpl(appCtx, secret, rmHeartbeatHandler, null) {
+    Dispatcher dispatcher = mock(Dispatcher.class);
+    EventHandler ea = mock(EventHandler.class);
+    when(dispatcher.getEventHandler()).thenReturn(ea);
+    when(appCtx.getEventHandler()).thenReturn(ea);
+    CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
+    policy.init(appCtx);
+    TaskAttemptListenerImpl listener = new TaskAttemptListenerImpl(
+        appCtx, secret, rmHeartbeatHandler, policy) {
       @Override
       protected void registerHeartbeatHandler(Configuration conf) {
         taskHeartbeatHandler = hbHandler;
@@ -270,4 +297,88 @@ public class TestTaskAttemptListenerImpl {
 
     listener.stop();
   }
+
+  @Test
+  public void testCheckpointIDTracking()
+    throws IOException, InterruptedException{
+
+    SystemClock clock = new SystemClock();
+
+    org.apache.hadoop.mapreduce.v2.app.job.Task mockTask =
+        mock(org.apache.hadoop.mapreduce.v2.app.job.Task.class);
+    when(mockTask.canCommit(any(TaskAttemptId.class))).thenReturn(true);
+    Job mockJob = mock(Job.class);
+    when(mockJob.getTask(any(TaskId.class))).thenReturn(mockTask);
+
+    Dispatcher dispatcher = mock(Dispatcher.class);
+    EventHandler ea = mock(EventHandler.class);
+    when(dispatcher.getEventHandler()).thenReturn(ea);
+
+    RMHeartbeatHandler rmHeartbeatHandler =
+        mock(RMHeartbeatHandler.class);
+
+    AppContext appCtx = mock(AppContext.class);
+    when(appCtx.getJob(any(JobId.class))).thenReturn(mockJob);
+    when(appCtx.getClock()).thenReturn(clock);
+    when(appCtx.getEventHandler()).thenReturn(ea);
+    JobTokenSecretManager secret = mock(JobTokenSecretManager.class);
+    final TaskHeartbeatHandler hbHandler = mock(TaskHeartbeatHandler.class);
+    when(appCtx.getEventHandler()).thenReturn(ea);
+    CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
+    policy.init(appCtx);
+    TaskAttemptListenerImpl listener = new TaskAttemptListenerImpl(
+        appCtx, secret, rmHeartbeatHandler, policy) {
+      @Override
+      protected void registerHeartbeatHandler(Configuration conf) {
+        taskHeartbeatHandler = hbHandler;
+      }
+    };
+
+    Configuration conf = new Configuration();
+    conf.setBoolean(MRJobConfig.TASK_PREEMPTION, true);
+    //conf.setBoolean("preemption.reduce", true);
+
+    listener.init(conf);
+    listener.start();
+
+    TaskAttemptID tid = new TaskAttemptID("12345", 1, TaskType.REDUCE, 1, 0);
+
+    List<Path> partialOut = new ArrayList<Path>();
+    partialOut.add(new Path("/prev1"));
+    partialOut.add(new Path("/prev2"));
+
+    Counters counters = mock(Counters.class);
+    final long CBYTES = 64L * 1024 * 1024;
+    final long CTIME = 4344L;
+    final Path CLOC = new Path("/test/1");
+    Counter cbytes = mock(Counter.class);
+    when(cbytes.getValue()).thenReturn(CBYTES);
+    Counter ctime = mock(Counter.class);
+    when(ctime.getValue()).thenReturn(CTIME);
+    when(counters.findCounter(eq(EnumCounter.CHECKPOINT_BYTES)))
+        .thenReturn(cbytes);
+    when(counters.findCounter(eq(EnumCounter.CHECKPOINT_MS)))
+        .thenReturn(ctime);
+
+    // propagating a taskstatus that contains a checkpoint id
+    TaskCheckpointID incid = new TaskCheckpointID(new FSCheckpointID(
+          CLOC), partialOut, counters);
+    listener.setCheckpointID(
+        org.apache.hadoop.mapred.TaskID.downgrade(tid.getTaskID()), incid);
+
+    // and try to get it back
+    CheckpointID outcid = listener.getCheckpointID(tid.getTaskID());
+    TaskCheckpointID tcid = (TaskCheckpointID) outcid;
+    assertEquals(CBYTES, tcid.getCheckpointBytes());
+    assertEquals(CTIME, tcid.getCheckpointTime());
+    assertTrue(partialOut.containsAll(tcid.getPartialCommittedOutput()));
+    assertTrue(tcid.getPartialCommittedOutput().containsAll(partialOut));
+
+    //assert it worked
+    assert outcid == incid;
+
+    listener.stop();
+
+  }
+
 }

+ 329 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestCheckpointPreemptionPolicy.java

@@ -0,0 +1,329 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapreduce.v2.app;
+
+import org.apache.hadoop.yarn.api.records.PreemptionContract;
+import org.apache.hadoop.yarn.api.records.PreemptionMessage;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.util.resource.Resources;
+
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.*;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hadoop.mapred.TaskAttemptListenerImpl;
+import org.apache.hadoop.mapreduce.v2.api.records.JobId;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
+import org.apache.hadoop.mapreduce.v2.app.MRAppMaster.RunningAppContext;
+import org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator;
+import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy;
+import org.apache.hadoop.mapreduce.v2.app.rm.preemption.CheckpointAMPreemptionPolicy;
+import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
+import org.apache.hadoop.yarn.api.records.PreemptionContainer;
+import org.apache.hadoop.yarn.api.records.PreemptionResourceRequest;
+import org.apache.hadoop.yarn.api.records.StrictPreemptionContract;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestCheckpointPreemptionPolicy {
+
+  TaskAttemptListenerImpl pel= null;
+  RMContainerAllocator r;
+  JobId jid;
+  RunningAppContext mActxt;
+  Set<ContainerId> preemptedContainers = new HashSet<ContainerId>();
+  Map<ContainerId,TaskAttemptId> assignedContainers =
+      new HashMap<ContainerId, TaskAttemptId>();
+  private final RecordFactory recordFactory =
+        RecordFactoryProvider.getRecordFactory(null);
+  HashMap<ContainerId,Resource> contToResourceMap =
+    new HashMap<ContainerId, Resource>();
+
+  private int minAlloc = 1024;
+
+  @Before
+  @SuppressWarnings("rawtypes") // mocked generics
+  public void setup() {
+    ApplicationId appId = ApplicationId.newInstance(200, 1);
+    ApplicationAttemptId appAttemptId =
+        ApplicationAttemptId.newInstance(appId, 1);
+    jid = MRBuilderUtils.newJobId(appId, 1);
+
+    mActxt = mock(RunningAppContext.class);
+    EventHandler ea = mock(EventHandler.class);
+    when(mActxt.getEventHandler()).thenReturn(ea);
+    for (int i = 0; i < 40; ++i) {
+      ContainerId cId = ContainerId.newInstance(appAttemptId, i);
+      if (0 == i % 7) {
+        preemptedContainers.add(cId);
+      }
+      TaskId tId = 0 == i % 2
+          ? MRBuilderUtils.newTaskId(jid, i / 2, TaskType.MAP)
+          : MRBuilderUtils.newTaskId(jid, i / 2 + 1, TaskType.REDUCE);
+      assignedContainers.put(cId, MRBuilderUtils.newTaskAttemptId(tId, 0));
+      contToResourceMap.put(cId, Resource.newInstance(2 * minAlloc, 2));
+    }
+
+    for (Map.Entry<ContainerId,TaskAttemptId> ent :
+         assignedContainers.entrySet()) {
+      System.out.println("cont:" + ent.getKey().getId() +
+          " type:" + ent.getValue().getTaskId().getTaskType() +
+          " res:" + contToResourceMap.get(ent.getKey()).getMemory() + "MB" );
+    }
+  }
+
+  @Test
+  public void testStrictPreemptionContract() {
+
+    final Map<ContainerId,TaskAttemptId> containers = assignedContainers;
+    AMPreemptionPolicy.Context mPctxt = new AMPreemptionPolicy.Context() {
+      @Override
+      public TaskAttemptId getTaskAttempt(ContainerId cId) {
+        return containers.get(cId);
+      }
+      @Override
+      public List<Container> getContainers(TaskType t) {
+        List<Container> p = new ArrayList<Container>();
+        for (Map.Entry<ContainerId,TaskAttemptId> ent :
+            assignedContainers.entrySet()) {
+          if (ent.getValue().getTaskId().getTaskType().equals(t)) {
+            p.add(Container.newInstance(ent.getKey(), null, null,
+                contToResourceMap.get(ent.getKey()),
+                Priority.newInstance(0), null));
+          }
+        }
+        return p;
+      }
+    };
+
+    PreemptionMessage pM = generatePreemptionMessage(preemptedContainers,
+        contToResourceMap, Resource.newInstance(1024, 1), true);
+
+    CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
+    policy.init(mActxt);
+    policy.preempt(mPctxt, pM);
+
+
+    for (ContainerId c : preemptedContainers) {
+      TaskAttemptId t = assignedContainers.get(c);
+      if (TaskType.MAP.equals(t.getTaskId().getTaskType())) {
+        assert policy.isPreempted(t) == false;
+      } else {
+        assert policy.isPreempted(t);
+      }
+    }
+  }
+
+
+  @Test
+  public void testPreemptionContract() {
+    final Map<ContainerId,TaskAttemptId> containers = assignedContainers;
+    AMPreemptionPolicy.Context mPctxt = new AMPreemptionPolicy.Context() {
+      @Override
+      public TaskAttemptId getTaskAttempt(ContainerId cId) {
+        return containers.get(cId);
+      }
+
+      @Override
+      public List<Container> getContainers(TaskType t) {
+        List<Container> p = new ArrayList<Container>();
+        for (Map.Entry<ContainerId,TaskAttemptId> ent :
+            assignedContainers.entrySet()){
+          if(ent.getValue().getTaskId().getTaskType().equals(t)){
+            p.add(Container.newInstance(ent.getKey(), null, null,
+                contToResourceMap.get(ent.getKey()),
+                Priority.newInstance(0), null));
+          }
+        }
+        return p;
+      }
+    };
+
+    PreemptionMessage pM = generatePreemptionMessage(preemptedContainers,
+        contToResourceMap, Resource.newInstance(minAlloc, 1), false);
+
+    CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
+    policy.init(mActxt);
+
+    int supposedMemPreemption = pM.getContract().getResourceRequest()
+        .get(0).getResourceRequest().getCapability().getMemory()
+        * pM.getContract().getResourceRequest().get(0).getResourceRequest()
+        .getNumContainers();
+
+    // first round of preemption
+    policy.preempt(mPctxt, pM);
+    List<TaskAttemptId> preempting =
+      validatePreemption(pM, policy, supposedMemPreemption);
+
+    // redundant message
+    policy.preempt(mPctxt, pM);
+    List<TaskAttemptId> preempting2 =
+      validatePreemption(pM, policy, supposedMemPreemption);
+
+    // check that nothing got added
+    assert preempting2.equals(preempting);
+
+    // simulate 2 task completions/successful preemption
+    policy.handleCompletedContainer(preempting.get(0));
+    policy.handleCompletedContainer(preempting.get(1));
+
+    // remove from assignedContainers
+    Iterator<Map.Entry<ContainerId,TaskAttemptId>> it =
+      assignedContainers.entrySet().iterator();
+    while (it.hasNext()) {
+      Map.Entry<ContainerId,TaskAttemptId> ent = it.next();
+      if (ent.getValue().equals(preempting.get(0)) ||
+        ent.getValue().equals(preempting.get(1)))
+        it.remove();
+    }
+
+    // one more message asking for preemption
+    policy.preempt(mPctxt, pM);
+
+    // triggers preemption of 2 more containers (i.e., the preemption set changes)
+    List<TaskAttemptId> preempting3 =
+      validatePreemption(pM, policy, supposedMemPreemption);
+    assert preempting3.equals(preempting2) == false;
+  }
+
+  private List<TaskAttemptId> validatePreemption(PreemptionMessage pM,
+    CheckpointAMPreemptionPolicy policy, int supposedMemPreemption) {
+    Resource effectivelyPreempted = Resource.newInstance(0, 0);
+
+    List<TaskAttemptId> preempting = new ArrayList<TaskAttemptId>();
+
+    for (Map.Entry<ContainerId, TaskAttemptId> ent :
+        assignedContainers.entrySet()) {
+      if (policy.isPreempted(ent.getValue())) {
+        Resources.addTo(effectivelyPreempted,contToResourceMap.get(ent.getKey()));
+        // preempt only reducers
+        if (policy.isPreempted(ent.getValue())){
+          assertEquals(TaskType.REDUCE, ent.getValue().getTaskId().getTaskType());
+          preempting.add(ent.getValue());
+        }
+      }
+    }
+
+    // preempt enough
+    assert (effectivelyPreempted.getMemory() >= supposedMemPreemption)
+      : " preempted: " + effectivelyPreempted.getMemory();
+
+    // preempt not too much enough
+    assert effectivelyPreempted.getMemory() <= supposedMemPreemption + minAlloc;
+    return preempting;
+  }
+
+  private PreemptionMessage generatePreemptionMessage(
+      Set<ContainerId> containerToPreempt,
+      HashMap<ContainerId, Resource> resPerCont,
+      Resource minimumAllocation, boolean strict) {
+
+    Set<ContainerId> currentContPreemption = Collections.unmodifiableSet(
+        new HashSet<ContainerId>(containerToPreempt));
+    containerToPreempt.clear();
+    Resource tot = Resource.newInstance(0, 0);
+    for(ContainerId c : currentContPreemption){
+      Resources.addTo(tot,
+          resPerCont.get(c));
+    }
+    int numCont = (int) Math.ceil(tot.getMemory() /
+              (double) minimumAllocation.getMemory());
+    ResourceRequest rr = ResourceRequest.newInstance(
+        Priority.newInstance(0), ResourceRequest.ANY,
+        minimumAllocation, numCont);
+    if (strict) {
+      return generatePreemptionMessage(new Allocation(null, null,
+                  currentContPreemption, null, null));
+    }
+    return generatePreemptionMessage(new Allocation(null, null,
+                          null, currentContPreemption,
+                          Collections.singletonList(rr)));
+  }
+
+
+  private PreemptionMessage generatePreemptionMessage(Allocation allocation) {
+    PreemptionMessage pMsg = null;
+    // assemble strict preemption request
+    if (allocation.getStrictContainerPreemptions() != null) {
+       pMsg = recordFactory.newRecordInstance(PreemptionMessage.class);
+      StrictPreemptionContract pStrict =
+          recordFactory.newRecordInstance(StrictPreemptionContract.class);
+      Set<PreemptionContainer> pCont = new HashSet<PreemptionContainer>();
+      for (ContainerId cId : allocation.getStrictContainerPreemptions()) {
+        PreemptionContainer pc =
+            recordFactory.newRecordInstance(PreemptionContainer.class);
+        pc.setId(cId);
+        pCont.add(pc);
+      }
+      pStrict.setContainers(pCont);
+      pMsg.setStrictContract(pStrict);
+    }
+
+    // assemble negotiable preemption request
+    if (allocation.getResourcePreemptions() != null &&
+        allocation.getResourcePreemptions().size() > 0 &&
+        allocation.getContainerPreemptions() != null &&
+        allocation.getContainerPreemptions().size() > 0) {
+      if (pMsg == null) {
+        pMsg = recordFactory.newRecordInstance(PreemptionMessage.class);
+      }
+      PreemptionContract contract =
+          recordFactory.newRecordInstance(PreemptionContract.class);
+      Set<PreemptionContainer> pCont = new HashSet<PreemptionContainer>();
+      for (ContainerId cId : allocation.getContainerPreemptions()) {
+        PreemptionContainer pc =
+            recordFactory.newRecordInstance(PreemptionContainer.class);
+        pc.setId(cId);
+        pCont.add(pc);
+      }
+      List<PreemptionResourceRequest> pRes =
+        new ArrayList<PreemptionResourceRequest>();
+      for (ResourceRequest crr : allocation.getResourcePreemptions()) {
+        PreemptionResourceRequest prr =
+            recordFactory.newRecordInstance(PreemptionResourceRequest.class);
+        prr.setResourceRequest(crr);
+        pRes.add(prr);
+      }
+      contract.setContainers(pCont);
+      contract.setResourceRequest(pRes);
+      pMsg.setContract(contract);
+    }
+    return pMsg;
+  }
+
+}

+ 15 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java

@@ -1604,6 +1604,21 @@ public class TestRMContainerAllocator {
         numPendingReduces, 
         maxReduceRampupLimit, reduceSlowStart);
     verify(allocator).rampDownReduces(anyInt());
+
+    // Test reduce ramp-down for when there are scheduled maps
+    // Since we have two scheduled Maps, rampDownReducers 
+    // should be invoked twice.
+    scheduledMaps = 2;
+    assignedReduces = 2;
+    doReturn(10 * 1024).when(allocator).getMemLimit();
+    allocator.scheduleReduces(
+        totalMaps, succeededMaps, 
+        scheduledMaps, scheduledReduces, 
+        assignedMaps, assignedReduces, 
+        mapResourceReqt, reduceResourceReqt, 
+        numPendingReduces, 
+        maxReduceRampupLimit, reduceSlowStart);
+    verify(allocator, times(2)).rampDownReduces(anyInt());
   }
 
   private static class RecalculateContainerAllocator extends MyContainerAllocator {

+ 35 - 7
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalJobRunner.java

@@ -53,6 +53,7 @@ import org.apache.hadoop.mapreduce.QueueInfo;
 import org.apache.hadoop.mapreduce.TaskCompletionEvent;
 import org.apache.hadoop.mapreduce.TaskTrackerInfo;
 import org.apache.hadoop.mapreduce.TaskType;
+import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
 import org.apache.hadoop.mapreduce.protocol.ClientProtocol;
 import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
@@ -575,10 +576,17 @@ public class LocalJobRunner implements ClientProtocol {
 
     // TaskUmbilicalProtocol methods
 
+    @Override
     public JvmTask getTask(JvmContext context) { return null; }
     
-    public synchronized boolean statusUpdate(TaskAttemptID taskId,
+    @Override
+    public synchronized AMFeedback statusUpdate(TaskAttemptID taskId,
         TaskStatus taskStatus) throws IOException, InterruptedException {
+      AMFeedback feedback = new AMFeedback();
+      feedback.setTaskFound(true);
+      if (null == taskStatus) {
+        return feedback;
+      }
       // Serialize as we would if distributed in order to make deep copy
       ByteArrayOutputStream baos = new ByteArrayOutputStream();
       DataOutputStream dos = new DataOutputStream(baos);
@@ -618,7 +626,7 @@ public class LocalJobRunner implements ClientProtocol {
       }
 
       // ignore phase
-      return true;
+      return feedback;
     }
 
     /** Return the current values of the counters for this job,
@@ -654,24 +662,24 @@ public class LocalJobRunner implements ClientProtocol {
       statusUpdate(taskid, taskStatus);
     }
 
+    @Override
     public void reportDiagnosticInfo(TaskAttemptID taskid, String trace) {
       // Ignore for now
     }
     
+    @Override
     public void reportNextRecordRange(TaskAttemptID taskid, 
         SortedRanges.Range range) throws IOException {
       LOG.info("Task " + taskid + " reportedNextRecordRange " + range);
     }
 
-    public boolean ping(TaskAttemptID taskid) throws IOException {
-      return true;
-    }
-    
+    @Override
     public boolean canCommit(TaskAttemptID taskid) 
     throws IOException {
       return true;
     }
     
+    @Override
     public void done(TaskAttemptID taskId) throws IOException {
       int taskIndex = mapIds.indexOf(taskId);
       if (taskIndex >= 0) {                       // mapping
@@ -681,11 +689,13 @@ public class LocalJobRunner implements ClientProtocol {
       }
     }
 
+    @Override
     public synchronized void fsError(TaskAttemptID taskId, String message) 
     throws IOException {
       LOG.fatal("FSError: "+ message + "from task: " + taskId);
     }
 
+    @Override
     public void shuffleError(TaskAttemptID taskId, String message) throws IOException {
       LOG.fatal("shuffleError: "+ message + "from task: " + taskId);
     }
@@ -695,12 +705,30 @@ public class LocalJobRunner implements ClientProtocol {
       LOG.fatal("Fatal: "+ msg + "from task: " + taskId);
     }
     
+    @Override
     public MapTaskCompletionEventsUpdate getMapCompletionEvents(JobID jobId, 
         int fromEventId, int maxLocs, TaskAttemptID id) throws IOException {
       return new MapTaskCompletionEventsUpdate(
         org.apache.hadoop.mapred.TaskCompletionEvent.EMPTY_ARRAY, false);
     }
-    
+
+    @Override
+    public void preempted(TaskAttemptID taskId, TaskStatus taskStatus)
+        throws IOException, InterruptedException {
+      // ignore
+    }
+
+    @Override
+    public TaskCheckpointID getCheckpointID(TaskID taskId) {
+      // ignore
+      return null;
+    }
+
+    @Override
+    public void setCheckpointID(TaskID downgrade, TaskCheckpointID cid) {
+      // ignore
+    }
+
   }
 
   public LocalJobRunner(Configuration conf) throws IOException {

+ 33 - 7
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestMRWithDistributedCache.java

@@ -44,6 +44,8 @@ import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
 import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
@@ -82,12 +84,11 @@ public class TestMRWithDistributedCache extends TestCase {
 
   private static final Log LOG =
     LogFactory.getLog(TestMRWithDistributedCache.class);
+  
+  private static class DistributedCacheChecker {
 
-  public static class DistributedCacheChecker extends
-      Mapper<LongWritable, Text, NullWritable, NullWritable> {
-
-    @Override
-    public void setup(Context context) throws IOException {
+    public void setup(TaskInputOutputContext<?, ?, ?, ?> context)
+        throws IOException {
       Configuration conf = context.getConfiguration();
       Path[] localFiles = context.getLocalCacheFiles();
       URI[] files = context.getCacheFiles();
@@ -101,6 +102,10 @@ public class TestMRWithDistributedCache extends TestCase {
       TestCase.assertEquals(2, files.length);
       TestCase.assertEquals(2, archives.length);
 
+      // Check the file name
+      TestCase.assertTrue(files[0].getPath().endsWith("distributed.first"));
+      TestCase.assertTrue(files[1].getPath().endsWith("distributed.second.jar"));
+      
       // Check lengths of the files
       TestCase.assertEquals(1, fs.getFileStatus(localFiles[0]).getLen());
       TestCase.assertTrue(fs.getFileStatus(localFiles[1]).getLen() > 1);
@@ -130,8 +135,28 @@ public class TestMRWithDistributedCache extends TestCase {
       TestCase.assertTrue("second file should be symlinked too",
           expectedAbsentSymlinkFile.exists());
     }
+
   }
-  
+
+  public static class DistributedCacheCheckerMapper extends
+      Mapper<LongWritable, Text, NullWritable, NullWritable> {
+
+    @Override
+    protected void setup(Context context) throws IOException,
+        InterruptedException {
+      new DistributedCacheChecker().setup(context);
+    }
+  }
+
+  public static class DistributedCacheCheckerReducer extends
+      Reducer<LongWritable, Text, NullWritable, NullWritable> {
+
+    @Override
+    public void setup(Context context) throws IOException {
+      new DistributedCacheChecker().setup(context);
+    }
+  }
+
   private void testWithConf(Configuration conf) throws IOException,
       InterruptedException, ClassNotFoundException, URISyntaxException {
     // Create a temporary file of length 1.
@@ -146,7 +171,8 @@ public class TestMRWithDistributedCache extends TestCase {
 
 
     Job job = Job.getInstance(conf);
-    job.setMapperClass(DistributedCacheChecker.class);
+    job.setMapperClass(DistributedCacheCheckerMapper.class);
+    job.setReducerClass(DistributedCacheCheckerReducer.class);
     job.setOutputFormatClass(NullOutputFormat.class);
     FileInputFormat.setInputPaths(job, first);
     // Creates the Job Configuration

+ 63 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/AMFeedback.java

@@ -0,0 +1,63 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.hadoop.mapred;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Writable;
+
+/**
+ * This class is a simple struct to include both the taskFound information and
+ * a possible preemption request coming from the AM.
+ */
+public class AMFeedback implements Writable {
+
+  boolean taskFound;
+  boolean preemption;
+
+  public void setTaskFound(boolean t){
+    taskFound=t;
+  }
+
+  public boolean getTaskFound(){
+    return taskFound;
+  }
+
+  public void setPreemption(boolean preemption) {
+    this.preemption=preemption;
+  }
+
+  public boolean getPreemption() {
+    return preemption;
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    out.writeBoolean(taskFound);
+    out.writeBoolean(preemption);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    taskFound = in.readBoolean();
+    preemption = in.readBoolean();
+  }
+
+}

+ 51 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java

@@ -949,12 +949,29 @@ public class JobConf extends Configuration {
     return get(KeyFieldBasedPartitioner.PARTITIONER_OPTIONS);
   }
 
+  /**
+   * Get the user defined {@link WritableComparable} comparator for
+   * grouping keys of inputs to the combiner.
+   *
+   * @return comparator set by the user for grouping values.
+   * @see #setCombinerKeyGroupingComparator(Class) for details.
+   */
+  public RawComparator getCombinerKeyGroupingComparator() {
+    Class<? extends RawComparator> theClass = getClass(
+        JobContext.COMBINER_GROUP_COMPARATOR_CLASS, null, RawComparator.class);
+    if (theClass == null) {
+      return getOutputKeyComparator();
+    }
+
+    return ReflectionUtils.newInstance(theClass, this);
+  }
+
   /** 
    * Get the user defined {@link WritableComparable} comparator for 
    * grouping keys of inputs to the reduce.
    * 
    * @return comparator set by the user for grouping values.
-   * @see #setOutputValueGroupingComparator(Class) for details.  
+   * @see #setOutputValueGroupingComparator(Class) for details.
    */
   public RawComparator getOutputValueGroupingComparator() {
     Class<? extends RawComparator> theClass = getClass(
@@ -966,6 +983,37 @@ public class JobConf extends Configuration {
     return ReflectionUtils.newInstance(theClass, this);
   }
 
+  /**
+   * Set the user defined {@link RawComparator} comparator for
+   * grouping keys in the input to the combiner.
+   * <p/>
+   * <p>This comparator should be provided if the equivalence rules for keys
+   * for sorting the intermediates are different from those for grouping keys
+   * before each call to
+   * {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.</p>
+   * <p/>
+   * <p>For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed
+   * in a single call to the reduce function if K1 and K2 compare as equal.</p>
+   * <p/>
+   * <p>Since {@link #setOutputKeyComparatorClass(Class)} can be used to control
+   * how keys are sorted, this can be used in conjunction to simulate
+   * <i>secondary sort on values</i>.</p>
+   * <p/>
+   * <p><i>Note</i>: This is not a guarantee of the combiner sort being
+   * <i>stable</i> in any sense. (In any case, with the order of available
+   * map-outputs to the combiner being non-deterministic, it wouldn't make
+   * that much sense.)</p>
+   *
+   * @param theClass the comparator class to be used for grouping keys for the
+   * combiner. It should implement <code>RawComparator</code>.
+   * @see #setOutputKeyComparatorClass(Class)
+   */
+  public void setCombinerKeyGroupingComparator(
+      Class<? extends RawComparator> theClass) {
+    setClass(JobContext.COMBINER_GROUP_COMPARATOR_CLASS,
+        theClass, RawComparator.class);
+  }
+
   /** 
    * Set the user defined {@link RawComparator} comparator for 
    * grouping keys in the input to the reduce.
@@ -989,7 +1037,8 @@ public class JobConf extends Configuration {
    * 
    * @param theClass the comparator class to be used for grouping keys. 
    *                 It should implement <code>RawComparator</code>.
-   * @see #setOutputKeyComparatorClass(Class)                 
+   * @see #setOutputKeyComparatorClass(Class)
+   * @see #setCombinerKeyGroupingComparator(Class)
    */
   public void setOutputValueGroupingComparator(
       Class<? extends RawComparator> theClass) {

+ 33 - 10
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java

@@ -187,6 +187,7 @@ abstract public class Task implements Writable, Configurable {
   protected SecretKey tokenSecret;
   protected SecretKey shuffleSecret;
   protected GcTimeUpdater gcUpdater;
+  final AtomicBoolean mustPreempt = new AtomicBoolean(false);
 
   ////////////////////////////////////////////
   // Constructors
@@ -711,6 +712,7 @@ abstract public class Task implements Writable, Configurable {
         }
         try {
           boolean taskFound = true; // whether TT knows about this task
+          AMFeedback amFeedback = null;
           // sleep for a bit
           synchronized(lock) {
             if (taskDone.get()) {
@@ -728,12 +730,14 @@ abstract public class Task implements Writable, Configurable {
             taskStatus.statusUpdate(taskProgress.get(),
                                     taskProgress.toString(), 
                                     counters);
-            taskFound = umbilical.statusUpdate(taskId, taskStatus);
+            amFeedback = umbilical.statusUpdate(taskId, taskStatus);
+            taskFound = amFeedback.getTaskFound();
             taskStatus.clearStatus();
           }
           else {
             // send ping 
-            taskFound = umbilical.ping(taskId);
+            amFeedback = umbilical.statusUpdate(taskId, null);
+            taskFound = amFeedback.getTaskFound();
           }
 
           // if Task Tracker is not aware of our task ID (probably because it died and 
@@ -744,6 +748,17 @@ abstract public class Task implements Writable, Configurable {
             System.exit(66);
           }
 
+          // Set a flag that says we should preempt this is read by
+          // ReduceTasks in places of the execution where it is
+          // safe/easy to preempt
+          boolean lastPreempt = mustPreempt.get();
+          mustPreempt.set(mustPreempt.get() || amFeedback.getPreemption());
+
+          if (lastPreempt ^ mustPreempt.get()) {
+            LOG.info("PREEMPTION TASK: setting mustPreempt to " +
+                mustPreempt.get() + " given " + amFeedback.getPreemption() +
+                " for "+ taskId + " task status: " +taskStatus.getPhase());
+          }
           sendProgress = resetProgressFlag(); 
           remainingRetries = MAX_RETRIES;
         } 
@@ -992,10 +1007,17 @@ abstract public class Task implements Writable, Configurable {
   public void done(TaskUmbilicalProtocol umbilical,
                    TaskReporter reporter
                    ) throws IOException, InterruptedException {
-    LOG.info("Task:" + taskId + " is done."
-             + " And is in the process of committing");
     updateCounters();
-
+    if (taskStatus.getRunState() == TaskStatus.State.PREEMPTED ) {
+      // If we are preempted, do no output promotion; signal done and exit
+      committer.commitTask(taskContext);
+      umbilical.preempted(taskId, taskStatus);
+      taskDone.set(true);
+      reporter.stopCommunicationThread();
+      return;
+    }
+    LOG.info("Task:" + taskId + " is done."
+        + " And is in the process of committing");
     boolean commitRequired = isCommitRequired();
     if (commitRequired) {
       int retries = MAX_RETRIES;
@@ -1054,7 +1076,7 @@ abstract public class Task implements Writable, Configurable {
     int retries = MAX_RETRIES;
     while (true) {
       try {
-        if (!umbilical.statusUpdate(getTaskID(), taskStatus)) {
+        if (!umbilical.statusUpdate(getTaskID(), taskStatus).getTaskFound()) {
           LOG.warn("Parent died.  Exiting "+taskId);
           System.exit(66);
         }
@@ -1098,8 +1120,8 @@ abstract public class Task implements Writable, Configurable {
     if (isMapTask() && conf.getNumReduceTasks() > 0) {
       try {
         Path mapOutput =  mapOutputFile.getOutputFile();
-        FileSystem localFS = FileSystem.getLocal(conf);
-        return localFS.getFileStatus(mapOutput).getLen();
+        FileSystem fs = mapOutput.getFileSystem(conf);
+        return fs.getFileStatus(mapOutput).getLen();
       } catch (IOException e) {
         LOG.warn ("Could not find output size " , e);
       }
@@ -1553,7 +1575,8 @@ abstract public class Task implements Writable, Configurable {
       combinerClass = cls;
       keyClass = (Class<K>) job.getMapOutputKeyClass();
       valueClass = (Class<V>) job.getMapOutputValueClass();
-      comparator = (RawComparator<K>) job.getOutputKeyComparator();
+      comparator = (RawComparator<K>)
+          job.getCombinerKeyGroupingComparator();
     }
 
     @SuppressWarnings("unchecked")
@@ -1602,7 +1625,7 @@ abstract public class Task implements Writable, Configurable {
       this.taskId = taskId;
       keyClass = (Class<K>) context.getMapOutputKeyClass();
       valueClass = (Class<V>) context.getMapOutputValueClass();
-      comparator = (RawComparator<K>) context.getSortComparator();
+      comparator = (RawComparator<K>) context.getCombinerKeyGroupingComparator();
       this.committer = committer;
     }
 

+ 1 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskStatus.java

@@ -51,7 +51,7 @@ public abstract class TaskStatus implements Writable, Cloneable {
   @InterfaceAudience.Private
   @InterfaceStability.Unstable
   public static enum State {RUNNING, SUCCEEDED, FAILED, UNASSIGNED, KILLED, 
-                            COMMIT_PENDING, FAILED_UNCLEAN, KILLED_UNCLEAN}
+                            COMMIT_PENDING, FAILED_UNCLEAN, KILLED_UNCLEAN, PREEMPTED}
     
   private final TaskAttemptID taskid;
   private float progress;

+ 37 - 8
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java

@@ -24,6 +24,9 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.ipc.VersionedProtocol;
 import org.apache.hadoop.mapred.JvmTask;
+import org.apache.hadoop.mapreduce.checkpoint.CheckpointID;
+import org.apache.hadoop.mapreduce.checkpoint.FSCheckpointID;
+import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
 import org.apache.hadoop.mapreduce.security.token.JobTokenSelector;
 import org.apache.hadoop.security.token.TokenInfo;
 
@@ -64,9 +67,10 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
    * Version 17 Modified TaskID to be aware of the new TaskTypes
    * Version 18 Added numRequiredSlots to TaskStatus for MAPREDUCE-516
    * Version 19 Added fatalError for child to communicate fatal errors to TT
+   * Version 20 Added methods to manage checkpoints
    * */
 
-  public static final long versionID = 19L;
+  public static final long versionID = 20L;
   
   /**
    * Called when a child task process starts, to get its task.
@@ -78,7 +82,8 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
   JvmTask getTask(JvmContext context) throws IOException;
   
   /**
-   * Report child's progress to parent.
+   * Report child's progress to parent. Also invoked to report still alive (used
+   * to be in ping). It reports an AMFeedback used to propagate preemption requests.
    * 
    * @param taskId task-id of the child
    * @param taskStatus status of the child
@@ -86,7 +91,7 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
    * @throws InterruptedException
    * @return True if the task is known
    */
-  boolean statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus) 
+  AMFeedback statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus) 
   throws IOException, InterruptedException;
   
   /** Report error messages back to parent.  Calls should be sparing, since all
@@ -105,11 +110,6 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
   void reportNextRecordRange(TaskAttemptID taskid, SortedRanges.Range range) 
     throws IOException;
 
-  /** Periodically called by child to check if parent is still alive. 
-   * @return True if the task is known
-   */
-  boolean ping(TaskAttemptID taskid) throws IOException;
-
   /** Report that the task is successfully completed.  Failure is assumed if
    * the task process exits without calling this.
    * @param taskid task's id
@@ -161,4 +161,33 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
                                                        TaskAttemptID id) 
   throws IOException;
 
+  /**
+   * Report to the AM that the task has been succesfully preempted.
+   *
+   * @param taskId task's id
+   * @param taskStatus status of the child
+   * @throws IOException
+   */
+  void preempted(TaskAttemptID taskId, TaskStatus taskStatus)
+      throws IOException, InterruptedException;
+
+  /**
+   * Return the latest CheckpointID for the given TaskID. This provides
+   * the task with a way to locate the checkpointed data and restart from
+   * that point in the computation.
+   *
+   * @param taskID task's id
+   * @return the most recent checkpoint (if any) for this task
+   * @throws IOException
+   */
+  TaskCheckpointID getCheckpointID(TaskID taskID);
+
+  /**
+   * Send a CheckpointID for a given TaskID to be stored in the AM,
+   * to later restart a task from this checkpoint.
+   * @param tid
+   * @param cid
+   */
+  void setCheckpointID(TaskID tid, TaskCheckpointID cid);
+
 }

+ 17 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java

@@ -948,11 +948,27 @@ public class Job extends JobContextImpl implements JobContext {
     conf.setOutputValueClass(theClass);
   }
 
+  /**
+   * Define the comparator that controls which keys are grouped together
+   * for a single call to combiner,
+   * {@link Reducer#reduce(Object, Iterable,
+   * org.apache.hadoop.mapreduce.Reducer.Context)}
+   *
+   * @param cls the raw comparator to use
+   * @throws IllegalStateException if the job is submitted
+   */
+  public void setCombinerKeyGroupingComparatorClass(
+      Class<? extends RawComparator> cls) throws IllegalStateException {
+    ensureState(JobState.DEFINE);
+    conf.setCombinerKeyGroupingComparator(cls);
+  }
+
   /**
    * Define the comparator that controls how the keys are sorted before they
    * are passed to the {@link Reducer}.
    * @param cls the raw comparator
    * @throws IllegalStateException if the job is submitted
+   * @see #setCombinerKeyGroupingComparatorClass(Class)
    */
   public void setSortComparatorClass(Class<? extends RawComparator> cls
                                      ) throws IllegalStateException {
@@ -967,6 +983,7 @@ public class Job extends JobContextImpl implements JobContext {
    *                       org.apache.hadoop.mapreduce.Reducer.Context)}
    * @param cls the raw comparator to use
    * @throws IllegalStateException if the job is submitted
+   * @see #setCombinerKeyGroupingComparatorClass(Class)
    */
   public void setGroupingComparatorClass(Class<? extends RawComparator> cls
                                          ) throws IllegalStateException {

+ 16 - 6
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobContext.java

@@ -167,13 +167,23 @@ public interface JobContext extends MRJobConfig {
    */
   public String getJar();
 
-  /** 
-   * Get the user defined {@link RawComparator} comparator for 
-   * grouping keys of inputs to the reduce.
-   * 
+  /**
+   * Get the user defined {@link RawComparator} comparator for
+   * grouping keys of inputs to the combiner.
+   *
    * @return comparator set by the user for grouping values.
-   * @see Job#setGroupingComparatorClass(Class) for details.  
-   */
+   * @see Job#setCombinerKeyGroupingComparatorClass(Class)
+   */
+  public RawComparator<?> getCombinerKeyGroupingComparator();
+
+    /**
+     * Get the user defined {@link RawComparator} comparator for
+     * grouping keys of inputs to the reduce.
+     *
+     * @return comparator set by the user for grouping values.
+     * @see Job#setGroupingComparatorClass(Class)
+     * @see #getCombinerKeyGroupingComparator()
+     */
   public RawComparator<?> getGroupingComparator();
   
   /**

+ 2 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java

@@ -93,6 +93,8 @@ public interface MRJobConfig {
 
   public static final String KEY_COMPARATOR = "mapreduce.job.output.key.comparator.class";
 
+  public static final String COMBINER_GROUP_COMPARATOR_CLASS = "mapreduce.job.combiner.group.comparator.class";
+
   public static final String GROUP_COMPARATOR_CLASS = "mapreduce.job.output.group.comparator.class";
 
   public static final String WORKING_DIR = "mapreduce.job.working.dir";

+ 21 - 26
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/TaskCheckpointID.java

@@ -34,37 +34,31 @@ import org.apache.hadoop.mapred.Counters;
  * cost of checkpoints and other counters. This is sent by the task to the AM
  * to be stored and provided to the next execution of the same task.
  */
-public class TaskCheckpointID implements CheckpointID{
+public class TaskCheckpointID implements CheckpointID {
 
-  FSCheckpointID rawId;
-  private List<Path> partialOutput;
-  private Counters counters;
+  final FSCheckpointID rawId;
+  private final List<Path> partialOutput;
+  private final Counters counters;
 
   public TaskCheckpointID() {
-    this.rawId = new FSCheckpointID();
-    this.partialOutput = new ArrayList<Path>();
+    this(new FSCheckpointID(), new ArrayList<Path>(), new Counters());
   }
 
   public TaskCheckpointID(FSCheckpointID rawId, List<Path> partialOutput,
           Counters counters) {
     this.rawId = rawId;
     this.counters = counters;
-    if(partialOutput == null)
-      this.partialOutput = new ArrayList<Path>();
-    else
-      this.partialOutput = partialOutput;
+    this.partialOutput = null == partialOutput
+      ? new ArrayList<Path>()
+      : partialOutput;
   }
 
   @Override
   public void write(DataOutput out) throws IOException {
     counters.write(out);
-    if (partialOutput == null) {
-      WritableUtils.writeVLong(out, 0L);
-    } else {
-      WritableUtils.writeVLong(out, partialOutput.size());
-      for(Path p:partialOutput){
-        Text.writeString(out, p.toString());
-      }
+    WritableUtils.writeVLong(out, partialOutput.size());
+    for (Path p : partialOutput) {
+      Text.writeString(out, p.toString());
     }
     rawId.write(out);
   }
@@ -74,21 +68,22 @@ public class TaskCheckpointID implements CheckpointID{
     partialOutput.clear();
     counters.readFields(in);
     long numPout = WritableUtils.readVLong(in);
-    for(int i=0;i<numPout;i++)
+    for (int i = 0; i < numPout; i++) {
       partialOutput.add(new Path(Text.readString(in)));
+    }
     rawId.readFields(in);
   }
 
   @Override
-  public boolean equals(Object other){
+  public boolean equals(Object other) {
     if (other instanceof TaskCheckpointID){
-      return this.rawId.equals(((TaskCheckpointID)other).rawId) &&
-             this.counters.equals(((TaskCheckpointID) other).counters) &&
-             this.partialOutput.containsAll(((TaskCheckpointID) other).partialOutput) &&
-             ((TaskCheckpointID) other).partialOutput.containsAll(this.partialOutput);
-    } else {
-      return false;
+      TaskCheckpointID o = (TaskCheckpointID) other;
+      return rawId.equals(o.rawId) &&
+             counters.equals(o.counters) &&
+             partialOutput.containsAll(o.partialOutput) &&
+             o.partialOutput.containsAll(partialOutput);
     }
+    return false;
   }
 
   @Override
@@ -110,7 +105,7 @@ public class TaskCheckpointID implements CheckpointID{
     return counters.findCounter(EnumCounter.CHECKPOINT_MS).getValue();
   }
 
-  public String toString(){
+  public String toString() {
     return rawId.toString() + " counters:" + counters;
 
   }

+ 5 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainMapContextImpl.java

@@ -166,6 +166,11 @@ class ChainMapContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT> implements
     return base.getFileTimestamps();
   }
 
+  @Override
+  public RawComparator<?> getCombinerKeyGroupingComparator() {
+    return base.getCombinerKeyGroupingComparator();
+  }
+
   @Override
   public RawComparator<?> getGroupingComparator() {
     return base.getGroupingComparator();

+ 5 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainReduceContextImpl.java

@@ -159,6 +159,11 @@ class ChainReduceContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT> implements
     return base.getFileTimestamps();
   }
 
+  @Override
+  public RawComparator<?> getCombinerKeyGroupingComparator() {
+    return base.getCombinerKeyGroupingComparator();
+  }
+
   @Override
   public RawComparator<?> getGroupingComparator() {
     return base.getGroupingComparator();

+ 5 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/map/WrappedMapper.java

@@ -168,6 +168,11 @@ public class WrappedMapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
       return mapContext.getFileTimestamps();
     }
 
+    @Override
+    public RawComparator<?> getCombinerKeyGroupingComparator() {
+      return mapContext.getCombinerKeyGroupingComparator();
+    }
+
     @Override
     public RawComparator<?> getGroupingComparator() {
       return mapContext.getGroupingComparator();

+ 6 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/reduce/WrappedReducer.java

@@ -137,7 +137,7 @@ public class WrappedReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
 
     @Override
     public URI[] getCacheFiles() throws IOException {
-      return reduceContext.getCacheArchives();
+      return reduceContext.getCacheFiles();
     }
 
     @Override
@@ -161,6 +161,11 @@ public class WrappedReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
       return reduceContext.getFileTimestamps();
     }
 
+    @Override
+    public RawComparator<?> getCombinerKeyGroupingComparator() {
+      return reduceContext.getCombinerKeyGroupingComparator();
+    }
+
     @Override
     public RawComparator<?> getGroupingComparator() {
       return reduceContext.getGroupingComparator();

+ 11 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/JobContextImpl.java

@@ -252,6 +252,17 @@ public class JobContextImpl implements JobContext {
     return conf.getJar();
   }
 
+  /**
+   * Get the user defined {@link RawComparator} comparator for
+   * grouping keys of inputs to the combiner.
+   *
+   * @return comparator set by the user for grouping values.
+   * @see Job#setCombinerKeyGroupingComparatorClass(Class) for details.
+   */
+  public RawComparator<?> getCombinerKeyGroupingComparator() {
+    return conf.getCombinerKeyGroupingComparator();
+  }
+
   /** 
    * Get the user defined {@link RawComparator} comparator for 
    * grouping keys of inputs to the reduce.

+ 1 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManagerImpl.java

@@ -582,7 +582,7 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
     Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
     Class<V> valClass = (Class<V>) job.getMapOutputValueClass();
     RawComparator<K> comparator = 
-      (RawComparator<K>)job.getOutputKeyComparator();
+      (RawComparator<K>)job.getCombinerKeyGroupingComparator();
     try {
       CombineValuesIterator values = new CombineValuesIterator(
           kvIter, comparator, keyClass, valClass, job, Reporter.NULL,

+ 4 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java

@@ -88,6 +88,8 @@ import org.apache.hadoop.yarn.util.Records;
 import org.apache.hadoop.yarn.webapp.WebApp;
 import org.apache.hadoop.yarn.webapp.WebApps;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * This module is responsible for talking to the
  * JobClient (user facing).
@@ -142,7 +144,8 @@ public class HistoryClientService extends AbstractService {
     super.serviceStart();
   }
 
-  private void initializeWebApp(Configuration conf) {
+  @VisibleForTesting
+  protected void initializeWebApp(Configuration conf) {
     webApp = new HsWebApp(history);
     InetSocketAddress bindAddress = MRWebAppUtil.getJHSWebBindAddress(conf);
     // NOTE: there should be a .at(InetSocketAddress)

+ 11 - 4
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistoryServer.java

@@ -45,6 +45,8 @@ import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.hadoop.yarn.logaggregation.AggregatedLogDeletionService;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /******************************************************************
  * {@link JobHistoryServer} is responsible for servicing all job history
  * related requests from client.
@@ -60,10 +62,10 @@ public class JobHistoryServer extends CompositeService {
   public static final long historyServerTimeStamp = System.currentTimeMillis();
 
   private static final Log LOG = LogFactory.getLog(JobHistoryServer.class);
-  private HistoryContext historyContext;
+  protected HistoryContext historyContext;
   private HistoryClientService clientService;
   private JobHistory jobHistoryService;
-  private JHSDelegationTokenSecretManager jhsDTSecretManager;
+  protected JHSDelegationTokenSecretManager jhsDTSecretManager;
   private AggregatedLogDeletionService aggLogDelService;
   private HSAdminServer hsAdminServer;
   private HistoryServerStateStoreService stateStore;
@@ -129,8 +131,7 @@ public class JobHistoryServer extends CompositeService {
     historyContext = (HistoryContext)jobHistoryService;
     stateStore = createStateStore(conf);
     this.jhsDTSecretManager = createJHSSecretManager(conf, stateStore);
-    clientService = new HistoryClientService(historyContext, 
-        this.jhsDTSecretManager);
+    clientService = createHistoryClientService();
     aggLogDelService = new AggregatedLogDeletionService();
     hsAdminServer = new HSAdminServer(aggLogDelService, jobHistoryService);
     addService(stateStore);
@@ -142,6 +143,12 @@ public class JobHistoryServer extends CompositeService {
     super.serviceInit(config);
   }
 
+  @VisibleForTesting
+  protected HistoryClientService createHistoryClientService() {
+    return new HistoryClientService(historyContext, 
+        this.jhsDTSecretManager);
+  }
+
   protected JHSDelegationTokenSecretManager createJHSSecretManager(
       Configuration conf, HistoryServerStateStoreService store) {
     long secretKeyInterval = 

+ 23 - 9
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapProgress.java

@@ -20,7 +20,6 @@ package org.apache.hadoop.mapred;
 import java.io.DataOutputStream;
 import java.io.File;
 import java.io.IOException;
-import java.util.Arrays;
 import java.util.List;
 
 import junit.framework.TestCase;
@@ -29,20 +28,17 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.ipc.ProtocolSignature;
 import org.apache.hadoop.mapreduce.InputFormat;
 import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.hadoop.mapreduce.jobhistory.JobSubmittedEvent;
+import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
 import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
-import org.apache.hadoop.mapreduce.split.JobSplitWriter;
-import org.apache.hadoop.mapreduce.split.SplitMetaInfoReader;
-import org.apache.hadoop.mapreduce.split.JobSplit.SplitMetaInfo;
 import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitIndex;
 import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
+import org.apache.hadoop.mapreduce.split.JobSplitWriter;
+import org.apache.hadoop.mapreduce.split.SplitMetaInfoReader;
 import org.apache.hadoop.util.ReflectionUtils;
 
 /**
@@ -110,11 +106,16 @@ public class TestMapProgress extends TestCase {
       statusUpdate(taskId, taskStatus);
     }
     
+    public void preempted(TaskAttemptID taskId, TaskStatus taskStatus)
+        throws IOException, InterruptedException {
+      statusUpdate(taskId, taskStatus);
+    }
+
     public boolean canCommit(TaskAttemptID taskid) throws IOException {
       return true;
     }
     
-    public boolean statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus) 
+    public AMFeedback statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus) 
     throws IOException, InterruptedException {
       StringBuffer buf = new StringBuffer("Task ");
       buf.append(taskId);
@@ -128,7 +129,9 @@ public class TestMapProgress extends TestCase {
       LOG.info(buf.toString());
       // ignore phase
       // ignore counters
-      return true;
+      AMFeedback a = new AMFeedback();
+      a.setTaskFound(true);
+      return a;
     }
 
     public void reportDiagnosticInfo(TaskAttemptID taskid, String trace) throws IOException {
@@ -145,6 +148,17 @@ public class TestMapProgress extends TestCase {
         SortedRanges.Range range) throws IOException {
       LOG.info("Task " + taskid + " reportedNextRecordRange " + range);
     }
+
+    @Override
+    public TaskCheckpointID getCheckpointID(TaskID taskId) {
+      // do nothing
+      return null;
+    }
+
+    @Override
+    public void setCheckpointID(TaskID downgrade, TaskCheckpointID cid) {
+      // do nothing
+    }
   }
   
   private FileSystem fs = null;

+ 191 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestOldCombinerGrouping.java

@@ -0,0 +1,191 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred;
+
+import junit.framework.Assert;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.UUID;
+
+public class TestOldCombinerGrouping {
+  private static String TEST_ROOT_DIR =
+      new File("build", UUID.randomUUID().toString()).getAbsolutePath();
+
+  public static class Map implements
+      Mapper<LongWritable, Text, Text, LongWritable> {
+    @Override
+    public void map(LongWritable key, Text value,
+        OutputCollector<Text, LongWritable> output, Reporter reporter)
+        throws IOException {
+      String v = value.toString();
+      String k = v.substring(0, v.indexOf(","));
+      v = v.substring(v.indexOf(",") + 1);
+      output.collect(new Text(k), new LongWritable(Long.parseLong(v)));
+    }
+
+    @Override
+    public void close() throws IOException {
+    }
+
+    @Override
+    public void configure(JobConf job) {
+    }
+  }
+
+  public static class Reduce implements
+      Reducer<Text, LongWritable, Text, LongWritable> {
+
+    @Override
+    public void reduce(Text key, Iterator<LongWritable> values,
+        OutputCollector<Text, LongWritable> output, Reporter reporter)
+        throws IOException {
+      LongWritable maxValue = null;
+      while (values.hasNext()) {
+        LongWritable value = values.next();
+        if (maxValue == null) {
+          maxValue = value;
+        } else if (value.compareTo(maxValue) > 0) {
+          maxValue = value;
+        }
+      }
+      output.collect(key, maxValue);
+    }
+
+    @Override
+    public void close() throws IOException {
+    }
+
+    @Override
+    public void configure(JobConf job) {
+    }
+  }
+
+  public static class Combiner extends Reduce {
+  }
+
+  public static class GroupComparator implements RawComparator<Text> {
+    @Override
+    public int compare(byte[] bytes, int i, int i2, byte[] bytes2, int i3,
+        int i4) {
+      byte[] b1 = new byte[i2];
+      System.arraycopy(bytes, i, b1, 0, i2);
+
+      byte[] b2 = new byte[i4];
+      System.arraycopy(bytes2, i3, b2, 0, i4);
+
+      return compare(new Text(new String(b1)), new Text(new String(b2)));
+    }
+
+    @Override
+    public int compare(Text o1, Text o2) {
+      String s1 = o1.toString();
+      String s2 = o2.toString();
+      s1 = s1.substring(0, s1.indexOf("|"));
+      s2 = s2.substring(0, s2.indexOf("|"));
+      return s1.compareTo(s2);
+    }
+
+  }
+
+  @Test
+  public void testCombiner() throws Exception {
+    if (!new File(TEST_ROOT_DIR).mkdirs()) {
+      throw new RuntimeException("Could not create test dir: " + TEST_ROOT_DIR);
+    }
+    File in = new File(TEST_ROOT_DIR, "input");
+    if (!in.mkdirs()) {
+      throw new RuntimeException("Could not create test dir: " + in);
+    }
+    File out = new File(TEST_ROOT_DIR, "output");
+    PrintWriter pw = new PrintWriter(new FileWriter(new File(in, "data.txt")));
+    pw.println("A|a,1");
+    pw.println("A|b,2");
+    pw.println("B|a,3");
+    pw.println("B|b,4");
+    pw.println("B|c,5");
+    pw.close();
+    JobConf job = new JobConf();
+    job.set("mapreduce.framework.name", "local");
+    TextInputFormat.setInputPaths(job, new Path(in.getPath()));
+    TextOutputFormat.setOutputPath(job, new Path(out.getPath()));
+    job.setMapperClass(Map.class);
+    job.setReducerClass(Reduce.class);
+    job.setInputFormat(TextInputFormat.class);
+    job.setMapOutputKeyClass(Text.class);
+    job.setMapOutputValueClass(LongWritable.class);
+    job.setOutputFormat(TextOutputFormat.class);
+    job.setOutputValueGroupingComparator(GroupComparator.class);
+
+    job.setCombinerClass(Combiner.class);
+    job.setCombinerKeyGroupingComparator(GroupComparator.class);
+    job.setInt("min.num.spills.for.combine", 0);
+
+    JobClient client = new JobClient(job);
+    RunningJob runningJob = client.submitJob(job);
+    runningJob.waitForCompletion();
+    if (runningJob.isSuccessful()) {
+      Counters counters = runningJob.getCounters();
+
+      long combinerInputRecords = counters.getGroup(
+          "org.apache.hadoop.mapreduce.TaskCounter").
+          getCounter("COMBINE_INPUT_RECORDS");
+      long combinerOutputRecords = counters.getGroup(
+          "org.apache.hadoop.mapreduce.TaskCounter").
+          getCounter("COMBINE_OUTPUT_RECORDS");
+      Assert.assertTrue(combinerInputRecords > 0);
+      Assert.assertTrue(combinerInputRecords > combinerOutputRecords);
+
+      BufferedReader br = new BufferedReader(new FileReader(
+          new File(out, "part-00000")));
+      Set<String> output = new HashSet<String>();
+      String line = br.readLine();
+      Assert.assertNotNull(line);
+      output.add(line.substring(0, 1) + line.substring(4, 5));
+      line = br.readLine();
+      Assert.assertNotNull(line);
+      output.add(line.substring(0, 1) + line.substring(4, 5));
+      line = br.readLine();
+      Assert.assertNull(line);
+      br.close();
+
+      Set<String> expected = new HashSet<String>();
+      expected.add("A2");
+      expected.add("B5");
+
+      Assert.assertEquals(expected, output);
+
+    } else {
+      Assert.fail("Job failed");
+    }
+  }
+
+}

+ 24 - 7
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestTaskCommit.java

@@ -27,6 +27,10 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.ipc.ProtocolSignature;
 import org.apache.hadoop.mapred.SortedRanges.Range;
 import org.apache.hadoop.mapreduce.TaskType;
+import org.apache.hadoop.mapreduce.checkpoint.CheckpointID;
+import org.apache.hadoop.mapreduce.checkpoint.FSCheckpointID;
+import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
+
 
 public class TestTaskCommit extends HadoopTestCase {
   Path rootDir = 
@@ -131,11 +135,6 @@ public class TestTaskCommit extends HadoopTestCase {
       return null;
     }
 
-    @Override
-    public boolean ping(TaskAttemptID taskid) throws IOException {
-      return true;
-    }
-
     @Override
     public void reportDiagnosticInfo(TaskAttemptID taskid, String trace)
         throws IOException {
@@ -152,9 +151,11 @@ public class TestTaskCommit extends HadoopTestCase {
     }
 
     @Override
-    public boolean statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus)
+    public AMFeedback statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus)
         throws IOException, InterruptedException {
-      return true;
+      AMFeedback a = new AMFeedback();
+      a.setTaskFound(true);
+      return a;
     }
 
     @Override
@@ -168,6 +169,22 @@ public class TestTaskCommit extends HadoopTestCase {
         long clientVersion, int clientMethodsHash) throws IOException {
       return null;
     }
+
+    @Override
+    public void preempted(TaskAttemptID taskId, TaskStatus taskStatus)
+        throws IOException, InterruptedException {
+      fail("Task should not go to commit-pending");
+    }
+
+    @Override
+    public TaskCheckpointID getCheckpointID(TaskID taskId) {
+      return null;
+    }
+
+    @Override
+    public void setCheckpointID(TaskID downgrade, TaskCheckpointID cid) {
+      // ignore
+    }
   }
   
   /**

+ 178 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestNewCombinerGrouping.java

@@ -0,0 +1,178 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce;
+
+import junit.framework.Assert;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.junit.Test;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.UUID;
+
+public class TestNewCombinerGrouping {
+  private static String TEST_ROOT_DIR =
+      new File("build", UUID.randomUUID().toString()).getAbsolutePath();
+
+  public static class Map extends
+      Mapper<LongWritable, Text, Text, LongWritable> {
+
+    @Override
+    protected void map(LongWritable key, Text value,
+        Context context)
+        throws IOException, InterruptedException {
+      String v = value.toString();
+      String k = v.substring(0, v.indexOf(","));
+      v = v.substring(v.indexOf(",") + 1);
+      context.write(new Text(k), new LongWritable(Long.parseLong(v)));
+    }
+  }
+
+  public static class Reduce extends
+      Reducer<Text, LongWritable, Text, LongWritable> {
+
+    @Override
+    protected void reduce(Text key, Iterable<LongWritable> values,
+        Context context)
+        throws IOException, InterruptedException {
+      LongWritable maxValue = null;
+      for (LongWritable value : values) {
+        if (maxValue == null) {
+          maxValue = value;
+        } else if (value.compareTo(maxValue) > 0) {
+          maxValue = value;
+        }
+      }
+      context.write(key, maxValue);
+    }
+  }
+
+  public static class Combiner extends Reduce {
+  }
+
+  public static class GroupComparator implements RawComparator<Text> {
+    @Override
+    public int compare(byte[] bytes, int i, int i2, byte[] bytes2, int i3,
+        int i4) {
+      byte[] b1 = new byte[i2];
+      System.arraycopy(bytes, i, b1, 0, i2);
+
+      byte[] b2 = new byte[i4];
+      System.arraycopy(bytes2, i3, b2, 0, i4);
+
+      return compare(new Text(new String(b1)), new Text(new String(b2)));
+    }
+
+    @Override
+    public int compare(Text o1, Text o2) {
+      String s1 = o1.toString();
+      String s2 = o2.toString();
+      s1 = s1.substring(0, s1.indexOf("|"));
+      s2 = s2.substring(0, s2.indexOf("|"));
+      return s1.compareTo(s2);
+    }
+
+  }
+
+  @Test
+  public void testCombiner() throws Exception {
+    if (!new File(TEST_ROOT_DIR).mkdirs()) {
+      throw new RuntimeException("Could not create test dir: " + TEST_ROOT_DIR);
+    }
+    File in = new File(TEST_ROOT_DIR, "input");
+    if (!in.mkdirs()) {
+      throw new RuntimeException("Could not create test dir: " + in);
+    }
+    File out = new File(TEST_ROOT_DIR, "output");
+    PrintWriter pw = new PrintWriter(new FileWriter(new File(in, "data.txt")));
+    pw.println("A|a,1");
+    pw.println("A|b,2");
+    pw.println("B|a,3");
+    pw.println("B|b,4");
+    pw.println("B|c,5");
+    pw.close();
+    JobConf conf = new JobConf();
+    conf.set("mapreduce.framework.name", "local");
+    Job job = new Job(conf);
+    TextInputFormat.setInputPaths(job, new Path(in.getPath()));
+    TextOutputFormat.setOutputPath(job, new Path(out.getPath()));
+
+    job.setMapperClass(Map.class);
+    job.setReducerClass(Reduce.class);
+    job.setInputFormatClass(TextInputFormat.class);
+    job.setMapOutputKeyClass(Text.class);
+    job.setMapOutputValueClass(LongWritable.class);
+    job.setOutputFormatClass(TextOutputFormat.class);
+    job.setGroupingComparatorClass(GroupComparator.class);
+
+    job.setCombinerKeyGroupingComparatorClass(GroupComparator.class);
+    job.setCombinerClass(Combiner.class);
+    job.getConfiguration().setInt("min.num.spills.for.combine", 0);
+
+    job.submit();
+    job.waitForCompletion(false);
+    if (job.isSuccessful()) {
+      Counters counters = job.getCounters();
+
+      long combinerInputRecords = counters.findCounter(
+          "org.apache.hadoop.mapreduce.TaskCounter",
+          "COMBINE_INPUT_RECORDS").getValue();
+      long combinerOutputRecords = counters.findCounter(
+          "org.apache.hadoop.mapreduce.TaskCounter",
+          "COMBINE_OUTPUT_RECORDS").getValue();
+      Assert.assertTrue(combinerInputRecords > 0);
+      Assert.assertTrue(combinerInputRecords > combinerOutputRecords);
+
+      BufferedReader br = new BufferedReader(new FileReader(
+          new File(out, "part-r-00000")));
+      Set<String> output = new HashSet<String>();
+      String line = br.readLine();
+      Assert.assertNotNull(line);
+      output.add(line.substring(0, 1) + line.substring(4, 5));
+      line = br.readLine();
+      Assert.assertNotNull(line);
+      output.add(line.substring(0, 1) + line.substring(4, 5));
+      line = br.readLine();
+      Assert.assertNull(line);
+      br.close();
+
+      Set<String> expected = new HashSet<String>();
+      expected.add("A2");
+      expected.add("B5");
+
+      Assert.assertEquals(expected, output);
+
+    } else {
+      Assert.fail("Job failed");
+    }
+  }
+
+}

+ 12 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestJHSSecurity.java

@@ -39,6 +39,7 @@ import org.apache.hadoop.mapreduce.v2.api.protocolrecords.CancelDelegationTokenR
 import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetDelegationTokenRequest;
 import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportRequest;
 import org.apache.hadoop.mapreduce.v2.api.protocolrecords.RenewDelegationTokenRequest;
+import org.apache.hadoop.mapreduce.v2.hs.HistoryClientService;
 import org.apache.hadoop.mapreduce.v2.hs.HistoryServerStateStoreService;
 import org.apache.hadoop.mapreduce.v2.hs.JHSDelegationTokenSecretManager;
 import org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer;
@@ -94,6 +95,17 @@ public class TestJHSSecurity {
           return new JHSDelegationTokenSecretManager(initialInterval, 
               maxLifetime, renewInterval, 3600000, store);
         }
+
+        @Override
+        protected HistoryClientService createHistoryClientService() {
+          return new HistoryClientService(historyContext, 
+            this.jhsDTSecretManager) {
+            @Override
+            protected void initializeWebApp(Configuration conf) {
+              // Don't need it, skip.;
+              }
+          };
+        }
       };
 //      final JobHistoryServer jobHistoryServer = jhServer;
       jobHistoryServer.init(conf);

+ 1 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestUmbilicalProtocolWithJobToken.java

@@ -115,7 +115,7 @@ public class TestUmbilicalProtocolWithJobToken {
           proxy = (TaskUmbilicalProtocol) RPC.getProxy(
               TaskUmbilicalProtocol.class, TaskUmbilicalProtocol.versionID,
               addr, conf);
-          proxy.ping(null);
+          proxy.statusUpdate(null, null);
         } finally {
           server.stop();
           if (proxy != null) {

+ 1 - 1
hadoop-project/pom.xml

@@ -519,7 +519,7 @@
       <dependency>
         <groupId>commons-logging</groupId>
         <artifactId>commons-logging</artifactId>
-        <version>1.1.1</version>
+        <version>1.1.3</version>
         <exclusions>
           <exclusion>
             <groupId>avalon-framework</groupId>

+ 19 - 18
hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java

@@ -64,8 +64,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
@@ -105,8 +106,8 @@ public class ResourceSchedulerWrapper implements
 
   private Configuration conf;
   private ResourceScheduler scheduler;
-  private Map<ApplicationAttemptId, String> appQueueMap =
-          new ConcurrentHashMap<ApplicationAttemptId, String>();
+  private Map<ApplicationId, String> appQueueMap =
+          new ConcurrentHashMap<ApplicationId, String>();
   private BufferedWriter jobRuntimeLogBW;
 
   // Priority of the ResourceSchedulerWrapper shutdown hook.
@@ -240,7 +241,7 @@ public class ResourceSchedulerWrapper implements
             (AppAttemptRemovedSchedulerEvent) schedulerEvent;
         ApplicationAttemptId appAttemptId =
                 appRemoveEvent.getApplicationAttemptID();
-        String queue = appQueueMap.get(appAttemptId);
+        String queue = appQueueMap.get(appAttemptId.getApplicationId());
         SchedulerAppReport app = scheduler.getSchedulerAppInfo(appAttemptId);
         if (! app.getLiveContainers().isEmpty()) {  // have 0 or 1
           // should have one container which is AM container
@@ -262,20 +263,18 @@ public class ResourceSchedulerWrapper implements
       schedulerHandleCounter.inc();
       schedulerHandleCounterMap.get(schedulerEvent.getType()).inc();
 
-      if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_REMOVED
-          && schedulerEvent instanceof AppAttemptRemovedSchedulerEvent) {
+      if (schedulerEvent.getType() == SchedulerEventType.APP_REMOVED
+          && schedulerEvent instanceof AppRemovedSchedulerEvent) {
         SLSRunner.decreaseRemainingApps();
-        AppAttemptRemovedSchedulerEvent appRemoveEvent =
-                (AppAttemptRemovedSchedulerEvent) schedulerEvent;
-        ApplicationAttemptId appAttemptId =
-                appRemoveEvent.getApplicationAttemptID();
-        appQueueMap.remove(appRemoveEvent.getApplicationAttemptID());
-      } else if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_ADDED
-          && schedulerEvent instanceof AppAttemptAddedSchedulerEvent) {
-        AppAttemptAddedSchedulerEvent appAddEvent =
-                (AppAttemptAddedSchedulerEvent) schedulerEvent;
+        AppRemovedSchedulerEvent appRemoveEvent =
+                (AppRemovedSchedulerEvent) schedulerEvent;
+        appQueueMap.remove(appRemoveEvent.getApplicationID());
+      } else if (schedulerEvent.getType() == SchedulerEventType.APP_ADDED
+          && schedulerEvent instanceof AppAddedSchedulerEvent) {
+        AppAddedSchedulerEvent appAddEvent =
+                (AppAddedSchedulerEvent) schedulerEvent;
         String queueName = appAddEvent.getQueue();
-        appQueueMap.put(appAddEvent.getApplicationAttemptId(), queueName);
+        appQueueMap.put(appAddEvent.getApplicationId(), queueName);
       }
     }
   }
@@ -297,7 +296,9 @@ public class ResourceSchedulerWrapper implements
           continue;
         }
 
-        String queue = appQueueMap.get(containerId.getApplicationAttemptId());
+        String queue =
+            appQueueMap.get(containerId.getApplicationAttemptId()
+              .getApplicationId());
         int releasedMemory = 0, releasedVCores = 0;
         if (status.getExitStatus() == ContainerExitStatus.SUCCESS) {
           for (RMContainer rmc : app.getLiveContainers()) {
@@ -329,7 +330,7 @@ public class ResourceSchedulerWrapper implements
     // update queue information
     Resource pendingResource = Resources.createResource(0, 0);
     Resource allocatedResource = Resources.createResource(0, 0);
-    String queueName = appQueueMap.get(attemptId);
+    String queueName = appQueueMap.get(attemptId.getApplicationId());
     // container requested
     for (ResourceRequest request : resourceRequests) {
       if (request.getResourceName().equals(ResourceRequest.ANY)) {

+ 7 - 4
hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java

@@ -283,10 +283,13 @@ public class SLSCapacityScheduler extends CapacityScheduler implements
 	        appQueueMap.remove(appRemoveEvent.getApplicationAttemptID());
 	      } else if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_ADDED
 	          && schedulerEvent instanceof AppAttemptAddedSchedulerEvent) {
-	        AppAttemptAddedSchedulerEvent appAddEvent =
-	                (AppAttemptAddedSchedulerEvent) schedulerEvent;
-	        String queueName = appAddEvent.getQueue();
-	        appQueueMap.put(appAddEvent.getApplicationAttemptId(), queueName);
+          AppAttemptAddedSchedulerEvent appAddEvent =
+              (AppAttemptAddedSchedulerEvent) schedulerEvent;
+          SchedulerApplication app =
+              applications.get(appAddEvent.getApplicationAttemptId()
+                .getApplicationId());
+          appQueueMap.put(appAddEvent.getApplicationAttemptId(), app.getQueue()
+              .getQueueName());
 	      }
 	    }
   }

+ 28 - 0
hadoop-yarn-project/CHANGES.txt

@@ -187,6 +187,16 @@ Release 2.4.0 - UNRELEASED
     YARN-1307. Redesign znode structure for Zookeeper based RM state-store for
     better organization and scalability. (Tsuyoshi OZAWA via vinodkv)
 
+    YARN-1172. Convert SecretManagers in RM to services (Tsuyoshi OZAWA via kasha)
+
+    YARN-1523. Use StandbyException instead of RMNotYetReadyException (kasha)
+
+    YARN-1541. Changed ResourceManager to invalidate ApplicationMaster host/port
+    information once an AM crashes. (Jian He via vinodkv)
+
+    YARN-1493. Changed ResourceManager and Scheduler interfacing to recognize
+    app-attempts separately from apps. (Jian He via vinodkv)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -267,6 +277,21 @@ Release 2.4.0 - UNRELEASED
     YARN-1451. TestResourceManager relies on the scheduler assigning multiple
     containers in a single node update. (Sandy Ryza via kasha)
 
+    YARN-1527. Fix yarn rmadmin command to print the correct usage info.
+    (Akira AJISAKA via jianhe)
+
+    YARN-1522. Fixed a race condition in the test TestApplicationCleanup that was
+    causing it to randomly fail. (Liyin Liang via vinodkv)
+
+    YARN-1549. Fixed a bug in ResourceManager's ApplicationMasterService that
+    was causing unamanged AMs to not finish correctly. (haosdent via vinodkv)
+
+    YARN-1559. Race between ServerRMProxy and ClientRMProxy setting 
+    RMProxy#INSTANCE. (kasha and vinodkv via kasha)
+
+    YARN-1560. Fixed TestYarnClient#testAMMRTokens failure with null AMRM token.
+    (Ted Yu via jianhe)
+
 Release 2.3.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -473,6 +498,9 @@ Release 2.2.0 - 2013-10-13
     YARN-1278. Fixed NodeManager to not delete local resources for apps on resync
     command from RM - a bug caused by YARN-1149. (Hitesh Shah via vinodkv)
 
+    YARN-1463. Tests should avoid starting http-server where possible or creates 
+    spnego keytab/principals (vinodkv via kasha)
+
 Release 2.1.1-beta - 2013-09-23
 
   INCOMPATIBLE CHANGES

+ 0 - 9
hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml

@@ -309,13 +309,4 @@
     <Class name="org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore" />
     <Bug pattern="IS2_INCONSISTENT_SYNC" />
   </Match>
-
-  <!-- Ignore INSTANCE not being final as it is created in sub-classes -->
-  <Match>
-    <Class name="org.apache.hadoop.yarn.client.RMProxy" />
-    <Field name="INSTANCE" />
-    <Bug pattern="MS_SHOULD_BE_FINAL"/>
-  </Match>
-
-
 </FindBugsFilter>

+ 5 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/ResourceManagerAdministrationProtocol.java

@@ -24,10 +24,10 @@ import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceAudience.Public;
 import org.apache.hadoop.classification.InterfaceStability.Evolving;
 import org.apache.hadoop.classification.InterfaceStability.Stable;
+import org.apache.hadoop.ipc.StandbyException;
 import org.apache.hadoop.tools.GetUserMappingsProtocol;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.ResourceOption;
-import org.apache.hadoop.yarn.exceptions.RMNotYetActiveException;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsRequest;
 import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsResponse;
@@ -51,25 +51,25 @@ public interface ResourceManagerAdministrationProtocol extends GetUserMappingsPr
   @Public
   @Stable
   public RefreshQueuesResponse refreshQueues(RefreshQueuesRequest request) 
-  throws RMNotYetActiveException, YarnException, IOException;
+  throws StandbyException, YarnException, IOException;
 
   @Public
   @Stable
   public RefreshNodesResponse refreshNodes(RefreshNodesRequest request)
-  throws RMNotYetActiveException, YarnException, IOException;
+  throws StandbyException, YarnException, IOException;
 
   @Public
   @Stable
   public RefreshSuperUserGroupsConfigurationResponse 
   refreshSuperUserGroupsConfiguration(
       RefreshSuperUserGroupsConfigurationRequest request)
-  throws RMNotYetActiveException, YarnException, IOException;
+  throws StandbyException, YarnException, IOException;
 
   @Public
   @Stable
   public RefreshUserToGroupsMappingsResponse refreshUserToGroupsMappings(
       RefreshUserToGroupsMappingsRequest request)
-  throws RMNotYetActiveException, YarnException, IOException;
+  throws StandbyException, YarnException, IOException;
 
   @Public
   @Stable

Some files were not shown because too many files changed in this diff