Explorar el Código

Merging r1616894 through r1617376 from trunk.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-6584@1617377 13f79535-47bb-0310-9956-ffa450edef68
Jing Zhao hace 11 años
padre
commit
04884cd530
Se han modificado 56 ficheros con 1693 adiciones y 871 borrados
  1. 9 0
      hadoop-common-project/hadoop-common/CHANGES.txt
  2. 2 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
  3. 228 19
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/JavaKeyStoreProvider.java
  4. 2 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java
  5. 1 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSRESTConstants.java
  6. 8 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
  7. 65 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyProviderFactory.java
  8. 38 44
      hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMS.java
  9. 13 12
      hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAudit.java
  10. 2 0
      hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java
  11. 3 0
      hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSJMXServlet.java
  12. 18 17
      hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMSAudit.java
  13. 4 0
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java
  14. 0 2
      hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestRpcProgramNfs3.java
  15. 9 0
      hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
  16. 47 55
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
  17. 87 113
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java
  18. 21 13
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/ExitStatus.java
  19. 4 8
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
  20. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
  21. 28 31
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java
  22. 1 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java
  23. 5 7
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
  24. 1 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java
  25. 1 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java
  26. 1 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java
  27. 3 3
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
  28. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java
  29. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java
  30. 3 3
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java
  31. 18 0
      hadoop-yarn-project/CHANGES.txt
  32. 63 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/AMRMClient.java
  33. 64 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/AMRMClientAsync.java
  34. 73 3
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/async/impl/TestAMRMClientAsync.java
  35. 35 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java
  36. 40 37
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java
  37. 7 18
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/webapp/AHSWebApp.java
  38. 319 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/TimelineDataManager.java
  39. 33 222
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/webapp/TimelineWebServices.java
  40. 1 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestApplicationHistoryClientService.java
  41. 4 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/timeline/webapp/TestTimelineWebServices.java
  42. 0 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
  43. 28 43
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
  44. 0 18
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
  45. 0 36
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppUpdateSavedEvent.java
  46. 0 27
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
  47. 0 39
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptNewSavedEvent.java
  48. 0 38
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptUpdateSavedEvent.java
  49. 12 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/Schedulable.java
  50. 27 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/ComputeFairShares.java
  51. 6 3
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java
  52. 3 6
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java
  53. 6 7
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java
  54. 7 9
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
  55. 31 15
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java
  56. 308 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerFairShare.java

+ 9 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -199,6 +199,9 @@ Trunk (Unreleased)
 
     HADOOP-10936. Change default KeyProvider bitlength to 128. (wang)
 
+    HADOOP-10224. JavaKeyStoreProvider has to protect against corrupting 
+    underlying store. (asuresh via tucu)
+
   BUG FIXES
 
     HADOOP-9451. Fault single-layer config if node group topology is enabled.
@@ -421,6 +424,9 @@ Trunk (Unreleased)
     HADOOP-10939. Fix TestKeyProviderFactory testcases to use default 128 bit
     length keys. (Arun Suresh via wang)
 
+    HADOOP-10862. Miscellaneous trivial corrections to KMS classes. 
+    (asuresh via tucu)
+
   OPTIMIZATIONS
 
     HADOOP-7761. Improve the performance of raw comparisons. (todd)
@@ -547,6 +553,9 @@ Release 2.6.0 - UNRELEASED
     HADOOP-10929. Typo in Configuration.getPasswordFromCredentialProviders
     (lmccay via brandonli)
 
+    HADOOP-10402. Configuration.getValByRegex does not substitute for
+    variables. (Robert Kanter via kasha)
+
 Release 2.5.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 2 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java

@@ -2755,7 +2755,8 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
           item.getValue() instanceof String) {
         m = p.matcher((String)item.getKey());
         if(m.find()) { // match
-          result.put((String) item.getKey(), (String) item.getValue());
+          result.put((String) item.getKey(),
+              substituteVars(getProps().getProperty((String) item.getKey())));
         }
       }
     }

+ 228 - 19
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/JavaKeyStoreProvider.java

@@ -27,8 +27,11 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.security.ProviderUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import javax.crypto.spec.SecretKeySpec;
+
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.ObjectInputStream;
@@ -80,6 +83,9 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
 @InterfaceAudience.Private
 public class JavaKeyStoreProvider extends KeyProvider {
   private static final String KEY_METADATA = "KeyMetadata";
+  private static Logger LOG =
+      LoggerFactory.getLogger(JavaKeyStoreProvider.class);
+
   public static final String SCHEME_NAME = "jceks";
 
   public static final String KEYSTORE_PASSWORD_FILE_KEY =
@@ -115,6 +121,10 @@ public class JavaKeyStoreProvider extends KeyProvider {
       if (pwFile != null) {
         ClassLoader cl = Thread.currentThread().getContextClassLoader();
         URL pwdFile = cl.getResource(pwFile);
+        if (pwdFile == null) {
+          // Provided Password file does not exist
+          throw new IOException("Password file does not exists");
+        }
         if (pwdFile != null) {
           InputStream is = pwdFile.openStream();
           try {
@@ -129,19 +139,25 @@ public class JavaKeyStoreProvider extends KeyProvider {
       password = KEYSTORE_PASSWORD_DEFAULT;
     }
     try {
+      Path oldPath = constructOldPath(path);
+      Path newPath = constructNewPath(path);
       keyStore = KeyStore.getInstance(SCHEME_NAME);
+      FsPermission perm = null;
       if (fs.exists(path)) {
-        // save off permissions in case we need to
-        // rewrite the keystore in flush()
-        FileStatus s = fs.getFileStatus(path);
-        permissions = s.getPermission();
-
-        keyStore.load(fs.open(path), password);
+        // flush did not proceed to completion
+        // _NEW should not exist
+        if (fs.exists(newPath)) {
+          throw new IOException(
+              String.format("Keystore not loaded due to some inconsistency "
+              + "('%s' and '%s' should not exist together)!!", path, newPath));
+        }
+        perm = tryLoadFromPath(path, oldPath);
       } else {
-        permissions = new FsPermission("700");
-        // required to create an empty keystore. *sigh*
-        keyStore.load(null, password);
+        perm = tryLoadIncompleteFlush(oldPath, newPath);
       }
+      // Need to save off permissions in case we need to
+      // rewrite the keystore in flush()
+      permissions = perm;
     } catch (KeyStoreException e) {
       throw new IOException("Can't create keystore", e);
     } catch (NoSuchAlgorithmException e) {
@@ -154,6 +170,136 @@ public class JavaKeyStoreProvider extends KeyProvider {
     writeLock = lock.writeLock();
   }
 
+  /**
+   * Try loading from the user specified path, else load from the backup
+   * path in case Exception is not due to bad/wrong password
+   * @param path Actual path to load from
+   * @param backupPath Backup path (_OLD)
+   * @return The permissions of the loaded file
+   * @throws NoSuchAlgorithmException
+   * @throws CertificateException
+   * @throws IOException
+   */
+  private FsPermission tryLoadFromPath(Path path, Path backupPath)
+      throws NoSuchAlgorithmException, CertificateException,
+      IOException {
+    FsPermission perm = null;
+    try {
+      perm = loadFromPath(path, password);
+      // Remove _OLD if exists
+      if (fs.exists(backupPath)) {
+        fs.delete(backupPath, true);
+      }
+      LOG.debug("KeyStore loaded successfully !!");
+    } catch (IOException ioe) {
+      // If file is corrupted for some reason other than
+      // wrong password try the _OLD file if exits
+      if (!isBadorWrongPassword(ioe)) {
+        perm = loadFromPath(backupPath, password);
+        // Rename CURRENT to CORRUPTED
+        renameOrFail(path, new Path(path.toString() + "_CORRUPTED_"
+            + System.currentTimeMillis()));
+        renameOrFail(backupPath, path);
+        LOG.debug(String.format(
+            "KeyStore loaded successfully from '%s' since '%s'"
+                + "was corrupted !!", backupPath, path));
+      } else {
+        throw ioe;
+      }
+    }
+    return perm;
+  }
+
+  /**
+   * The KeyStore might have gone down during a flush, In which case either the
+   * _NEW or _OLD files might exists. This method tries to load the KeyStore
+   * from one of these intermediate files.
+   * @param oldPath the _OLD file created during flush
+   * @param newPath the _NEW file created during flush
+   * @return The permissions of the loaded file
+   * @throws IOException
+   * @throws NoSuchAlgorithmException
+   * @throws CertificateException
+   */
+  private FsPermission tryLoadIncompleteFlush(Path oldPath, Path newPath)
+      throws IOException, NoSuchAlgorithmException, CertificateException {
+    FsPermission perm = null;
+    // Check if _NEW exists (in case flush had finished writing but not
+    // completed the re-naming)
+    if (fs.exists(newPath)) {
+      perm = loadAndReturnPerm(newPath, oldPath);
+    }
+    // try loading from _OLD (An earlier Flushing MIGHT not have completed
+    // writing completely)
+    if ((perm == null) && fs.exists(oldPath)) {
+      perm = loadAndReturnPerm(oldPath, newPath);
+    }
+    // If not loaded yet,
+    // required to create an empty keystore. *sigh*
+    if (perm == null) {
+      keyStore.load(null, password);
+      LOG.debug("KeyStore initialized anew successfully !!");
+      perm = new FsPermission("700");
+    }
+    return perm;
+  }
+
+  private FsPermission loadAndReturnPerm(Path pathToLoad, Path pathToDelete)
+      throws NoSuchAlgorithmException, CertificateException,
+      IOException {
+    FsPermission perm = null;
+    try {
+      perm = loadFromPath(pathToLoad, password);
+      renameOrFail(pathToLoad, path);
+      LOG.debug(String.format("KeyStore loaded successfully from '%s'!!",
+          pathToLoad));
+      if (fs.exists(pathToDelete)) {
+        fs.delete(pathToDelete, true);
+      }
+    } catch (IOException e) {
+      // Check for password issue : don't want to trash file due
+      // to wrong password
+      if (isBadorWrongPassword(e)) {
+        throw e;
+      }
+    }
+    return perm;
+  }
+
+  private boolean isBadorWrongPassword(IOException ioe) {
+    // As per documentation this is supposed to be the way to figure
+    // if password was correct
+    if (ioe.getCause() instanceof UnrecoverableKeyException) {
+      return true;
+    }
+    // Unfortunately that doesn't seem to work..
+    // Workaround :
+    if ((ioe.getCause() == null)
+        && (ioe.getMessage() != null)
+        && ((ioe.getMessage().contains("Keystore was tampered")) || (ioe
+            .getMessage().contains("password was incorrect")))) {
+      return true;
+    }
+    return false;
+  }
+
+  private FsPermission loadFromPath(Path p, char[] password)
+      throws IOException, NoSuchAlgorithmException, CertificateException {
+    FileStatus s = fs.getFileStatus(p);
+    keyStore.load(fs.open(p), password);
+    return s.getPermission();
+  }
+
+  private Path constructNewPath(Path path) {
+    Path newPath = new Path(path.toString() + "_NEW");
+    return newPath;
+  }
+
+  private Path constructOldPath(Path path) {
+    Path oldPath = new Path(path.toString() + "_OLD");
+    return oldPath;
+  }
+
   @Override
   public KeyVersion getKeyVersion(String versionName) throws IOException {
     readLock.lock();
@@ -352,11 +498,22 @@ public class JavaKeyStoreProvider extends KeyProvider {
 
   @Override
   public void flush() throws IOException {
+    Path newPath = constructNewPath(path);
+    Path oldPath = constructOldPath(path);
     writeLock.lock();
     try {
       if (!changed) {
         return;
       }
+      // Might exist if a backup has been restored etc.
+      if (fs.exists(newPath)) {
+        renameOrFail(newPath, new Path(newPath.toString()
+            + "_ORPHANED_" + System.currentTimeMillis()));
+      }
+      if (fs.exists(oldPath)) {
+        renameOrFail(oldPath, new Path(oldPath.toString()
+            + "_ORPHANED_" + System.currentTimeMillis()));
+      }
       // put all of the updates into the keystore
       for(Map.Entry<String, Metadata> entry: cache.entrySet()) {
         try {
@@ -366,25 +523,77 @@ public class JavaKeyStoreProvider extends KeyProvider {
           throw new IOException("Can't set metadata key " + entry.getKey(),e );
         }
       }
+
+      // Save old File first
+      boolean fileExisted = backupToOld(oldPath);
       // write out the keystore
-      FSDataOutputStream out = FileSystem.create(fs, path, permissions);
+      // Write to _NEW path first :
       try {
-        keyStore.store(out, password);
-      } catch (KeyStoreException e) {
-        throw new IOException("Can't store keystore " + this, e);
-      } catch (NoSuchAlgorithmException e) {
-        throw new IOException("No such algorithm storing keystore " + this, e);
-      } catch (CertificateException e) {
-        throw new IOException("Certificate exception storing keystore " + this,
-            e);
+        writeToNew(newPath);
+      } catch (IOException ioe) {
+        // rename _OLD back to curent and throw Exception
+        revertFromOld(oldPath, fileExisted);
+        throw ioe;
       }
-      out.close();
+      // Rename _NEW to CURRENT and delete _OLD
+      cleanupNewAndOld(newPath, oldPath);
       changed = false;
     } finally {
       writeLock.unlock();
     }
   }
 
+  private void cleanupNewAndOld(Path newPath, Path oldPath) throws IOException {
+    // Rename _NEW to CURRENT
+    renameOrFail(newPath, path);
+    // Delete _OLD
+    if (fs.exists(oldPath)) {
+      fs.delete(oldPath, true);
+    }
+  }
+
+  private void writeToNew(Path newPath) throws IOException {
+    FSDataOutputStream out =
+        FileSystem.create(fs, newPath, permissions);
+    try {
+      keyStore.store(out, password);
+    } catch (KeyStoreException e) {
+      throw new IOException("Can't store keystore " + this, e);
+    } catch (NoSuchAlgorithmException e) {
+      throw new IOException(
+          "No such algorithm storing keystore " + this, e);
+    } catch (CertificateException e) {
+      throw new IOException(
+          "Certificate exception storing keystore " + this, e);
+    }
+    out.close();
+  }
+
+  private void revertFromOld(Path oldPath, boolean fileExisted)
+      throws IOException {
+    if (fileExisted) {
+      renameOrFail(oldPath, path);
+    }
+  }
+
+  private boolean backupToOld(Path oldPath)
+      throws IOException {
+    boolean fileExisted = false;
+    if (fs.exists(path)) {
+      renameOrFail(path, oldPath);
+      fileExisted = true;
+    }
+    return fileExisted;
+  }
+
+  private void renameOrFail(Path src, Path dest)
+      throws IOException {
+    if (!fs.rename(src, dest)) {
+      throw new IOException("Rename unsuccessful : "
+          + String.format("'%s' to '%s'", src, dest));
+    }
+  }
+
   @Override
   public String toString() {
     return uri.toString();

+ 2 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java

@@ -512,7 +512,7 @@ public class KMSClientProvider extends KeyProvider implements CryptoExtension {
     List<String> batch = new ArrayList<String>();
     int batchLen = 0;
     for (String name : keyNames) {
-      int additionalLen = KMSRESTConstants.KEY_OP.length() + 1 + name.length();
+      int additionalLen = KMSRESTConstants.KEY.length() + 1 + name.length();
       batchLen += additionalLen;
       // topping at 1500 to account for initial URL and encoded names
       if (batchLen > 1500) {
@@ -536,7 +536,7 @@ public class KMSClientProvider extends KeyProvider implements CryptoExtension {
     for (String[] keySet : keySets) {
       if (keyNames.length > 0) {
         Map<String, Object> queryStr = new HashMap<String, Object>();
-        queryStr.put(KMSRESTConstants.KEY_OP, keySet);
+        queryStr.put(KMSRESTConstants.KEY, keySet);
         URL url = createURL(KMSRESTConstants.KEYS_METADATA_RESOURCE, null,
             null, queryStr);
         HttpURLConnection conn = createConnection(url, HTTP_GET);

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSRESTConstants.java

@@ -37,7 +37,7 @@ public class KMSRESTConstants {
   public static final String EEK_SUB_RESOURCE = "_eek";
   public static final String CURRENT_VERSION_SUB_RESOURCE = "_currentversion";
 
-  public static final String KEY_OP = "key";
+  public static final String KEY = "key";
   public static final String EEK_OP = "eek_op";
   public static final String EEK_GENERATE = "generate";
   public static final String EEK_DECRYPT = "decrypt";

+ 8 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java

@@ -178,6 +178,14 @@ public class TestConfiguration extends TestCase {
     // check that expansion also occurs for getInt()
     assertTrue(conf.getInt("intvar", -1) == 42);
     assertTrue(conf.getInt("my.int", -1) == 42);
+
+    Map<String, String> results = conf.getValByRegex("^my.*file$");
+    assertTrue(results.keySet().contains("my.relfile"));
+    assertTrue(results.keySet().contains("my.fullfile"));
+    assertTrue(results.keySet().contains("my.file"));
+    assertEquals(-1, results.get("my.relfile").indexOf("${"));
+    assertEquals(-1, results.get("my.fullfile").indexOf("${"));
+    assertEquals(-1, results.get("my.file").indexOf("${"));
   }
 
   public void testFinalParam() throws IOException {

+ 65 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyProviderFactory.java

@@ -220,11 +220,76 @@ public class TestKeyProviderFactory {
     assertTrue(s.getPermission().toString().equals("rwx------"));
     assertTrue(file + " should exist", file.isFile());
 
+    // Corrupt file and Check if JKS can reload from _OLD file
+    File oldFile = new File(file.getPath() + "_OLD");
+    file.renameTo(oldFile);
+    file.delete();
+    file.createNewFile();
+    assertTrue(oldFile.exists());
+    KeyProvider provider = KeyProviderFactory.getProviders(conf).get(0);
+    assertTrue(file.exists());
+    assertTrue(oldFile + "should be deleted", !oldFile.exists());
+    verifyAfterReload(file, provider);
+    assertTrue(!oldFile.exists());
+
+    // _NEW and current file should not exist together
+    File newFile = new File(file.getPath() + "_NEW");
+    newFile.createNewFile();
+    try {
+      provider = KeyProviderFactory.getProviders(conf).get(0);
+      Assert.fail("_NEW and current file should not exist together !!");
+    } catch (Exception e) {
+      // Ignore
+    } finally {
+      if (newFile.exists()) {
+        newFile.delete();
+      }
+    }
+
+    // Load from _NEW file
+    file.renameTo(newFile);
+    file.delete();
+    try {
+      provider = KeyProviderFactory.getProviders(conf).get(0);
+      Assert.assertFalse(newFile.exists());
+      Assert.assertFalse(oldFile.exists());
+    } catch (Exception e) {
+      Assert.fail("JKS should load from _NEW file !!");
+      // Ignore
+    }
+    verifyAfterReload(file, provider);
+
+    // _NEW exists but corrupt.. must load from _OLD
+    newFile.createNewFile();
+    file.renameTo(oldFile);
+    file.delete();
+    try {
+      provider = KeyProviderFactory.getProviders(conf).get(0);
+      Assert.assertFalse(newFile.exists());
+      Assert.assertFalse(oldFile.exists());
+    } catch (Exception e) {
+      Assert.fail("JKS should load from _OLD file !!");
+      // Ignore
+    } finally {
+      if (newFile.exists()) {
+        newFile.delete();
+      }
+    }
+    verifyAfterReload(file, provider);
+
     // check permission retention after explicit change
     fs.setPermission(path, new FsPermission("777"));
     checkPermissionRetention(conf, ourUrl, path);
   }
 
+  private void verifyAfterReload(File file, KeyProvider provider)
+      throws IOException {
+    List<String> existingKeys = provider.getKeys();
+    assertTrue(existingKeys.contains("key4"));
+    assertTrue(existingKeys.contains("key3"));
+    assertTrue(file.exists());
+  }
+
   public void checkPermissionRetention(Configuration conf, String ourUrl, Path path) throws Exception {
     KeyProvider provider = KeyProviderFactory.getProviders(conf).get(0);
     // let's add a new key and flush and check that permissions are still set to 777

+ 38 - 44
hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMS.java

@@ -47,7 +47,6 @@ import java.io.IOException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.security.Principal;
-import java.text.MessageFormat;
 import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
@@ -59,19 +58,14 @@ import java.util.Map;
 @Path(KMSRESTConstants.SERVICE_VERSION)
 @InterfaceAudience.Private
 public class KMS {
-  public static final String CREATE_KEY = "CREATE_KEY";
-  public static final String DELETE_KEY = "DELETE_KEY";
-  public static final String ROLL_NEW_VERSION = "ROLL_NEW_VERSION";
-  public static final String GET_KEYS = "GET_KEYS";
-  public static final String GET_KEYS_METADATA = "GET_KEYS_METADATA";
-  public static final String GET_KEY_VERSIONS = "GET_KEY_VERSIONS";
-  public static final String GET_METADATA = "GET_METADATA";
-
-  public static final String GET_KEY_VERSION = "GET_KEY_VERSION";
-  public static final String GET_CURRENT_KEY = "GET_CURRENT_KEY";
-  public static final String GENERATE_EEK = "GENERATE_EEK";
-  public static final String DECRYPT_EEK = "DECRYPT_EEK";
-  
+
+  public static enum KMSOp {
+    CREATE_KEY, DELETE_KEY, ROLL_NEW_VERSION,
+    GET_KEYS, GET_KEYS_METADATA,
+    GET_KEY_VERSIONS, GET_METADATA, GET_KEY_VERSION, GET_CURRENT_KEY,
+    GENERATE_EEK, DECRYPT_EEK
+  }
+
   private KeyProviderCryptoExtension provider;
   private KMSAudit kmsAudit;
 
@@ -91,22 +85,22 @@ public class KMS {
 
 
   private static final String UNAUTHORIZED_MSG_WITH_KEY = 
-      "User:{0} not allowed to do ''{1}'' on ''{2}''";
+      "User:%s not allowed to do '%s' on '%s'";
   
   private static final String UNAUTHORIZED_MSG_WITHOUT_KEY = 
-      "User:{0} not allowed to do ''{1}''";
+      "User:%s not allowed to do '%s'";
 
   private void assertAccess(KMSACLs.Type aclType, Principal principal,
-      String operation) throws AccessControlException {
+      KMSOp operation) throws AccessControlException {
     assertAccess(aclType, principal, operation, null);
   }
 
   private void assertAccess(KMSACLs.Type aclType, Principal principal,
-      String operation, String key) throws AccessControlException {
+      KMSOp operation, String key) throws AccessControlException {
     if (!KMSWebApp.getACLs().hasAccess(aclType, principal.getName())) {
       KMSWebApp.getUnauthorizedCallsMeter().mark();
       kmsAudit.unauthorized(principal, operation, key);
-      throw new AuthorizationException(MessageFormat.format(
+      throw new AuthorizationException(String.format(
           (key != null) ? UNAUTHORIZED_MSG_WITH_KEY 
                         : UNAUTHORIZED_MSG_WITHOUT_KEY,
           principal.getName(), operation, key));
@@ -135,7 +129,7 @@ public class KMS {
     Principal user = getPrincipal(securityContext);
     String name = (String) jsonKey.get(KMSRESTConstants.NAME_FIELD);
     KMSClientProvider.checkNotEmpty(name, KMSRESTConstants.NAME_FIELD);
-    assertAccess(KMSACLs.Type.CREATE, user, CREATE_KEY, name);
+    assertAccess(KMSACLs.Type.CREATE, user, KMSOp.CREATE_KEY, name);
     String cipher = (String) jsonKey.get(KMSRESTConstants.CIPHER_FIELD);
     String material = (String) jsonKey.get(KMSRESTConstants.MATERIAL_FIELD);
     int length = (jsonKey.containsKey(KMSRESTConstants.LENGTH_FIELD))
@@ -146,7 +140,7 @@ public class KMS {
         jsonKey.get(KMSRESTConstants.ATTRIBUTES_FIELD);
     if (material != null) {
       assertAccess(KMSACLs.Type.SET_KEY_MATERIAL, user,
-          CREATE_KEY + " with user provided material", name);
+          KMSOp.CREATE_KEY, name);
     }
     KeyProvider.Options options = new KeyProvider.Options(
         KMSWebApp.getConfiguration());
@@ -165,7 +159,7 @@ public class KMS {
 
     provider.flush();
 
-    kmsAudit.ok(user, CREATE_KEY, name, "UserProvidedMaterial:" +
+    kmsAudit.ok(user, KMSOp.CREATE_KEY, name, "UserProvidedMaterial:" +
         (material != null) + " Description:" + description);
 
     if (!KMSWebApp.getACLs().hasAccess(KMSACLs.Type.GET, user.getName())) {
@@ -186,12 +180,12 @@ public class KMS {
       @PathParam("name") String name) throws Exception {
     KMSWebApp.getAdminCallsMeter().mark();
     Principal user = getPrincipal(securityContext);
-    assertAccess(KMSACLs.Type.DELETE, user, DELETE_KEY, name);
+    assertAccess(KMSACLs.Type.DELETE, user, KMSOp.DELETE_KEY, name);
     KMSClientProvider.checkNotEmpty(name, "name");
     provider.deleteKey(name);
     provider.flush();
 
-    kmsAudit.ok(user, DELETE_KEY, name, "");
+    kmsAudit.ok(user, KMSOp.DELETE_KEY, name, "");
 
     return Response.ok().build();
   }
@@ -205,13 +199,13 @@ public class KMS {
       throws Exception {
     KMSWebApp.getAdminCallsMeter().mark();
     Principal user = getPrincipal(securityContext);
-    assertAccess(KMSACLs.Type.ROLLOVER, user, ROLL_NEW_VERSION, name);
+    assertAccess(KMSACLs.Type.ROLLOVER, user, KMSOp.ROLL_NEW_VERSION, name);
     KMSClientProvider.checkNotEmpty(name, "name");
     String material = (String)
         jsonMaterial.get(KMSRESTConstants.MATERIAL_FIELD);
     if (material != null) {
       assertAccess(KMSACLs.Type.SET_KEY_MATERIAL, user,
-          ROLL_NEW_VERSION + " with user provided material", name);
+          KMSOp.ROLL_NEW_VERSION, name);
     }
     KeyProvider.KeyVersion keyVersion = (material != null)
         ? provider.rollNewVersion(name, Base64.decodeBase64(material))
@@ -219,7 +213,7 @@ public class KMS {
 
     provider.flush();
 
-    kmsAudit.ok(user, ROLL_NEW_VERSION, name, "UserProvidedMaterial:" +
+    kmsAudit.ok(user, KMSOp.ROLL_NEW_VERSION, name, "UserProvidedMaterial:" +
         (material != null) + " NewVersion:" + keyVersion.getVersionName());
 
     if (!KMSWebApp.getACLs().hasAccess(KMSACLs.Type.GET, user.getName())) {
@@ -233,15 +227,15 @@ public class KMS {
   @Path(KMSRESTConstants.KEYS_METADATA_RESOURCE)
   @Produces(MediaType.APPLICATION_JSON)
   public Response getKeysMetadata(@Context SecurityContext securityContext,
-      @QueryParam(KMSRESTConstants.KEY_OP) List<String> keyNamesList)
+      @QueryParam(KMSRESTConstants.KEY) List<String> keyNamesList)
       throws Exception {
     KMSWebApp.getAdminCallsMeter().mark();
     Principal user = getPrincipal(securityContext);
     String[] keyNames = keyNamesList.toArray(new String[keyNamesList.size()]);
-    assertAccess(KMSACLs.Type.GET_METADATA, user, GET_KEYS_METADATA);
+    assertAccess(KMSACLs.Type.GET_METADATA, user, KMSOp.GET_KEYS_METADATA);
     KeyProvider.Metadata[] keysMeta = provider.getKeysMetadata(keyNames);
     Object json = KMSServerJSONUtils.toJSON(keyNames, keysMeta);
-    kmsAudit.ok(user, GET_KEYS_METADATA, "");
+    kmsAudit.ok(user, KMSOp.GET_KEYS_METADATA, "");
     return Response.ok().type(MediaType.APPLICATION_JSON).entity(json).build();
   }
 
@@ -252,9 +246,9 @@ public class KMS {
       throws Exception {
     KMSWebApp.getAdminCallsMeter().mark();
     Principal user = getPrincipal(securityContext);
-    assertAccess(KMSACLs.Type.GET_KEYS, user, GET_KEYS);
+    assertAccess(KMSACLs.Type.GET_KEYS, user, KMSOp.GET_KEYS);
     Object json = provider.getKeys();
-    kmsAudit.ok(user, GET_KEYS, "");
+    kmsAudit.ok(user, KMSOp.GET_KEYS, "");
     return Response.ok().type(MediaType.APPLICATION_JSON).entity(json).build();
   }
 
@@ -276,9 +270,9 @@ public class KMS {
     Principal user = getPrincipal(securityContext);
     KMSClientProvider.checkNotEmpty(name, "name");
     KMSWebApp.getAdminCallsMeter().mark();
-    assertAccess(KMSACLs.Type.GET_METADATA, user, GET_METADATA, name);
+    assertAccess(KMSACLs.Type.GET_METADATA, user, KMSOp.GET_METADATA, name);
     Object json = KMSServerJSONUtils.toJSON(name, provider.getMetadata(name));
-    kmsAudit.ok(user, GET_METADATA, name, "");
+    kmsAudit.ok(user, KMSOp.GET_METADATA, name, "");
     return Response.ok().type(MediaType.APPLICATION_JSON).entity(json).build();
   }
 
@@ -292,9 +286,9 @@ public class KMS {
     Principal user = getPrincipal(securityContext);
     KMSClientProvider.checkNotEmpty(name, "name");
     KMSWebApp.getKeyCallsMeter().mark();
-    assertAccess(KMSACLs.Type.GET, user, GET_CURRENT_KEY, name);
+    assertAccess(KMSACLs.Type.GET, user, KMSOp.GET_CURRENT_KEY, name);
     Object json = KMSServerJSONUtils.toJSON(provider.getCurrentKey(name));
-    kmsAudit.ok(user, GET_CURRENT_KEY, name, "");
+    kmsAudit.ok(user, KMSOp.GET_CURRENT_KEY, name, "");
     return Response.ok().type(MediaType.APPLICATION_JSON).entity(json).build();
   }
 
@@ -308,9 +302,9 @@ public class KMS {
     KMSClientProvider.checkNotEmpty(versionName, "versionName");
     KMSWebApp.getKeyCallsMeter().mark();
     KeyVersion keyVersion = provider.getKeyVersion(versionName);
-    assertAccess(KMSACLs.Type.GET, user, GET_KEY_VERSION);
+    assertAccess(KMSACLs.Type.GET, user, KMSOp.GET_KEY_VERSION);
     if (keyVersion != null) {
-      kmsAudit.ok(user, GET_KEY_VERSION, keyVersion.getName(), "");
+      kmsAudit.ok(user, KMSOp.GET_KEY_VERSION, keyVersion.getName(), "");
     }
     Object json = KMSServerJSONUtils.toJSON(keyVersion);
     return Response.ok().type(MediaType.APPLICATION_JSON).entity(json).build();
@@ -334,7 +328,7 @@ public class KMS {
 
     Object retJSON;
     if (edekOp.equals(KMSRESTConstants.EEK_GENERATE)) {
-      assertAccess(KMSACLs.Type.GENERATE_EEK, user, GENERATE_EEK, name);
+      assertAccess(KMSACLs.Type.GENERATE_EEK, user, KMSOp.GENERATE_EEK, name);
 
       List<EncryptedKeyVersion> retEdeks =
           new LinkedList<EncryptedKeyVersion>();
@@ -345,7 +339,7 @@ public class KMS {
       } catch (Exception e) {
         throw new IOException(e);
       }
-      kmsAudit.ok(user, GENERATE_EEK, name, "");
+      kmsAudit.ok(user, KMSOp.GENERATE_EEK, name, "");
       retJSON = new ArrayList();
       for (EncryptedKeyVersion edek : retEdeks) {
         ((ArrayList)retJSON).add(KMSServerJSONUtils.toJSON(edek));
@@ -380,7 +374,7 @@ public class KMS {
         (String) jsonPayload.get(KMSRESTConstants.MATERIAL_FIELD);
     Object retJSON;
     if (eekOp.equals(KMSRESTConstants.EEK_DECRYPT)) {
-      assertAccess(KMSACLs.Type.DECRYPT_EEK, user, DECRYPT_EEK, keyName);
+      assertAccess(KMSACLs.Type.DECRYPT_EEK, user, KMSOp.DECRYPT_EEK, keyName);
       KMSClientProvider.checkNotNull(ivStr, KMSRESTConstants.IV_FIELD);
       byte[] iv = Base64.decodeBase64(ivStr);
       KMSClientProvider.checkNotNull(encMaterialStr,
@@ -391,7 +385,7 @@ public class KMS {
               new KMSClientProvider.KMSEncryptedKeyVersion(keyName, versionName,
                   iv, KeyProviderCryptoExtension.EEK, encMaterial));
       retJSON = KMSServerJSONUtils.toJSON(retKeyVersion);
-      kmsAudit.ok(user, DECRYPT_EEK, keyName, "");
+      kmsAudit.ok(user, KMSOp.DECRYPT_EEK, keyName, "");
     } else {
       throw new IllegalArgumentException("Wrong " + KMSRESTConstants.EEK_OP +
           " value, it must be " + KMSRESTConstants.EEK_GENERATE + " or " +
@@ -412,9 +406,9 @@ public class KMS {
     Principal user = getPrincipal(securityContext);
     KMSClientProvider.checkNotEmpty(name, "name");
     KMSWebApp.getKeyCallsMeter().mark();
-    assertAccess(KMSACLs.Type.GET, user, GET_KEY_VERSIONS, name);
+    assertAccess(KMSACLs.Type.GET, user, KMSOp.GET_KEY_VERSIONS, name);
     Object json = KMSServerJSONUtils.toJSON(provider.getKeyVersions(name));
-    kmsAudit.ok(user, GET_KEY_VERSIONS, name, "");
+    kmsAudit.ok(user, KMSOp.GET_KEY_VERSIONS, name, "");
     return Response.ok().type(MediaType.APPLICATION_JSON).entity(json).build();
   }
 

+ 13 - 12
hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAudit.java

@@ -50,11 +50,11 @@ public class KMSAudit {
     private final AtomicLong accessCount = new AtomicLong(-1);
     private final String keyName;
     private final String user;
-    private final String op;
+    private final KMS.KMSOp op;
     private final String extraMsg;
     private final long startTime = System.currentTimeMillis();
 
-    private AuditEvent(String keyName, String user, String op, String msg) {
+    private AuditEvent(String keyName, String user, KMS.KMSOp op, String msg) {
       this.keyName = keyName;
       this.user = user;
       this.op = op;
@@ -77,7 +77,7 @@ public class KMSAudit {
       return user;
     }
 
-    public String getOp() {
+    public KMS.KMSOp getOp() {
       return op;
     }
 
@@ -90,8 +90,9 @@ public class KMSAudit {
     OK, UNAUTHORIZED, UNAUTHENTICATED, ERROR;
   }
 
-  private static Set<String> AGGREGATE_OPS_WHITELIST = Sets.newHashSet(
-    KMS.GET_KEY_VERSION, KMS.GET_CURRENT_KEY, KMS.DECRYPT_EEK, KMS.GENERATE_EEK
+  private static Set<KMS.KMSOp> AGGREGATE_OPS_WHITELIST = Sets.newHashSet(
+    KMS.KMSOp.GET_KEY_VERSION, KMS.KMSOp.GET_CURRENT_KEY,
+    KMS.KMSOp.DECRYPT_EEK, KMS.KMSOp.GENERATE_EEK
   );
 
   private Cache<String, AuditEvent> cache;
@@ -137,10 +138,10 @@ public class KMSAudit {
         event.getExtraMsg());
   }
 
-  private void op(OpStatus opStatus, final String op, final String user,
+  private void op(OpStatus opStatus, final KMS.KMSOp op, final String user,
       final String key, final String extraMsg) {
     if (!Strings.isNullOrEmpty(user) && !Strings.isNullOrEmpty(key)
-        && !Strings.isNullOrEmpty(op)
+        && (op != null)
         && AGGREGATE_OPS_WHITELIST.contains(op)) {
       String cacheKey = createCacheKey(user, key, op);
       if (opStatus == OpStatus.UNAUTHORIZED) {
@@ -167,7 +168,7 @@ public class KMSAudit {
       }
     } else {
       List<String> kvs = new LinkedList<String>();
-      if (!Strings.isNullOrEmpty(op)) {
+      if (op != null) {
         kvs.add("op=" + op);
       }
       if (!Strings.isNullOrEmpty(key)) {
@@ -185,16 +186,16 @@ public class KMSAudit {
     }
   }
 
-  public void ok(Principal user, String op, String key,
+  public void ok(Principal user, KMS.KMSOp op, String key,
       String extraMsg) {
     op(OpStatus.OK, op, user.getName(), key, extraMsg);
   }
 
-  public void ok(Principal user, String op, String extraMsg) {
+  public void ok(Principal user, KMS.KMSOp op, String extraMsg) {
     op(OpStatus.OK, op, user.getName(), null, extraMsg);
   }
 
-  public void unauthorized(Principal user, String op, String key) {
+  public void unauthorized(Principal user, KMS.KMSOp op, String key) {
     op(OpStatus.UNAUTHORIZED, op, user.getName(), key, "");
   }
 
@@ -211,7 +212,7 @@ public class KMSAudit {
         + " URL:" + url + " ErrorMsg:'" + extraMsg + "'");
   }
 
-  private static String createCacheKey(String user, String key, String op) {
+  private static String createCacheKey(String user, String key, KMS.KMSOp op) {
     return user + "#" + key + "#" + op;
   }
 

+ 2 - 0
hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java

@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.crypto.key.kms.server;
 
+import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 
 import java.io.File;
@@ -26,6 +27,7 @@ import java.net.URL;
 /**
  * Utility class to load KMS configuration files.
  */
+@InterfaceAudience.Private
 public class KMSConfiguration {
 
   public static final String KMS_CONFIG_DIR = "kms.config.dir";

+ 3 - 0
hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSJMXServlet.java

@@ -17,12 +17,15 @@
  */
 package org.apache.hadoop.crypto.key.kms.server;
 
+import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.jmx.JMXJsonServlet;
 
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;
+
 import java.io.IOException;
 
+@InterfaceAudience.Private
 public class KMSJMXServlet extends JMXJsonServlet {
 
   @Override

+ 18 - 17
hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMSAudit.java

@@ -23,6 +23,7 @@ import java.io.OutputStream;
 import java.io.PrintStream;
 import java.security.Principal;
 
+import org.apache.hadoop.crypto.key.kms.server.KMS.KMSOp;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.PropertyConfigurator;
 import org.junit.After;
@@ -82,16 +83,16 @@ public class TestKMSAudit {
   public void testAggregation() throws Exception {
     Principal luser = Mockito.mock(Principal.class);
     Mockito.when(luser.getName()).thenReturn("luser");
-    kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
-    kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
-    kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
-    kmsAudit.ok(luser, KMS.DELETE_KEY, "k1", "testmsg");
-    kmsAudit.ok(luser, KMS.ROLL_NEW_VERSION, "k1", "testmsg");
-    kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
-    kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
-    kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
+    kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
+    kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
+    kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
+    kmsAudit.ok(luser, KMSOp.DELETE_KEY, "k1", "testmsg");
+    kmsAudit.ok(luser, KMSOp.ROLL_NEW_VERSION, "k1", "testmsg");
+    kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
+    kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
+    kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
     Thread.sleep(1500);
-    kmsAudit.ok(luser, KMS.DECRYPT_EEK, "k1", "testmsg");
+    kmsAudit.ok(luser, KMSOp.DECRYPT_EEK, "k1", "testmsg");
     Thread.sleep(1500);
     String out = getAndResetLogOutput();
     System.out.println(out);
@@ -110,15 +111,15 @@ public class TestKMSAudit {
   public void testAggregationUnauth() throws Exception {
     Principal luser = Mockito.mock(Principal.class);
     Mockito.when(luser.getName()).thenReturn("luser");
-    kmsAudit.unauthorized(luser, KMS.GENERATE_EEK, "k2");
+    kmsAudit.unauthorized(luser, KMSOp.GENERATE_EEK, "k2");
     Thread.sleep(1000);
-    kmsAudit.ok(luser, KMS.GENERATE_EEK, "k3", "testmsg");
-    kmsAudit.ok(luser, KMS.GENERATE_EEK, "k3", "testmsg");
-    kmsAudit.ok(luser, KMS.GENERATE_EEK, "k3", "testmsg");
-    kmsAudit.ok(luser, KMS.GENERATE_EEK, "k3", "testmsg");
-    kmsAudit.ok(luser, KMS.GENERATE_EEK, "k3", "testmsg");
-    kmsAudit.unauthorized(luser, KMS.GENERATE_EEK, "k3");
-    kmsAudit.ok(luser, KMS.GENERATE_EEK, "k3", "testmsg");
+    kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "k3", "testmsg");
+    kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "k3", "testmsg");
+    kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "k3", "testmsg");
+    kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "k3", "testmsg");
+    kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "k3", "testmsg");
+    kmsAudit.unauthorized(luser, KMSOp.GENERATE_EEK, "k3");
+    kmsAudit.ok(luser, KMSOp.GENERATE_EEK, "k3", "testmsg");
     Thread.sleep(2000);
     String out = getAndResetLogOutput();
     System.out.println(out);

+ 4 - 0
hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java

@@ -724,6 +724,10 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
         FSDataInputStream fis = clientCache.getDfsInputStream(userName,
             Nfs3Utils.getFileIdPath(handle));
 
+        if (fis == null) {
+            return new READ3Response(Nfs3Status.NFS3ERR_ACCES);
+        }
+
         try {
           readCount = fis.read(offset, readbuffer, 0, count);
         } catch (IOException e) {

+ 0 - 2
hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestRpcProgramNfs3.java

@@ -278,13 +278,11 @@ public class TestRpcProgramNfs3 {
     readReq.serialize(xdr_req);
 
     // Attempt by an unpriviledged user should fail.
-    /* Hits HDFS-6582. It needs to be fixed first.
     READ3Response response1 = nfsd.read(xdr_req.asReadOnlyWrap(),
         securityHandlerUnpriviledged,
         new InetSocketAddress("localhost", 1234));
     assertEquals("Incorrect return code:", Nfs3Status.NFS3ERR_ACCES,
         response1.getStatus());
-    */
 
     // Attempt by a priviledged user should pass.
     READ3Response response2 = nfsd.read(xdr_req.asReadOnlyWrap(),

+ 9 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -407,6 +407,12 @@ Release 2.6.0 - UNRELEASED
     HDFS-6828. Separate block replica dispatching from Balancer. (szetszwo via
     jing9)
 
+    HDFS-6837. Code cleanup for Balancer and Dispatcher. (szetszwo via
+    jing9)
+
+    HDFS-6838. Code cleanup for unnecessary INode replacement.
+    (Jing Zhao via wheat9)
+
   OPTIMIZATIONS
 
     HDFS-6690. Deduplicate xattr names in memory. (wang)
@@ -502,6 +508,9 @@ Release 2.6.0 - UNRELEASED
     HDFS-6791. A block could remain under replicated if all of its replicas are on
     decommissioned nodes. (Ming Ma via jing9)
 
+    HDFS-6582. Missing null check in RpcProgramNfs3#read(XDR, SecurityHandler)
+    (Abhiraj Butala via brandonli)
+
 Release 2.5.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 47 - 55
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java

@@ -44,7 +44,8 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.StorageType;
-import org.apache.hadoop.hdfs.server.balancer.Dispatcher.BalancerDatanode;
+import org.apache.hadoop.hdfs.server.balancer.Dispatcher.DDatanode;
+import org.apache.hadoop.hdfs.server.balancer.Dispatcher.DDatanode.StorageGroup;
 import org.apache.hadoop.hdfs.server.balancer.Dispatcher.Source;
 import org.apache.hadoop.hdfs.server.balancer.Dispatcher.Task;
 import org.apache.hadoop.hdfs.server.balancer.Dispatcher.Util;
@@ -184,10 +185,10 @@ public class Balancer {
   // all data node lists
   private final Collection<Source> overUtilized = new LinkedList<Source>();
   private final Collection<Source> aboveAvgUtilized = new LinkedList<Source>();
-  private final Collection<BalancerDatanode.StorageGroup> belowAvgUtilized
-      = new LinkedList<BalancerDatanode.StorageGroup>();
-  private final Collection<BalancerDatanode.StorageGroup> underUtilized
-      = new LinkedList<BalancerDatanode.StorageGroup>();
+  private final Collection<StorageGroup> belowAvgUtilized
+      = new LinkedList<StorageGroup>();
+  private final Collection<StorageGroup> underUtilized
+      = new LinkedList<StorageGroup>();
 
   /* Check that this Balancer is compatible with the Block Placement Policy
    * used by the Namenode.
@@ -209,8 +210,22 @@ public class Balancer {
    * when connection fails.
    */
   Balancer(NameNodeConnector theblockpool, Parameters p, Configuration conf) {
+    final long movedWinWidth = conf.getLong(
+        DFSConfigKeys.DFS_BALANCER_MOVEDWINWIDTH_KEY,
+        DFSConfigKeys.DFS_BALANCER_MOVEDWINWIDTH_DEFAULT);
+    final int moverThreads = conf.getInt(
+        DFSConfigKeys.DFS_BALANCER_MOVERTHREADS_KEY,
+        DFSConfigKeys.DFS_BALANCER_MOVERTHREADS_DEFAULT);
+    final int dispatcherThreads = conf.getInt(
+        DFSConfigKeys.DFS_BALANCER_DISPATCHERTHREADS_KEY,
+        DFSConfigKeys.DFS_BALANCER_DISPATCHERTHREADS_DEFAULT);
+    final int maxConcurrentMovesPerNode = conf.getInt(
+        DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY,
+        DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT);
+
     this.dispatcher = new Dispatcher(theblockpool, p.nodesToBeIncluded,
-        p.nodesToBeExcluded, conf);
+        p.nodesToBeExcluded, movedWinWidth, moverThreads, dispatcherThreads,
+        maxConcurrentMovesPerNode, conf);
     this.threshold = p.threshold;
     this.policy = p.policy;
   }
@@ -255,7 +270,7 @@ public class Balancer {
     //   over-utilized, above-average, below-average and under-utilized.
     long overLoadedBytes = 0L, underLoadedBytes = 0L;
     for(DatanodeStorageReport r : reports) {
-      final BalancerDatanode dn = dispatcher.newDatanode(r);
+      final DDatanode dn = dispatcher.newDatanode(r);
       for(StorageType t : StorageType.asList()) {
         final Double utilization = policy.getUtilization(r, t);
         if (utilization == null) { // datanode does not have such storage type 
@@ -268,9 +283,9 @@ public class Balancer {
         final long maxSize2Move = computeMaxSize2Move(capacity,
             getRemaining(r, t), utilizationDiff, threshold);
 
-        final BalancerDatanode.StorageGroup g;
+        final StorageGroup g;
         if (utilizationDiff > 0) {
-          final Source s = dn.addSource(t, utilization, maxSize2Move, dispatcher);
+          final Source s = dn.addSource(t, maxSize2Move, dispatcher);
           if (thresholdDiff <= 0) { // within threshold
             aboveAvgUtilized.add(s);
           } else {
@@ -279,7 +294,7 @@ public class Balancer {
           }
           g = s;
         } else {
-          g = dn.addStorageGroup(t, utilization, maxSize2Move);
+          g = dn.addStorageGroup(t, maxSize2Move);
           if (thresholdDiff <= 0) { // within threshold
             belowAvgUtilized.add(g);
           } else {
@@ -328,7 +343,7 @@ public class Balancer {
     logUtilizationCollection("underutilized", underUtilized);
   }
 
-  private static <T extends BalancerDatanode.StorageGroup>
+  private static <T extends StorageGroup>
       void logUtilizationCollection(String name, Collection<T> items) {
     LOG.info(items.size() + " " + name + ": " + items);
   }
@@ -381,8 +396,7 @@ public class Balancer {
    * datanodes or the candidates are source nodes with (utilization > Avg), and
    * the others are target nodes with (utilization < Avg).
    */
-  private <G extends BalancerDatanode.StorageGroup,
-           C extends BalancerDatanode.StorageGroup>
+  private <G extends StorageGroup, C extends StorageGroup>
       void chooseStorageGroups(Collection<G> groups, Collection<C> candidates,
           Matcher matcher) {
     for(final Iterator<G> i = groups.iterator(); i.hasNext();) {
@@ -398,9 +412,8 @@ public class Balancer {
    * For the given datanode, choose a candidate and then schedule it.
    * @return true if a candidate is chosen; false if no candidates is chosen.
    */
-  private <C extends BalancerDatanode.StorageGroup>
-      boolean choose4One(BalancerDatanode.StorageGroup g,
-          Collection<C> candidates, Matcher matcher) {
+  private <C extends StorageGroup> boolean choose4One(StorageGroup g,
+      Collection<C> candidates, Matcher matcher) {
     final Iterator<C> i = candidates.iterator();
     final C chosen = chooseCandidate(g, i, matcher);
   
@@ -418,8 +431,7 @@ public class Balancer {
     return true;
   }
   
-  private void matchSourceWithTargetToMove(Source source,
-      BalancerDatanode.StorageGroup target) {
+  private void matchSourceWithTargetToMove(Source source, StorageGroup target) {
     long size = Math.min(source.availableSizeToMove(), target.availableSizeToMove());
     final Task task = new Task(target, size);
     source.addTask(task);
@@ -430,8 +442,7 @@ public class Balancer {
   }
   
   /** Choose a candidate for the given datanode. */
-  private <G extends BalancerDatanode.StorageGroup,
-           C extends BalancerDatanode.StorageGroup>
+  private <G extends StorageGroup, C extends StorageGroup>
       C chooseCandidate(G g, Iterator<C> candidates, Matcher matcher) {
     if (g.hasSpaceForScheduling()) {
       for(; candidates.hasNext(); ) {
@@ -439,7 +450,7 @@ public class Balancer {
         if (!c.hasSpaceForScheduling()) {
           candidates.remove();
         } else if (matcher.match(dispatcher.getCluster(),
-            g.getDatanode(), c.getDatanode())) {
+            g.getDatanodeInfo(), c.getDatanodeInfo())) {
           return c;
         }
       }
@@ -457,34 +468,15 @@ public class Balancer {
     dispatcher.reset(conf);;
   }
   
-  // Exit status
-  enum ReturnStatus {
-    // These int values will map directly to the balancer process's exit code.
-    SUCCESS(0),
-    IN_PROGRESS(1),
-    ALREADY_RUNNING(-1),
-    NO_MOVE_BLOCK(-2),
-    NO_MOVE_PROGRESS(-3),
-    IO_EXCEPTION(-4),
-    ILLEGAL_ARGS(-5),
-    INTERRUPTED(-6);
-
-    final int code;
-
-    ReturnStatus(int code) {
-      this.code = code;
-    }
-  }
-
   /** Run an iteration for all datanodes. */
-  private ReturnStatus run(int iteration, Formatter formatter,
+  private ExitStatus run(int iteration, Formatter formatter,
       Configuration conf) {
     try {
       final List<DatanodeStorageReport> reports = dispatcher.init();
       final long bytesLeftToMove = init(reports);
       if (bytesLeftToMove == 0) {
         System.out.println("The cluster is balanced. Exiting...");
-        return ReturnStatus.SUCCESS;
+        return ExitStatus.SUCCESS;
       } else {
         LOG.info( "Need to move "+ StringUtils.byteDesc(bytesLeftToMove)
             + " to make the cluster balanced." );
@@ -498,7 +490,7 @@ public class Balancer {
       final long bytesToMove = chooseStorageGroups();
       if (bytesToMove == 0) {
         System.out.println("No block can be moved. Exiting...");
-        return ReturnStatus.NO_MOVE_BLOCK;
+        return ExitStatus.NO_MOVE_BLOCK;
       } else {
         LOG.info( "Will move " + StringUtils.byteDesc(bytesToMove) +
             " in this iteration");
@@ -519,19 +511,19 @@ public class Balancer {
        * Exit no byte has been moved for 5 consecutive iterations.
        */
       if (!dispatcher.dispatchAndCheckContinue()) {
-        return ReturnStatus.NO_MOVE_PROGRESS;
+        return ExitStatus.NO_MOVE_PROGRESS;
       }
 
-      return ReturnStatus.IN_PROGRESS;
+      return ExitStatus.IN_PROGRESS;
     } catch (IllegalArgumentException e) {
       System.out.println(e + ".  Exiting ...");
-      return ReturnStatus.ILLEGAL_ARGS;
+      return ExitStatus.ILLEGAL_ARGUMENTS;
     } catch (IOException e) {
       System.out.println(e + ".  Exiting ...");
-      return ReturnStatus.IO_EXCEPTION;
+      return ExitStatus.IO_EXCEPTION;
     } catch (InterruptedException e) {
       System.out.println(e + ".  Exiting ...");
-      return ReturnStatus.INTERRUPTED;
+      return ExitStatus.INTERRUPTED;
     } finally {
       dispatcher.shutdownNow();
     }
@@ -570,14 +562,14 @@ public class Balancer {
         Collections.shuffle(connectors);
         for(NameNodeConnector nnc : connectors) {
           final Balancer b = new Balancer(nnc, p, conf);
-          final ReturnStatus r = b.run(iteration, formatter, conf);
+          final ExitStatus r = b.run(iteration, formatter, conf);
           // clean all lists
           b.resetData(conf);
-          if (r == ReturnStatus.IN_PROGRESS) {
+          if (r == ExitStatus.IN_PROGRESS) {
             done = false;
-          } else if (r != ReturnStatus.SUCCESS) {
+          } else if (r != ExitStatus.SUCCESS) {
             //must be an error statue, return.
-            return r.code;
+            return r.getExitCode();
           }
         }
 
@@ -590,7 +582,7 @@ public class Balancer {
         nnc.close();
       }
     }
-    return ReturnStatus.SUCCESS.code;
+    return ExitStatus.SUCCESS.getExitCode();
   }
 
   /* Given elaspedTime in ms, return a printable string */
@@ -661,10 +653,10 @@ public class Balancer {
         return Balancer.run(namenodes, parse(args), conf);
       } catch (IOException e) {
         System.out.println(e + ".  Exiting ...");
-        return ReturnStatus.IO_EXCEPTION.code;
+        return ExitStatus.IO_EXCEPTION.getExitCode();
       } catch (InterruptedException e) {
         System.out.println(e + ".  Exiting ...");
-        return ReturnStatus.INTERRUPTED.code;
+        return ExitStatus.INTERRUPTED.getExitCode();
       } finally {
         System.out.format("%-24s ", DateFormat.getDateTimeInstance().format(new Date()));
         System.out.println("Balancing took " + time2Str(Time.now()-startTime));

+ 87 - 113
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java

@@ -48,7 +48,6 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
-import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.StorageType;
 import org.apache.hadoop.hdfs.protocol.Block;
@@ -63,6 +62,7 @@ import org.apache.hadoop.hdfs.protocol.datatransfer.sasl.SaslDataTransferClient;
 import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
 import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
+import org.apache.hadoop.hdfs.server.balancer.Dispatcher.DDatanode.StorageGroup;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
 import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations;
 import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations.BlockWithLocations;
@@ -91,7 +91,6 @@ public class Dispatcher {
                                                                      // minutes
 
   private final NameNodeConnector nnc;
-  private final KeyManager keyManager;
   private final SaslDataTransferClient saslClient;
 
   /** Set of datanodes to be excluded. */
@@ -100,11 +99,10 @@ public class Dispatcher {
   private final Set<String> includedNodes;
 
   private final Collection<Source> sources = new HashSet<Source>();
-  private final Collection<BalancerDatanode.StorageGroup> targets
-      = new HashSet<BalancerDatanode.StorageGroup>();
+  private final Collection<StorageGroup> targets = new HashSet<StorageGroup>();
 
   private final GlobalBlockMap globalBlocks = new GlobalBlockMap();
-  private final MovedBlocks<BalancerDatanode.StorageGroup> movedBlocks;
+  private final MovedBlocks<StorageGroup> movedBlocks;
 
   /** Map (datanodeUuid,storageType -> StorageGroup) */
   private final StorageGroupMap storageGroupMap = new StorageGroupMap();
@@ -135,8 +133,7 @@ public class Dispatcher {
     }
     
     /** Remove all blocks except for the moved blocks. */
-    private void removeAllButRetain(
-        MovedBlocks<BalancerDatanode.StorageGroup> movedBlocks) {
+    private void removeAllButRetain(MovedBlocks<StorageGroup> movedBlocks) {
       for (Iterator<Block> i = map.keySet().iterator(); i.hasNext();) {
         if (!movedBlocks.contains(i.next())) {
           i.remove();
@@ -150,17 +147,15 @@ public class Dispatcher {
       return datanodeUuid + ":" + storageType;
     }
 
-    private final Map<String, BalancerDatanode.StorageGroup> map
-        = new HashMap<String, BalancerDatanode.StorageGroup>();
+    private final Map<String, StorageGroup> map = new HashMap<String, StorageGroup>();
 
-    BalancerDatanode.StorageGroup get(String datanodeUuid,
-        StorageType storageType) {
+    StorageGroup get(String datanodeUuid, StorageType storageType) {
       return map.get(toKey(datanodeUuid, storageType));
     }
 
-    void put(BalancerDatanode.StorageGroup g) {
-      final String key = toKey(g.getDatanode().getDatanodeUuid(), g.storageType);
-      final BalancerDatanode.StorageGroup existing = map.put(key, g);
+    void put(StorageGroup g) {
+      final String key = toKey(g.getDatanodeInfo().getDatanodeUuid(), g.storageType);
+      final StorageGroup existing = map.put(key, g);
       Preconditions.checkState(existing == null);
     }
 
@@ -177,8 +172,8 @@ public class Dispatcher {
   private class PendingMove {
     private DBlock block;
     private Source source;
-    private BalancerDatanode proxySource;
-    private BalancerDatanode.StorageGroup target;
+    private DDatanode proxySource;
+    private StorageGroup target;
 
     private PendingMove() {
     }
@@ -235,24 +230,24 @@ public class Dispatcher {
      * @return true if a proxy is found; otherwise false
      */
     private boolean chooseProxySource() {
-      final DatanodeInfo targetDN = target.getDatanode();
+      final DatanodeInfo targetDN = target.getDatanodeInfo();
       // if node group is supported, first try add nodes in the same node group
       if (cluster.isNodeGroupAware()) {
-        for (BalancerDatanode.StorageGroup loc : block.getLocations()) {
-          if (cluster.isOnSameNodeGroup(loc.getDatanode(), targetDN)
+        for (StorageGroup loc : block.getLocations()) {
+          if (cluster.isOnSameNodeGroup(loc.getDatanodeInfo(), targetDN)
               && addTo(loc)) {
             return true;
           }
         }
       }
       // check if there is replica which is on the same rack with the target
-      for (BalancerDatanode.StorageGroup loc : block.getLocations()) {
-        if (cluster.isOnSameRack(loc.getDatanode(), targetDN) && addTo(loc)) {
+      for (StorageGroup loc : block.getLocations()) {
+        if (cluster.isOnSameRack(loc.getDatanodeInfo(), targetDN) && addTo(loc)) {
           return true;
         }
       }
       // find out a non-busy replica
-      for (BalancerDatanode.StorageGroup loc : block.getLocations()) {
+      for (StorageGroup loc : block.getLocations()) {
         if (addTo(loc)) {
           return true;
         }
@@ -261,10 +256,10 @@ public class Dispatcher {
     }
 
     /** add to a proxy source for specific block movement */
-    private boolean addTo(BalancerDatanode.StorageGroup g) {
-      final BalancerDatanode bdn = g.getBalancerDatanode();
-      if (bdn.addPendingBlock(this)) {
-        proxySource = bdn;
+    private boolean addTo(StorageGroup g) {
+      final DDatanode dn = g.getDDatanode();
+      if (dn.addPendingBlock(this)) {
+        proxySource = dn;
         return true;
       }
       return false;
@@ -281,14 +276,13 @@ public class Dispatcher {
       DataInputStream in = null;
       try {
         sock.connect(
-            NetUtils.createSocketAddr(target.getDatanode().getXferAddr()),
+            NetUtils.createSocketAddr(target.getDatanodeInfo().getXferAddr()),
             HdfsServerConstants.READ_TIMEOUT);
         /*
          * Unfortunately we don't have a good way to know if the Datanode is
          * taking a really long time to move a block, OR something has gone
          * wrong and it's never going to finish. To deal with this scenario, we
-         * set a long timeout (20 minutes) to avoid hanging the balancer
-         * indefinitely.
+         * set a long timeout (20 minutes) to avoid hanging indefinitely.
          */
         sock.setSoTimeout(BLOCK_MOVE_READ_TIMEOUT);
 
@@ -298,9 +292,10 @@ public class Dispatcher {
         InputStream unbufIn = sock.getInputStream();
         ExtendedBlock eb = new ExtendedBlock(nnc.getBlockpoolID(),
             block.getBlock());
-        Token<BlockTokenIdentifier> accessToken = keyManager.getAccessToken(eb);
+        final KeyManager km = nnc.getKeyManager(); 
+        Token<BlockTokenIdentifier> accessToken = km.getAccessToken(eb);
         IOStreamPair saslStreams = saslClient.socketSend(sock, unbufOut,
-            unbufIn, keyManager, accessToken, target.getDatanode());
+            unbufIn, km, accessToken, target.getDatanodeInfo());
         unbufOut = saslStreams.out;
         unbufIn = saslStreams.in;
         out = new DataOutputStream(new BufferedOutputStream(unbufOut,
@@ -314,21 +309,19 @@ public class Dispatcher {
         LOG.info("Successfully moved " + this);
       } catch (IOException e) {
         LOG.warn("Failed to move " + this + ": " + e.getMessage());
-        /*
-         * proxy or target may have an issue, insert a small delay before using
-         * these nodes further. This avoids a potential storm of
-         * "threads quota exceeded" Warnings when the balancer gets out of sync
-         * with work going on in datanode.
-         */
+        // Proxy or target may have some issues, delay before using these nodes
+        // further in order to avoid a potential storm of "threads quota
+        // exceeded" warnings when the dispatcher gets out of sync with work
+        // going on in datanodes.
         proxySource.activateDelay(DELAY_AFTER_ERROR);
-        target.getBalancerDatanode().activateDelay(DELAY_AFTER_ERROR);
+        target.getDDatanode().activateDelay(DELAY_AFTER_ERROR);
       } finally {
         IOUtils.closeStream(out);
         IOUtils.closeStream(in);
         IOUtils.closeSocket(sock);
 
         proxySource.removePendingBlock(this);
-        target.getBalancerDatanode().removePendingBlock(this);
+        target.getDDatanode().removePendingBlock(this);
 
         synchronized (this) {
           reset();
@@ -342,8 +335,8 @@ public class Dispatcher {
     /** Send a block replace request to the output stream */
     private void sendRequest(DataOutputStream out, ExtendedBlock eb,
         Token<BlockTokenIdentifier> accessToken) throws IOException {
-      new Sender(out).replaceBlock(eb, target.storageType, accessToken, source
-          .getDatanode().getDatanodeUuid(), proxySource.datanode);
+      new Sender(out).replaceBlock(eb, target.storageType, accessToken,
+          source.getDatanodeInfo().getDatanodeUuid(), proxySource.datanode);
     }
 
     /** Receive a block copy response from the input stream */
@@ -368,8 +361,7 @@ public class Dispatcher {
   }
 
   /** A class for keeping track of block locations in the dispatcher. */
-  private static class DBlock extends
-      MovedBlocks.Locations<BalancerDatanode.StorageGroup> {
+  private static class DBlock extends MovedBlocks.Locations<StorageGroup> {
     DBlock(Block block) {
       super(block);
     }
@@ -377,10 +369,10 @@ public class Dispatcher {
 
   /** The class represents a desired move. */
   static class Task {
-    private final BalancerDatanode.StorageGroup target;
+    private final StorageGroup target;
     private long size; // bytes scheduled to move
 
-    Task(BalancerDatanode.StorageGroup target, long size) {
+    Task(StorageGroup target, long size) {
       this.target = target;
       this.size = size;
     }
@@ -391,28 +383,25 @@ public class Dispatcher {
   }
 
   /** A class that keeps track of a datanode. */
-  static class BalancerDatanode {
+  static class DDatanode {
 
     /** A group of storages in a datanode with the same storage type. */
     class StorageGroup {
       final StorageType storageType;
-      final double utilization;
       final long maxSize2Move;
       private long scheduledSize = 0L;
 
-      private StorageGroup(StorageType storageType, double utilization,
-          long maxSize2Move) {
+      private StorageGroup(StorageType storageType, long maxSize2Move) {
         this.storageType = storageType;
-        this.utilization = utilization;
         this.maxSize2Move = maxSize2Move;
       }
 
-      BalancerDatanode getBalancerDatanode() {
-        return BalancerDatanode.this;
+      private DDatanode getDDatanode() {
+        return DDatanode.this;
       }
 
-      DatanodeInfo getDatanode() {
-        return BalancerDatanode.this.datanode;
+      DatanodeInfo getDatanodeInfo() {
+        return DDatanode.this.datanode;
       }
 
       /** Decide if still need to move more bytes */
@@ -447,7 +436,7 @@ public class Dispatcher {
 
       @Override
       public String toString() {
-        return "" + utilization;
+        return getDisplayName();
       }
     }
 
@@ -461,10 +450,10 @@ public class Dispatcher {
 
     @Override
     public String toString() {
-      return getClass().getSimpleName() + ":" + datanode + ":" + storageMap;
+      return getClass().getSimpleName() + ":" + datanode + ":" + storageMap.values();
     }
 
-    private BalancerDatanode(DatanodeStorageReport r, int maxConcurrentMoves) {
+    private DDatanode(DatanodeStorageReport r, int maxConcurrentMoves) {
       this.datanode = r.getDatanodeInfo();
       this.maxConcurrentMoves = maxConcurrentMoves;
       this.pendings = new ArrayList<PendingMove>(maxConcurrentMoves);
@@ -475,18 +464,14 @@ public class Dispatcher {
       Preconditions.checkState(existing == null);
     }
 
-    StorageGroup addStorageGroup(StorageType storageType, double utilization,
-        long maxSize2Move) {
-      final StorageGroup g = new StorageGroup(storageType, utilization,
-          maxSize2Move);
+    StorageGroup addStorageGroup(StorageType storageType, long maxSize2Move) {
+      final StorageGroup g = new StorageGroup(storageType, maxSize2Move);
       put(storageType, g);
       return g;
     }
 
-    Source addSource(StorageType storageType, double utilization,
-        long maxSize2Move, Dispatcher balancer) {
-      final Source s = balancer.new Source(storageType, utilization,
-          maxSize2Move, this);
+    Source addSource(StorageType storageType, long maxSize2Move, Dispatcher d) {
+      final Source s = d.new Source(storageType, maxSize2Move, this);
       put(storageType, s);
       return s;
     }
@@ -528,7 +513,7 @@ public class Dispatcher {
   }
 
   /** A node that can be the sources of a block move */
-  class Source extends BalancerDatanode.StorageGroup {
+  class Source extends DDatanode.StorageGroup {
 
     private final List<Task> tasks = new ArrayList<Task>(2);
     private long blocksToReceive = 0L;
@@ -539,9 +524,8 @@ public class Dispatcher {
      */
     private final List<DBlock> srcBlocks = new ArrayList<DBlock>();
 
-    private Source(StorageType storageType, double utilization,
-        long maxSize2Move, BalancerDatanode dn) {
-      dn.super(storageType, utilization, maxSize2Move);
+    private Source(StorageType storageType, long maxSize2Move, DDatanode dn) {
+      dn.super(storageType, maxSize2Move);
     }
 
     /** Add a task */
@@ -565,7 +549,7 @@ public class Dispatcher {
      */
     private long getBlockList() throws IOException {
       final long size = Math.min(MAX_BLOCKS_SIZE_TO_FETCH, blocksToReceive);
-      final BlocksWithLocations newBlocks = nnc.getBlocks(getDatanode(), size);
+      final BlocksWithLocations newBlocks = nnc.getBlocks(getDatanodeInfo(), size);
 
       long bytesReceived = 0;
       for (BlockWithLocations blk : newBlocks.getBlocks()) {
@@ -579,7 +563,7 @@ public class Dispatcher {
             final String[] datanodeUuids = blk.getDatanodeUuids();
             final StorageType[] storageTypes = blk.getStorageTypes();
             for (int i = 0; i < datanodeUuids.length; i++) {
-              final BalancerDatanode.StorageGroup g = storageGroupMap.get(
+              final StorageGroup g = storageGroupMap.get(
                   datanodeUuids[i], storageTypes[i]);
               if (g != null) { // not unknown
                 block.addLocation(g);
@@ -617,7 +601,7 @@ public class Dispatcher {
     private PendingMove chooseNextMove() {
       for (Iterator<Task> i = tasks.iterator(); i.hasNext();) {
         final Task task = i.next();
-        final BalancerDatanode target = task.target.getBalancerDatanode();
+        final DDatanode target = task.target.getDDatanode();
         PendingMove pendingBlock = new PendingMove();
         if (target.addPendingBlock(pendingBlock)) {
           // target is not busy, so do a tentative block allocation
@@ -670,7 +654,7 @@ public class Dispatcher {
       final long startTime = Time.monotonicNow();
       this.blocksToReceive = 2 * getScheduledSize();
       boolean isTimeUp = false;
-      int noPendingBlockIteration = 0;
+      int noPendingMoveIteration = 0;
       while (!isTimeUp && getScheduledSize() > 0
           && (!srcBlocks.isEmpty() || blocksToReceive > 0)) {
         final PendingMove p = chooseNextMove();
@@ -699,11 +683,11 @@ public class Dispatcher {
             return;
           }
         } else {
-          // source node cannot find a pendingBlockToMove, iteration +1
-          noPendingBlockIteration++;
+          // source node cannot find a pending block to move, iteration +1
+          noPendingMoveIteration++;
           // in case no blocks can be moved for source node's task,
           // jump out of while-loop after 5 iterations.
-          if (noPendingBlockIteration >= MAX_NO_PENDING_MOVE_ITERATIONS) {
+          if (noPendingMoveIteration >= MAX_NO_PENDING_MOVE_ITERATIONS) {
             resetScheduledSize();
           }
         }
@@ -726,29 +710,19 @@ public class Dispatcher {
     }
   }
 
-  Dispatcher(NameNodeConnector theblockpool, Set<String> includedNodes,
-      Set<String> excludedNodes, Configuration conf) {
-    this.nnc = theblockpool;
-    this.keyManager = nnc.getKeyManager();
+  public Dispatcher(NameNodeConnector nnc, Set<String> includedNodes,
+      Set<String> excludedNodes, long movedWinWidth, int moverThreads,
+      int dispatcherThreads, int maxConcurrentMovesPerNode, Configuration conf) {
+    this.nnc = nnc;
     this.excludedNodes = excludedNodes;
     this.includedNodes = includedNodes;
-
-    final long movedWinWidth = conf.getLong(
-        DFSConfigKeys.DFS_BALANCER_MOVEDWINWIDTH_KEY,
-        DFSConfigKeys.DFS_BALANCER_MOVEDWINWIDTH_DEFAULT);
-    movedBlocks = new MovedBlocks<BalancerDatanode.StorageGroup>(movedWinWidth);
+    this.movedBlocks = new MovedBlocks<StorageGroup>(movedWinWidth);
 
     this.cluster = NetworkTopology.getInstance(conf);
 
-    this.moveExecutor = Executors.newFixedThreadPool(conf.getInt(
-        DFSConfigKeys.DFS_BALANCER_MOVERTHREADS_KEY,
-        DFSConfigKeys.DFS_BALANCER_MOVERTHREADS_DEFAULT));
-    this.dispatchExecutor = Executors.newFixedThreadPool(conf.getInt(
-        DFSConfigKeys.DFS_BALANCER_DISPATCHERTHREADS_KEY,
-        DFSConfigKeys.DFS_BALANCER_DISPATCHERTHREADS_DEFAULT));
-    this.maxConcurrentMovesPerNode = conf.getInt(
-        DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY,
-        DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT);
+    this.moveExecutor = Executors.newFixedThreadPool(moverThreads);
+    this.dispatchExecutor = Executors.newFixedThreadPool(dispatcherThreads);
+    this.maxConcurrentMovesPerNode = maxConcurrentMovesPerNode;
 
     final boolean fallbackToSimpleAuthAllowed = conf.getBoolean(
         CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY,
@@ -784,7 +758,7 @@ public class Dispatcher {
     return b;
   }
 
-  void add(Source source, BalancerDatanode.StorageGroup target) {
+  void add(Source source, StorageGroup target) {
     sources.add(source);
     targets.add(target);
   }
@@ -826,8 +800,8 @@ public class Dispatcher {
     return trimmed;
   }
 
-  public BalancerDatanode newDatanode(DatanodeStorageReport r) {
-    return new BalancerDatanode(r, maxConcurrentMovesPerNode);
+  public DDatanode newDatanode(DatanodeStorageReport r) {
+    return new DDatanode(r, maxConcurrentMovesPerNode);
   }
 
   public boolean dispatchAndCheckContinue() throws InterruptedException {
@@ -884,8 +858,8 @@ public class Dispatcher {
   private void waitForMoveCompletion() {
     for(;;) {
       boolean empty = true;
-      for (BalancerDatanode.StorageGroup t : targets) {
-        if (!t.getBalancerDatanode().isPendingQEmpty()) {
+      for (StorageGroup t : targets) {
+        if (!t.getDDatanode().isPendingQEmpty()) {
           empty = false;
           break;
         }
@@ -907,8 +881,8 @@ public class Dispatcher {
    * 2. the block does not have a replica on the target;
    * 3. doing the move does not reduce the number of racks that the block has
    */
-  private boolean isGoodBlockCandidate(Source source,
-      BalancerDatanode.StorageGroup target, DBlock block) {
+  private boolean isGoodBlockCandidate(Source source, StorageGroup target,
+      DBlock block) {
     if (source.storageType != target.storageType) {
       return false;
     }
@@ -933,17 +907,17 @@ public class Dispatcher {
    * Determine whether moving the given block replica from source to target
    * would reduce the number of racks of the block replicas.
    */
-  private boolean reduceNumOfRacks(Source source,
-      BalancerDatanode.StorageGroup target, DBlock block) {
-    final DatanodeInfo sourceDn = source.getDatanode();
-    if (cluster.isOnSameRack(sourceDn, target.getDatanode())) {
+  private boolean reduceNumOfRacks(Source source, StorageGroup target,
+      DBlock block) {
+    final DatanodeInfo sourceDn = source.getDatanodeInfo();
+    if (cluster.isOnSameRack(sourceDn, target.getDatanodeInfo())) {
       // source and target are on the same rack
       return false;
     }
     boolean notOnSameRack = true;
     synchronized (block) {
-      for (BalancerDatanode.StorageGroup loc : block.getLocations()) {
-        if (cluster.isOnSameRack(loc.getDatanode(), target.getDatanode())) {
+      for (StorageGroup loc : block.getLocations()) {
+        if (cluster.isOnSameRack(loc.getDatanodeInfo(), target.getDatanodeInfo())) {
           notOnSameRack = false;
           break;
         }
@@ -953,8 +927,8 @@ public class Dispatcher {
       // target is not on the same rack as any replica
       return false;
     }
-    for (BalancerDatanode.StorageGroup g : block.getLocations()) {
-      if (g != source && cluster.isOnSameRack(g.getDatanode(), sourceDn)) {
+    for (StorageGroup g : block.getLocations()) {
+      if (g != source && cluster.isOnSameRack(g.getDatanodeInfo(), sourceDn)) {
         // source is on the same rack of another replica
         return false;
       }
@@ -971,10 +945,10 @@ public class Dispatcher {
    *         group with target
    */
   private boolean isOnSameNodeGroupWithReplicas(
-      BalancerDatanode.StorageGroup target, DBlock block, Source source) {
-    final DatanodeInfo targetDn = target.getDatanode();
-    for (BalancerDatanode.StorageGroup g : block.getLocations()) {
-      if (g != source && cluster.isOnSameNodeGroup(g.getDatanode(), targetDn)) {
+      StorageGroup target, DBlock block, Source source) {
+    final DatanodeInfo targetDn = target.getDatanodeInfo();
+    for (StorageGroup g : block.getLocations()) {
+      if (g != source && cluster.isOnSameNodeGroup(g.getDatanodeInfo(), targetDn)) {
         return true;
       }
     }

+ 21 - 13
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppNewSavedEvent.java → hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/ExitStatus.java

@@ -15,22 +15,30 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+package org.apache.hadoop.hdfs.server.balancer;
 
-package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
-
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-
-public class RMAppNewSavedEvent extends RMAppEvent {
+/**
+ * Exit status - The values associated with each exit status is directly mapped
+ * to the process's exit code in command line.
+ */
+public enum ExitStatus {
+  SUCCESS(0),
+  IN_PROGRESS(1),
+  ALREADY_RUNNING(-1),
+  NO_MOVE_BLOCK(-2),
+  NO_MOVE_PROGRESS(-3),
+  IO_EXCEPTION(-4),
+  ILLEGAL_ARGUMENTS(-5),
+  INTERRUPTED(-6);
 
-  private final Exception storedException;
+  private final int code;
 
-  public RMAppNewSavedEvent(ApplicationId appId, Exception storedException) {
-    super(appId, RMAppEventType.APP_NEW_SAVED);
-    this.storedException = storedException;
+  private ExitStatus(int code) {
+    this.code = code;
   }
-
-  public Exception getStoredException() {
-    return storedException;
+  
+  /** @return the command line exit code. */
+  public int getExitCode() {
+    return code;
   }
-
 }

+ 4 - 8
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java

@@ -768,8 +768,6 @@ public class FSDirectory implements Closeable {
     checkSnapshot(srcInode, null);
   }
 
-
-
   private class RenameOperation {
     private final INodesInPath srcIIP;
     private final INodesInPath dstIIP;
@@ -802,7 +800,7 @@ public class FSDirectory implements Closeable {
       // snapshot is taken on the dst tree, changes will be recorded in the latest
       // snapshot of the src tree.
       if (isSrcInSnapshot) {
-        srcChild = srcChild.recordModification(srcIIP.getLatestSnapshotId());
+        srcChild.recordModification(srcIIP.getLatestSnapshotId());
       }
 
       // check srcChild for reference
@@ -932,8 +930,7 @@ public class FSDirectory implements Closeable {
       updateCount(iip, 0, dsDelta, true);
     }
 
-    file = file.setFileReplication(replication, iip.getLatestSnapshotId(),
-        inodeMap);
+    file.setFileReplication(replication, iip.getLatestSnapshotId());
     
     final short newBR = file.getBlockReplication(); 
     // check newBR < oldBR case. 
@@ -1216,8 +1213,7 @@ public class FSDirectory implements Closeable {
 
     // record modification
     final int latestSnapshot = iip.getLatestSnapshotId();
-    targetNode = targetNode.recordModification(latestSnapshot);
-    iip.setLastINode(targetNode);
+    targetNode.recordModification(latestSnapshot);
 
     // Remove the node from the namespace
     long removed = removeLastINode(iip);
@@ -2126,7 +2122,7 @@ public class FSDirectory implements Closeable {
       }
 
       final int latest = iip.getLatestSnapshotId();
-      dirNode = dirNode.recordModification(latest);
+      dirNode.recordModification(latest);
       dirNode.setQuota(nsQuota, dsQuota);
       return dirNode;
     }

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -2515,7 +2515,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
                                    boolean writeToEditLog,
                                    int latestSnapshot, boolean logRetryCache)
       throws IOException {
-    file = file.recordModification(latestSnapshot);
+    file.recordModification(latestSnapshot);
     final INodeFile cons = file.toUnderConstruction(leaseHolder, clientMachine);
 
     leaseManager.addLease(cons.getFileUnderConstructionFeature()
@@ -4214,7 +4214,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     Preconditions.checkArgument(uc != null);
     leaseManager.removeLease(uc.getClientName(), src);
     
-    pendingFile = pendingFile.recordModification(latestSnapshot);
+    pendingFile.recordModification(latestSnapshot);
 
     // The file is no longer pending.
     // Create permanent INode, update blocks. No need to replace the inode here

+ 28 - 31
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java

@@ -97,9 +97,9 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
   /** Set user */
   final INode setUser(String user, int latestSnapshotId)
       throws QuotaExceededException {
-    final INode nodeToUpdate = recordModification(latestSnapshotId);
-    nodeToUpdate.setUser(user);
-    return nodeToUpdate;
+    recordModification(latestSnapshotId);
+    setUser(user);
+    return this;
   }
   /**
    * @param snapshotId
@@ -122,9 +122,9 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
   /** Set group */
   final INode setGroup(String group, int latestSnapshotId)
       throws QuotaExceededException {
-    final INode nodeToUpdate = recordModification(latestSnapshotId);
-    nodeToUpdate.setGroup(group);
-    return nodeToUpdate;
+    recordModification(latestSnapshotId);
+    setGroup(group);
+    return this;
   }
 
   /**
@@ -148,9 +148,9 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
   /** Set the {@link FsPermission} of this {@link INode} */
   INode setPermission(FsPermission permission, int latestSnapshotId) 
       throws QuotaExceededException {
-    final INode nodeToUpdate = recordModification(latestSnapshotId);
-    nodeToUpdate.setPermission(permission);
-    return nodeToUpdate;
+    recordModification(latestSnapshotId);
+    setPermission(permission);
+    return this;
   }
 
   abstract AclFeature getAclFeature(int snapshotId);
@@ -164,18 +164,18 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
 
   final INode addAclFeature(AclFeature aclFeature, int latestSnapshotId)
       throws QuotaExceededException {
-    final INode nodeToUpdate = recordModification(latestSnapshotId);
-    nodeToUpdate.addAclFeature(aclFeature);
-    return nodeToUpdate;
+    recordModification(latestSnapshotId);
+    addAclFeature(aclFeature);
+    return this;
   }
 
   abstract void removeAclFeature();
 
   final INode removeAclFeature(int latestSnapshotId)
       throws QuotaExceededException {
-    final INode nodeToUpdate = recordModification(latestSnapshotId);
-    nodeToUpdate.removeAclFeature();
-    return nodeToUpdate;
+    recordModification(latestSnapshotId);
+    removeAclFeature();
+    return this;
   }
 
   /**
@@ -199,9 +199,9 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
   
   final INode addXAttrFeature(XAttrFeature xAttrFeature, int latestSnapshotId) 
       throws QuotaExceededException {
-    final INode nodeToUpdate = recordModification(latestSnapshotId);
-    nodeToUpdate.addXAttrFeature(xAttrFeature);
-    return nodeToUpdate;
+    recordModification(latestSnapshotId);
+    addXAttrFeature(xAttrFeature);
+    return this;
   }
   
   /**
@@ -211,9 +211,9 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
   
   final INode removeXAttrFeature(int lastestSnapshotId)
       throws QuotaExceededException {
-    final INode nodeToUpdate = recordModification(lastestSnapshotId);
-    nodeToUpdate.removeXAttrFeature();
-    return nodeToUpdate;
+    recordModification(lastestSnapshotId);
+    removeXAttrFeature();
+    return this;
   }
   
   /**
@@ -298,11 +298,8 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
    * @param latestSnapshotId The id of the latest snapshot that has been taken.
    *                         Note that it is {@link Snapshot#CURRENT_STATE_ID} 
    *                         if no snapshots have been taken.
-   * @return The current inode, which usually is the same object of this inode.
-   *         However, in some cases, this inode may be replaced with a new inode
-   *         for maintaining snapshots. The current inode is then the new inode.
    */
-  abstract INode recordModification(final int latestSnapshotId)
+  abstract void recordModification(final int latestSnapshotId)
       throws QuotaExceededException;
 
   /** Check whether it's a reference. */
@@ -652,9 +649,9 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
   /** Set the last modification time of inode. */
   public final INode setModificationTime(long modificationTime,
       int latestSnapshotId) throws QuotaExceededException {
-    final INode nodeToUpdate = recordModification(latestSnapshotId);
-    nodeToUpdate.setModificationTime(modificationTime);
-    return nodeToUpdate;
+    recordModification(latestSnapshotId);
+    setModificationTime(modificationTime);
+    return this;
   }
 
   /**
@@ -682,9 +679,9 @@ public abstract class INode implements INodeAttributes, Diff.Element<byte[]> {
    */
   public final INode setAccessTime(long accessTime, int latestSnapshotId)
       throws QuotaExceededException {
-    final INode nodeToUpdate = recordModification(latestSnapshotId);
-    nodeToUpdate.setAccessTime(accessTime);
-    return nodeToUpdate;
+    recordModification(latestSnapshotId);
+    setAccessTime(accessTime);
+    return this;
   }
 
 

+ 1 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java

@@ -318,7 +318,7 @@ public class INodeDirectory extends INodeWithAdditionalFields
   }
 
   @Override
-  public INodeDirectory recordModification(int latestSnapshotId) 
+  public void recordModification(int latestSnapshotId)
       throws QuotaExceededException {
     if (isInLatestSnapshot(latestSnapshotId)
         && !shouldRecordInSrcSnapshot(latestSnapshotId)) {
@@ -330,7 +330,6 @@ public class INodeDirectory extends INodeWithAdditionalFields
       // record self in the diff list if necessary
       sf.getDiffs().saveSelf2Snapshot(latestSnapshotId, this, null);
     }
-    return this;
   }
 
   /**

+ 5 - 7
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java

@@ -296,7 +296,7 @@ public class INodeFile extends INodeWithAdditionalFields
   }
 
   @Override
-  public INodeFile recordModification(final int latestSnapshotId) 
+  public void recordModification(final int latestSnapshotId)
       throws QuotaExceededException {
     if (isInLatestSnapshot(latestSnapshotId)
         && !shouldRecordInSrcSnapshot(latestSnapshotId)) {
@@ -308,7 +308,6 @@ public class INodeFile extends INodeWithAdditionalFields
       // record self in the diff list if necessary
       sf.getDiffs().saveSelf2Snapshot(latestSnapshotId, this, null);
     }
-    return this;
   }
   
   public FileDiffList getDiffs() {
@@ -356,11 +355,10 @@ public class INodeFile extends INodeWithAdditionalFields
 
   /** Set the replication factor of this file. */
   public final INodeFile setFileReplication(short replication,
-      int latestSnapshotId, final INodeMap inodeMap)
-      throws QuotaExceededException {
-    final INodeFile nodeToUpdate = recordModification(latestSnapshotId);
-    nodeToUpdate.setFileReplication(replication);
-    return nodeToUpdate;
+      int latestSnapshotId) throws QuotaExceededException {
+    recordModification(latestSnapshotId);
+    setFileReplication(replication);
+    return this;
   }
 
   /** @return preferred block size (in bytes) of the file. */

+ 1 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java

@@ -93,9 +93,8 @@ public class INodeMap {
         "", "", new FsPermission((short) 0)), 0, 0) {
       
       @Override
-      INode recordModification(int latestSnapshotId)
+      void recordModification(int latestSnapshotId)
           throws QuotaExceededException {
-        return null;
       }
       
       @Override

+ 1 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java

@@ -287,11 +287,9 @@ public abstract class INodeReference extends INode {
   }
 
   @Override
-  final INode recordModification(int latestSnapshotId)
+  final void recordModification(int latestSnapshotId)
       throws QuotaExceededException {
     referred.recordModification(latestSnapshotId);
-    // reference is never replaced 
-    return this;
   }
 
   @Override // used by WithCount

+ 1 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java

@@ -47,12 +47,11 @@ public class INodeSymlink extends INodeWithAdditionalFields {
   }
 
   @Override
-  INode recordModification(int latestSnapshotId) throws QuotaExceededException {
+  void recordModification(int latestSnapshotId) throws QuotaExceededException {
     if (isInLatestSnapshot(latestSnapshotId)) {
       INodeDirectory parent = getParent();
       parent.saveChild2Snapshot(this, latestSnapshotId, new INodeSymlink(this));
     }
-    return this;
   }
 
   /** @return true unconditionally. */

+ 3 - 3
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java

@@ -570,10 +570,10 @@ public class TestBalancer {
     final int r = Balancer.run(namenodes, p, conf);
     if (conf.getInt(DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY, 
         DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT) ==0) {
-      assertEquals(Balancer.ReturnStatus.NO_MOVE_PROGRESS.code, r);
+      assertEquals(ExitStatus.NO_MOVE_PROGRESS.getExitCode(), r);
       return;
     } else {
-      assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
+      assertEquals(ExitStatus.SUCCESS.getExitCode(), r);
     }
     waitForHeartBeat(totalUsedSpace, totalCapacity, client, cluster);
     LOG.info("Rebalancing with default ctor.");
@@ -717,7 +717,7 @@ public class TestBalancer {
           Balancer.Parameters.DEFAULT.threshold,
           datanodes, Balancer.Parameters.DEFAULT.nodesToBeIncluded);
       final int r = Balancer.run(namenodes, p, conf);
-      assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
+      assertEquals(ExitStatus.SUCCESS.getExitCode(), r);
     } finally {
       cluster.shutdown();
     }

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java

@@ -98,7 +98,7 @@ public class TestBalancerWithHANameNodes {
       assertEquals(1, namenodes.size());
       assertTrue(namenodes.contains(HATestUtil.getLogicalUri(cluster)));
       final int r = Balancer.run(namenodes, Balancer.Parameters.DEFAULT, conf);
-      assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
+      assertEquals(ExitStatus.SUCCESS.getExitCode(), r);
       TestBalancer.waitForBalancer(totalUsedSpace, totalCapacity, client,
           cluster, Balancer.Parameters.DEFAULT);
     } finally {

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java

@@ -160,7 +160,7 @@ public class TestBalancerWithMultipleNameNodes {
     // start rebalancing
     final Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(s.conf);
     final int r = Balancer.run(namenodes, Balancer.Parameters.DEFAULT, s.conf);
-    Assert.assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
+    Assert.assertEquals(ExitStatus.SUCCESS.getExitCode(), r);
 
     LOG.info("BALANCER 2");
     wait(s.clients, totalUsed, totalCapacity);

+ 3 - 3
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java

@@ -176,7 +176,7 @@ public class TestBalancerWithNodeGroup {
     // start rebalancing
     Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf);
     final int r = Balancer.run(namenodes, Balancer.Parameters.DEFAULT, conf);
-    assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
+    assertEquals(ExitStatus.SUCCESS.getExitCode(), r);
 
     waitForHeartBeat(totalUsedSpace, totalCapacity);
     LOG.info("Rebalancing with default factor.");
@@ -190,8 +190,8 @@ public class TestBalancerWithNodeGroup {
     // start rebalancing
     Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf);
     final int r = Balancer.run(namenodes, Balancer.Parameters.DEFAULT, conf);
-    Assert.assertTrue(r == Balancer.ReturnStatus.SUCCESS.code ||
-        (r == Balancer.ReturnStatus.NO_MOVE_PROGRESS.code));
+    Assert.assertTrue(r == ExitStatus.SUCCESS.getExitCode() ||
+        (r == ExitStatus.NO_MOVE_PROGRESS.getExitCode()));
     waitForHeartBeat(totalUsedSpace, totalCapacity);
     LOG.info("Rebalancing with default factor.");
   }

+ 18 - 0
hadoop-yarn-project/CHANGES.txt

@@ -100,6 +100,19 @@ Release 2.6.0 - UNRELEASED
     YARN-2212. ApplicationMaster needs to find a way to update the AMRMToken
     periodically. (xgong)
 
+    YARN-2026. Fair scheduler: Consider only active queues for computing fairshare. 
+    (Ashwin Shankar via kasha)
+
+    YARN-1954. Added waitFor to AMRMClient(Async). (Tsuyoshi Ozawa via zjshen)
+
+    YARN-2302. Refactor TimelineWebServices. (Zhijie Shen via junping_du)
+
+    YARN-2337. ResourceManager sets ClientRMService in RMContext multiple times.
+    (Zhihai Xu via kasha)
+
+    YARN-2138. Cleaned up notifyDone* APIs in RMStateStore. (Varun Saxena via
+    jianhe)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -154,6 +167,11 @@ Release 2.6.0 - UNRELEASED
     YARN-2008. Fixed CapacityScheduler to calculate headroom based on max available
     capacity instead of configured max capacity. (Craig Welch via jianhe)
 
+    YARN-2400. Fixed TestAMRestart fails intermittently. (Jian He via xgong)
+
+    YARN-2361. RMAppAttempt state machine entries for KILLED state has duplicate
+    event entries. (Zhihai Xu via kasha)
+
 Release 2.5.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 63 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/AMRMClient.java

@@ -22,6 +22,8 @@ import java.io.IOException;
 import java.util.Collection;
 import java.util.List;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceAudience.Public;
@@ -37,12 +39,14 @@ import org.apache.hadoop.yarn.client.api.impl.AMRMClientImpl;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 
 import com.google.common.base.Preconditions;
+import com.google.common.base.Supplier;
 import com.google.common.collect.ImmutableList;
 
 @InterfaceAudience.Public
 @InterfaceStability.Stable
 public abstract class AMRMClient<T extends AMRMClient.ContainerRequest> extends
     AbstractService {
+  private static final Log LOG = LogFactory.getLog(AMRMClient.class);
 
   /**
    * Create a new instance of AMRMClient.
@@ -336,4 +340,63 @@ public abstract class AMRMClient<T extends AMRMClient.ContainerRequest> extends
     return nmTokenCache;
   }
 
+  /**
+   * Wait for <code>check</code> to return true for each 1000 ms.
+   * See also {@link #waitFor(com.google.common.base.Supplier, int)}
+   * and {@link #waitFor(com.google.common.base.Supplier, int, int)}
+   * @param check
+   */
+  public void waitFor(Supplier<Boolean> check) throws InterruptedException {
+    waitFor(check, 1000);
+  }
+
+  /**
+   * Wait for <code>check</code> to return true for each
+   * <code>checkEveryMillis</code> ms.
+   * See also {@link #waitFor(com.google.common.base.Supplier, int, int)}
+   * @param check user defined checker
+   * @param checkEveryMillis interval to call <code>check</code>
+   */
+  public void waitFor(Supplier<Boolean> check, int checkEveryMillis)
+      throws InterruptedException {
+    waitFor(check, checkEveryMillis, 1);
+  }
+
+  /**
+   * Wait for <code>check</code> to return true for each
+   * <code>checkEveryMillis</code> ms. In the main loop, this method will log
+   * the message "waiting in main loop" for each <code>logInterval</code> times
+   * iteration to confirm the thread is alive.
+   * @param check user defined checker
+   * @param checkEveryMillis interval to call <code>check</code>
+   * @param logInterval interval to log for each
+   */
+  public void waitFor(Supplier<Boolean> check, int checkEveryMillis,
+      int logInterval) throws InterruptedException {
+    Preconditions.checkNotNull(check, "check should not be null");
+    Preconditions.checkArgument(checkEveryMillis >= 0,
+        "checkEveryMillis should be positive value");
+    Preconditions.checkArgument(logInterval >= 0,
+        "logInterval should be positive value");
+
+    int loggingCounter = logInterval;
+    do {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Check the condition for main loop.");
+      }
+
+      boolean result = check.get();
+      if (result) {
+        LOG.info("Exits the main loop.");
+        return;
+      }
+      if (--loggingCounter <= 0) {
+        LOG.info("Waiting in main loop.");
+        loggingCounter = logInterval;
+      }
+
+      Thread.sleep(checkEveryMillis);
+    } while (true);
+  }
+
 }

+ 64 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/async/AMRMClientAsync.java

@@ -18,11 +18,15 @@
 
 package org.apache.hadoop.yarn.client.api.async;
 
+import com.google.common.base.Preconditions;
+import com.google.common.base.Supplier;
 import java.io.IOException;
 import java.util.Collection;
 import java.util.List;
 import java.util.concurrent.atomic.AtomicInteger;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceAudience.Public;
 import org.apache.hadoop.classification.InterfaceStability.Stable;
@@ -90,6 +94,7 @@ import com.google.common.annotations.VisibleForTesting;
 @Stable
 public abstract class AMRMClientAsync<T extends ContainerRequest> 
 extends AbstractService {
+  private static final Log LOG = LogFactory.getLog(AMRMClientAsync.class);
   
   protected final AMRMClient<T> client;
   protected final CallbackHandler handler;
@@ -189,6 +194,65 @@ extends AbstractService {
    */
   public abstract int getClusterNodeCount();
 
+  /**
+   * Wait for <code>check</code> to return true for each 1000 ms.
+   * See also {@link #waitFor(com.google.common.base.Supplier, int)}
+   * and {@link #waitFor(com.google.common.base.Supplier, int, int)}
+   * @param check
+   */
+  public void waitFor(Supplier<Boolean> check) throws InterruptedException {
+    waitFor(check, 1000);
+  }
+
+  /**
+   * Wait for <code>check</code> to return true for each
+   * <code>checkEveryMillis</code> ms.
+   * See also {@link #waitFor(com.google.common.base.Supplier, int, int)}
+   * @param check user defined checker
+   * @param checkEveryMillis interval to call <code>check</code>
+   */
+  public void waitFor(Supplier<Boolean> check, int checkEveryMillis)
+      throws InterruptedException {
+    waitFor(check, checkEveryMillis, 1);
+  };
+
+  /**
+   * Wait for <code>check</code> to return true for each
+   * <code>checkEveryMillis</code> ms. In the main loop, this method will log
+   * the message "waiting in main loop" for each <code>logInterval</code> times
+   * iteration to confirm the thread is alive.
+   * @param check user defined checker
+   * @param checkEveryMillis interval to call <code>check</code>
+   * @param logInterval interval to log for each
+   */
+  public void waitFor(Supplier<Boolean> check, int checkEveryMillis,
+      int logInterval) throws InterruptedException {
+    Preconditions.checkNotNull(check, "check should not be null");
+    Preconditions.checkArgument(checkEveryMillis >= 0,
+        "checkEveryMillis should be positive value");
+    Preconditions.checkArgument(logInterval >= 0,
+        "logInterval should be positive value");
+
+    int loggingCounter = logInterval;
+    do {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Check the condition for main loop.");
+      }
+
+      boolean result = check.get();
+      if (result) {
+        LOG.info("Exits the main loop.");
+        return;
+      }
+      if (--loggingCounter <= 0) {
+        LOG.info("Waiting in main loop.");
+        loggingCounter = logInterval;
+      }
+
+      Thread.sleep(checkEveryMillis);
+    } while (true);
+  }
+
   public interface CallbackHandler {
     
     /**

+ 73 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/async/impl/TestAMRMClientAsync.java

@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.yarn.client.api.async.impl;
 
+import com.google.common.base.Supplier;
 import static org.mockito.Matchers.anyFloat;
 import static org.mockito.Matchers.anyInt;
 import static org.mockito.Matchers.anyString;
@@ -180,7 +181,7 @@ public class TestAMRMClientAsync {
     AMRMClient<ContainerRequest> client = mock(AMRMClientImpl.class);
     when(client.allocate(anyFloat())).thenThrow(ex);
 
-    AMRMClientAsync<ContainerRequest> asyncClient = 
+    AMRMClientAsync<ContainerRequest> asyncClient =
         AMRMClientAsync.createAMRMClientAsync(client, 20, callbackHandler);
     asyncClient.init(conf);
     asyncClient.start();
@@ -228,6 +229,41 @@ public class TestAMRMClientAsync {
     asyncClient.stop();
   }
 
+  @Test (timeout = 10000)
+  public void testAMRMClientAsyncShutDownWithWaitFor() throws Exception {
+    Configuration conf = new Configuration();
+    final TestCallbackHandler callbackHandler = new TestCallbackHandler();
+    @SuppressWarnings("unchecked")
+    AMRMClient<ContainerRequest> client = mock(AMRMClientImpl.class);
+
+    final AllocateResponse shutDownResponse = createAllocateResponse(
+        new ArrayList<ContainerStatus>(), new ArrayList<Container>(), null);
+    shutDownResponse.setAMCommand(AMCommand.AM_SHUTDOWN);
+    when(client.allocate(anyFloat())).thenReturn(shutDownResponse);
+
+    AMRMClientAsync<ContainerRequest> asyncClient =
+        AMRMClientAsync.createAMRMClientAsync(client, 10, callbackHandler);
+    asyncClient.init(conf);
+    asyncClient.start();
+
+    Supplier<Boolean> checker = new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        return callbackHandler.reboot;
+      }
+    };
+
+    asyncClient.registerApplicationMaster("localhost", 1234, null);
+    asyncClient.waitFor(checker);
+
+    asyncClient.stop();
+    // stopping should have joined all threads and completed all callbacks
+    Assert.assertTrue(callbackHandler.callbackCount == 0);
+
+    verify(client, times(1)).allocate(anyFloat());
+    asyncClient.stop();
+  }
+
   @Test (timeout = 5000)
   public void testCallAMRMClientAsyncStopFromCallbackHandler()
       throws YarnException, IOException, InterruptedException {
@@ -262,6 +298,40 @@ public class TestAMRMClientAsync {
     }
   }
 
+  @Test (timeout = 5000)
+  public void testCallAMRMClientAsyncStopFromCallbackHandlerWithWaitFor()
+      throws YarnException, IOException, InterruptedException {
+    Configuration conf = new Configuration();
+    final TestCallbackHandler2 callbackHandler = new TestCallbackHandler2();
+    @SuppressWarnings("unchecked")
+    AMRMClient<ContainerRequest> client = mock(AMRMClientImpl.class);
+
+    List<ContainerStatus> completed = Arrays.asList(
+        ContainerStatus.newInstance(newContainerId(0, 0, 0, 0),
+            ContainerState.COMPLETE, "", 0));
+    final AllocateResponse response = createAllocateResponse(completed,
+        new ArrayList<Container>(), null);
+
+    when(client.allocate(anyFloat())).thenReturn(response);
+
+    AMRMClientAsync<ContainerRequest> asyncClient =
+        AMRMClientAsync.createAMRMClientAsync(client, 20, callbackHandler);
+    callbackHandler.asynClient = asyncClient;
+    asyncClient.init(conf);
+    asyncClient.start();
+
+    Supplier<Boolean> checker = new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        return callbackHandler.notify;
+      }
+    };
+
+    asyncClient.registerApplicationMaster("localhost", 1234, null);
+    asyncClient.waitFor(checker);
+    Assert.assertTrue(checker.get());
+  }
+
   void runCallBackThrowOutException(TestCallbackHandler2 callbackHandler) throws
         InterruptedException, YarnException, IOException {
     Configuration conf = new Configuration();
@@ -342,7 +412,7 @@ public class TestAMRMClientAsync {
     private volatile List<ContainerStatus> completedContainers;
     private volatile List<Container> allocatedContainers;
     Exception savedException = null;
-    boolean reboot = false;
+    volatile boolean reboot = false;
     Object notifier = new Object();
     
     int callbackCount = 0;
@@ -432,7 +502,7 @@ public class TestAMRMClientAsync {
     @SuppressWarnings("rawtypes")
     AMRMClientAsync asynClient;
     boolean stop = true;
-    boolean notify = false;
+    volatile boolean notify = false;
     boolean throwOutException = false;
 
     @Override

+ 35 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java

@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.yarn.client.api.impl;
 
+import com.google.common.base.Supplier;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
@@ -814,6 +815,40 @@ public class TestAMRMClient {
     assertEquals(0, amClient.ask.size());
     assertEquals(0, amClient.release.size());
   }
+
+  class CountDownSupplier implements Supplier<Boolean> {
+    int counter = 0;
+    @Override
+    public Boolean get() {
+      counter++;
+      if (counter >= 3) {
+        return true;
+      } else {
+        return false;
+      }
+    }
+  };
+
+  @Test
+  public void testWaitFor() throws InterruptedException {
+    AMRMClientImpl<ContainerRequest> amClient = null;
+    CountDownSupplier countDownChecker = new CountDownSupplier();
+
+    try {
+      // start am rm client
+      amClient =
+          (AMRMClientImpl<ContainerRequest>) AMRMClient
+              .<ContainerRequest> createAMRMClient();
+      amClient.init(new YarnConfiguration());
+      amClient.start();
+      amClient.waitFor(countDownChecker, 1000);
+      assertEquals(3, countDownChecker.counter);
+    } finally {
+      if (amClient != null) {
+        amClient.stop();
+      }
+    }
+  }
   
   private void sleep(int sleepTime) {
     try {

+ 40 - 37
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java

@@ -39,6 +39,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.hadoop.yarn.server.applicationhistoryservice.webapp.AHSWebApp;
 import org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore;
+import org.apache.hadoop.yarn.server.timeline.TimelineDataManager;
 import org.apache.hadoop.yarn.server.timeline.TimelineStore;
 import org.apache.hadoop.yarn.server.timeline.security.TimelineACLsManager;
 import org.apache.hadoop.yarn.server.timeline.security.TimelineAuthenticationFilterInitializer;
@@ -59,12 +60,12 @@ public class ApplicationHistoryServer extends CompositeService {
   private static final Log LOG = LogFactory
     .getLog(ApplicationHistoryServer.class);
 
-  protected ApplicationHistoryClientService ahsClientService;
-  protected ApplicationHistoryManager historyManager;
-  protected TimelineStore timelineStore;
-  protected TimelineDelegationTokenSecretManagerService secretManagerService;
-  protected TimelineACLsManager timelineACLsManager;
-  protected WebApp webApp;
+  private ApplicationHistoryClientService ahsClientService;
+  private ApplicationHistoryManager historyManager;
+  private TimelineStore timelineStore;
+  private TimelineDelegationTokenSecretManagerService secretManagerService;
+  private TimelineDataManager timelineDataManager;
+  private WebApp webApp;
 
   public ApplicationHistoryServer() {
     super(ApplicationHistoryServer.class.getName());
@@ -72,15 +73,18 @@ public class ApplicationHistoryServer extends CompositeService {
 
   @Override
   protected void serviceInit(Configuration conf) throws Exception {
-    historyManager = createApplicationHistory();
-    ahsClientService = createApplicationHistoryClientService(historyManager);
-    addService(ahsClientService);
-    addService((Service) historyManager);
+    // init timeline services first
     timelineStore = createTimelineStore(conf);
     addIfService(timelineStore);
     secretManagerService = createTimelineDelegationTokenSecretManagerService(conf);
     addService(secretManagerService);
-    timelineACLsManager = createTimelineACLsManager(conf);
+    timelineDataManager = createTimelineDataManager(conf);
+
+    // init generic history service afterwards
+    historyManager = createApplicationHistoryManager(conf);
+    ahsClientService = createApplicationHistoryClientService(historyManager);
+    addService(ahsClientService);
+    addService((Service) historyManager);
 
     DefaultMetricsSystem.initialize("ApplicationHistoryServer");
     JvmMetrics.initSingleton("ApplicationHistoryServer", null);
@@ -111,21 +115,22 @@ public class ApplicationHistoryServer extends CompositeService {
 
   @Private
   @VisibleForTesting
-  public ApplicationHistoryClientService getClientService() {
+  ApplicationHistoryClientService getClientService() {
     return this.ahsClientService;
   }
 
-  protected ApplicationHistoryClientService
-      createApplicationHistoryClientService(
-          ApplicationHistoryManager historyManager) {
-    return new ApplicationHistoryClientService(historyManager);
-  }
-
-  protected ApplicationHistoryManager createApplicationHistory() {
-    return new ApplicationHistoryManagerImpl();
+  /**
+   * @return ApplicationTimelineStore
+   */
+  @Private
+  @VisibleForTesting
+  public TimelineStore getTimelineStore() {
+    return timelineStore;
   }
 
-  protected ApplicationHistoryManager getApplicationHistory() {
+  @Private
+  @VisibleForTesting
+  ApplicationHistoryManager getApplicationHistoryManager() {
     return this.historyManager;
   }
 
@@ -154,28 +159,35 @@ public class ApplicationHistoryServer extends CompositeService {
     launchAppHistoryServer(args);
   }
 
-  protected ApplicationHistoryManager createApplicationHistoryManager(
+  private ApplicationHistoryClientService
+      createApplicationHistoryClientService(
+          ApplicationHistoryManager historyManager) {
+    return new ApplicationHistoryClientService(historyManager);
+  }
+
+  private ApplicationHistoryManager createApplicationHistoryManager(
       Configuration conf) {
     return new ApplicationHistoryManagerImpl();
   }
 
-  protected TimelineStore createTimelineStore(
+  private TimelineStore createTimelineStore(
       Configuration conf) {
     return ReflectionUtils.newInstance(conf.getClass(
         YarnConfiguration.TIMELINE_SERVICE_STORE, LeveldbTimelineStore.class,
         TimelineStore.class), conf);
   }
 
-  protected TimelineDelegationTokenSecretManagerService
+  private TimelineDelegationTokenSecretManagerService
       createTimelineDelegationTokenSecretManagerService(Configuration conf) {
     return new TimelineDelegationTokenSecretManagerService();
   }
 
-  protected TimelineACLsManager createTimelineACLsManager(Configuration conf) {
-    return new TimelineACLsManager(conf);
+  private TimelineDataManager createTimelineDataManager(Configuration conf) {
+    return new TimelineDataManager(
+        timelineStore, new TimelineACLsManager(conf));
   }
 
-  protected void startWebApp() {
+  private void startWebApp() {
     Configuration conf = getConfig();
     // Always load pseudo authentication filter to parse "user.name" in an URL
     // to identify a HTTP request's user in insecure mode.
@@ -199,9 +211,8 @@ public class ApplicationHistoryServer extends CompositeService {
     try {
       AHSWebApp ahsWebApp = AHSWebApp.getInstance();
       ahsWebApp.setApplicationHistoryManager(historyManager);
-      ahsWebApp.setTimelineStore(timelineStore);
       ahsWebApp.setTimelineDelegationTokenSecretManagerService(secretManagerService);
-      ahsWebApp.setTimelineACLsManager(timelineACLsManager);
+      ahsWebApp.setTimelineDataManager(timelineDataManager);
       webApp =
           WebApps
             .$for("applicationhistory", ApplicationHistoryClientService.class,
@@ -213,14 +224,6 @@ public class ApplicationHistoryServer extends CompositeService {
       throw new YarnRuntimeException(msg, e);
     }
   }
-  /**
-   * @return ApplicationTimelineStore
-   */
-  @Private
-  @VisibleForTesting
-  public TimelineStore getTimelineStore() {
-    return timelineStore;
-  }
 
   private void doSecureLogin(Configuration conf) throws IOException {
     InetSocketAddress socAddr = getBindAddress(conf);

+ 7 - 18
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/webapp/AHSWebApp.java

@@ -22,8 +22,7 @@ import static org.apache.hadoop.yarn.util.StringHelper.pajoin;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.yarn.server.api.ApplicationContext;
 import org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryManager;
-import org.apache.hadoop.yarn.server.timeline.TimelineStore;
-import org.apache.hadoop.yarn.server.timeline.security.TimelineACLsManager;
+import org.apache.hadoop.yarn.server.timeline.TimelineDataManager;
 import org.apache.hadoop.yarn.server.timeline.security.TimelineDelegationTokenSecretManagerService;
 import org.apache.hadoop.yarn.server.timeline.webapp.TimelineWebServices;
 import org.apache.hadoop.yarn.webapp.GenericExceptionHandler;
@@ -36,9 +35,8 @@ import com.google.common.annotations.VisibleForTesting;
 public class AHSWebApp extends WebApp implements YarnWebParams {
 
   private ApplicationHistoryManager applicationHistoryManager;
-  private TimelineStore timelineStore;
   private TimelineDelegationTokenSecretManagerService secretManagerService;
-  private TimelineACLsManager timelineACLsManager;
+  private TimelineDataManager timelineDataManager;
 
   private static AHSWebApp instance = null;
 
@@ -68,14 +66,6 @@ public class AHSWebApp extends WebApp implements YarnWebParams {
     this.applicationHistoryManager = applicationHistoryManager;
   }
 
-  public TimelineStore getTimelineStore() {
-    return timelineStore;
-  }
-
-  public void setTimelineStore(TimelineStore timelineStore) {
-    this.timelineStore = timelineStore;
-  }
-
   public TimelineDelegationTokenSecretManagerService
       getTimelineDelegationTokenSecretManagerService() {
     return secretManagerService;
@@ -86,12 +76,12 @@ public class AHSWebApp extends WebApp implements YarnWebParams {
     this.secretManagerService = secretManagerService;
   }
 
-  public TimelineACLsManager getTimelineACLsManager() {
-    return timelineACLsManager;
+  public TimelineDataManager getTimelineDataManager() {
+    return timelineDataManager;
   }
 
-  public void setTimelineACLsManager(TimelineACLsManager timelineACLsManager) {
-    this.timelineACLsManager = timelineACLsManager;
+  public void setTimelineDataManager(TimelineDataManager timelineDataManager) {
+    this.timelineDataManager = timelineDataManager;
   }
 
   @Override
@@ -101,10 +91,9 @@ public class AHSWebApp extends WebApp implements YarnWebParams {
     bind(TimelineWebServices.class);
     bind(GenericExceptionHandler.class);
     bind(ApplicationContext.class).toInstance(applicationHistoryManager);
-    bind(TimelineStore.class).toInstance(timelineStore);
     bind(TimelineDelegationTokenSecretManagerService.class).toInstance(
         secretManagerService);
-    bind(TimelineACLsManager.class).toInstance(timelineACLsManager);
+    bind(TimelineDataManager.class).toInstance(timelineDataManager);
     route("/", AHSController.class);
     route(pajoin("/apps", APP_STATE), AHSController.class);
     route(pajoin("/app", APPLICATION_ID), AHSController.class, "app");

+ 319 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/TimelineDataManager.java

@@ -0,0 +1,319 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.timeline;
+
+import static org.apache.hadoop.yarn.util.StringHelper.CSV_JOINER;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.EnumSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.SortedSet;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.yarn.api.records.timeline.TimelineEntities;
+import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity;
+import org.apache.hadoop.yarn.api.records.timeline.TimelineEvents;
+import org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.timeline.TimelineReader.Field;
+import org.apache.hadoop.yarn.server.timeline.security.TimelineACLsManager;
+import org.apache.hadoop.yarn.util.timeline.TimelineUtils;
+
+/**
+ * The class wrap over the timeline store and the ACLs manager. It does some non
+ * trivial manipulation of the timeline data before putting or after getting it
+ * from the timeline store, and checks the user's access to it.
+ * 
+ */
+public class TimelineDataManager {
+
+  private static final Log LOG = LogFactory.getLog(TimelineDataManager.class);
+
+  private TimelineStore store;
+  private TimelineACLsManager timelineACLsManager;
+
+  public TimelineDataManager(TimelineStore store,
+      TimelineACLsManager timelineACLsManager) {
+    this.store = store;
+    this.timelineACLsManager = timelineACLsManager;
+  }
+
+  /**
+   * Get the timeline entities that the given user have access to. The meaning
+   * of each argument has been documented with
+   * {@link TimelineReader#getEntities}.
+   * 
+   * @see TimelineReader#getEntities
+   */
+  public TimelineEntities getEntities(
+      String entityType,
+      NameValuePair primaryFilter,
+      Collection<NameValuePair> secondaryFilter,
+      Long windowStart,
+      Long windowEnd,
+      String fromId,
+      Long fromTs,
+      Long limit,
+      EnumSet<Field> fields,
+      UserGroupInformation callerUGI) throws YarnException, IOException {
+    TimelineEntities entities = null;
+    boolean modified = extendFields(fields);
+    entities = store.getEntities(
+        entityType,
+        limit,
+        windowStart,
+        windowEnd,
+        fromId,
+        fromTs,
+        primaryFilter,
+        secondaryFilter,
+        fields);
+    if (entities != null) {
+      Iterator<TimelineEntity> entitiesItr =
+          entities.getEntities().iterator();
+      while (entitiesItr.hasNext()) {
+        TimelineEntity entity = entitiesItr.next();
+        try {
+          // check ACLs
+          if (!timelineACLsManager.checkAccess(callerUGI, entity)) {
+            entitiesItr.remove();
+          } else {
+            // clean up system data
+            if (modified) {
+              entity.setPrimaryFilters(null);
+            } else {
+              cleanupOwnerInfo(entity);
+            }
+          }
+        } catch (YarnException e) {
+          LOG.error("Error when verifying access for user " + callerUGI
+              + " on the events of the timeline entity "
+              + new EntityIdentifier(entity.getEntityId(),
+                  entity.getEntityType()), e);
+          entitiesItr.remove();
+        }
+      }
+    }
+    if (entities == null) {
+      return new TimelineEntities();
+    }
+    return entities;
+  }
+
+  /**
+   * Get the single timeline entity that the given user has access to. The
+   * meaning of each argument has been documented with
+   * {@link TimelineReader#getEntity}.
+   * 
+   * @see TimelineReader#getEntity
+   */
+  public TimelineEntity getEntity(
+      String entityType,
+      String entityId,
+      EnumSet<Field> fields,
+      UserGroupInformation callerUGI) throws YarnException, IOException {
+    TimelineEntity entity = null;
+    boolean modified = extendFields(fields);
+    entity =
+        store.getEntity(entityId, entityType, fields);
+    if (entity != null) {
+      // check ACLs
+      if (!timelineACLsManager.checkAccess(callerUGI, entity)) {
+        entity = null;
+      } else {
+        // clean up the system data
+        if (modified) {
+          entity.setPrimaryFilters(null);
+        } else {
+          cleanupOwnerInfo(entity);
+        }
+      }
+    }
+    return entity;
+  }
+
+  /**
+   * Get the events whose entities the given user has access to. The meaning of
+   * each argument has been documented with
+   * {@link TimelineReader#getEntityTimelines}.
+   * 
+   * @see TimelineReader#getEntityTimelines
+   */
+  public TimelineEvents getEvents(
+      String entityType,
+      SortedSet<String> entityIds,
+      SortedSet<String> eventTypes,
+      Long windowStart,
+      Long windowEnd,
+      Long limit,
+      UserGroupInformation callerUGI) throws YarnException, IOException {
+    TimelineEvents events = null;
+    events = store.getEntityTimelines(
+        entityType,
+        entityIds,
+        limit,
+        windowStart,
+        windowEnd,
+        eventTypes);
+    if (events != null) {
+      Iterator<TimelineEvents.EventsOfOneEntity> eventsItr =
+          events.getAllEvents().iterator();
+      while (eventsItr.hasNext()) {
+        TimelineEvents.EventsOfOneEntity eventsOfOneEntity = eventsItr.next();
+        try {
+          TimelineEntity entity = store.getEntity(
+              eventsOfOneEntity.getEntityId(),
+              eventsOfOneEntity.getEntityType(),
+              EnumSet.of(Field.PRIMARY_FILTERS));
+          // check ACLs
+          if (!timelineACLsManager.checkAccess(callerUGI, entity)) {
+            eventsItr.remove();
+          }
+        } catch (Exception e) {
+          LOG.error("Error when verifying access for user " + callerUGI
+              + " on the events of the timeline entity "
+              + new EntityIdentifier(eventsOfOneEntity.getEntityId(),
+                  eventsOfOneEntity.getEntityType()), e);
+          eventsItr.remove();
+        }
+      }
+    }
+    if (events == null) {
+      return new TimelineEvents();
+    }
+    return events;
+  }
+
+  /**
+   * Store the timeline entities into the store and set the owner of them to the
+   * given user.
+   */
+  public TimelinePutResponse postEntities(
+      TimelineEntities entities,
+      UserGroupInformation callerUGI) throws YarnException, IOException {
+    if (entities == null) {
+      return new TimelinePutResponse();
+    }
+    List<EntityIdentifier> entityIDs = new ArrayList<EntityIdentifier>();
+    TimelineEntities entitiesToPut = new TimelineEntities();
+    List<TimelinePutResponse.TimelinePutError> errors =
+        new ArrayList<TimelinePutResponse.TimelinePutError>();
+    for (TimelineEntity entity : entities.getEntities()) {
+      EntityIdentifier entityID =
+          new EntityIdentifier(entity.getEntityId(), entity.getEntityType());
+
+      // check if there is existing entity
+      TimelineEntity existingEntity = null;
+      try {
+        existingEntity =
+            store.getEntity(entityID.getId(), entityID.getType(),
+                EnumSet.of(Field.PRIMARY_FILTERS));
+        if (existingEntity != null
+            && !timelineACLsManager.checkAccess(callerUGI, existingEntity)) {
+          throw new YarnException("The timeline entity " + entityID
+              + " was not put by " + callerUGI + " before");
+        }
+      } catch (Exception e) {
+        // Skip the entity which already exists and was put by others
+        LOG.error("Skip the timeline entity: " + entityID + ", because "
+            + e.getMessage());
+        TimelinePutResponse.TimelinePutError error =
+            new TimelinePutResponse.TimelinePutError();
+        error.setEntityId(entityID.getId());
+        error.setEntityType(entityID.getType());
+        error.setErrorCode(
+            TimelinePutResponse.TimelinePutError.ACCESS_DENIED);
+        errors.add(error);
+        continue;
+      }
+
+      // inject owner information for the access check if this is the first
+      // time to post the entity, in case it's the admin who is updating
+      // the timeline data.
+      try {
+        if (existingEntity == null) {
+          injectOwnerInfo(entity, callerUGI.getShortUserName());
+        }
+      } catch (YarnException e) {
+        // Skip the entity which messes up the primary filter and record the
+        // error
+        LOG.error("Skip the timeline entity: " + entityID + ", because "
+            + e.getMessage());
+        TimelinePutResponse.TimelinePutError error =
+            new TimelinePutResponse.TimelinePutError();
+        error.setEntityId(entityID.getId());
+        error.setEntityType(entityID.getType());
+        error.setErrorCode(
+            TimelinePutResponse.TimelinePutError.SYSTEM_FILTER_CONFLICT);
+        errors.add(error);
+        continue;
+      }
+
+      entityIDs.add(entityID);
+      entitiesToPut.addEntity(entity);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Storing the entity " + entityID + ", JSON-style content: "
+            + TimelineUtils.dumpTimelineRecordtoJSON(entity));
+      }
+    }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Storing entities: " + CSV_JOINER.join(entityIDs));
+    }
+    TimelinePutResponse response = store.put(entitiesToPut);
+    // add the errors of timeline system filter key conflict
+    response.addErrors(errors);
+    return response;
+  }
+
+  private static boolean extendFields(EnumSet<Field> fieldEnums) {
+    boolean modified = false;
+    if (fieldEnums != null && !fieldEnums.contains(Field.PRIMARY_FILTERS)) {
+      fieldEnums.add(Field.PRIMARY_FILTERS);
+      modified = true;
+    }
+    return modified;
+  }
+
+  private static void injectOwnerInfo(TimelineEntity timelineEntity,
+      String owner) throws YarnException {
+    if (timelineEntity.getPrimaryFilters() != null &&
+        timelineEntity.getPrimaryFilters().containsKey(
+            TimelineStore.SystemFilter.ENTITY_OWNER.toString())) {
+      throw new YarnException(
+          "User should not use the timeline system filter key: "
+              + TimelineStore.SystemFilter.ENTITY_OWNER);
+    }
+    timelineEntity.addPrimaryFilter(
+        TimelineStore.SystemFilter.ENTITY_OWNER
+            .toString(), owner);
+  }
+
+  private static void cleanupOwnerInfo(TimelineEntity timelineEntity) {
+    if (timelineEntity.getPrimaryFilters() != null) {
+      timelineEntity.getPrimaryFilters().remove(
+          TimelineStore.SystemFilter.ENTITY_OWNER.toString());
+    }
+  }
+
+}

+ 33 - 222
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/webapp/TimelineWebServices.java

@@ -18,14 +18,10 @@
 
 package org.apache.hadoop.yarn.server.timeline.webapp;
 
-import static org.apache.hadoop.yarn.util.StringHelper.CSV_JOINER;
-
-import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.EnumSet;
 import java.util.HashSet;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 import java.util.SortedSet;
@@ -58,14 +54,11 @@ import org.apache.hadoop.yarn.api.records.timeline.TimelineEntities;
 import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity;
 import org.apache.hadoop.yarn.api.records.timeline.TimelineEvents;
 import org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse;
-import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.server.timeline.EntityIdentifier;
 import org.apache.hadoop.yarn.server.timeline.GenericObjectMapper;
 import org.apache.hadoop.yarn.server.timeline.NameValuePair;
+import org.apache.hadoop.yarn.server.timeline.TimelineDataManager;
 import org.apache.hadoop.yarn.server.timeline.TimelineReader.Field;
-import org.apache.hadoop.yarn.server.timeline.TimelineStore;
-import org.apache.hadoop.yarn.server.timeline.security.TimelineACLsManager;
-import org.apache.hadoop.yarn.util.timeline.TimelineUtils;
 import org.apache.hadoop.yarn.webapp.BadRequestException;
 import org.apache.hadoop.yarn.webapp.ForbiddenException;
 import org.apache.hadoop.yarn.webapp.NotFoundException;
@@ -80,14 +73,11 @@ public class TimelineWebServices {
 
   private static final Log LOG = LogFactory.getLog(TimelineWebServices.class);
 
-  private TimelineStore store;
-  private TimelineACLsManager timelineACLsManager;
+  private TimelineDataManager timelineDataManager;
 
   @Inject
-  public TimelineWebServices(TimelineStore store,
-      TimelineACLsManager timelineACLsManager) {
-    this.store = store;
-    this.timelineACLsManager = timelineACLsManager;
+  public TimelineWebServices(TimelineDataManager timelineDataManager) {
+    this.timelineDataManager = timelineDataManager;
   }
 
   @XmlRootElement(name = "about")
@@ -148,61 +138,28 @@ public class TimelineWebServices {
       @QueryParam("limit") String limit,
       @QueryParam("fields") String fields) {
     init(res);
-    TimelineEntities entities = null;
     try {
-      EnumSet<Field> fieldEnums = parseFieldsStr(fields, ",");
-      boolean modified = extendFields(fieldEnums);
-      UserGroupInformation callerUGI = getUser(req);
-      entities = store.getEntities(
+      return timelineDataManager.getEntities(
           parseStr(entityType),
-          parseLongStr(limit),
+          parsePairStr(primaryFilter, ":"),
+          parsePairsStr(secondaryFilter, ",", ":"),
           parseLongStr(windowStart),
           parseLongStr(windowEnd),
           parseStr(fromId),
           parseLongStr(fromTs),
-          parsePairStr(primaryFilter, ":"),
-          parsePairsStr(secondaryFilter, ",", ":"),
-          fieldEnums);
-      if (entities != null) {
-        Iterator<TimelineEntity> entitiesItr =
-            entities.getEntities().iterator();
-        while (entitiesItr.hasNext()) {
-          TimelineEntity entity = entitiesItr.next();
-          try {
-            // check ACLs
-            if (!timelineACLsManager.checkAccess(callerUGI, entity)) {
-              entitiesItr.remove();
-            } else {
-              // clean up system data
-              if (modified) {
-                entity.setPrimaryFilters(null);
-              } else {
-                cleanupOwnerInfo(entity);
-              }
-            }
-          } catch (YarnException e) {
-            LOG.error("Error when verifying access for user " + callerUGI
-                + " on the events of the timeline entity "
-                + new EntityIdentifier(entity.getEntityId(),
-                    entity.getEntityType()), e);
-            entitiesItr.remove();
-          }
-        }
-      }
+          parseLongStr(limit),
+          parseFieldsStr(fields, ","),
+          getUser(req));
     } catch (NumberFormatException e) {
       throw new BadRequestException(
           "windowStart, windowEnd or limit is not a numeric value.");
     } catch (IllegalArgumentException e) {
       throw new BadRequestException("requested invalid field.");
-    } catch (IOException e) {
+    } catch (Exception e) {
       LOG.error("Error getting entities", e);
       throw new WebApplicationException(e,
           Response.Status.INTERNAL_SERVER_ERROR);
     }
-    if (entities == null) {
-      return new TimelineEntities();
-    }
-    return entities;
   }
 
   /**
@@ -220,33 +177,15 @@ public class TimelineWebServices {
     init(res);
     TimelineEntity entity = null;
     try {
-      EnumSet<Field> fieldEnums = parseFieldsStr(fields, ",");
-      boolean modified = extendFields(fieldEnums);
-      entity =
-          store.getEntity(parseStr(entityId), parseStr(entityType),
-              fieldEnums);
-      if (entity != null) {
-        // check ACLs
-        UserGroupInformation callerUGI = getUser(req);
-        if (!timelineACLsManager.checkAccess(callerUGI, entity)) {
-          entity = null;
-        } else {
-          // clean up the system data
-          if (modified) {
-            entity.setPrimaryFilters(null);
-          } else {
-            cleanupOwnerInfo(entity);
-          }
-        }
-      }
+      entity = timelineDataManager.getEntity(
+          parseStr(entityType),
+          parseStr(entityId),
+          parseFieldsStr(fields, ","),
+          getUser(req));
     } catch (IllegalArgumentException e) {
       throw new BadRequestException(
           "requested invalid field.");
-    } catch (IOException e) {
-      LOG.error("Error getting entity", e);
-      throw new WebApplicationException(e,
-          Response.Status.INTERNAL_SERVER_ERROR);
-    } catch (YarnException e) {
+    } catch (Exception e) {
       LOG.error("Error getting entity", e);
       throw new WebApplicationException(e,
           Response.Status.INTERNAL_SERVER_ERROR);
@@ -275,51 +214,23 @@ public class TimelineWebServices {
       @QueryParam("windowEnd") String windowEnd,
       @QueryParam("limit") String limit) {
     init(res);
-    TimelineEvents events = null;
     try {
-      UserGroupInformation callerUGI = getUser(req);
-      events = store.getEntityTimelines(
+      return timelineDataManager.getEvents(
           parseStr(entityType),
           parseArrayStr(entityId, ","),
-          parseLongStr(limit),
+          parseArrayStr(eventType, ","),
           parseLongStr(windowStart),
           parseLongStr(windowEnd),
-          parseArrayStr(eventType, ","));
-      if (events != null) {
-        Iterator<TimelineEvents.EventsOfOneEntity> eventsItr =
-            events.getAllEvents().iterator();
-        while (eventsItr.hasNext()) {
-          TimelineEvents.EventsOfOneEntity eventsOfOneEntity = eventsItr.next();
-          try {
-            TimelineEntity entity = store.getEntity(
-                eventsOfOneEntity.getEntityId(),
-                eventsOfOneEntity.getEntityType(),
-                EnumSet.of(Field.PRIMARY_FILTERS));
-            // check ACLs
-            if (!timelineACLsManager.checkAccess(callerUGI, entity)) {
-              eventsItr.remove();
-            }
-          } catch (Exception e) {
-            LOG.error("Error when verifying access for user " + callerUGI
-                + " on the events of the timeline entity "
-                + new EntityIdentifier(eventsOfOneEntity.getEntityId(),
-                    eventsOfOneEntity.getEntityType()), e);
-            eventsItr.remove();
-          }
-        }
-      }
+          parseLongStr(limit),
+          getUser(req));
     } catch (NumberFormatException e) {
       throw new BadRequestException(
           "windowStart, windowEnd or limit is not a numeric value.");
-    } catch (IOException e) {
+    } catch (Exception e) {
       LOG.error("Error getting entity timelines", e);
       throw new WebApplicationException(e,
           Response.Status.INTERNAL_SERVER_ERROR);
     }
-    if (events == null) {
-      return new TimelineEvents();
-    }
-    return events;
   }
 
   /**
@@ -333,9 +244,6 @@ public class TimelineWebServices {
       @Context HttpServletResponse res,
       TimelineEntities entities) {
     init(res);
-    if (entities == null) {
-      return new TimelinePutResponse();
-    }
     UserGroupInformation callerUGI = getUser(req);
     if (callerUGI == null) {
       String msg = "The owner of the posted timeline entities is not set";
@@ -343,76 +251,8 @@ public class TimelineWebServices {
       throw new ForbiddenException(msg);
     }
     try {
-      List<EntityIdentifier> entityIDs = new ArrayList<EntityIdentifier>();
-      TimelineEntities entitiesToPut = new TimelineEntities();
-      List<TimelinePutResponse.TimelinePutError> errors =
-          new ArrayList<TimelinePutResponse.TimelinePutError>();
-      for (TimelineEntity entity : entities.getEntities()) {
-        EntityIdentifier entityID =
-            new EntityIdentifier(entity.getEntityId(), entity.getEntityType());
-
-        // check if there is existing entity
-        TimelineEntity existingEntity = null;
-        try {
-          existingEntity =
-              store.getEntity(entityID.getId(), entityID.getType(),
-                  EnumSet.of(Field.PRIMARY_FILTERS));
-          if (existingEntity != null
-              && !timelineACLsManager.checkAccess(callerUGI, existingEntity)) {
-            throw new YarnException("The timeline entity " + entityID
-                + " was not put by " + callerUGI + " before");
-          }
-        } catch (Exception e) {
-          // Skip the entity which already exists and was put by others
-          LOG.warn("Skip the timeline entity: " + entityID + ", because "
-              + e.getMessage());
-          TimelinePutResponse.TimelinePutError error =
-              new TimelinePutResponse.TimelinePutError();
-          error.setEntityId(entityID.getId());
-          error.setEntityType(entityID.getType());
-          error.setErrorCode(
-              TimelinePutResponse.TimelinePutError.ACCESS_DENIED);
-          errors.add(error);
-          continue;
-        }
-
-        // inject owner information for the access check if this is the first
-        // time to post the entity, in case it's the admin who is updating
-        // the timeline data.
-        try {
-          if (existingEntity == null) {
-            injectOwnerInfo(entity, callerUGI.getShortUserName());
-          }
-        } catch (YarnException e) {
-          // Skip the entity which messes up the primary filter and record the
-          // error
-          LOG.warn("Skip the timeline entity: " + entityID + ", because "
-              + e.getMessage());
-          TimelinePutResponse.TimelinePutError error =
-              new TimelinePutResponse.TimelinePutError();
-          error.setEntityId(entityID.getId());
-          error.setEntityType(entityID.getType());
-          error.setErrorCode(
-              TimelinePutResponse.TimelinePutError.SYSTEM_FILTER_CONFLICT);
-          errors.add(error);
-          continue;
-        }
-
-        entityIDs.add(entityID);
-        entitiesToPut.addEntity(entity);
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("Storing the entity " + entityID + ", JSON-style content: "
-              + TimelineUtils.dumpTimelineRecordtoJSON(entity));
-        }
-      }
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("Storing entities: " + CSV_JOINER.join(entityIDs));
-      }
-      TimelinePutResponse response =  store.put(entitiesToPut);
-      // add the errors of timeline system filter key conflict
-      response.addErrors(errors);
-      return response;
-    } catch (IOException e) {
+      return timelineDataManager.postEntities(entities, callerUGI);
+    } catch (Exception e) {
       LOG.error("Error putting entities", e);
       throw new WebApplicationException(e,
           Response.Status.INTERNAL_SERVER_ERROR);
@@ -423,6 +263,15 @@ public class TimelineWebServices {
     response.setContentType(null);
   }
 
+  private static UserGroupInformation getUser(HttpServletRequest req) {
+    String remoteUser = req.getRemoteUser();
+    UserGroupInformation callerUGI = null;
+    if (remoteUser != null) {
+      callerUGI = UserGroupInformation.createRemoteUser(remoteUser);
+    }
+    return callerUGI;
+  }
+
   private static SortedSet<String> parseArrayStr(String str, String delimiter) {
     if (str == null) {
       return null;
@@ -495,14 +344,6 @@ public class TimelineWebServices {
     }
   }
 
-  private static boolean extendFields(EnumSet<Field> fieldEnums) {
-    boolean modified = false;
-    if (fieldEnums != null && !fieldEnums.contains(Field.PRIMARY_FILTERS)) {
-      fieldEnums.add(Field.PRIMARY_FILTERS);
-      modified = true;
-    }
-    return modified;
-  }
   private static Long parseLongStr(String str) {
     return str == null ? null : Long.parseLong(str.trim());
   }
@@ -511,34 +352,4 @@ public class TimelineWebServices {
     return str == null ? null : str.trim();
   }
 
-  private static UserGroupInformation getUser(HttpServletRequest req) {
-    String remoteUser = req.getRemoteUser();
-    UserGroupInformation callerUGI = null;
-    if (remoteUser != null) {
-      callerUGI = UserGroupInformation.createRemoteUser(remoteUser);
-    }
-    return callerUGI;
-  }
-
-  private static void injectOwnerInfo(TimelineEntity timelineEntity,
-      String owner) throws YarnException {
-    if (timelineEntity.getPrimaryFilters() != null &&
-        timelineEntity.getPrimaryFilters().containsKey(
-            TimelineStore.SystemFilter.ENTITY_OWNER.toString())) {
-      throw new YarnException(
-          "User should not use the timeline system filter key: "
-              + TimelineStore.SystemFilter.ENTITY_OWNER);
-    }
-    timelineEntity.addPrimaryFilter(
-        TimelineStore.SystemFilter.ENTITY_OWNER
-            .toString(), owner);
-  }
-
-  private static void cleanupOwnerInfo(TimelineEntity timelineEntity) {
-    if (timelineEntity.getPrimaryFilters() != null) {
-      timelineEntity.getPrimaryFilters().remove(
-          TimelineStore.SystemFilter.ENTITY_OWNER.toString());
-    }
-  }
-
 }

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestApplicationHistoryClientService.java

@@ -69,7 +69,7 @@ public class TestApplicationHistoryClientService extends
     historyServer.init(config);
     historyServer.start();
     store =
-        ((ApplicationHistoryManagerImpl) historyServer.getApplicationHistory())
+        ((ApplicationHistoryManagerImpl) historyServer.getApplicationHistoryManager())
           .getHistoryStore();
   }
 

+ 4 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/timeline/webapp/TestTimelineWebServices.java

@@ -49,6 +49,7 @@ import org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse.TimelineP
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.security.AdminACLsManager;
 import org.apache.hadoop.yarn.server.timeline.TestMemoryTimelineStore;
+import org.apache.hadoop.yarn.server.timeline.TimelineDataManager;
 import org.apache.hadoop.yarn.server.timeline.TimelineStore;
 import org.apache.hadoop.yarn.server.timeline.security.TimelineACLsManager;
 import org.apache.hadoop.yarn.server.timeline.security.TimelineAuthenticationFilter;
@@ -89,14 +90,15 @@ public class TestTimelineWebServices extends JerseyTest {
       } catch (Exception e) {
         Assert.fail();
       }
-      bind(TimelineStore.class).toInstance(store);
       Configuration conf = new YarnConfiguration();
       conf.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, false);
       timelineACLsManager = new TimelineACLsManager(conf);
       conf.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, true);
       conf.set(YarnConfiguration.YARN_ADMIN_ACL, "admin");
       adminACLsManager = new AdminACLsManager(conf);
-      bind(TimelineACLsManager.class).toInstance(timelineACLsManager);
+      TimelineDataManager timelineDataManager =
+          new TimelineDataManager(store, timelineACLsManager);
+      bind(TimelineDataManager.class).toInstance(timelineDataManager);
       serve("/*").with(GuiceContainer.class);
       TimelineAuthenticationFilter taFilter = new TimelineAuthenticationFilter();
       FilterConfig filterConfig = mock(FilterConfig.class);

+ 0 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java

@@ -461,7 +461,6 @@ public class ResourceManager extends CompositeService implements Recoverable {
       rmDispatcher.register(RMAppManagerEventType.class, rmAppManager);
 
       clientRM = createClientRMService();
-      rmContext.setClientRMService(clientRM);
       addService(clientRM);
       rmContext.setClientRMService(clientRM);
 

+ 28 - 43
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java

@@ -52,13 +52,13 @@ import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.AMRMTokenS
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppNewSavedEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppUpdateSavedEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptNewSavedEvent;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUpdateSavedEvent;
 import org.apache.hadoop.yarn.state.InvalidStateTransitonException;
 import org.apache.hadoop.yarn.state.SingleArcTransition;
 import org.apache.hadoop.yarn.state.StateMachine;
@@ -132,7 +132,8 @@ public abstract class RMStateStore extends AbstractService {
       LOG.info("Storing info for app: " + appId);
       try {
         store.storeApplicationStateInternal(appId, appStateData);
-        store.notifyDoneStoringApplication(appId, null);
+        store.notifyApplication(new RMAppEvent(appId,
+               RMAppEventType.APP_NEW_SAVED));
       } catch (Exception e) {
         LOG.error("Error storing app: " + appId, e);
         store.notifyStoreOperationFailed(e);
@@ -156,7 +157,8 @@ public abstract class RMStateStore extends AbstractService {
       LOG.info("Updating info for app: " + appId);
       try {
         store.updateApplicationStateInternal(appId, appStateData);
-        store.notifyDoneUpdatingApplication(appId, null);
+        store.notifyApplication(new RMAppEvent(appId,
+               RMAppEventType.APP_UPDATE_SAVED));
       } catch (Exception e) {
         LOG.error("Error updating app: " + appId, e);
         store.notifyStoreOperationFailed(e);
@@ -205,8 +207,9 @@ public abstract class RMStateStore extends AbstractService {
         }
         store.storeApplicationAttemptStateInternal(attemptState.getAttemptId(),
             attemptStateData);
-        store.notifyDoneStoringApplicationAttempt(attemptState.getAttemptId(),
-            null);
+        store.notifyApplicationAttempt(new RMAppAttemptEvent
+               (attemptState.getAttemptId(),
+               RMAppAttemptEventType.ATTEMPT_NEW_SAVED));
       } catch (Exception e) {
         LOG.error("Error storing appAttempt: " + attemptState.getAttemptId(), e);
         store.notifyStoreOperationFailed(e);
@@ -233,8 +236,9 @@ public abstract class RMStateStore extends AbstractService {
         }
         store.updateApplicationAttemptStateInternal(attemptState.getAttemptId(),
             attemptStateData);
-        store.notifyDoneUpdatingApplicationAttempt(attemptState.getAttemptId(),
-            null);
+        store.notifyApplicationAttempt(new RMAppAttemptEvent
+               (attemptState.getAttemptId(),
+               RMAppAttemptEventType.ATTEMPT_UPDATE_SAVED));
       } catch (Exception e) {
         LOG.error("Error updating appAttempt: " + attemptState.getAttemptId(), e);
         store.notifyStoreOperationFailed(e);
@@ -801,47 +805,28 @@ public abstract class RMStateStore extends AbstractService {
     }
     rmDispatcher.getEventHandler().handle(new RMFatalEvent(type, failureCause));
   }
-
+ 
   @SuppressWarnings("unchecked")
   /**
-   * In (@link handleStoreEvent}, this method is called to notify the
-   * application that new application is stored in state store
-   * @param appId id of the application that has been saved
-   * @param storedException the exception that is thrown when storing the
-   * application
+   * This method is called to notify the application that
+   * new application is stored or updated in state store
+   * @param event App event containing the app id and event type
    */
-  private void notifyDoneStoringApplication(ApplicationId appId,
-                                                  Exception storedException) {
-    rmDispatcher.getEventHandler().handle(
-        new RMAppNewSavedEvent(appId, storedException));
-  }
-
-  @SuppressWarnings("unchecked")
-  private void notifyDoneUpdatingApplication(ApplicationId appId,
-      Exception storedException) {
-    rmDispatcher.getEventHandler().handle(
-      new RMAppUpdateSavedEvent(appId, storedException));
+  private void notifyApplication(RMAppEvent event) {
+    rmDispatcher.getEventHandler().handle(event);
   }
-
+  
   @SuppressWarnings("unchecked")
   /**
-   * In (@link handleStoreEvent}, this method is called to notify the
-   * application attempt that new attempt is stored in state store
-   * @param appAttempt attempt that has been saved
+   * This method is called to notify the application attempt
+   * that new attempt is stored or updated in state store
+   * @param event App attempt event containing the app attempt
+   * id and event type
    */
-  private void notifyDoneStoringApplicationAttempt(ApplicationAttemptId attemptId,
-                                                  Exception storedException) {
-    rmDispatcher.getEventHandler().handle(
-        new RMAppAttemptNewSavedEvent(attemptId, storedException));
+  private void notifyApplicationAttempt(RMAppAttemptEvent event) {
+    rmDispatcher.getEventHandler().handle(event);
   }
-
-  @SuppressWarnings("unchecked")
-  private void notifyDoneUpdatingApplicationAttempt(ApplicationAttemptId attemptId,
-      Exception updatedException) {
-    rmDispatcher.getEventHandler().handle(
-      new RMAppAttemptUpdateSavedEvent(attemptId, updatedException));
-  }
-
+  
   /**
    * EventHandler implementation which forward events to the FSRMStateStore
    * This hides the EventHandle methods of the store from its public interface 

+ 0 - 18
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java

@@ -820,17 +820,6 @@ public class RMAppImpl implements RMApp, Recoverable {
       RMAppTransition {
     @Override
     public void transition(RMAppImpl app, RMAppEvent event) {
-      if (event instanceof RMAppNewSavedEvent) {
-        RMAppNewSavedEvent storeEvent = (RMAppNewSavedEvent) event;
-        // For HA this exception needs to be handled by giving up
-        // master status if we got fenced
-        if (((RMAppNewSavedEvent) event).getStoredException() != null) {
-          LOG.error(
-            "Failed to store application: " + storeEvent.getApplicationId(),
-            storeEvent.getStoredException());
-          ExitUtil.terminate(1, storeEvent.getStoredException());
-        }
-      }
       app.handler.handle(new AppAddedSchedulerEvent(app.applicationId,
         app.submissionContext.getQueue(), app.user));
     }
@@ -848,13 +837,6 @@ public class RMAppImpl implements RMApp, Recoverable {
 
     @Override
     public RMAppState transition(RMAppImpl app, RMAppEvent event) {
-      RMAppUpdateSavedEvent storeEvent = (RMAppUpdateSavedEvent) event;
-      if (storeEvent.getUpdatedException() != null) {
-        LOG.error("Failed to update the final state of application"
-              + storeEvent.getApplicationId(), storeEvent.getUpdatedException());
-        ExitUtil.terminate(1, storeEvent.getUpdatedException());
-      }
-
       if (app.transitionTodo instanceof SingleArcTransition) {
         ((SingleArcTransition) app.transitionTodo).transition(app,
           app.eventCausingFinalSaving);

+ 0 - 36
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppUpdateSavedEvent.java

@@ -1,36 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
-
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-
-public class RMAppUpdateSavedEvent extends RMAppEvent {
-
-  private final Exception updatedException;
-
-  public RMAppUpdateSavedEvent(ApplicationId appId, Exception updatedException) {
-    super(appId, RMAppEventType.APP_UPDATE_SAVED);
-    this.updatedException = updatedException;
-  }
-
-  public Exception getUpdatedException() {
-    return updatedException;
-  }
-
-}

+ 0 - 27
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java

@@ -80,11 +80,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAllocatedEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptLaunchFailedEvent;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptNewSavedEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStatusupdateEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUpdateSavedEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
@@ -398,7 +396,6 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
           RMAppAttemptState.KILLED,
           RMAppAttemptState.KILLED,
           EnumSet.of(RMAppAttemptEventType.ATTEMPT_ADDED,
-              RMAppAttemptEventType.EXPIRE,
               RMAppAttemptEventType.LAUNCHED,
               RMAppAttemptEventType.LAUNCH_FAILED,
               RMAppAttemptEventType.EXPIRE,
@@ -906,8 +903,6 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
     @Override
     public void transition(RMAppAttemptImpl appAttempt,
                                                     RMAppAttemptEvent event) {
-      appAttempt.checkAttemptStoreError(event);
-
       appAttempt.launchAttempt();
     }
   }
@@ -1059,14 +1054,6 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
     @Override
     public RMAppAttemptState transition(RMAppAttemptImpl appAttempt,
         RMAppAttemptEvent event) {
-      RMAppAttemptUpdateSavedEvent storeEvent = (RMAppAttemptUpdateSavedEvent) event;
-      if (storeEvent.getUpdatedException() != null) {
-        LOG.error("Failed to update the final state of application attempt: "
-            + storeEvent.getApplicationAttemptId(),
-          storeEvent.getUpdatedException());
-        ExitUtil.terminate(1, storeEvent.getUpdatedException());
-      }
-
       RMAppAttemptEvent causeEvent = appAttempt.eventCausingFinalSaving;
 
       if (appAttempt.transitionTodo instanceof SingleArcTransition) {
@@ -1196,8 +1183,6 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
     @Override
     public void transition(RMAppAttemptImpl appAttempt,
                             RMAppAttemptEvent event) {
-      appAttempt.checkAttemptStoreError(event);
-
       // create AMRMToken
       appAttempt.amrmToken =
           appAttempt.rmContext.getAMRMTokenSecretManager().createAndGetAMRMToken(
@@ -1690,18 +1675,6 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
     rmContext.getAMLivelinessMonitor().register(getAppAttemptId());
   }
   
-  private void checkAttemptStoreError(RMAppAttemptEvent event) {
-    RMAppAttemptNewSavedEvent storeEvent = (RMAppAttemptNewSavedEvent) event;
-    if(storeEvent.getStoredException() != null)
-    {
-      // This needs to be handled for HA and give up master status if we got
-      // fenced
-      LOG.error("Failed to store attempt: " + getAppAttemptId(),
-                storeEvent.getStoredException());
-      ExitUtil.terminate(1, storeEvent.getStoredException());
-    }
-  }
-
   private void storeAttempt() {
     // store attempt data in a non-blocking manner to prevent dispatcher
     // thread starvation and wait for state to be saved

+ 0 - 39
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptNewSavedEvent.java

@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event;
-
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
-
-public class RMAppAttemptNewSavedEvent extends RMAppAttemptEvent {
-
-  final Exception storedException;
-  
-  public RMAppAttemptNewSavedEvent(ApplicationAttemptId appAttemptId,
-                                 Exception storedException) {
-    super(appAttemptId, RMAppAttemptEventType.ATTEMPT_NEW_SAVED);
-    this.storedException = storedException;
-  }
-  
-  public Exception getStoredException() {
-    return storedException;
-  }
-
-}

+ 0 - 38
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptUpdateSavedEvent.java

@@ -1,38 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event;
-
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
-
-public class RMAppAttemptUpdateSavedEvent extends RMAppAttemptEvent {
-
-  final Exception updatedException;
-
-  public RMAppAttemptUpdateSavedEvent(ApplicationAttemptId appAttemptId,
-      Exception updatedException) {
-    super(appAttemptId, RMAppAttemptEventType.ATTEMPT_UPDATE_SAVED);
-    this.updatedException = updatedException;
-  }
-
-  public Exception getUpdatedException() {
-    return updatedException;
-  }
-}

+ 12 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/Schedulable.java

@@ -116,6 +116,18 @@ public abstract class Schedulable {
     return fairShare;
   }
 
+  /**
+   * Returns true if queue has atleast one app running. Always returns true for
+   * AppSchedulables.
+   */
+  public boolean isActive() {
+    if (this instanceof FSQueue) {
+      FSQueue queue = (FSQueue) this;
+      return queue.getNumRunnableApps() > 0;
+    }
+    return true;
+  }
+
   /** Convenient toString implementation for debugging. */
   @Override
   public String toString() {

+ 27 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/ComputeFairShares.java

@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies;
 
+import java.util.ArrayList;
 import java.util.Collection;
 
 import org.apache.hadoop.yarn.api.records.Resource;
@@ -33,7 +34,31 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.Schedulable;
 public class ComputeFairShares {
   
   private static final int COMPUTE_FAIR_SHARES_ITERATIONS = 25;
-  
+
+  /**
+   * Compute fair share of the given schedulables.Fair share is an allocation of
+   * shares considering only active schedulables ie schedulables which have
+   * running apps.
+   * 
+   * @param schedulables
+   * @param totalResources
+   * @param type
+   */
+  public static void computeShares(
+      Collection<? extends Schedulable> schedulables, Resource totalResources,
+      ResourceType type) {
+    Collection<Schedulable> activeSchedulables = new ArrayList<Schedulable>();
+    for (Schedulable sched : schedulables) {
+      if (sched.isActive()) {
+        activeSchedulables.add(sched);
+      } else {
+        setResourceValue(0, sched.getFairShare(), type);
+      }
+    }
+
+    computeSharesInternal(activeSchedulables, totalResources, type);
+  }
+
   /**
    * Given a set of Schedulables and a number of slots, compute their weighted
    * fair shares. The min and max shares and of the Schedulables are assumed to
@@ -75,7 +100,7 @@ public class ComputeFairShares {
    * because resourceUsedWithWeightToResourceRatio is linear-time and the number of
    * iterations of binary search is a constant (dependent on desired precision).
    */
-  public static void computeShares(
+  private static void computeSharesInternal(
       Collection<? extends Schedulable> schedulables, Resource totalResources,
       ResourceType type) {
     if (schedulables.isEmpty()) {

+ 6 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java

@@ -386,7 +386,8 @@ public class TestAMRestart {
     ApplicationState appState =
         memStore.getState().getApplicationState().get(app1.getApplicationId());
     // AM should be restarted even though max-am-attempt is 1.
-    MockAM am2 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
+    MockAM am2 =
+        rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 2, nm1);
     RMAppAttempt attempt2 = app1.getCurrentAppAttempt();
     Assert.assertTrue(((RMAppAttemptImpl) attempt2).mayBeLastAttempt());
 
@@ -398,7 +399,8 @@ public class TestAMRestart {
     am2.waitForState(RMAppAttemptState.FAILED);
     Assert.assertTrue(! attempt2.shouldCountTowardsMaxAttemptRetry());
     rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
-    MockAM am3 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
+    MockAM am3 =
+        rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 3, nm1);
     RMAppAttempt attempt3 = app1.getCurrentAppAttempt();
     Assert.assertTrue(((RMAppAttemptImpl) attempt3).mayBeLastAttempt());
 
@@ -421,7 +423,8 @@ public class TestAMRestart {
         .getAMContainerExitStatus());
 
     rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
-    MockAM am4 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
+    MockAM am4 =
+        rm1.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 4, nm1);
     RMAppAttempt attempt4 = app1.getCurrentAppAttempt();
     Assert.assertTrue(((RMAppAttemptImpl) attempt4).mayBeLastAttempt());
 

+ 3 - 6
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java

@@ -65,8 +65,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.AMRMTokenS
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptNewSavedEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.security.AMRMTokenSecretManager;
 import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM;
 import org.apache.hadoop.yarn.server.security.MasterKeyData;
@@ -77,10 +77,9 @@ public class RMStateStoreTestBase extends ClientBaseWithFixes{
   public static final Log LOG = LogFactory.getLog(RMStateStoreTestBase.class);
 
   static class TestDispatcher implements
-      Dispatcher, EventHandler<RMAppAttemptNewSavedEvent> {
+      Dispatcher, EventHandler<RMAppAttemptEvent> {
 
     ApplicationAttemptId attemptId;
-    Exception storedException;
 
     boolean notified = false;
 
@@ -91,9 +90,8 @@ public class RMStateStoreTestBase extends ClientBaseWithFixes{
     }
 
     @Override
-    public void handle(RMAppAttemptNewSavedEvent event) {
+    public void handle(RMAppAttemptEvent event) {
       assertEquals(attemptId, event.getApplicationAttemptId());
-      assertEquals(storedException, event.getStoredException());
       notified = true;
       synchronized (this) {
         notifyAll();
@@ -163,7 +161,6 @@ public class RMStateStoreTestBase extends ClientBaseWithFixes{
     when(mockAttempt.getClientTokenMasterKey())
         .thenReturn(clientTokenMasterKey);
     dispatcher.attemptId = attemptId;
-    dispatcher.storedException = null;
     store.storeNewApplicationAttempt(mockAttempt);
     waitNotify(dispatcher);
     return container.getId();

+ 6 - 7
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java

@@ -60,7 +60,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessM
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUpdateSavedEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent;
@@ -328,15 +327,15 @@ public class TestRMAppTransitions {
 
   private void sendAppUpdateSavedEvent(RMApp application) {
     RMAppEvent event =
-        new RMAppUpdateSavedEvent(application.getApplicationId(), null);
+        new RMAppEvent(application.getApplicationId(), RMAppEventType.APP_UPDATE_SAVED);
     application.handle(event);
     rmDispatcher.await();
   }
 
   private void sendAttemptUpdateSavedEvent(RMApp application) {
     application.getCurrentAppAttempt().handle(
-      new RMAppAttemptUpdateSavedEvent(application.getCurrentAppAttempt()
-        .getAppAttemptId(), null));
+        new RMAppAttemptEvent(application.getCurrentAppAttempt().getAppAttemptId(),
+            RMAppAttemptEventType.ATTEMPT_UPDATE_SAVED));
   }
 
   protected RMApp testCreateAppNewSaving(
@@ -357,7 +356,7 @@ public class TestRMAppTransitions {
   RMApp application = testCreateAppNewSaving(submissionContext);
     // NEW_SAVING => SUBMITTED event RMAppEventType.APP_SAVED
     RMAppEvent event =
-        new RMAppNewSavedEvent(application.getApplicationId(), null);
+        new RMAppEvent(application.getApplicationId(), RMAppEventType.APP_NEW_SAVED);
     application.handle(event);
     assertStartTimeSet(application);
     assertAppState(RMAppState.SUBMITTED, application);
@@ -422,7 +421,7 @@ public class TestRMAppTransitions {
     RMApp application = testCreateAppFinalSaving(submissionContext);
     // FINAL_SAVING => FINISHING event RMAppEventType.APP_UPDATED
     RMAppEvent appUpdated =
-        new RMAppUpdateSavedEvent(application.getApplicationId(), null);
+        new RMAppEvent(application.getApplicationId(), RMAppEventType.APP_UPDATE_SAVED);
     application.handle(appUpdated);
     assertAppState(RMAppState.FINISHING, application);
     assertTimesAtFinish(application);
@@ -763,7 +762,7 @@ public class TestRMAppTransitions {
     application.handle(event);
     assertAppState(RMAppState.FINAL_SAVING, application);
     RMAppEvent appUpdated =
-        new RMAppUpdateSavedEvent(application.getApplicationId(), null);
+        new RMAppEvent(application.getApplicationId(), RMAppEventType.APP_UPDATE_SAVED);
     application.handle(appUpdated);
     assertAppState(RMAppState.FINISHED, application);
 

+ 7 - 9
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java

@@ -81,10 +81,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAllocatedEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptLaunchFailedEvent;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptNewSavedEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUpdateSavedEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
@@ -570,15 +568,15 @@ public class TestRMAppAttemptTransitions {
     submitApplicationAttempt();
     applicationAttempt.handle(
         new RMAppAttemptEvent(
-            applicationAttempt.getAppAttemptId(), 
+            applicationAttempt.getAppAttemptId(),
             RMAppAttemptEventType.ATTEMPT_ADDED));
     
     if(unmanagedAM){
       assertEquals(RMAppAttemptState.LAUNCHED_UNMANAGED_SAVING, 
           applicationAttempt.getAppAttemptState());
       applicationAttempt.handle(
-          new RMAppAttemptNewSavedEvent(
-              applicationAttempt.getAppAttemptId(), null));
+        new RMAppAttemptEvent(applicationAttempt.getAppAttemptId(),
+            RMAppAttemptEventType.ATTEMPT_NEW_SAVED));
     }
     
     testAppAttemptScheduledState();
@@ -616,8 +614,8 @@ public class TestRMAppAttemptTransitions {
     assertEquals(RMAppAttemptState.ALLOCATED_SAVING, 
         applicationAttempt.getAppAttemptState());
     applicationAttempt.handle(
-        new RMAppAttemptNewSavedEvent(
-            applicationAttempt.getAppAttemptId(), null));
+        new RMAppAttemptEvent(applicationAttempt.getAppAttemptId(),
+            RMAppAttemptEventType.ATTEMPT_NEW_SAVED));
     
     testAppAttemptAllocatedState(container);
     
@@ -696,8 +694,8 @@ public class TestRMAppAttemptTransitions {
     assertEquals(RMAppAttemptState.FINAL_SAVING,
       applicationAttempt.getAppAttemptState());
     applicationAttempt.handle(
-      new RMAppAttemptUpdateSavedEvent(
-          applicationAttempt.getAppAttemptId(), null));
+      new RMAppAttemptEvent(applicationAttempt.getAppAttemptId(), 
+          RMAppAttemptEventType.ATTEMPT_UPDATE_SAVED));
   }
 
   @Test

+ 31 - 15
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java

@@ -292,6 +292,7 @@ public class TestFairScheduler extends FairSchedulerTestBase {
     // Have two queues which want entire cluster capacity
     createSchedulingRequest(10 * 1024, "queue1", "user1");
     createSchedulingRequest(10 * 1024, "queue2", "user1");
+    createSchedulingRequest(10 * 1024, "root.default", "user1");
 
     scheduler.update();
 
@@ -322,6 +323,7 @@ public class TestFairScheduler extends FairSchedulerTestBase {
     // Have two queues which want entire cluster capacity
     createSchedulingRequest(10 * 1024, "parent.queue2", "user1");
     createSchedulingRequest(10 * 1024, "parent.queue3", "user1");
+    createSchedulingRequest(10 * 1024, "root.default", "user1");
 
     scheduler.update();
 
@@ -766,8 +768,10 @@ public class TestFairScheduler extends FairSchedulerTestBase {
     scheduler.handle(nodeEvent1);
 
     // user1,user2 submit their apps to parentq and create user queues
-    scheduler.assignToQueue(rmApp1, "root.parentq", "user1");
-    scheduler.assignToQueue(rmApp2, "root.parentq", "user2");
+    createSchedulingRequest(10 * 1024, "root.parentq", "user1");
+    createSchedulingRequest(10 * 1024, "root.parentq", "user2");
+    // user3 submits app in default queue
+    createSchedulingRequest(10 * 1024, "root.default", "user3");
 
     scheduler.update();
 
@@ -1287,7 +1291,7 @@ public class TestFairScheduler extends FairSchedulerTestBase {
     scheduler.update();
     Resource toPreempt = scheduler.resToPreempt(scheduler.getQueueManager()
         .getLeafQueue("queueA.queueA2", false), clock.getTime());
-    assertEquals(2980, toPreempt.getMemory());
+    assertEquals(3277, toPreempt.getMemory());
 
     // verify if the 3 containers required by queueA2 are preempted in the same
     // round
@@ -2446,8 +2450,12 @@ public class TestFairScheduler extends FairSchedulerTestBase {
     scheduler.update();
 
     FSLeafQueue queue1 = scheduler.getQueueManager().getLeafQueue("queue1", true);
-    assertEquals("Queue queue1's fair share should be 10240",
-        10240, queue1.getFairShare().getMemory());
+    assertEquals("Queue queue1's fair share should be 0", 0, queue1
+        .getFairShare().getMemory());
+
+    createSchedulingRequest(1 * 1024, "root.default", "user1");
+    scheduler.update();
+    scheduler.handle(updateEvent);
 
     Resource amResource1 = Resource.newInstance(1024, 1);
     Resource amResource2 = Resource.newInstance(2048, 2);
@@ -2635,24 +2643,32 @@ public class TestFairScheduler extends FairSchedulerTestBase {
 
     FSLeafQueue queue1 =
         scheduler.getQueueManager().getLeafQueue("queue1", true);
-    assertEquals("Queue queue1's fair share should be 1366",
-        1366, queue1.getFairShare().getMemory());
+    assertEquals("Queue queue1's fair share should be 0", 0, queue1
+        .getFairShare().getMemory());
     FSLeafQueue queue2 =
         scheduler.getQueueManager().getLeafQueue("queue2", true);
-    assertEquals("Queue queue2's fair share should be 1366",
-        1366, queue2.getFairShare().getMemory());
+    assertEquals("Queue queue2's fair share should be 0", 0, queue2
+        .getFairShare().getMemory());
     FSLeafQueue queue3 =
         scheduler.getQueueManager().getLeafQueue("queue3", true);
-    assertEquals("Queue queue3's fair share should be 1366",
-        1366, queue3.getFairShare().getMemory());
+    assertEquals("Queue queue3's fair share should be 0", 0, queue3
+        .getFairShare().getMemory());
     FSLeafQueue queue4 =
         scheduler.getQueueManager().getLeafQueue("queue4", true);
-    assertEquals("Queue queue4's fair share should be 1366",
-        1366, queue4.getFairShare().getMemory());
+    assertEquals("Queue queue4's fair share should be 0", 0, queue4
+        .getFairShare().getMemory());
     FSLeafQueue queue5 =
         scheduler.getQueueManager().getLeafQueue("queue5", true);
-    assertEquals("Queue queue5's fair share should be 1366",
-        1366, queue5.getFairShare().getMemory());
+    assertEquals("Queue queue5's fair share should be 0", 0, queue5
+        .getFairShare().getMemory());
+
+    List<String> queues = Arrays.asList("root.default", "root.queue3",
+        "root.queue4", "root.queue5");
+    for (String queue : queues) {
+      createSchedulingRequest(1 * 1024, queue, "user1");
+      scheduler.update();
+      scheduler.handle(updateEvent);
+    }
 
     Resource amResource1 = Resource.newInstance(2048, 1);
     int amPriority = RMAppAttemptImpl.AM_CONTAINER_PRIORITY.getPriority();

+ 308 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerFairShare.java

@@ -0,0 +1,308 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.Collection;
+
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.server.resourcemanager.MockNodes;
+import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
+import org.apache.hadoop.yarn.util.resource.Resources;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestFairSchedulerFairShare extends FairSchedulerTestBase {
+  private final static String ALLOC_FILE = new File(TEST_DIR,
+      TestFairSchedulerFairShare.class.getName() + ".xml").getAbsolutePath();
+
+  @Before
+  public void setup() throws IOException {
+    conf = createConfiguration();
+    conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
+  }
+
+  @After
+  public void teardown() {
+    if (resourceManager != null) {
+      resourceManager.stop();
+      resourceManager = null;
+    }
+    conf = null;
+  }
+
+  private void createClusterWithQueuesAndOneNode(int mem, String policy)
+      throws IOException {
+    createClusterWithQueuesAndOneNode(mem, 0, policy);
+  }
+
+  private void createClusterWithQueuesAndOneNode(int mem, int vCores,
+      String policy) throws IOException {
+    PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
+    out.println("<?xml version=\"1.0\"?>");
+    out.println("<allocations>");
+    out.println("<queue name=\"root\" >");
+    out.println("   <queue name=\"parentA\" >");
+    out.println("       <weight>8</weight>");
+    out.println("       <queue name=\"childA1\" />");
+    out.println("       <queue name=\"childA2\" />");
+    out.println("       <queue name=\"childA3\" />");
+    out.println("       <queue name=\"childA4\" />");
+    out.println("   </queue>");
+    out.println("   <queue name=\"parentB\" >");
+    out.println("       <weight>1</weight>");
+    out.println("       <queue name=\"childB1\" />");
+    out.println("       <queue name=\"childB2\" />");
+    out.println("   </queue>");
+    out.println("</queue>");
+    out.println("<defaultQueueSchedulingPolicy>" + policy
+        + "</defaultQueueSchedulingPolicy>");
+    out.println("</allocations>");
+    out.close();
+
+    resourceManager = new MockRM(conf);
+    resourceManager.start();
+    scheduler = (FairScheduler) resourceManager.getResourceScheduler();
+
+    RMNode node1 = MockNodes.newNodeInfo(1,
+        Resources.createResource(mem, vCores), 1, "127.0.0.1");
+    NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
+    scheduler.handle(nodeEvent1);
+  }
+
+  @Test
+  public void testFairShareNoAppsRunning() throws IOException {
+    int nodeCapacity = 16 * 1024;
+    createClusterWithQueuesAndOneNode(nodeCapacity, "fair");
+
+    scheduler.update();
+    // No apps are running in the cluster,verify if fair share is zero
+    // for all queues under parentA and parentB.
+    Collection<FSLeafQueue> leafQueues = scheduler.getQueueManager()
+        .getLeafQueues();
+
+    for (FSLeafQueue leaf : leafQueues) {
+      if (leaf.getName().startsWith("root.parentA")) {
+        assertEquals(0, (double) leaf.getFairShare().getMemory() / nodeCapacity
+            * 100, 0);
+      } else if (leaf.getName().startsWith("root.parentB")) {
+        assertEquals(0, (double) leaf.getFairShare().getMemory() / nodeCapacity
+            * 100, 0.1);
+      }
+    }
+  }
+
+  @Test
+  public void testFairShareOneAppRunning() throws IOException {
+    int nodeCapacity = 16 * 1024;
+    createClusterWithQueuesAndOneNode(nodeCapacity, "fair");
+
+    // Run a app in a childA1. Verify whether fair share is 100% in childA1,
+    // since it is the only active queue.
+    // Also verify if fair share is 0 for childA2. since no app is
+    // running in it.
+    createSchedulingRequest(2 * 1024, "root.parentA.childA1", "user1");
+
+    scheduler.update();
+
+    assertEquals(
+        100,
+        (double) scheduler.getQueueManager()
+            .getLeafQueue("root.parentA.childA1", false).getFairShare()
+            .getMemory()
+            / nodeCapacity * 100, 0.1);
+    assertEquals(
+        0,
+        (double) scheduler.getQueueManager()
+            .getLeafQueue("root.parentA.childA2", false).getFairShare()
+            .getMemory()
+            / nodeCapacity * 100, 0.1);
+  }
+
+  @Test
+  public void testFairShareMultipleActiveQueuesUnderSameParent()
+      throws IOException {
+    int nodeCapacity = 16 * 1024;
+    createClusterWithQueuesAndOneNode(nodeCapacity, "fair");
+
+    // Run apps in childA1,childA2,childA3
+    createSchedulingRequest(2 * 1024, "root.parentA.childA1", "user1");
+    createSchedulingRequest(2 * 1024, "root.parentA.childA2", "user2");
+    createSchedulingRequest(2 * 1024, "root.parentA.childA3", "user3");
+
+    scheduler.update();
+
+    // Verify if fair share is 100 / 3 = 33%
+    for (int i = 1; i <= 3; i++) {
+      assertEquals(
+          33,
+          (double) scheduler.getQueueManager()
+              .getLeafQueue("root.parentA.childA" + i, false).getFairShare()
+              .getMemory()
+              / nodeCapacity * 100, .9);
+    }
+  }
+
+  @Test
+  public void testFairShareMultipleActiveQueuesUnderDifferentParent()
+      throws IOException {
+    int nodeCapacity = 16 * 1024;
+    createClusterWithQueuesAndOneNode(nodeCapacity, "fair");
+
+    // Run apps in childA1,childA2 which are under parentA
+    createSchedulingRequest(2 * 1024, "root.parentA.childA1", "user1");
+    createSchedulingRequest(3 * 1024, "root.parentA.childA2", "user2");
+
+    // Run app in childB1 which is under parentB
+    createSchedulingRequest(1 * 1024, "root.parentB.childB1", "user3");
+
+    // Run app in root.default queue
+    createSchedulingRequest(1 * 1024, "root.default", "user4");
+
+    scheduler.update();
+
+    // The two active child queues under parentA would
+    // get fair share of 80/2=40%
+    for (int i = 1; i <= 2; i++) {
+      assertEquals(
+          40,
+          (double) scheduler.getQueueManager()
+              .getLeafQueue("root.parentA.childA" + i, false).getFairShare()
+              .getMemory()
+              / nodeCapacity * 100, .9);
+    }
+
+    // The child queue under parentB would get a fair share of 10%,
+    // basically all of parentB's fair share
+    assertEquals(
+        10,
+        (double) scheduler.getQueueManager()
+            .getLeafQueue("root.parentB.childB1", false).getFairShare()
+            .getMemory()
+            / nodeCapacity * 100, .9);
+  }
+
+  @Test
+  public void testFairShareResetsToZeroWhenAppsComplete() throws IOException {
+    int nodeCapacity = 16 * 1024;
+    createClusterWithQueuesAndOneNode(nodeCapacity, "fair");
+
+    // Run apps in childA1,childA2 which are under parentA
+    ApplicationAttemptId app1 = createSchedulingRequest(2 * 1024,
+        "root.parentA.childA1", "user1");
+    ApplicationAttemptId app2 = createSchedulingRequest(3 * 1024,
+        "root.parentA.childA2", "user2");
+
+    scheduler.update();
+
+    // Verify if both the active queues under parentA get 50% fair
+    // share
+    for (int i = 1; i <= 2; i++) {
+      assertEquals(
+          50,
+          (double) scheduler.getQueueManager()
+              .getLeafQueue("root.parentA.childA" + i, false).getFairShare()
+              .getMemory()
+              / nodeCapacity * 100, .9);
+    }
+    // Let app under childA1 complete. This should cause the fair share
+    // of queue childA1 to be reset to zero,since the queue has no apps running.
+    // Queue childA2's fair share would increase to 100% since its the only
+    // active queue.
+    AppAttemptRemovedSchedulerEvent appRemovedEvent1 = new AppAttemptRemovedSchedulerEvent(
+        app1, RMAppAttemptState.FINISHED, false);
+
+    scheduler.handle(appRemovedEvent1);
+    scheduler.update();
+
+    assertEquals(
+        0,
+        (double) scheduler.getQueueManager()
+            .getLeafQueue("root.parentA.childA1", false).getFairShare()
+            .getMemory()
+            / nodeCapacity * 100, 0);
+    assertEquals(
+        100,
+        (double) scheduler.getQueueManager()
+            .getLeafQueue("root.parentA.childA2", false).getFairShare()
+            .getMemory()
+            / nodeCapacity * 100, 0.1);
+  }
+
+  @Test
+  public void testFairShareWithDRFMultipleActiveQueuesUnderDifferentParent()
+      throws IOException {
+    int nodeMem = 16 * 1024;
+    int nodeVCores = 10;
+    createClusterWithQueuesAndOneNode(nodeMem, nodeVCores, "drf");
+
+    // Run apps in childA1,childA2 which are under parentA
+    createSchedulingRequest(2 * 1024, "root.parentA.childA1", "user1");
+    createSchedulingRequest(3 * 1024, "root.parentA.childA2", "user2");
+
+    // Run app in childB1 which is under parentB
+    createSchedulingRequest(1 * 1024, "root.parentB.childB1", "user3");
+
+    // Run app in root.default queue
+    createSchedulingRequest(1 * 1024, "root.default", "user4");
+
+    scheduler.update();
+
+    // The two active child queues under parentA would
+    // get 80/2=40% memory and vcores
+    for (int i = 1; i <= 2; i++) {
+      assertEquals(
+          40,
+          (double) scheduler.getQueueManager()
+              .getLeafQueue("root.parentA.childA" + i, false).getFairShare()
+              .getMemory()
+              / nodeMem * 100, .9);
+      assertEquals(
+          40,
+          (double) scheduler.getQueueManager()
+              .getLeafQueue("root.parentA.childA" + i, false).getFairShare()
+              .getVirtualCores()
+              / nodeVCores * 100, .9);
+    }
+
+    // The only active child queue under parentB would get 10% memory and vcores
+    assertEquals(
+        10,
+        (double) scheduler.getQueueManager()
+            .getLeafQueue("root.parentB.childB1", false).getFairShare()
+            .getMemory()
+            / nodeMem * 100, .9);
+    assertEquals(
+        10,
+        (double) scheduler.getQueueManager()
+            .getLeafQueue("root.parentB.childB1", false).getFairShare()
+            .getVirtualCores()
+            / nodeVCores * 100, .9);
+  }
+}