Browse Source

YARN-542. Changed the default global AM max-attempts value to be not one. Contributed by Zhijie Shen.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1470094 13f79535-47bb-0310-9956-ffa450edef68
Vinod Kumar Vavilapalli 12 years ago
parent
commit
a91067fc5e

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -164,6 +164,9 @@ Release 2.0.5-beta - UNRELEASED
     YARN-586. Fixed a typo in ApplicationSubmissionContext#setApplicationId.
     (Zhijie Shen via vinodkv)
 
+    YARN-542. Changed the default global AM max-attempts value to be not one.
+    (Zhijie Shen via vinodkv)
+
   OPTIMIZATIONS
 
   BUG FIXES

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

@@ -186,7 +186,7 @@ public class YarnConfiguration extends Configuration {
    */
   public static final String RM_AM_MAX_ATTEMPTS =
     RM_PREFIX + "am.max-attempts";
-  public static final int DEFAULT_RM_AM_MAX_ATTEMPTS = 1;
+  public static final int DEFAULT_RM_AM_MAX_ATTEMPTS = 2;
   
   /** The keytab for the resource manager.*/
   public static final String RM_KEYTAB = 

+ 3 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml

@@ -145,9 +145,10 @@
     setting for all application masters. Each application master can specify
     its individual maximum number of application attempts via the API, but the
     individual number cannot be more than the global upper bound. If it is,
-    the resourcemanager will override it.</description>
+    the resourcemanager will override it. The default number is set to 2, to
+    allow at least one retry for AM.</description>
     <name>yarn.resourcemanager.am.max-attempts</name>
-    <value>1</value>
+    <value>2</value>
   </property>
 
   <property>

+ 6 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java

@@ -64,7 +64,9 @@ public class TestRMRestart {
     "org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore");
     conf.set(YarnConfiguration.RM_SCHEDULER, 
     "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler");
-    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 5);
+    Assert.assertTrue(YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS > 1);
+    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
+        YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
 
     MemoryRMStateStore memStore = new MemoryRMStateStore();
     memStore.init(conf);
@@ -321,7 +323,9 @@ public class TestRMRestart {
     conf.set(YarnConfiguration.RECOVERY_ENABLED, "true");
     conf.set(YarnConfiguration.RM_STORE, 
     "org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore");
-    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
+    Assert.assertTrue(YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS > 1);
+    conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
+        YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
 
     MemoryRMStateStore memStore = new MemoryRMStateStore();
     memStore.init(conf);

+ 4 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java

@@ -62,7 +62,8 @@ public class TestRMAppTransitions {
   static final Log LOG = LogFactory.getLog(TestRMAppTransitions.class);
 
   private RMContext rmContext;
-  private static int maxAppAttempts = 4;
+  private static int maxAppAttempts =
+      YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS;
   private static int appId = 1;
   private DrainDispatcher rmDispatcher;
 
@@ -499,6 +500,7 @@ public class TestRMAppTransitions {
 
     RMApp application = testCreateAppAccepted(null);
     // ACCEPTED => ACCEPTED event RMAppEventType.RMAppEventType.ATTEMPT_FAILED
+    Assert.assertTrue(maxAppAttempts > 1);
     for (int i=1; i < maxAppAttempts; i++) {
       RMAppEvent event = 
           new RMAppFailedAttemptEvent(application.getApplicationId(), 
@@ -562,6 +564,7 @@ public class TestRMAppTransitions {
     Assert.assertEquals(expectedAttemptId, 
         appAttempt.getAppAttemptId().getAttemptId());
     // RUNNING => FAILED/RESTARTING event RMAppEventType.ATTEMPT_FAILED
+    Assert.assertTrue(maxAppAttempts > 1);
     for (int i=1; i<maxAppAttempts; i++) {
       RMAppEvent event = 
           new RMAppFailedAttemptEvent(application.getApplicationId(), 

+ 5 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java

@@ -83,7 +83,8 @@ public class TestRMWebServicesApps extends JerseyTest {
       bind(RMWebServices.class);
       bind(GenericExceptionHandler.class);
       Configuration conf = new Configuration();
-      conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
+      conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
+          YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
       conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class,
           ResourceScheduler.class);
       rm = new MockRM(conf);
@@ -871,8 +872,10 @@ public class TestRMWebServicesApps extends JerseyTest {
     MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
     RMApp app1 = rm.submitApp(1024, "testwordcount", "user1");
     amNodeManager.nodeHeartbeat(true);
-    int maxAppAttempts = rm.getConfig().getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
+    int maxAppAttempts = rm.getConfig().getInt(
+        YarnConfiguration.RM_AM_MAX_ATTEMPTS,
         YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
+    assertTrue(maxAppAttempts > 1);
     int retriesLeft = maxAppAttempts;
     while (--retriesLeft > 0) {
       RMAppEvent event =