瀏覽代碼

YARN-2392. Add more diags about app retry limits on AM failures. Contributed by Steve Loughran

Jian He 10 年之前
父節點
當前提交
1970ca7cbc

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -289,6 +289,9 @@ Release 2.8.0 - UNRELEASED
     YARN-3467. Expose allocatedMB, allocatedVCores, and runningContainers metrics on 
     running Applications in RM Web UI. (Anubhav Dhoot via kasha)
 
+    YARN-2392. Add more diags about app retry limits on AM failures. (Steve
+    Loughran via jianhe)
+
   OPTIMIZATIONS
 
     YARN-3339. TestDockerContainerExecutor should pull a single image and not

+ 13 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java

@@ -1014,9 +1014,19 @@ public class RMAppImpl implements RMApp, Recoverable {
               + " failed due to " + failedEvent.getDiagnostics()
               + ". Failing the application.";
     } else if (this.isNumAttemptsBeyondThreshold) {
-      msg = "Application " + this.getApplicationId() + " failed "
-              + this.maxAppAttempts + " times due to "
-              + failedEvent.getDiagnostics() + ". Failing the application.";
+      int globalLimit = conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
+          YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
+      msg = String.format(
+        "Application %s failed %d times%s%s due to %s. Failing the application.",
+          getApplicationId(),
+          maxAppAttempts,
+          (attemptFailuresValidityInterval <= 0 ? ""
+               : (" in previous " + attemptFailuresValidityInterval
+                  + " milliseconds")),
+          (globalLimit == maxAppAttempts) ? ""
+              : (" (global limit =" + globalLimit
+                 + "; local limit is =" + maxAppAttempts + ")"),
+          failedEvent.getDiagnostics());
     }
     return msg;
   }

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java

@@ -1459,9 +1459,9 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
         .append(status.getDiagnostics());
     if (this.getTrackingUrl() != null) {
       diagnosticsBuilder.append("For more detailed output,").append(
-        " check application tracking page: ").append(
+        " check the application tracking page: ").append(
         this.getTrackingUrl()).append(
-        " Then, click on links to logs of each attempt.\n");
+        " Then click on links to logs of each attempt.\n");
     }
     return diagnosticsBuilder.toString();
   }