Browse Source

YARN-4307. Display blacklisted nodes for AM container in the RM web UI. Contributed by Naganarasimha G R.

(cherry picked from commit 308d63f382e1992ea2b8ccf3130edaaa751c644d)
(cherry picked from commit 99966b1912320326fb63be1ce6d4222d736ca365)
Varun Vasudev 9 năm trước cách đây
mục cha
commit
5b49bcaee3
9 tập tin đã thay đổi với 80 bổ sung54 xóa
  1. 9 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/hamlet/Hamlet.java
  2. 4 5
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/SimpleBlacklistManager.java
  3. 6 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java
  4. 17 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
  5. 19 27
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java
  6. 16 16
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppBlock.java
  7. 3 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppsBlock.java
  8. 5 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAttemptInfo.java
  9. 1 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java

+ 9 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/hamlet/Hamlet.java

@@ -18,10 +18,13 @@
 
 // Generated by HamletGen. Do NOT edit!
 package org.apache.hadoop.yarn.webapp.hamlet;
+import static java.util.EnumSet.of;
+import static org.apache.hadoop.yarn.webapp.hamlet.HamletImpl.EOpt.ENDTAG;
+import static org.apache.hadoop.yarn.webapp.hamlet.HamletImpl.EOpt.INLINE;
+import static org.apache.hadoop.yarn.webapp.hamlet.HamletImpl.EOpt.PRE;
+
 import java.io.PrintWriter;
 import java.util.EnumSet;
-import static java.util.EnumSet.*;
-import static org.apache.hadoop.yarn.webapp.hamlet.HamletImpl.EOpt.*;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.yarn.webapp.SubView;
@@ -2405,6 +2408,10 @@ public class Hamlet extends HamletImpl implements HamletSpec._Html {
       return setSelector(th(), selector)._(cdata)._();
     }
 
+    public TR<T> th(String selector, String title, String cdata) {
+      return setSelector(th(), selector).$title(title)._(cdata)._();
+    }
+
     @Override
     public TD<TR<T>> td() {
       closeAttrs();

+ 4 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/blacklist/SimpleBlacklistManager.java

@@ -72,11 +72,10 @@ public class SimpleBlacklistManager implements BlacklistManager {
       }
       ret = new BlacklistUpdates(blacklist, EMPTY_LIST);
     } else {
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("blacklist size " + currentBlacklistSize + " is more than " +
-            "failure threshold ratio " + blacklistDisableFailureThreshold +
-            " out of total usable nodes " + numberOfNodeManagerHosts);
-      }
+      LOG.warn("Ignoring Blacklists, blacklist size " + currentBlacklistSize
+          + " is more than failure threshold ratio "
+          + blacklistDisableFailureThreshold + " out of total usable nodes "
+          + numberOfNodeManagerHosts);
       ret = new BlacklistUpdates(EMPTY_LIST, blacklist);
     }
     return ret;

+ 6 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java

@@ -19,6 +19,7 @@
 package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt;
 
 import java.util.List;
+import java.util.Set;
 import java.util.concurrent.ConcurrentMap;
 
 import javax.crypto.SecretKey;
@@ -252,4 +253,9 @@ public interface RMAppAttempt extends EventHandler<RMAppAttemptEvent> {
    * @param amLaunchDiagnostics
    */
   void updateAMLaunchDiagnostics(String amLaunchDiagnostics);
+
+  /**
+   * @return Set of nodes which are blacklisted by the application
+   */
+  Set<String> getBlacklistedNodes();
 }

+ 17 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java

@@ -29,6 +29,7 @@ import java.util.Collections;
 import java.util.EnumSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
@@ -92,7 +93,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAt
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeFinishedContainersPulledByAMEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.AMState;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
@@ -2151,4 +2154,18 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
   public void updateAMLaunchDiagnostics(String amLaunchDiagnostics) {
     this.amLaunchDiagnostics = amLaunchDiagnostics;
   }
+
+  @Override
+  public Set<String> getBlacklistedNodes() {
+    if (scheduler instanceof AbstractYarnScheduler) {
+      AbstractYarnScheduler ayScheduler =
+          (AbstractYarnScheduler) scheduler;
+      SchedulerApplicationAttempt attempt =
+          ayScheduler.getApplicationAttempt(applicationAttemptId);
+      if (attempt != null) {
+        return attempt.getBlacklistedNodes();
+      }
+    }
+    return Collections.EMPTY_SET;
+  }
 }

+ 19 - 27
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppAttemptBlock.java

@@ -23,9 +23,11 @@ import static org.apache.hadoop.yarn.webapp.view.JQueryUI._INFO_WRAP;
 import static org.apache.hadoop.yarn.webapp.view.JQueryUI._ODD;
 import static org.apache.hadoop.yarn.webapp.view.JQueryUI._TH;
 
+import java.util.Collection;
+import java.util.List;
+
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerReport;
@@ -36,8 +38,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo;
 import org.apache.hadoop.yarn.server.webapp.AppAttemptBlock;
 import org.apache.hadoop.yarn.server.webapp.dao.AppAttemptInfo;
@@ -48,11 +48,8 @@ import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE;
 import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
 import org.apache.hadoop.yarn.webapp.view.InfoBlock;
-import com.google.inject.Inject;
-import java.util.List;
 
-import java.util.Collection;
-import java.util.Set;
+import com.google.inject.Inject;
 
 public class RMAppAttemptBlock extends AppAttemptBlock{
 
@@ -207,14 +204,13 @@ public class RMAppAttemptBlock extends AppAttemptBlock{
       Collection<ContainerReport> containers, AppAttemptInfo appAttempt,
       String node) {
 
-    String blacklistedNodes = "-";
-    Set<String> nodes =
-        getBlacklistedNodes(rm, getRMAppAttempt().getAppAttemptId());
-    if (nodes != null) {
-      if (!nodes.isEmpty()) {
-        blacklistedNodes = StringUtils.join(nodes, ", ");
-      }
-    }
+    RMAppAttempt rmAppAttempt = getRMAppAttempt();
+    // nodes which are blacklisted by the application
+    String appBlacklistedNodes =
+        getNodeString(rmAppAttempt.getBlacklistedNodes());
+    // nodes which are blacklisted by the RM for AM launches
+    String rmBlackListedNodes = getNodeString(
+        rmAppAttempt.getAMBlacklist().getBlacklistUpdates().getAdditions());
 
     info("Application Attempt Overview")
       ._(
@@ -248,21 +244,17 @@ public class RMAppAttemptBlock extends AppAttemptBlock{
       ._(
         "Diagnostics Info:",
         appAttempt.getDiagnosticsInfo() == null ? "" : appAttempt
-          .getDiagnosticsInfo())._("Blacklisted Nodes:", blacklistedNodes);
+          .getDiagnosticsInfo())
+      ._("Application Blacklisted Nodes:", appBlacklistedNodes)
+      ._("RM Blacklisted Nodes(for AM launches)", rmBlackListedNodes);
   }
 
-  public static Set<String> getBlacklistedNodes(ResourceManager rm,
-      ApplicationAttemptId appid) {
-    if (rm.getResourceScheduler() instanceof AbstractYarnScheduler) {
-      AbstractYarnScheduler ayScheduler =
-          (AbstractYarnScheduler) rm.getResourceScheduler();
-      SchedulerApplicationAttempt attempt =
-          ayScheduler.getApplicationAttempt(appid);
-      if (attempt != null) {
-        return attempt.getBlacklistedNodes();
-      }
+  private String getNodeString(Collection<String> nodes) {
+    String concatinatedString = "-";
+    if (null != nodes && !nodes.isEmpty()) {
+      concatinatedString = StringUtils.join(nodes, ", ");
     }
-    return null;
+    return concatinatedString;
   }
 
   @Override

+ 16 - 16
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppBlock.java

@@ -20,9 +20,10 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp;
 
 import static org.apache.hadoop.yarn.webapp.view.JQueryUI._INFO_WRAP;
 
+import java.util.Collection;
+import java.util.Set;
+
 import org.apache.commons.lang.StringEscapeUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptReport;
@@ -43,12 +44,8 @@ import org.apache.hadoop.yarn.webapp.view.InfoBlock;
 
 import com.google.inject.Inject;
 
-import java.util.Collection;
-import java.util.Set;
-
 public class RMAppBlock extends AppBlock{
 
-  private static final Log LOG = LogFactory.getLog(RMAppBlock.class);
   private final ResourceManager rm;
   private final Configuration conf;
 
@@ -116,7 +113,10 @@ public class RMAppBlock extends AppBlock{
     Hamlet.TBODY<Hamlet.TABLE<Hamlet>> tbody =
         html.table("#attempts").thead().tr().th(".id", "Attempt ID")
             .th(".started", "Started").th(".node", "Node").th(".logs", "Logs")
-            .th(".blacklistednodes", "Blacklisted Nodes")._()._().tbody();
+            .th(".appBlacklistednodes", "Nodes black listed by the application",
+                "App Blacklisted Nodes")
+            .th(".rmBlacklistednodes", "Nodes black listed by the RM for the"
+                + " app", "RM Blacklisted Nodes")._()._().tbody();
 
     RMApp rmApp = this.rm.getRMContext().getRMApps().get(this.appID);
     if (rmApp == null) {
@@ -132,13 +132,12 @@ public class RMAppBlock extends AppBlock{
       AppAttemptInfo attemptInfo =
           new AppAttemptInfo(this.rm, rmAppAttempt, rmApp.getUser(),
               WebAppUtils.getHttpSchemePrefix(conf));
-      String blacklistedNodesCount = "N/A";
-      Set<String> nodes =
-          RMAppAttemptBlock.getBlacklistedNodes(rm,
-            rmAppAttempt.getAppAttemptId());
-      if(nodes != null) {
-        blacklistedNodesCount = String.valueOf(nodes.size());
-      }
+      Set<String> nodes = rmAppAttempt.getBlacklistedNodes();
+      // nodes which are blacklisted by the application
+      String appBlacklistedNodesCount = String.valueOf(nodes.size());
+      // nodes which are blacklisted by the RM for AM launches
+      String rmBlacklistedNodesCount = String.valueOf(rmAppAttempt
+          .getAMBlacklist().getBlacklistUpdates().getAdditions().size());
       String nodeLink = attemptInfo.getNodeHttpAddress();
       if (nodeLink != null) {
         nodeLink = WebAppUtils.getHttpSchemePrefix(conf) + nodeLink;
@@ -158,8 +157,9 @@ public class RMAppBlock extends AppBlock{
               .escapeJavaScript(StringEscapeUtils.escapeHtml(nodeLink)))
           .append("</a>\",\"<a ")
           .append(logsLink == null ? "#" : "href='" + logsLink).append("'>")
-          .append(logsLink == null ? "N/A" : "Logs").append("</a>\",").append(
-          "\"").append(blacklistedNodesCount).append("\"],\n");
+          .append(logsLink == null ? "N/A" : "Logs").append("</a>\",")
+          .append("\"").append(appBlacklistedNodesCount).append("\",")
+          .append("\"").append(rmBlacklistedNodesCount).append("\"],\n");
     }
     if (attemptsTableData.charAt(attemptsTableData.length() - 2) == ',') {
       attemptsTableData.delete(attemptsTableData.length() - 2,

+ 3 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppsBlock.java

@@ -95,8 +95,9 @@ public class RMAppsBlock extends AppsBlock {
       }
 
       String blacklistedNodesCount = "N/A";
-      Set<String> nodes =
-          RMAppAttemptBlock.getBlacklistedNodes(rm, appAttemptId);
+      Set<String> nodes = rm.getRMContext().getRMApps()
+          .get(appAttemptId.getApplicationId()).getAppAttempts()
+          .get(appAttemptId).getBlacklistedNodes();
       if (nodes != null) {
         blacklistedNodesCount = String.valueOf(nodes.size());
       }

+ 5 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAttemptInfo.java

@@ -27,7 +27,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
-import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
 
 @XmlRootElement(name = "appAttempt")
@@ -41,6 +40,7 @@ public class AppAttemptInfo {
   protected String nodeId;
   protected String logsLink;
   protected String blacklistedNodes;
+  protected String rmBlacklistedNodesForAMLaunches;
 
   public AppAttemptInfo() {
   }
@@ -64,6 +64,10 @@ public class AppAttemptInfo {
         this.logsLink = WebAppUtils.getRunningLogURL(schemePrefix
             + masterContainer.getNodeHttpAddress(),
             masterContainer.getId().toString(), user);
+
+        rmBlacklistedNodesForAMLaunches = StringUtils.join(
+            attempt.getAMBlacklist().getBlacklistUpdates().getAdditions(),
+            ", ");
         if (rm.getResourceScheduler() instanceof AbstractYarnScheduler) {
           AbstractYarnScheduler ayScheduler =
               (AbstractYarnScheduler) rm.getResourceScheduler();

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java

@@ -1659,7 +1659,7 @@ public class TestRMWebServicesApps extends JerseyTestBase {
       String user)
       throws JSONException, Exception {
 
-    assertEquals("incorrect number of elements", 7, info.length());
+    assertEquals("incorrect number of elements", 8, info.length());
 
     verifyAppAttemptInfoGeneric(appAttempt, info.getInt("id"),
         info.getLong("startTime"), info.getString("containerId"),