Bläddra i källkod

YARN-2523. ResourceManager UI showing negative value for "Decommissioned Nodes" field. Contributed by Rohith
(cherry picked from commit 8269bfa613999f71767de3c0369817b58cfe1416)

Jason Lowe 10 år sedan
förälder
incheckning
e4d46e5ff6

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -417,6 +417,9 @@ Release 2.6.0 - UNRELEASED
     YARN-2546. Made REST API for application creation/submission use numeric and
     boolean types instead of the string of them. (Varun Vasudev via zjshen)
 
+    YARN-2523. ResourceManager UI showing negative value for "Decommissioned
+    Nodes" field (Rohith via jlowe)
+
 Release 2.5.1 - 2014-09-05
 
   INCOMPATIBLE CHANGES

+ 0 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java

@@ -121,7 +121,6 @@ public class NodesListManager extends AbstractService implements
                   this.conf, includesFile), excludesFile.isEmpty() ? null
               : this.rmContext.getConfigurationProvider()
                   .getConfigurationInputStream(this.conf, excludesFile));
-      setDecomissionedNMsMetrics();
       printConfiguredHosts();
     }
   }

+ 0 - 13
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java

@@ -460,22 +460,9 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
         break;
     }
 
-    // Decomissioned NMs equals to the nodes missing in include list (if
-    // include list not empty) or the nodes listed in excluded list.
-    // DecomissionedNMs as per exclude list is set upfront when the
-    // exclude list is read so that RM restart can also reflect the
-    // decomissionedNMs. Note that RM is still not able to know decomissionedNMs
-    // as per include list after it restarts as they are known when those nodes
-    // come for registration.
-    // DecomissionedNMs as per include list is incremented in this transition.
     switch (finalState) {
     case DECOMMISSIONED:
-      Set<String> ecludedHosts =
-          context.getNodesListManager().getHostsReader().getExcludedHosts();
-      if (!ecludedHosts.contains(hostName)
-          && !ecludedHosts.contains(NetUtils.normalizeHostName(hostName))) {
         metrics.incrDecommisionedNMs();
-      }
       break;
     case LOST:
       metrics.incrNumLostNMs();

+ 22 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java

@@ -77,6 +77,8 @@ import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.api.records.YarnApplicationState;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.event.DrainDispatcher;
 import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
 import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
 import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
@@ -1833,10 +1835,16 @@ public class TestRMRestart {
     conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH,
       hostFile.getAbsolutePath());
     writeToHostsFile("");
-    MockRM rm1 = new MockRM(conf);
+    final DrainDispatcher dispatcher = new DrainDispatcher();
+    MockRM rm1 = new MockRM(conf) {
+      @Override
+      protected Dispatcher createDispatcher() {
+        return dispatcher;
+      }
+    };
     rm1.start();
-    rm1.registerNode("localhost:1234", 8000);
-    rm1.registerNode("host2:1234", 8000);
+    MockNM nm1 = rm1.registerNode("localhost:1234", 8000);
+    MockNM nm2 = rm1.registerNode("host2:1234", 8000);
     Assert
       .assertEquals(0, ClusterMetrics.getMetrics().getNumDecommisionedNMs());
     String ip = NetUtils.normalizeHostName("localhost");
@@ -1845,15 +1853,25 @@ public class TestRMRestart {
 
     // refresh nodes
     rm1.getNodesListManager().refreshNodes(conf);
+    NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
+    Assert
+        .assertTrue(NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction()));
+    nodeHeartbeat = nm2.nodeHeartbeat(true);
+    Assert.assertTrue("The decommisioned metrics are not updated",
+        NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction()));
+
+    dispatcher.await();
     Assert
       .assertEquals(2, ClusterMetrics.getMetrics().getNumDecommisionedNMs());
+    rm1.stop();
+    Assert
+        .assertEquals(0, ClusterMetrics.getMetrics().getNumDecommisionedNMs());
 
     // restart RM.
     MockRM rm2 = new MockRM(conf);
     rm2.start();
     Assert
       .assertEquals(2, ClusterMetrics.getMetrics().getNumDecommisionedNMs());
-    rm1.stop();
     rm2.stop();
   }
 

+ 23 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java

@@ -157,25 +157,33 @@ public class TestResourceTrackerService {
         .getAbsolutePath());
 
     writeToHostsFile("");
-    rm = new MockRM(conf);
+    final DrainDispatcher dispatcher = new DrainDispatcher();
+    rm = new MockRM(conf) {
+      @Override
+      protected Dispatcher createDispatcher() {
+        return dispatcher;
+      }
+    };
     rm.start();
 
     MockNM nm1 = rm.registerNode("host1:1234", 5120);
     MockNM nm2 = rm.registerNode("host2:5678", 10240);
     MockNM nm3 = rm.registerNode("localhost:4433", 1024);
 
+    dispatcher.await();
+
     int metricCount = ClusterMetrics.getMetrics().getNumDecommisionedNMs();
     NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true);
     Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
     nodeHeartbeat = nm2.nodeHeartbeat(true);
     Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
+    dispatcher.await();
 
     // To test that IPs also work
     String ip = NetUtils.normalizeHostName("localhost");
     writeToHostsFile("host2", ip);
 
     rm.getNodesListManager().refreshNodes(conf);
-    checkDecommissionedNMCount(rm, metricCount + 2);
 
     nodeHeartbeat = nm1.nodeHeartbeat(true);
     Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
@@ -186,6 +194,19 @@ public class TestResourceTrackerService {
     nodeHeartbeat = nm3.nodeHeartbeat(true);
     Assert.assertTrue("The decommisioned metrics are not updated",
         NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction()));
+    dispatcher.await();
+    checkDecommissionedNMCount(rm, metricCount + 2);
+    writeToHostsFile("");
+    rm.getNodesListManager().refreshNodes(conf);
+
+    nm3 = rm.registerNode("localhost:4433", 1024);
+    dispatcher.await();
+    nodeHeartbeat = nm3.nodeHeartbeat(true);
+    dispatcher.await();
+    Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction()));
+    // decommissined node is 1 since 1 node is rejoined after updating exclude
+    // file
+    checkDecommissionedNMCount(rm, metricCount + 1);
   }
 
   /**