ソースを参照

YARN-3971. Skip RMNodeLabelsManager#checkRemoveFromClusterNodeLabelsOfQueue on nodelabel recovery. (Bibin A Chundatt via wangda)

Wangda Tan 9 年 前
コミット
91b42e7d6e

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -713,6 +713,9 @@ Release 2.8.0 - UNRELEASED
     YARN-3963. AddNodeLabel on duplicate label addition shows success.
     (Bibin A Chundatt via wangda)
 
+    YARN-3971. Skip RMNodeLabelsManager#checkRemoveFromClusterNodeLabelsOfQueue 
+    on nodelabel recovery. (Bibin A Chundatt via wangda)
+
 Release 2.7.2 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 10 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/RMNodeLabelsManager.java

@@ -33,8 +33,8 @@ import java.util.concurrent.ConcurrentMap;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.service.Service;
 import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.api.records.NodeLabel;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
 import org.apache.hadoop.yarn.nodelabels.RMNodeLabel;
@@ -114,9 +114,15 @@ public class RMNodeLabelsManager extends CommonNodeLabelsManager {
       throws IOException {
     try {
       writeLock.lock();
-
-      checkRemoveFromClusterNodeLabelsOfQueue(labelsToRemove);
-
+      if (getServiceState() == Service.STATE.STARTED) {
+        // We cannot remove node labels from collection when some queue(s) are
+        // using any of them.
+        // We will only do this check when service starting finished. Before
+        // service starting, we will replay edit logs and recover state. It is
+        // possible that a history operation removed some labels which were being
+        // used by some queues in the past but not used by current queues.
+        checkRemoveFromClusterNodeLabelsOfQueue(labelsToRemove);
+      }
       // copy before NMs
       Map<String, Host> before = cloneNodeMap();
 

+ 46 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/TestRMNodeLabelsManager.java

@@ -18,7 +18,10 @@
 
 package org.apache.hadoop.yarn.server.resourcemanager.nodelabels;
 
+import java.io.File;
 import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -31,6 +34,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
 import org.apache.hadoop.yarn.nodelabels.RMNodeLabel;
 import org.apache.hadoop.yarn.nodelabels.NodeLabelTestBase;
+import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
 import org.apache.hadoop.yarn.util.resource.Resources;
 import org.junit.After;
 import org.junit.Assert;
@@ -46,7 +50,8 @@ public class TestRMNodeLabelsManager extends NodeLabelTestBase {
   private final Resource LARGE_NODE = Resource.newInstance(1000, 0);
   
   NullRMNodeLabelsManager mgr = null;
-
+  RMNodeLabelsManager lmgr = null;
+  boolean checkQueueCall = false;
   @Before
   public void before() {
     mgr = new NullRMNodeLabelsManager();
@@ -506,7 +511,46 @@ public class TestRMNodeLabelsManager extends NodeLabelTestBase {
     checkNodeLabelInfo(infos, "y", 1, 10);
     checkNodeLabelInfo(infos, "z", 0, 0);
   }
-  
+
+  @Test(timeout = 60000)
+  public void testcheckRemoveFromClusterNodeLabelsOfQueue() throws Exception {
+    class TestRMLabelManger extends RMNodeLabelsManager {
+      @Override
+      protected void checkRemoveFromClusterNodeLabelsOfQueue(
+          Collection<String> labelsToRemove) throws IOException {
+        checkQueueCall = true;
+        // Do nothing
+      }
+
+    }
+    lmgr = new TestRMLabelManger();
+    Configuration conf = new Configuration();
+    File tempDir = File.createTempFile("nlb", ".tmp");
+    tempDir.delete();
+    tempDir.mkdirs();
+    tempDir.deleteOnExit();
+    conf.set(YarnConfiguration.FS_NODE_LABELS_STORE_ROOT_DIR,
+        tempDir.getAbsolutePath());
+    conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true);
+    MockRM rm = new MockRM(conf) {
+      @Override
+      public RMNodeLabelsManager createNodeLabelManager() {
+        return lmgr;
+      }
+    };
+    lmgr.addToCluserNodeLabelsWithDefaultExclusivity(toSet("a"));
+    lmgr.removeFromClusterNodeLabels(Arrays.asList(new String[] { "a" }));
+    rm.getRMContext().setNodeLabelManager(lmgr);
+    rm.start();
+    lmgr.addToCluserNodeLabelsWithDefaultExclusivity(toSet("a"));
+    Assert.assertEquals(false, checkQueueCall);
+    lmgr.removeFromClusterNodeLabels(Arrays.asList(new String[] { "a" }));
+    Assert.assertEquals(true, checkQueueCall);
+    lmgr.stop();
+    lmgr.close();
+    rm.stop();
+  }
+
   @Test(timeout = 5000)
   public void testLabelsToNodesOnNodeActiveDeactive() throws Exception {
     // Activate a node without assigning any labels