浏览代码

YARN-9900. Revert to previous state when Invalid Config is applied and Refresh Support in SchedulerConfig Format. Contributed by Prabhu Joseph.

Sunil G 5 年之前
父节点
当前提交
090f73a9aa

+ 2 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/MutableConfigurationProvider.java

@@ -73,6 +73,8 @@ public interface MutableConfigurationProvider {
 
   void formatConfigurationInStore(Configuration conf) throws Exception;
 
+  void revertToOldConfig(Configuration config) throws Exception;
+
   /**
    * Closes the configuration provider, releasing any required resources.
    * @throws IOException on failure to close

+ 16 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/conf/MutableCSConfigurationProvider.java

@@ -166,6 +166,7 @@ public class MutableCSConfigurationProvider implements CSConfigurationProvider,
     formatLock.writeLock().lock();
     try {
       confStore.format();
+      oldConf = new Configuration(schedConf);
       Configuration initialSchedConf = new Configuration(false);
       initialSchedConf.addResource(YarnConfiguration.CS_CONFIGURATION_FILE);
       this.schedConf = new Configuration(false);
@@ -184,6 +185,21 @@ public class MutableCSConfigurationProvider implements CSConfigurationProvider,
     }
   }
 
+  @Override
+  public void revertToOldConfig(Configuration config) throws Exception {
+    formatLock.writeLock().lock();
+    try {
+      schedConf = oldConf;
+      confStore.format();
+      confStore.initialize(config, oldConf, rmContext);
+      confStore.checkVersion();
+    } catch (Exception e) {
+      throw new IOException(e);
+    } finally {
+      formatLock.writeLock().unlock();
+    }
+  }
+
   @Override
   public void confirmPendingMutation(boolean isValid) throws Exception {
     formatLock.readLock().lock();

+ 7 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java

@@ -2582,6 +2582,13 @@ public class RMWebServices extends WebServices implements RMWebServiceProtocol {
         MutableConfigurationProvider mutableConfigurationProvider =
             ((MutableConfScheduler) scheduler).getMutableConfProvider();
         mutableConfigurationProvider.formatConfigurationInStore(conf);
+        try {
+          rm.getRMContext().getRMAdminService().refreshQueues();
+        } catch (IOException | YarnException e) {
+          LOG.error("Exception thrown when formatting configuration.", e);
+          mutableConfigurationProvider.revertToOldConfig(conf);
+          throw e;
+        }
         return Response.status(Status.OK).entity("Configuration under " +
             "store successfully formatted.").build();
       } catch (Exception e) {

+ 32 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesConfigurationMutation.java

@@ -191,15 +191,44 @@ public class TestRMWebServicesConfigurationMutation extends JerseyTestBase {
 
   @Test
   public void testFormatSchedulerConf() throws Exception {
-    testAddNestedQueue();
+    CapacitySchedulerConfiguration newConf = getSchedulerConf();
+    assertNotNull(newConf);
+    assertEquals(3, newConf.getQueues("root").length);
+
+    SchedConfUpdateInfo updateInfo = new SchedConfUpdateInfo();
+    Map<String, String> nearEmptyCapacity = new HashMap<>();
+    nearEmptyCapacity.put(CapacitySchedulerConfiguration.CAPACITY, "1E-4");
+    QueueConfigInfo d = new QueueConfigInfo("root.formattest",
+        nearEmptyCapacity);
+    updateInfo.getAddQueueInfo().add(d);
+
+    Map<String, String> stoppedParam = new HashMap<>();
+    stoppedParam.put(CapacitySchedulerConfiguration.STATE,
+        QueueState.STOPPED.toString());
+    QueueConfigInfo stoppedInfo = new QueueConfigInfo("root.formattest",
+        stoppedParam);
+    updateInfo.getUpdateQueueInfo().add(stoppedInfo);
+
+    // Add a queue root.formattest to the existing three queues
     WebResource r = resource();
     ClientResponse response = r.path("ws").path("v1").path("cluster")
+        .path("scheduler-conf").queryParam("user.name", userName)
+        .accept(MediaType.APPLICATION_JSON)
+        .entity(YarnWebServiceUtils.toJson(updateInfo,
+        SchedConfUpdateInfo.class), MediaType.APPLICATION_JSON)
+        .put(ClientResponse.class);
+    newConf = getSchedulerConf();
+    assertNotNull(newConf);
+    assertEquals(4, newConf.getQueues("root").length);
+
+    // Format the scheduler config and validate root.formattest is not present
+    response = r.path("ws").path("v1").path("cluster")
         .queryParam("user.name", userName)
         .path(RMWSConsts.FORMAT_SCHEDULER_CONF)
         .accept(MediaType.APPLICATION_JSON).get(ClientResponse.class);
     assertEquals(Status.OK.getStatusCode(), response.getStatus());
-    CapacitySchedulerConfiguration orgConf = getSchedulerConf();
-    assertEquals(3, orgConf.getQueues("root").length);
+    newConf = getSchedulerConf();
+    assertEquals(3, newConf.getQueues("root").length);
   }
 
   private long getConfigVersion() throws Exception {