Browse Source

YARN-9719. Fixed YARN service restart bug when application ID no longer exist in RM.
Contributed by kyungwan nam

Eric Yang 5 years ago
parent
commit
201dc667e9

+ 12 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java

@@ -57,6 +57,7 @@ import org.apache.hadoop.yarn.client.api.YarnClientApplication;
 import org.apache.hadoop.yarn.client.cli.ApplicationCLI;
 import org.apache.hadoop.yarn.client.util.YarnClientUtils;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
 import org.apache.hadoop.yarn.proto.ClientAMProtocol.CancelUpgradeRequestProto;
@@ -1558,7 +1559,17 @@ public class ServiceClient extends AppAdminClient implements SliderExitCodes,
       return appSpec;
     }
     appSpec.setId(currentAppId.toString());
-    ApplicationReport appReport = yarnClient.getApplicationReport(currentAppId);
+    ApplicationReport appReport = null;
+    try {
+      appReport = yarnClient.getApplicationReport(currentAppId);
+    } catch (ApplicationNotFoundException e) {
+      LOG.info("application ID {} doesn't exist", currentAppId);
+      return appSpec;
+    }
+    if (appReport == null) {
+      LOG.warn("application ID {} is reported as null", currentAppId);
+      return appSpec;
+    }
     appSpec.setState(convertState(appReport.getYarnApplicationState()));
     ApplicationTimeout lifetime =
         appReport.getApplicationTimeouts().get(ApplicationTimeoutType.LIFETIME);

+ 2 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/ServiceTestUtils.java

@@ -218,6 +218,8 @@ public class ServiceTestUtils {
       setConf(new YarnConfiguration());
       conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, false);
       conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_USE_RPC, false);
+      conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS,
+          YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS);
     }
     conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 128);
     // reduce the teardown waiting time

+ 29 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestYarnNativeServices.java

@@ -30,6 +30,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetContainersRequest;
 import org.apache.hadoop.yarn.api.records.*;
 import org.apache.hadoop.yarn.client.api.YarnClient;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.service.api.records.Component;
@@ -326,6 +327,8 @@ public class TestYarnNativeServices extends ServiceTestUtils {
 
     conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, true);
     conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_USE_RPC, true);
+    conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS,
+        YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS);
     setConf(conf);
     setupInternal(NUM_NMS);
 
@@ -518,6 +521,8 @@ public class TestYarnNativeServices extends ServiceTestUtils {
     YarnConfiguration conf = new YarnConfiguration();
     conf.set(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER,
         YarnConfiguration.SCHEDULER_RM_PLACEMENT_CONSTRAINTS_HANDLER);
+    conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS,
+        YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS);
     setConf(conf);
     setupInternal(3);
     ServiceClient client = createClient(getConf());
@@ -727,6 +732,8 @@ public class TestYarnNativeServices extends ServiceTestUtils {
     YarnConfiguration conf = new YarnConfiguration();
     conf.set(YarnConfiguration.RM_PLACEMENT_CONSTRAINTS_HANDLER,
         YarnConfiguration.SCHEDULER_RM_PLACEMENT_CONSTRAINTS_HANDLER);
+    conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS,
+        YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS);
     setConf(conf);
     setupInternal(3);
     ServiceClient client = createClient(getConf());
@@ -909,4 +916,26 @@ public class TestYarnNativeServices extends ServiceTestUtils {
       i++;
     }
   }
+
+  @Test (timeout = 200000)
+  public void testRestartServiceForNonExistingInRM() throws Exception {
+    YarnConfiguration conf = new YarnConfiguration();
+    conf.setInt(YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS, 0);
+    setConf(conf);
+    setupInternal(NUM_NMS);
+    ServiceClient client = createClient(getConf());
+    Service exampleApp = createExampleApplication();
+    client.actionCreate(exampleApp);
+    waitForServiceToBeStable(client, exampleApp);
+    try {
+      client.actionStop(exampleApp.getName(), true);
+    } catch (ApplicationNotFoundException e) {
+      LOG.info("ignore ApplicationNotFoundException during stopping");
+    }
+    client.actionStart(exampleApp.getName());
+    waitForServiceToBeStable(client, exampleApp);
+    Service service = client.getStatus(exampleApp.getName());
+    Assert.assertEquals("Restarted service state should be STABLE",
+        ServiceState.STABLE, service.getState());
+  }
 }