Browse Source

YARN-8679. [ATSv2] If HBase cluster is down for long time, high chances that NM ContainerManager dispatcher get blocked. Contributed by Wangda Tan.

(cherry picked from commit 4aacbfff605262aaf3dbd926258afcadc86c72c0)
Rohith Sharma K S 6 years ago
parent
commit
c68d1d49ca

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMLaunchFailure.java

@@ -90,7 +90,7 @@ public class TestAMLaunchFailure {
 //    }
 //
 //    @Override
-//    public void addApplication(ApplicationId applicationId,
+//    public void addApplicationIfAbsent(ApplicationId applicationId,
 //        ApplicationMaster master, String user, String queue, Priority priority
 //        , ApplicationStore appStore)
 //        throws IOException {

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestSchedulerNegotiator.java

@@ -67,7 +67,7 @@ public class TestSchedulerNegotiator {
 //      return null;
 //    }
 //    @Override
-//    public void addApplication(ApplicationId applicationId,
+//    public void addApplicationIfAbsent(ApplicationId applicationId,
 //        ApplicationMaster master, String user, String queue, Priority priority,
 //        ApplicationStore store)
 //        throws IOException {

+ 2 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/timelineservice/TestTimelineServiceClientIntegration.java

@@ -84,7 +84,8 @@ public class TestTimelineServiceClientIntegration {
       auxService =
           PerNodeTimelineCollectorsAuxService.launchServer(new String[0],
               collectorManager, conf);
-      auxService.addApplication(ApplicationId.newInstance(0, 1), "user");
+      auxService
+          .addApplicationIfAbsent(ApplicationId.newInstance(0, 1), "user");
     } catch (ExitUtil.ExitException e) {
       fail();
     }

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/timelineservice/security/TestTimelineAuthFilterForV2.java

@@ -210,7 +210,7 @@ public class TestTimelineAuthFilterForV2 {
           YarnConfiguration.TIMELINE_SERVICE_PRINCIPAL, "localhost");
     }
     ApplicationId appId = ApplicationId.newInstance(0, 1);
-    auxService.addApplication(
+    auxService.addApplicationIfAbsent(
         appId, UserGroupInformation.getCurrentUser().getUserName());
     if (!withKerberosLogin) {
       AppLevelTimelineCollector collector =

+ 9 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/collector/PerNodeTimelineCollectorsAuxService.java

@@ -125,7 +125,7 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService {
    * @param user Application Master container user.
    * @return whether it was added successfully
    */
-  public boolean addApplication(ApplicationId appId, String user) {
+  public boolean addApplicationIfAbsent(ApplicationId appId, String user) {
     AppLevelTimelineCollector collector =
         new AppLevelTimelineCollectorWithAgg(appId, user);
     return (collectorManager.putIfAbsent(appId, collector)
@@ -156,15 +156,15 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService {
     if (context.getContainerType() == ContainerType.APPLICATION_MASTER) {
       ApplicationId appId = context.getContainerId().
           getApplicationAttemptId().getApplicationId();
-      synchronized (appIdToContainerId) {
+      synchronized (appIdToContainerId){
         Set<ContainerId> masterContainers = appIdToContainerId.get(appId);
         if (masterContainers == null) {
           masterContainers = new HashSet<>();
           appIdToContainerId.put(appId, masterContainers);
         }
         masterContainers.add(context.getContainerId());
-        addApplication(appId, context.getUser());
       }
+      addApplicationIfAbsent(appId, context.getUser());
     }
   }
 
@@ -189,6 +189,7 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService {
         containerId.getApplicationAttemptId().getApplicationId();
     return scheduler.schedule(new Runnable() {
       public void run() {
+        boolean shouldRemoveApplication = false;
         synchronized (appIdToContainerId) {
           Set<ContainerId> masterContainers = appIdToContainerId.get(appId);
           if (masterContainers == null) {
@@ -199,10 +200,14 @@ public class PerNodeTimelineCollectorsAuxService extends AuxiliaryService {
           masterContainers.remove(containerId);
           if (masterContainers.size() == 0) {
             // remove only if it is last master container
-            removeApplication(appId);
+            shouldRemoveApplication = true;
             appIdToContainerId.remove(appId);
           }
         }
+
+        if (shouldRemoveApplication) {
+          removeApplication(appId);
+        }
       }
     }, collectorLingerPeriod, TimeUnit.MILLISECONDS);
   }