Преглед изворни кода

YARN-3585. NodeManager cannot exit on SHUTDOWN event triggered and NM recovery is enabled. Contributed by Rohith Sharmaks
(cherry picked from commit e13b671aa510f553f4a6a232b4694b6a4cce88ae)

Jason Lowe пре 10 година
родитељ
комит
752caa95a4

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -571,6 +571,9 @@ Release 2.7.1 - UNRELEASED
     YARN-3725. App submission via REST API is broken in secure mode due to 
     YARN-3725. App submission via REST API is broken in secure mode due to 
     Timeline DT service address is empty. (Zhijie Shen via wangda)
     Timeline DT service address is empty. (Zhijie Shen via wangda)
 
 
+    YARN-3585. NodeManager cannot exit on SHUTDOWN event triggered and NM
+    recovery is enabled (Rohith Sharmaks via jlowe)
+
 Release 2.7.0 - 2015-04-20
 Release 2.7.0 - 2015-04-20
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 15 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java

@@ -38,6 +38,7 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.service.CompositeService;
 import org.apache.hadoop.service.CompositeService;
+import org.apache.hadoop.util.ExitUtil;
 import org.apache.hadoop.util.GenericOptionsParser;
 import org.apache.hadoop.util.GenericOptionsParser;
 import org.apache.hadoop.util.NodeHealthScriptRunner;
 import org.apache.hadoop.util.NodeHealthScriptRunner;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.ReflectionUtils;
@@ -94,6 +95,7 @@ public class NodeManager extends CompositeService
   
   
   private AtomicBoolean isStopping = new AtomicBoolean(false);
   private AtomicBoolean isStopping = new AtomicBoolean(false);
   private boolean rmWorkPreservingRestartEnabled;
   private boolean rmWorkPreservingRestartEnabled;
+  private boolean shouldExitOnShutdownEvent = false;
 
 
   public NodeManager() {
   public NodeManager() {
     super(NodeManager.class.getName());
     super(NodeManager.class.getName());
@@ -344,7 +346,16 @@ public class NodeManager extends CompositeService
     new Thread() {
     new Thread() {
       @Override
       @Override
       public void run() {
       public void run() {
-        NodeManager.this.stop();
+        try {
+          NodeManager.this.stop();
+        } catch (Throwable t) {
+          LOG.error("Error while shutting down NodeManager", t);
+        } finally {
+          if (shouldExitOnShutdownEvent
+              && !ShutdownHookManager.get().isShutdownInProgress()) {
+            ExitUtil.terminate(-1);
+          }
+        }
       }
       }
     }.start();
     }.start();
   }
   }
@@ -530,7 +541,9 @@ public class NodeManager extends CompositeService
       nodeManagerShutdownHook = new CompositeServiceShutdownHook(this);
       nodeManagerShutdownHook = new CompositeServiceShutdownHook(this);
       ShutdownHookManager.get().addShutdownHook(nodeManagerShutdownHook,
       ShutdownHookManager.get().addShutdownHook(nodeManagerShutdownHook,
                                                 SHUTDOWN_HOOK_PRIORITY);
                                                 SHUTDOWN_HOOK_PRIORITY);
-
+      // System exit should be called only when NodeManager is instantiated from
+      // main() funtion
+      this.shouldExitOnShutdownEvent = true;
       this.init(conf);
       this.init(conf);
       this.start();
       this.start();
     } catch (Throwable t) {
     } catch (Throwable t) {