Переглянути джерело

YARN-3585. NodeManager cannot exit on SHUTDOWN event triggered and NM recovery is enabled. Contributed by Rohith Sharmaks
(cherry picked from commit e13b671aa510f553f4a6a232b4694b6a4cce88ae)

(cherry picked from commit 752caa95a40d899e1bf98bc907e91aec2bb57073)
(cherry picked from commit 13c4db632b0e7f19dcfa883c2492431c2c7d0799)

Jason Lowe 10 роки тому
батько
коміт
193d8d3667

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -153,6 +153,9 @@ Release 2.6.1 - UNRELEASED
     YARN-3725. App submission via REST API is broken in secure mode due to
     YARN-3725. App submission via REST API is broken in secure mode due to
     Timeline DT service address is empty. (Zhijie Shen via wangda)
     Timeline DT service address is empty. (Zhijie Shen via wangda)
 
 
+    YARN-3585. NodeManager cannot exit on SHUTDOWN event triggered and NM
+    recovery is enabled (Rohith Sharmaks via jlowe)
+
 Release 2.6.0 - 2014-11-18
 Release 2.6.0 - 2014-11-18
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 15 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java

@@ -37,6 +37,7 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.service.CompositeService;
 import org.apache.hadoop.service.CompositeService;
+import org.apache.hadoop.util.ExitUtil;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.ShutdownHookManager;
 import org.apache.hadoop.util.ShutdownHookManager;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.StringUtils;
@@ -88,6 +89,7 @@ public class NodeManager extends CompositeService
   
   
   private AtomicBoolean isStopping = new AtomicBoolean(false);
   private AtomicBoolean isStopping = new AtomicBoolean(false);
   private boolean rmWorkPreservingRestartEnabled;
   private boolean rmWorkPreservingRestartEnabled;
+  private boolean shouldExitOnShutdownEvent = false;
 
 
   public NodeManager() {
   public NodeManager() {
     super(NodeManager.class.getName());
     super(NodeManager.class.getName());
@@ -287,7 +289,16 @@ public class NodeManager extends CompositeService
     new Thread() {
     new Thread() {
       @Override
       @Override
       public void run() {
       public void run() {
-        NodeManager.this.stop();
+        try {
+          NodeManager.this.stop();
+        } catch (Throwable t) {
+          LOG.error("Error while shutting down NodeManager", t);
+        } finally {
+          if (shouldExitOnShutdownEvent
+              && !ShutdownHookManager.get().isShutdownInProgress()) {
+            ExitUtil.terminate(-1);
+          }
+        }
       }
       }
     }.start();
     }.start();
   }
   }
@@ -463,7 +474,9 @@ public class NodeManager extends CompositeService
       nodeManagerShutdownHook = new CompositeServiceShutdownHook(this);
       nodeManagerShutdownHook = new CompositeServiceShutdownHook(this);
       ShutdownHookManager.get().addShutdownHook(nodeManagerShutdownHook,
       ShutdownHookManager.get().addShutdownHook(nodeManagerShutdownHook,
                                                 SHUTDOWN_HOOK_PRIORITY);
                                                 SHUTDOWN_HOOK_PRIORITY);
-
+      // System exit should be called only when NodeManager is instantiated from
+      // main() funtion
+      this.shouldExitOnShutdownEvent = true;
       this.init(conf);
       this.init(conf);
       this.start();
       this.start();
     } catch (Throwable t) {
     } catch (Throwable t) {