Browse Source

merge -r 1352638:1352639 from branch-2. FIXES: MAPREDUCE-4295

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1352641 13f79535-47bb-0310-9956-ffa450edef68
Thomas Graves 13 years ago
parent
commit
e577d4c8bc

+ 2 - 0
hadoop-mapreduce-project/CHANGES.txt

@@ -247,6 +247,8 @@ Release 0.23.3 - UNRELEASED
 
     MAPREDUCE-4320. gridmix mainClass wrong in pom.xml (tgraves)
 
+    MAPREDUCE-4295. RM crashes due to DNS issue (tgraves)
+
 Release 0.23.2 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 15 - 3
hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java

@@ -1179,9 +1179,16 @@ public class LeafQueue implements CSQueue {
     if (UserGroupInformation.isSecurityEnabled()) {
       ContainerTokenIdentifier tokenIdentifier = new ContainerTokenIdentifier(
           containerId, nodeId.toString(), capability);
-      containerToken = BuilderUtils.newContainerToken(nodeId, ByteBuffer
-          .wrap(containerTokenSecretManager
-              .createPassword(tokenIdentifier)), tokenIdentifier);
+      try {
+        containerToken = BuilderUtils.newContainerToken(nodeId, ByteBuffer
+            .wrap(containerTokenSecretManager
+                .createPassword(tokenIdentifier)), tokenIdentifier);
+      } catch (IllegalArgumentException e) {
+         // this could be because DNS is down - in which case we just want
+         // to retry and not bring RM down
+         LOG.error("Error trying to create new container", e);
+         return null;
+      }
     }
 
     // Create the container
@@ -1210,6 +1217,11 @@ public class LeafQueue implements CSQueue {
     // Create the container if necessary
     Container container = 
         getContainer(rmContainer, application, node, capability, priority);
+  
+    // something went wrong getting/creating the container 
+    if (container == null) {
+      return Resources.none();
+    }
 
     // Can we allocate a container on this node?
     int availableContainers =