Browse Source

YARN-3842. NMProxy should retry on NMNotYetReadyException. (Robert Kanter via kasha)
(cherry picked from commit 5ebf2817e58e1be8214dc1916a694a912075aa0a)

Conflicts:

hadoop-yarn-project/CHANGES.txt

Jason Lowe 9 years ago
parent
commit
d43a651374

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -46,6 +46,9 @@ Release 2.6.4 - UNRELEASED
     YARN-3849. Too much of preemption activity causing continuos killing of
     containers across queues. (Sunil G via wangda)
 
+    YARN-3842. NMProxy should retry on NMNotYetReadyException. 
+    (Robert Kanter via kasha)
+
 Release 2.6.3 - 2015-12-17
 
   INCOMPATIBLE CHANGES

+ 2 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ServerProxy.java

@@ -38,6 +38,7 @@ import org.apache.hadoop.io.retry.RetryProxy;
 import org.apache.hadoop.ipc.RetriableException;
 import org.apache.hadoop.net.ConnectTimeoutException;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.yarn.exceptions.NMNotYetReadyException;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
 
 import com.google.common.base.Preconditions;
@@ -74,6 +75,7 @@ public class ServerProxy {
     exceptionToPolicyMap.put(UnknownHostException.class, retryPolicy);
     exceptionToPolicyMap.put(RetriableException.class, retryPolicy);
     exceptionToPolicyMap.put(SocketException.class, retryPolicy);
+    exceptionToPolicyMap.put(NMNotYetReadyException.class, retryPolicy);
 
     return RetryPolicies.retryByException(RetryPolicies.TRY_ONCE_THEN_FAIL,
       exceptionToPolicyMap);

+ 19 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestNMProxy.java

@@ -54,6 +54,7 @@ public class TestNMProxy extends BaseContainerManagerTest {
   }
 
   int retryCount = 0;
+  boolean shouldThrowNMNotYetReadyException = false;
 
   @Before
   public void setUp() throws Exception {
@@ -72,7 +73,15 @@ public class TestNMProxy extends BaseContainerManagerTest {
           StartContainersRequest requests) throws YarnException, IOException {
         if (retryCount < 5) {
           retryCount++;
-          throw new java.net.ConnectException("start container exception");
+          if (shouldThrowNMNotYetReadyException) {
+            // This causes super to throw an NMNotYetReadyException
+            containerManager.setBlockNewContainerRequests(true);
+          } else {
+            throw new java.net.ConnectException("start container exception");
+          }
+        } else {
+          // This stops super from throwing an NMNotYetReadyException
+          containerManager.setBlockNewContainerRequests(false);
         }
         return super.startContainers(requests);
       }
@@ -126,16 +135,25 @@ public class TestNMProxy extends BaseContainerManagerTest {
         NMProxy.createNMProxy(conf, ContainerManagementProtocol.class, ugi,
           YarnRPC.create(conf), address);
 
+    retryCount = 0;
+    shouldThrowNMNotYetReadyException = false;
     proxy.startContainers(allRequests);
     Assert.assertEquals(5, retryCount);
 
     retryCount = 0;
+    shouldThrowNMNotYetReadyException = false;
     proxy.stopContainers(Records.newRecord(StopContainersRequest.class));
     Assert.assertEquals(5, retryCount);
 
     retryCount = 0;
+    shouldThrowNMNotYetReadyException = false;
     proxy.getContainerStatuses(Records
       .newRecord(GetContainerStatusesRequest.class));
     Assert.assertEquals(5, retryCount);
+
+    retryCount = 0;
+    shouldThrowNMNotYetReadyException = true;
+    proxy.startContainers(allRequests);
+    Assert.assertEquals(5, retryCount);
   }
 }