瀏覽代碼

HDDS-2107. Datanodes should retry forever to connect to SCM in an unsecure environment (#1424)

Vivek Ratnavel Subramanian 5 年之前
父節點
當前提交
66bd1681f8

+ 9 - 2
hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/SCMConnectionManager.java

@@ -18,6 +18,8 @@ package org.apache.hadoop.ozone.container.common.statemachine;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.retry.RetryPolicies;
+import org.apache.hadoop.io.retry.RetryPolicy;
 import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.metrics2.util.MBeans;
@@ -38,6 +40,7 @@ import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.locks.ReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 
@@ -139,10 +142,14 @@ public class SCMConnectionManager
       long version =
           RPC.getProtocolVersion(StorageContainerDatanodeProtocolPB.class);
 
-      StorageContainerDatanodeProtocolPB rpcProxy = RPC.getProxy(
+      RetryPolicy retryPolicy =
+          RetryPolicies.retryForeverWithFixedSleep(
+              1000, TimeUnit.MILLISECONDS);
+      StorageContainerDatanodeProtocolPB rpcProxy = RPC.getProtocolProxy(
           StorageContainerDatanodeProtocolPB.class, version,
           address, UserGroupInformation.getCurrentUser(), conf,
-          NetUtils.getDefaultSocketFactory(conf), getRpcTimeout());
+          NetUtils.getDefaultSocketFactory(conf), getRpcTimeout(),
+          retryPolicy).getProxy();
 
       StorageContainerDatanodeProtocolClientSideTranslatorPB rpcClient =
           new StorageContainerDatanodeProtocolClientSideTranslatorPB(rpcProxy);

+ 3 - 3
hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/ratis/TestOzoneManagerDoubleBufferWithOMResponse.java

@@ -119,7 +119,7 @@ public class TestOzoneManagerDoubleBufferWithOMResponse {
    * transactions or not.
    * @throws Exception
    */
-  @Test(timeout = 300_000)
+  @Test(timeout = 500_000)
   public void testDoubleBuffer() throws Exception {
     // This test checks whether count in tables are correct or not.
     testDoubleBuffer(1, 10);
@@ -397,7 +397,7 @@ public class TestOzoneManagerDoubleBufferWithOMResponse {
         }
         return count == iterations;
 
-      }, 300, 40000);
+      }, 300, 300000);
 
 
       GenericTestUtils.waitFor(() -> {
@@ -409,7 +409,7 @@ public class TestOzoneManagerDoubleBufferWithOMResponse {
           fail("testDoubleBuffer failed");
         }
         return count == bucketCount * iterations;
-      }, 300, 40000);
+      }, 300, 300000);
 
       Assert.assertTrue(doubleBuffer.getFlushIterations() > 0);
     } finally {