|
@@ -19,6 +19,8 @@ package org.apache.hadoop.hdfs.server.federation.router;
|
|
|
|
|
|
import static org.apache.hadoop.hdfs.server.federation.FederationTestUtils.simulateSlowNamenode;
|
|
|
import static org.apache.hadoop.hdfs.server.federation.FederationTestUtils.simulateThrowExceptionRouterRpcServer;
|
|
|
+import static org.apache.hadoop.hdfs.server.federation.FederationTestUtils.transitionClusterNSToStandby;
|
|
|
+import static org.apache.hadoop.hdfs.server.federation.FederationTestUtils.transitionClusterNSToActive;
|
|
|
import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains;
|
|
|
import static org.junit.Assert.assertEquals;
|
|
|
import static org.junit.Assert.assertTrue;
|
|
@@ -27,6 +29,7 @@ import static org.junit.Assert.fail;
|
|
|
import java.io.IOException;
|
|
|
import java.net.URI;
|
|
|
import java.util.ArrayList;
|
|
|
+import java.util.Collection;
|
|
|
import java.util.List;
|
|
|
import java.util.concurrent.ExecutorService;
|
|
|
import java.util.concurrent.Executors;
|
|
@@ -46,7 +49,9 @@ import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
|
|
import org.apache.hadoop.ipc.RemoteException;
|
|
|
import org.apache.hadoop.ipc.StandbyException;
|
|
|
import org.junit.After;
|
|
|
+import org.junit.Rule;
|
|
|
import org.junit.Test;
|
|
|
+import org.junit.rules.ExpectedException;
|
|
|
import org.slf4j.Logger;
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
|
@@ -71,14 +76,19 @@ public class TestRouterClientRejectOverload {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- private void setupCluster(boolean overloadControl) throws Exception {
|
|
|
+ @Rule
|
|
|
+ public ExpectedException exceptionRule = ExpectedException.none();
|
|
|
+
|
|
|
+ private void setupCluster(boolean overloadControl, boolean ha)
|
|
|
+ throws Exception {
|
|
|
// Build and start a federated cluster
|
|
|
- cluster = new StateStoreDFSCluster(false, 2);
|
|
|
+ cluster = new StateStoreDFSCluster(ha, 2);
|
|
|
Configuration routerConf = new RouterConfigBuilder()
|
|
|
.stateStore()
|
|
|
.metrics()
|
|
|
.admin()
|
|
|
.rpc()
|
|
|
+ .heartbeat()
|
|
|
.build();
|
|
|
|
|
|
// Reduce the number of RPC clients threads to overload the Router easy
|
|
@@ -98,7 +108,7 @@ public class TestRouterClientRejectOverload {
|
|
|
|
|
|
@Test
|
|
|
public void testWithoutOverloadControl() throws Exception {
|
|
|
- setupCluster(false);
|
|
|
+ setupCluster(false, false);
|
|
|
|
|
|
// Nobody should get overloaded
|
|
|
testOverloaded(0);
|
|
@@ -121,7 +131,7 @@ public class TestRouterClientRejectOverload {
|
|
|
|
|
|
@Test
|
|
|
public void testOverloadControl() throws Exception {
|
|
|
- setupCluster(true);
|
|
|
+ setupCluster(true, false);
|
|
|
|
|
|
List<RouterContext> routers = cluster.getRouters();
|
|
|
FederationRPCMetrics rpcMetrics0 =
|
|
@@ -244,7 +254,7 @@ public class TestRouterClientRejectOverload {
|
|
|
|
|
|
@Test
|
|
|
public void testConnectionNullException() throws Exception {
|
|
|
- setupCluster(false);
|
|
|
+ setupCluster(false, false);
|
|
|
|
|
|
// Choose 1st router
|
|
|
RouterContext routerContext = cluster.getRouters().get(0);
|
|
@@ -280,4 +290,70 @@ public class TestRouterClientRejectOverload {
|
|
|
assertEquals(originalRouter1Failures,
|
|
|
rpcMetrics1.getProxyOpFailureCommunicate());
|
|
|
}
|
|
|
+
|
|
|
+ /**
|
|
|
+ * When failover occurs, no namenodes are available within a short time.
|
|
|
+ * Client will success after some retries.
|
|
|
+ */
|
|
|
+ @Test
|
|
|
+ public void testNoNamenodesAvailable() throws Exception{
|
|
|
+ setupCluster(false, true);
|
|
|
+
|
|
|
+ transitionClusterNSToStandby(cluster);
|
|
|
+
|
|
|
+ Configuration conf = cluster.getRouterClientConf();
|
|
|
+ // Set dfs.client.failover.random.order false, to pick 1st router at first
|
|
|
+ conf.setBoolean("dfs.client.failover.random.order", false);
|
|
|
+
|
|
|
+ // Retries is 3 (see FailoverOnNetworkExceptionRetry#shouldRetry, will fail
|
|
|
+ // when reties > max.attempts), so total access is 4.
|
|
|
+ conf.setInt("dfs.client.retry.max.attempts", 2);
|
|
|
+ DFSClient routerClient = new DFSClient(new URI("hdfs://fed"), conf);
|
|
|
+
|
|
|
+ // Get router0 metrics
|
|
|
+ FederationRPCMetrics rpcMetrics0 = cluster.getRouters().get(0)
|
|
|
+ .getRouter().getRpcServer().getRPCMetrics();
|
|
|
+ // Get router1 metrics
|
|
|
+ FederationRPCMetrics rpcMetrics1 = cluster.getRouters().get(1)
|
|
|
+ .getRouter().getRpcServer().getRPCMetrics();
|
|
|
+
|
|
|
+ // Original failures
|
|
|
+ long originalRouter0Failures = rpcMetrics0.getProxyOpNoNamenodes();
|
|
|
+ long originalRouter1Failures = rpcMetrics1.getProxyOpNoNamenodes();
|
|
|
+
|
|
|
+ // GetFileInfo will throw Exception
|
|
|
+ String exceptionMessage = "org.apache.hadoop.hdfs.server.federation."
|
|
|
+ + "router.NoNamenodesAvailableException: No namenodes available "
|
|
|
+ + "under nameservice ns0";
|
|
|
+ exceptionRule.expect(RemoteException.class);
|
|
|
+ exceptionRule.expectMessage(exceptionMessage);
|
|
|
+ routerClient.getFileInfo("/");
|
|
|
+
|
|
|
+ // Router 0 failures will increase
|
|
|
+ assertEquals(originalRouter0Failures + 4,
|
|
|
+ rpcMetrics0.getProxyOpNoNamenodes());
|
|
|
+ // Router 1 failures do not change
|
|
|
+ assertEquals(originalRouter1Failures,
|
|
|
+ rpcMetrics1.getProxyOpNoNamenodes());
|
|
|
+
|
|
|
+ // Make name services available
|
|
|
+ transitionClusterNSToActive(cluster, 0);
|
|
|
+ for (RouterContext routerContext : cluster.getRouters()) {
|
|
|
+ // Manually trigger the heartbeat
|
|
|
+ Collection<NamenodeHeartbeatService> heartbeatServices = routerContext
|
|
|
+ .getRouter().getNamenodeHearbeatServices();
|
|
|
+ for (NamenodeHeartbeatService service : heartbeatServices) {
|
|
|
+ service.periodicInvoke();
|
|
|
+ }
|
|
|
+ // Update service cache
|
|
|
+ routerContext.getRouter().getStateStore().refreshCaches(true);
|
|
|
+ }
|
|
|
+
|
|
|
+ originalRouter0Failures = rpcMetrics0.getProxyOpNoNamenodes();
|
|
|
+
|
|
|
+ // RPC call must be successful
|
|
|
+ routerClient.getFileInfo("/");
|
|
|
+ // Router 0 failures do not change
|
|
|
+ assertEquals(originalRouter0Failures, rpcMetrics0.getProxyOpNoNamenodes());
|
|
|
+ }
|
|
|
}
|