|
@@ -18,30 +18,29 @@ package org.apache.hadoop.ozone.om;
|
|
|
|
|
|
|
|
|
import org.apache.commons.lang3.RandomStringUtils;
|
|
|
-import org.apache.hadoop.hdfs.server.datanode.ObjectStoreHandler;
|
|
|
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
|
|
|
+import org.apache.hadoop.hdfs.LogVerificationAppender;
|
|
|
+import org.apache.hadoop.ipc.RemoteException;
|
|
|
import org.apache.hadoop.ozone.MiniOzoneCluster;
|
|
|
import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl;
|
|
|
+import org.apache.hadoop.ozone.client.ObjectStore;
|
|
|
import org.apache.hadoop.ozone.client.OzoneClient;
|
|
|
-import org.apache.hadoop.ozone.client.rpc.ha.OMProxyInfo;
|
|
|
-import org.apache.hadoop.ozone.client.rpc.ha.OMProxyProvider;
|
|
|
-import org.apache.hadoop.ozone.om.exceptions.OMException;
|
|
|
-import org.apache.hadoop.ozone.web.handlers.UserArgs;
|
|
|
-import org.apache.hadoop.ozone.web.handlers.VolumeArgs;
|
|
|
-import org.apache.hadoop.ozone.web.interfaces.StorageHandler;
|
|
|
-import org.apache.hadoop.ozone.web.response.VolumeInfo;
|
|
|
-import org.apache.hadoop.ozone.web.utils.OzoneUtils;
|
|
|
+import org.apache.hadoop.ozone.om.ha.OMFailoverProxyProvider;
|
|
|
import org.apache.hadoop.test.GenericTestUtils;
|
|
|
+import org.apache.hadoop.ozone.client.OzoneClientFactory;
|
|
|
+import org.apache.hadoop.ozone.client.OzoneVolume;
|
|
|
+import org.apache.hadoop.ozone.client.VolumeArgs;
|
|
|
+import org.apache.log4j.Logger;
|
|
|
import org.junit.After;
|
|
|
import org.junit.Assert;
|
|
|
import org.junit.Before;
|
|
|
-import org.junit.Ignore;
|
|
|
import org.junit.Rule;
|
|
|
import org.junit.Test;
|
|
|
import org.junit.rules.ExpectedException;
|
|
|
import org.junit.rules.Timeout;
|
|
|
|
|
|
import java.io.IOException;
|
|
|
+import java.net.ConnectException;
|
|
|
import java.net.InetSocketAddress;
|
|
|
import java.util.List;
|
|
|
import java.util.UUID;
|
|
@@ -49,6 +48,14 @@ import java.util.UUID;
|
|
|
import static org.apache.hadoop.ozone.MiniOzoneHAClusterImpl
|
|
|
.NODE_FAILURE_TIMEOUT;
|
|
|
import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED;
|
|
|
+import static org.apache.hadoop.ozone.OzoneConfigKeys
|
|
|
+ .OZONE_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY;
|
|
|
+import static org.apache.hadoop.ozone.OzoneConfigKeys
|
|
|
+ .OZONE_CLIENT_FAILOVER_SLEEP_BASE_MILLIS_DEFAULT;
|
|
|
+import static org.apache.hadoop.ozone.OzoneConfigKeys
|
|
|
+ .OZONE_CLIENT_FAILOVER_SLEEP_BASE_MILLIS_KEY;
|
|
|
+import static org.apache.hadoop.ozone.OzoneConfigKeys
|
|
|
+ .OZONE_CLIENT_RETRY_MAX_ATTEMPTS_KEY;
|
|
|
import static org.apache.hadoop.ozone.OzoneConfigKeys
|
|
|
.OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS;
|
|
|
|
|
@@ -58,8 +65,7 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys
|
|
|
public class TestOzoneManagerHA {
|
|
|
|
|
|
private MiniOzoneHAClusterImpl cluster = null;
|
|
|
- private StorageHandler storageHandler;
|
|
|
- private UserArgs userArgs;
|
|
|
+ private ObjectStore objectStore;
|
|
|
private OzoneConfiguration conf;
|
|
|
private String clusterId;
|
|
|
private String scmId;
|
|
@@ -69,7 +75,7 @@ public class TestOzoneManagerHA {
|
|
|
public ExpectedException exception = ExpectedException.none();
|
|
|
|
|
|
@Rule
|
|
|
- public Timeout timeout = new Timeout(60_000);
|
|
|
+ public Timeout timeout = new Timeout(120_000);
|
|
|
|
|
|
/**
|
|
|
* Create a MiniDFSCluster for testing.
|
|
@@ -85,6 +91,9 @@ public class TestOzoneManagerHA {
|
|
|
scmId = UUID.randomUUID().toString();
|
|
|
conf.setBoolean(OZONE_ACL_ENABLED, true);
|
|
|
conf.setInt(OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS, 2);
|
|
|
+ conf.setInt(OZONE_CLIENT_RETRY_MAX_ATTEMPTS_KEY, 3);
|
|
|
+ conf.setInt(OZONE_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY, 3);
|
|
|
+ conf.setInt(OZONE_CLIENT_FAILOVER_SLEEP_BASE_MILLIS_KEY, 50);
|
|
|
|
|
|
cluster = (MiniOzoneHAClusterImpl) MiniOzoneCluster.newHABuilder(conf)
|
|
|
.setClusterId(clusterId)
|
|
@@ -93,9 +102,7 @@ public class TestOzoneManagerHA {
|
|
|
.setNumOfOzoneManagers(numOfOMs)
|
|
|
.build();
|
|
|
cluster.waitForClusterToBeReady();
|
|
|
- storageHandler = new ObjectStoreHandler(conf).getStorageHandler();
|
|
|
- userArgs = new UserArgs(null, OzoneUtils.getRequestID(),
|
|
|
- null, null, null, null);
|
|
|
+ objectStore = OzoneClientFactory.getRpcClient(conf).getObjectStore();
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -115,7 +122,7 @@ public class TestOzoneManagerHA {
|
|
|
*/
|
|
|
@Test
|
|
|
public void testAllOMNodesRunning() throws Exception {
|
|
|
- testCreateVolume(true);
|
|
|
+ createVolumeTest(true);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -126,52 +133,56 @@ public class TestOzoneManagerHA {
|
|
|
cluster.stopOzoneManager(1);
|
|
|
Thread.sleep(NODE_FAILURE_TIMEOUT * 2);
|
|
|
|
|
|
- testCreateVolume(true);
|
|
|
+ createVolumeTest(true);
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Test client request fails when 2 OMs are down.
|
|
|
*/
|
|
|
@Test
|
|
|
- @Ignore("TODO:HDDS-1158")
|
|
|
public void testTwoOMNodesDown() throws Exception {
|
|
|
cluster.stopOzoneManager(1);
|
|
|
cluster.stopOzoneManager(2);
|
|
|
Thread.sleep(NODE_FAILURE_TIMEOUT * 2);
|
|
|
|
|
|
- testCreateVolume(false);
|
|
|
+ createVolumeTest(false);
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Create a volume and test its attribute.
|
|
|
*/
|
|
|
- private void testCreateVolume(boolean checkSuccess) throws Exception {
|
|
|
+ private void createVolumeTest(boolean checkSuccess) throws Exception {
|
|
|
String userName = "user" + RandomStringUtils.randomNumeric(5);
|
|
|
String adminName = "admin" + RandomStringUtils.randomNumeric(5);
|
|
|
String volumeName = "volume" + RandomStringUtils.randomNumeric(5);
|
|
|
|
|
|
- VolumeArgs createVolumeArgs = new VolumeArgs(volumeName, userArgs);
|
|
|
- createVolumeArgs.setUserName(userName);
|
|
|
- createVolumeArgs.setAdminName(adminName);
|
|
|
+ VolumeArgs createVolumeArgs = VolumeArgs.newBuilder()
|
|
|
+ .setOwner(userName)
|
|
|
+ .setAdmin(adminName)
|
|
|
+ .build();
|
|
|
|
|
|
try {
|
|
|
- storageHandler.createVolume(createVolumeArgs);
|
|
|
+ objectStore.createVolume(volumeName, createVolumeArgs);
|
|
|
|
|
|
- VolumeArgs getVolumeArgs = new VolumeArgs(volumeName, userArgs);
|
|
|
- VolumeInfo retVolumeinfo = storageHandler.getVolumeInfo(getVolumeArgs);
|
|
|
+ OzoneVolume retVolumeinfo = objectStore.getVolume(volumeName);
|
|
|
|
|
|
if (checkSuccess) {
|
|
|
- Assert.assertTrue(retVolumeinfo.getVolumeName().equals(volumeName));
|
|
|
- Assert.assertTrue(retVolumeinfo.getOwner().getName().equals(userName));
|
|
|
+ Assert.assertTrue(retVolumeinfo.getName().equals(volumeName));
|
|
|
+ Assert.assertTrue(retVolumeinfo.getOwner().equals(userName));
|
|
|
+ Assert.assertTrue(retVolumeinfo.getAdmin().equals(adminName));
|
|
|
} else {
|
|
|
// Verify that the request failed
|
|
|
- Assert.assertTrue(retVolumeinfo.getVolumeName().isEmpty());
|
|
|
Assert.fail("There is no quorum. Request should have failed");
|
|
|
}
|
|
|
- } catch (OMException e) {
|
|
|
+ } catch (ConnectException | RemoteException e) {
|
|
|
if (!checkSuccess) {
|
|
|
- GenericTestUtils.assertExceptionContains(
|
|
|
- "RaftRetryFailureException", e);
|
|
|
+ // If the last OM to be tried by the RetryProxy is down, we would get
|
|
|
+ // ConnectException. Otherwise, we would get a RemoteException from the
|
|
|
+ // last running OM as it would fail to get a quorum.
|
|
|
+ if (e instanceof RemoteException) {
|
|
|
+ GenericTestUtils.assertExceptionContains(
|
|
|
+ "RaftRetryFailureException", e);
|
|
|
+ }
|
|
|
} else {
|
|
|
throw e;
|
|
|
}
|
|
@@ -179,14 +190,16 @@ public class TestOzoneManagerHA {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Test that OMProxyProvider creates an OM proxy for each OM in the cluster.
|
|
|
+ * Test that OMFailoverProxyProvider creates an OM proxy for each OM in the
|
|
|
+ * cluster.
|
|
|
*/
|
|
|
@Test
|
|
|
- public void testOMClientProxyProvide() throws Exception {
|
|
|
+ public void testOMProxyProviderInitialization() throws Exception {
|
|
|
OzoneClient rpcClient = cluster.getRpcClient();
|
|
|
- OMProxyProvider omProxyProvider =
|
|
|
+ OMFailoverProxyProvider omFailoverProxyProvider =
|
|
|
rpcClient.getObjectStore().getClientProxy().getOMProxyProvider();
|
|
|
- List<OMProxyInfo> omProxies = omProxyProvider.getOMProxies();
|
|
|
+ List<OMFailoverProxyProvider.OMProxyInfo> omProxies =
|
|
|
+ omFailoverProxyProvider.getOMProxies();
|
|
|
|
|
|
Assert.assertEquals(numOfOMs, omProxies.size());
|
|
|
|
|
@@ -194,7 +207,7 @@ public class TestOzoneManagerHA {
|
|
|
InetSocketAddress omRpcServerAddr =
|
|
|
cluster.getOzoneManager(i).getOmRpcServerAddr();
|
|
|
boolean omClientProxyExists = false;
|
|
|
- for (OMProxyInfo omProxyInfo : omProxies) {
|
|
|
+ for (OMFailoverProxyProvider.OMProxyInfo omProxyInfo : omProxies) {
|
|
|
if (omProxyInfo.getAddress().equals(omRpcServerAddr)) {
|
|
|
omClientProxyExists = true;
|
|
|
break;
|
|
@@ -205,4 +218,99 @@ public class TestOzoneManagerHA {
|
|
|
omClientProxyExists);
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Test OMFailoverProxyProvider failover on connection exception to OM client.
|
|
|
+ */
|
|
|
+ @Test
|
|
|
+ public void testOMProxyProviderFailoverOnConnectionFailure()
|
|
|
+ throws Exception {
|
|
|
+ OMFailoverProxyProvider omFailoverProxyProvider =
|
|
|
+ objectStore.getClientProxy().getOMProxyProvider();
|
|
|
+ String firstProxyNodeId = omFailoverProxyProvider.getCurrentProxyOMNodeId();
|
|
|
+
|
|
|
+ createVolumeTest(true);
|
|
|
+
|
|
|
+ // On stopping the current OM Proxy, the next connection attempt should
|
|
|
+ // failover to a another OM proxy.
|
|
|
+ cluster.stopOzoneManager(firstProxyNodeId);
|
|
|
+ Thread.sleep(OZONE_CLIENT_FAILOVER_SLEEP_BASE_MILLIS_DEFAULT * 4);
|
|
|
+
|
|
|
+ // Next request to the proxy provider should result in a failover
|
|
|
+ createVolumeTest(true);
|
|
|
+ Thread.sleep(OZONE_CLIENT_FAILOVER_SLEEP_BASE_MILLIS_DEFAULT);
|
|
|
+
|
|
|
+ // Get the new OM Proxy NodeId
|
|
|
+ String newProxyNodeId = omFailoverProxyProvider.getCurrentProxyOMNodeId();
|
|
|
+
|
|
|
+ // Verify that a failover occured. the new proxy nodeId should be
|
|
|
+ // different from the old proxy nodeId.
|
|
|
+ Assert.assertNotEquals("Failover did not occur as expected",
|
|
|
+ firstProxyNodeId, newProxyNodeId);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Test OMFailoverProxyProvider failover when current OM proxy is not
|
|
|
+ * the current OM Leader.
|
|
|
+ */
|
|
|
+ @Test
|
|
|
+ public void testOMProxyProviderFailoverToCurrentLeader() throws Exception {
|
|
|
+ OMFailoverProxyProvider omFailoverProxyProvider =
|
|
|
+ objectStore.getClientProxy().getOMProxyProvider();
|
|
|
+
|
|
|
+ // Run couple of createVolume tests to discover the current Leader OM
|
|
|
+ createVolumeTest(true);
|
|
|
+ createVolumeTest(true);
|
|
|
+
|
|
|
+ // The OMFailoverProxyProvider will point to the current leader OM node.
|
|
|
+ String leaderOMNodeId = omFailoverProxyProvider.getCurrentProxyOMNodeId();
|
|
|
+
|
|
|
+ // Perform a manual failover of the proxy provider to move the
|
|
|
+ // currentProxyIndex to a node other than the leader OM.
|
|
|
+ omFailoverProxyProvider.performFailover(
|
|
|
+ omFailoverProxyProvider.getProxy().proxy);
|
|
|
+
|
|
|
+ String newProxyNodeId = omFailoverProxyProvider.getCurrentProxyOMNodeId();
|
|
|
+ Assert.assertNotEquals(leaderOMNodeId, newProxyNodeId);
|
|
|
+
|
|
|
+ // Once another request is sent to this new proxy node, the leader
|
|
|
+ // information must be returned via the response and a failover must
|
|
|
+ // happen to the leader proxy node.
|
|
|
+ createVolumeTest(true);
|
|
|
+ Thread.sleep(2000);
|
|
|
+
|
|
|
+ String newLeaderOMNodeId =
|
|
|
+ omFailoverProxyProvider.getCurrentProxyOMNodeId();
|
|
|
+
|
|
|
+ // The old and new Leader OM NodeId must match since there was no new
|
|
|
+ // election in the Ratis ring.
|
|
|
+ Assert.assertEquals(leaderOMNodeId, newLeaderOMNodeId);
|
|
|
+ }
|
|
|
+
|
|
|
+ @Test
|
|
|
+ public void testOMRetryProxy() throws Exception {
|
|
|
+ // Stop all the OMs. After making 5 (set maxRetries value) attempts at
|
|
|
+ // connection, the RpcClient should give up.
|
|
|
+ for (int i = 0; i < numOfOMs; i++) {
|
|
|
+ cluster.stopOzoneManager(i);
|
|
|
+ }
|
|
|
+
|
|
|
+ final LogVerificationAppender appender = new LogVerificationAppender();
|
|
|
+ final org.apache.log4j.Logger logger = Logger.getRootLogger();
|
|
|
+ logger.addAppender(appender);
|
|
|
+
|
|
|
+ try {
|
|
|
+ createVolumeTest(true);
|
|
|
+ Assert.fail("TestOMRetryProxy should fail when there are no OMs running");
|
|
|
+ } catch (ConnectException e) {
|
|
|
+ // Each retry attempt tries upto 10 times to connect. So there should be
|
|
|
+ // 3*10 "Retrying connect to server" messages
|
|
|
+ Assert.assertEquals(30,
|
|
|
+ appender.countLinesWithMessage("Retrying connect to server:"));
|
|
|
+
|
|
|
+ Assert.assertEquals(1,
|
|
|
+ appender.countLinesWithMessage("Failed to connect to OM. Attempted " +
|
|
|
+ "3 retries and 3 failovers"));
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|