|
@@ -22,6 +22,7 @@ import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_C
|
|
|
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_CALLER_CONTEXT_SEPARATOR_KEY;
|
|
|
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY;
|
|
|
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_TIMEOUT_KEY;
|
|
|
+import static org.apache.hadoop.hdfs.server.federation.fairness.RouterRpcFairnessConstants.CONCURRENT_NS;
|
|
|
|
|
|
import java.io.EOFException;
|
|
|
import java.io.FileNotFoundException;
|
|
@@ -61,6 +62,8 @@ import org.apache.hadoop.hdfs.NameNodeProxiesClient.ProxyAndInfo;
|
|
|
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
|
|
|
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
|
|
import org.apache.hadoop.hdfs.protocol.SnapshotException;
|
|
|
+import org.apache.hadoop.hdfs.server.federation.fairness.AbstractRouterRpcFairnessPolicyController;
|
|
|
+import org.apache.hadoop.hdfs.server.federation.fairness.RouterRpcFairnessPolicyController;
|
|
|
import org.apache.hadoop.hdfs.server.federation.resolver.ActiveNamenodeResolver;
|
|
|
import org.apache.hadoop.hdfs.server.federation.resolver.FederationNamenodeContext;
|
|
|
import org.apache.hadoop.hdfs.server.federation.resolver.FederationNamenodeServiceState;
|
|
@@ -129,6 +132,9 @@ public class RouterRpcClient {
|
|
|
|
|
|
private static final String CLIENT_IP_STR = "clientIp";
|
|
|
|
|
|
+ /** Fairness manager to control handlers assigned per NS. */
|
|
|
+ private RouterRpcFairnessPolicyController routerRpcFairnessPolicyController;
|
|
|
+
|
|
|
/**
|
|
|
* Create a router RPC client to manage remote procedure calls to NNs.
|
|
|
*
|
|
@@ -149,6 +155,8 @@ public class RouterRpcClient {
|
|
|
HADOOP_CALLER_CONTEXT_SEPARATOR_DEFAULT);
|
|
|
this.connectionManager = new ConnectionManager(clientConf);
|
|
|
this.connectionManager.start();
|
|
|
+ this.routerRpcFairnessPolicyController =
|
|
|
+ FederationUtil.newFairnessPolicyController(conf);
|
|
|
|
|
|
int numThreads = conf.getInt(
|
|
|
RBFConfigKeys.DFS_ROUTER_CLIENT_THREADS_SIZE,
|
|
@@ -229,6 +237,9 @@ public class RouterRpcClient {
|
|
|
if (this.executorService != null) {
|
|
|
this.executorService.shutdownNow();
|
|
|
}
|
|
|
+ if (this.routerRpcFairnessPolicyController != null) {
|
|
|
+ this.routerRpcFairnessPolicyController.shutdown();
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -770,13 +781,18 @@ public class RouterRpcClient {
|
|
|
public Object invokeSingle(final String nsId, RemoteMethod method)
|
|
|
throws IOException {
|
|
|
UserGroupInformation ugi = RouterRpcServer.getRemoteUser();
|
|
|
- List<? extends FederationNamenodeContext> nns =
|
|
|
- getNamenodesForNameservice(nsId);
|
|
|
- RemoteLocationContext loc = new RemoteLocation(nsId, "/", "/");
|
|
|
- Class<?> proto = method.getProtocol();
|
|
|
- Method m = method.getMethod();
|
|
|
- Object[] params = method.getParams(loc);
|
|
|
- return invokeMethod(ugi, nns, proto, m, params);
|
|
|
+ acquirePermit(nsId, ugi, method);
|
|
|
+ try {
|
|
|
+ List<? extends FederationNamenodeContext> nns =
|
|
|
+ getNamenodesForNameservice(nsId);
|
|
|
+ RemoteLocationContext loc = new RemoteLocation(nsId, "/", "/");
|
|
|
+ Class<?> proto = method.getProtocol();
|
|
|
+ Method m = method.getMethod();
|
|
|
+ Object[] params = method.getParams(loc);
|
|
|
+ return invokeMethod(ugi, nns, proto, m, params);
|
|
|
+ } finally {
|
|
|
+ releasePermit(nsId, ugi, method);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -933,6 +949,7 @@ public class RouterRpcClient {
|
|
|
// Invoke in priority order
|
|
|
for (final RemoteLocationContext loc : locations) {
|
|
|
String ns = loc.getNameserviceId();
|
|
|
+ acquirePermit(ns, ugi, remoteMethod);
|
|
|
List<? extends FederationNamenodeContext> namenodes =
|
|
|
getNamenodesForNameservice(ns);
|
|
|
try {
|
|
@@ -966,6 +983,8 @@ public class RouterRpcClient {
|
|
|
IOException ioe = new IOException(
|
|
|
"Unexpected exception proxying API " + e.getMessage(), e);
|
|
|
thrownExceptions.add(ioe);
|
|
|
+ } finally {
|
|
|
+ releasePermit(ns, ugi, remoteMethod);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -1298,6 +1317,7 @@ public class RouterRpcClient {
|
|
|
// Shortcut, just one call
|
|
|
T location = locations.iterator().next();
|
|
|
String ns = location.getNameserviceId();
|
|
|
+ acquirePermit(ns, ugi, method);
|
|
|
final List<? extends FederationNamenodeContext> namenodes =
|
|
|
getNamenodesForNameservice(ns);
|
|
|
try {
|
|
@@ -1309,6 +1329,8 @@ public class RouterRpcClient {
|
|
|
} catch (IOException ioe) {
|
|
|
// Localize the exception
|
|
|
throw processException(ioe, location);
|
|
|
+ } finally {
|
|
|
+ releasePermit(ns, ugi, method);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -1355,6 +1377,7 @@ public class RouterRpcClient {
|
|
|
rpcMonitor.proxyOp();
|
|
|
}
|
|
|
|
|
|
+ acquirePermit(CONCURRENT_NS, ugi, method);
|
|
|
try {
|
|
|
List<Future<Object>> futures = null;
|
|
|
if (timeOutMs > 0) {
|
|
@@ -1411,6 +1434,8 @@ public class RouterRpcClient {
|
|
|
LOG.error("Unexpected error while invoking API: {}", ex.getMessage());
|
|
|
throw new IOException(
|
|
|
"Unexpected error while invoking API " + ex.getMessage(), ex);
|
|
|
+ } finally {
|
|
|
+ releasePermit(CONCURRENT_NS, ugi, method);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -1484,4 +1509,55 @@ public class RouterRpcClient {
|
|
|
FederationNamenodeContext namenode = namenodes.get(0);
|
|
|
return namenode.getNameserviceId();
|
|
|
}
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Acquire permit to continue processing the request for specific nsId.
|
|
|
+ *
|
|
|
+ * @param nsId Identifier of the block pool.
|
|
|
+ * @param ugi UserGroupIdentifier associated with the user.
|
|
|
+ * @param m Remote method that needs to be invoked.
|
|
|
+ * @throws IOException If permit could not be acquired for the nsId.
|
|
|
+ */
|
|
|
+ private void acquirePermit(
|
|
|
+ final String nsId, final UserGroupInformation ugi, final RemoteMethod m)
|
|
|
+ throws IOException {
|
|
|
+ if (routerRpcFairnessPolicyController != null
|
|
|
+ && !routerRpcFairnessPolicyController.acquirePermit(nsId)) {
|
|
|
+ // Throw StandByException,
|
|
|
+ // Clients could fail over and try another router.
|
|
|
+ if (rpcMonitor != null) {
|
|
|
+ rpcMonitor.getRPCMetrics().incrProxyOpPermitRejected();
|
|
|
+ }
|
|
|
+ LOG.debug("Permit denied for ugi: {} for method: {}",
|
|
|
+ ugi, m.getMethodName());
|
|
|
+ String msg =
|
|
|
+ "Router " + router.getRouterId() +
|
|
|
+ " is overloaded for NS: " + nsId;
|
|
|
+ throw new StandbyException(msg);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Release permit for specific nsId after processing against downstream
|
|
|
+ * nsId is completed.
|
|
|
+ *
|
|
|
+ * @param nsId Identifier of the block pool.
|
|
|
+ * @param ugi UserGroupIdentifier associated with the user.
|
|
|
+ * @param m Remote method that needs to be invoked.
|
|
|
+ */
|
|
|
+ private void releasePermit(
|
|
|
+ final String nsId, final UserGroupInformation ugi, final RemoteMethod m) {
|
|
|
+ if (routerRpcFairnessPolicyController != null) {
|
|
|
+ routerRpcFairnessPolicyController.releasePermit(nsId);
|
|
|
+ LOG.trace("Permit released for ugi: {} for method: {}", ugi,
|
|
|
+ m.getMethodName());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @VisibleForTesting
|
|
|
+ public AbstractRouterRpcFairnessPolicyController
|
|
|
+ getRouterRpcFairnessPolicyController() {
|
|
|
+ return (AbstractRouterRpcFairnessPolicyController
|
|
|
+ )routerRpcFairnessPolicyController;
|
|
|
+ }
|
|
|
}
|