|
@@ -18,47 +18,51 @@
|
|
|
|
|
|
package org.apache.hadoop.ipc;
|
|
|
|
|
|
-import java.net.InetAddress;
|
|
|
-import java.net.Socket;
|
|
|
-import java.net.InetSocketAddress;
|
|
|
-import java.net.SocketTimeoutException;
|
|
|
-import java.net.UnknownHostException;
|
|
|
-import java.io.IOException;
|
|
|
-import java.io.DataInputStream;
|
|
|
-import java.io.DataOutputStream;
|
|
|
import java.io.BufferedInputStream;
|
|
|
import java.io.BufferedOutputStream;
|
|
|
+import java.io.DataInputStream;
|
|
|
+import java.io.DataOutputStream;
|
|
|
import java.io.FilterInputStream;
|
|
|
+import java.io.IOException;
|
|
|
import java.io.InputStream;
|
|
|
+import java.io.InterruptedIOException;
|
|
|
import java.io.OutputStream;
|
|
|
-
|
|
|
+import java.net.InetAddress;
|
|
|
+import java.net.InetSocketAddress;
|
|
|
+import java.net.Socket;
|
|
|
+import java.net.SocketTimeoutException;
|
|
|
+import java.net.UnknownHostException;
|
|
|
import java.security.PrivilegedExceptionAction;
|
|
|
import java.util.Hashtable;
|
|
|
import java.util.Iterator;
|
|
|
+import java.util.Map.Entry;
|
|
|
import java.util.Random;
|
|
|
import java.util.Set;
|
|
|
-import java.util.Map.Entry;
|
|
|
+import java.util.concurrent.TimeUnit;
|
|
|
import java.util.concurrent.atomic.AtomicBoolean;
|
|
|
import java.util.concurrent.atomic.AtomicLong;
|
|
|
|
|
|
import javax.net.SocketFactory;
|
|
|
|
|
|
-import org.apache.commons.logging.*;
|
|
|
-
|
|
|
+import org.apache.commons.logging.Log;
|
|
|
+import org.apache.commons.logging.LogFactory;
|
|
|
import org.apache.hadoop.classification.InterfaceAudience;
|
|
|
import org.apache.hadoop.classification.InterfaceStability;
|
|
|
import org.apache.hadoop.conf.Configuration;
|
|
|
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
|
|
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
|
|
+import org.apache.hadoop.io.DataOutputBuffer;
|
|
|
+import org.apache.hadoop.io.IOUtils;
|
|
|
+import org.apache.hadoop.io.Writable;
|
|
|
+import org.apache.hadoop.io.WritableUtils;
|
|
|
+import org.apache.hadoop.io.retry.RetryPolicies;
|
|
|
+import org.apache.hadoop.io.retry.RetryPolicy;
|
|
|
+import org.apache.hadoop.io.retry.RetryPolicy.RetryAction;
|
|
|
import org.apache.hadoop.ipc.protobuf.IpcConnectionContextProtos.IpcConnectionContextProto;
|
|
|
import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcPayloadHeaderProto;
|
|
|
import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcPayloadOperationProto;
|
|
|
import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcResponseHeaderProto;
|
|
|
import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcStatusProto;
|
|
|
-import org.apache.hadoop.io.IOUtils;
|
|
|
-import org.apache.hadoop.io.Writable;
|
|
|
-import org.apache.hadoop.io.WritableUtils;
|
|
|
-import org.apache.hadoop.io.DataOutputBuffer;
|
|
|
import org.apache.hadoop.net.NetUtils;
|
|
|
import org.apache.hadoop.security.KerberosInfo;
|
|
|
import org.apache.hadoop.security.SaslRpcClient;
|
|
@@ -67,8 +71,8 @@ import org.apache.hadoop.security.SecurityUtil;
|
|
|
import org.apache.hadoop.security.UserGroupInformation;
|
|
|
import org.apache.hadoop.security.token.Token;
|
|
|
import org.apache.hadoop.security.token.TokenIdentifier;
|
|
|
-import org.apache.hadoop.security.token.TokenSelector;
|
|
|
import org.apache.hadoop.security.token.TokenInfo;
|
|
|
+import org.apache.hadoop.security.token.TokenSelector;
|
|
|
import org.apache.hadoop.util.ProtoUtil;
|
|
|
import org.apache.hadoop.util.ReflectionUtils;
|
|
|
|
|
@@ -80,8 +84,8 @@ import org.apache.hadoop.util.ReflectionUtils;
|
|
|
*/
|
|
|
public class Client {
|
|
|
|
|
|
- public static final Log LOG =
|
|
|
- LogFactory.getLog(Client.class);
|
|
|
+ public static final Log LOG = LogFactory.getLog(Client.class);
|
|
|
+
|
|
|
private Hashtable<ConnectionId, Connection> connections =
|
|
|
new Hashtable<ConnectionId, Connection>();
|
|
|
|
|
@@ -228,8 +232,7 @@ public class Client {
|
|
|
private int rpcTimeout;
|
|
|
private int maxIdleTime; //connections will be culled if it was idle for
|
|
|
//maxIdleTime msecs
|
|
|
- private int maxRetries; //the max. no. of retries for socket connections
|
|
|
- // the max. no. of retries for socket connections on time out exceptions
|
|
|
+ private final RetryPolicy connectionRetryPolicy;
|
|
|
private int maxRetriesOnSocketTimeouts;
|
|
|
private boolean tcpNoDelay; // if T then disable Nagle's Algorithm
|
|
|
private boolean doPing; //do we need to send ping message
|
|
@@ -253,7 +256,7 @@ public class Client {
|
|
|
}
|
|
|
this.rpcTimeout = remoteId.getRpcTimeout();
|
|
|
this.maxIdleTime = remoteId.getMaxIdleTime();
|
|
|
- this.maxRetries = remoteId.getMaxRetries();
|
|
|
+ this.connectionRetryPolicy = remoteId.connectionRetryPolicy;
|
|
|
this.maxRetriesOnSocketTimeouts = remoteId.getMaxRetriesOnSocketTimeouts();
|
|
|
this.tcpNoDelay = remoteId.getTcpNoDelay();
|
|
|
this.doPing = remoteId.getDoPing();
|
|
@@ -488,7 +491,7 @@ public class Client {
|
|
|
if (updateAddress()) {
|
|
|
timeoutFailures = ioFailures = 0;
|
|
|
}
|
|
|
- handleConnectionFailure(ioFailures++, maxRetries, ie);
|
|
|
+ handleConnectionFailure(ioFailures++, ie);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -680,8 +683,36 @@ public class Client {
|
|
|
Thread.sleep(1000);
|
|
|
} catch (InterruptedException ignored) {}
|
|
|
|
|
|
- LOG.info("Retrying connect to server: " + server +
|
|
|
- ". Already tried " + curRetries + " time(s).");
|
|
|
+ LOG.info("Retrying connect to server: " + server + ". Already tried "
|
|
|
+ + curRetries + " time(s); maxRetries=" + maxRetries);
|
|
|
+ }
|
|
|
+
|
|
|
+ private void handleConnectionFailure(int curRetries, IOException ioe
|
|
|
+ ) throws IOException {
|
|
|
+ closeConnection();
|
|
|
+
|
|
|
+ final RetryAction action;
|
|
|
+ try {
|
|
|
+ action = connectionRetryPolicy.shouldRetry(ioe, curRetries, 0, true);
|
|
|
+ } catch(Exception e) {
|
|
|
+ throw e instanceof IOException? (IOException)e: new IOException(e);
|
|
|
+ }
|
|
|
+ if (action.action == RetryAction.RetryDecision.FAIL) {
|
|
|
+ if (action.reason != null) {
|
|
|
+ LOG.warn("Failed to connect to server: " + server + ": "
|
|
|
+ + action.reason, ioe);
|
|
|
+ }
|
|
|
+ throw ioe;
|
|
|
+ }
|
|
|
+
|
|
|
+ try {
|
|
|
+ Thread.sleep(action.delayMillis);
|
|
|
+ } catch (InterruptedException e) {
|
|
|
+ throw (IOException)new InterruptedIOException("Interrupted: action="
|
|
|
+ + action + ", retry policy=" + connectionRetryPolicy).initCause(e);
|
|
|
+ }
|
|
|
+ LOG.info("Retrying connect to server: " + server + ". Already tried "
|
|
|
+ + curRetries + " time(s); retry policy is " + connectionRetryPolicy);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -849,6 +880,10 @@ public class Client {
|
|
|
try {
|
|
|
RpcResponseHeaderProto response =
|
|
|
RpcResponseHeaderProto.parseDelimitedFrom(in);
|
|
|
+ if (response == null) {
|
|
|
+ throw new IOException("Response is null.");
|
|
|
+ }
|
|
|
+
|
|
|
int callId = response.getCallId();
|
|
|
if (LOG.isDebugEnabled())
|
|
|
LOG.debug(getName() + " got value #" + callId);
|
|
@@ -1287,7 +1322,7 @@ public class Client {
|
|
|
private final String serverPrincipal;
|
|
|
private final int maxIdleTime; //connections will be culled if it was idle for
|
|
|
//maxIdleTime msecs
|
|
|
- private final int maxRetries; //the max. no. of retries for socket connections
|
|
|
+ private final RetryPolicy connectionRetryPolicy;
|
|
|
// the max. no. of retries for socket connections on time out exceptions
|
|
|
private final int maxRetriesOnSocketTimeouts;
|
|
|
private final boolean tcpNoDelay; // if T then disable Nagle's Algorithm
|
|
@@ -1297,7 +1332,7 @@ public class Client {
|
|
|
ConnectionId(InetSocketAddress address, Class<?> protocol,
|
|
|
UserGroupInformation ticket, int rpcTimeout,
|
|
|
String serverPrincipal, int maxIdleTime,
|
|
|
- int maxRetries, int maxRetriesOnSocketTimeouts,
|
|
|
+ RetryPolicy connectionRetryPolicy, int maxRetriesOnSocketTimeouts,
|
|
|
boolean tcpNoDelay, boolean doPing, int pingInterval) {
|
|
|
this.protocol = protocol;
|
|
|
this.address = address;
|
|
@@ -1305,7 +1340,7 @@ public class Client {
|
|
|
this.rpcTimeout = rpcTimeout;
|
|
|
this.serverPrincipal = serverPrincipal;
|
|
|
this.maxIdleTime = maxIdleTime;
|
|
|
- this.maxRetries = maxRetries;
|
|
|
+ this.connectionRetryPolicy = connectionRetryPolicy;
|
|
|
this.maxRetriesOnSocketTimeouts = maxRetriesOnSocketTimeouts;
|
|
|
this.tcpNoDelay = tcpNoDelay;
|
|
|
this.doPing = doPing;
|
|
@@ -1336,10 +1371,6 @@ public class Client {
|
|
|
return maxIdleTime;
|
|
|
}
|
|
|
|
|
|
- int getMaxRetries() {
|
|
|
- return maxRetries;
|
|
|
- }
|
|
|
-
|
|
|
/** max connection retries on socket time outs */
|
|
|
public int getMaxRetriesOnSocketTimeouts() {
|
|
|
return maxRetriesOnSocketTimeouts;
|
|
@@ -1357,6 +1388,12 @@ public class Client {
|
|
|
return pingInterval;
|
|
|
}
|
|
|
|
|
|
+ static ConnectionId getConnectionId(InetSocketAddress addr,
|
|
|
+ Class<?> protocol, UserGroupInformation ticket, int rpcTimeout,
|
|
|
+ Configuration conf) throws IOException {
|
|
|
+ return getConnectionId(addr, protocol, ticket, rpcTimeout, null, conf);
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
* Returns a ConnectionId object.
|
|
|
* @param addr Remote address for the connection.
|
|
@@ -1367,9 +1404,18 @@ public class Client {
|
|
|
* @return A ConnectionId instance
|
|
|
* @throws IOException
|
|
|
*/
|
|
|
- public static ConnectionId getConnectionId(InetSocketAddress addr,
|
|
|
+ static ConnectionId getConnectionId(InetSocketAddress addr,
|
|
|
Class<?> protocol, UserGroupInformation ticket, int rpcTimeout,
|
|
|
- Configuration conf) throws IOException {
|
|
|
+ RetryPolicy connectionRetryPolicy, Configuration conf) throws IOException {
|
|
|
+
|
|
|
+ if (connectionRetryPolicy == null) {
|
|
|
+ final int max = conf.getInt(
|
|
|
+ CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY,
|
|
|
+ CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_DEFAULT);
|
|
|
+ connectionRetryPolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep(
|
|
|
+ max, 1, TimeUnit.SECONDS);
|
|
|
+ }
|
|
|
+
|
|
|
String remotePrincipal = getRemotePrincipal(conf, addr, protocol);
|
|
|
boolean doPing =
|
|
|
conf.getBoolean(CommonConfigurationKeys.IPC_CLIENT_PING_KEY, true);
|
|
@@ -1377,8 +1423,7 @@ public class Client {
|
|
|
rpcTimeout, remotePrincipal,
|
|
|
conf.getInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
|
|
|
CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_DEFAULT),
|
|
|
- conf.getInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY,
|
|
|
- CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_DEFAULT),
|
|
|
+ connectionRetryPolicy,
|
|
|
conf.getInt(
|
|
|
CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
|
|
|
CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT),
|
|
@@ -1421,7 +1466,7 @@ public class Client {
|
|
|
return isEqual(this.address, that.address)
|
|
|
&& this.doPing == that.doPing
|
|
|
&& this.maxIdleTime == that.maxIdleTime
|
|
|
- && this.maxRetries == that.maxRetries
|
|
|
+ && isEqual(this.connectionRetryPolicy, that.connectionRetryPolicy)
|
|
|
&& this.pingInterval == that.pingInterval
|
|
|
&& isEqual(this.protocol, that.protocol)
|
|
|
&& this.rpcTimeout == that.rpcTimeout
|
|
@@ -1434,11 +1479,10 @@ public class Client {
|
|
|
|
|
|
@Override
|
|
|
public int hashCode() {
|
|
|
- int result = 1;
|
|
|
+ int result = connectionRetryPolicy.hashCode();
|
|
|
result = PRIME * result + ((address == null) ? 0 : address.hashCode());
|
|
|
result = PRIME * result + (doPing ? 1231 : 1237);
|
|
|
result = PRIME * result + maxIdleTime;
|
|
|
- result = PRIME * result + maxRetries;
|
|
|
result = PRIME * result + pingInterval;
|
|
|
result = PRIME * result + ((protocol == null) ? 0 : protocol.hashCode());
|
|
|
result = PRIME * result + rpcTimeout;
|