|
@@ -17,8 +17,18 @@
|
|
|
*/
|
|
|
package org.apache.hadoop.metrics2.sink.timeline;
|
|
|
|
|
|
+import com.google.common.base.Supplier;
|
|
|
+import com.google.common.base.Suppliers;
|
|
|
+import com.google.common.reflect.TypeToken;
|
|
|
+import com.google.gson.Gson;
|
|
|
+import com.google.gson.JsonSyntaxException;
|
|
|
+import org.apache.commons.io.IOUtils;
|
|
|
import org.apache.commons.logging.Log;
|
|
|
import org.apache.commons.logging.LogFactory;
|
|
|
+import org.apache.hadoop.metrics2.sink.timeline.availability.MetricCollectorHAHelper;
|
|
|
+import org.apache.hadoop.metrics2.sink.timeline.availability.MetricCollectorUnavailableException;
|
|
|
+import org.apache.hadoop.metrics2.sink.timeline.availability.MetricSinkWriteShardHostnameHashingStrategy;
|
|
|
+import org.apache.hadoop.metrics2.sink.timeline.availability.MetricSinkWriteShardStrategy;
|
|
|
import org.codehaus.jackson.map.AnnotationIntrospector;
|
|
|
import org.codehaus.jackson.map.ObjectMapper;
|
|
|
import org.codehaus.jackson.map.annotate.JsonSerialize;
|
|
@@ -35,9 +45,17 @@ import java.io.IOException;
|
|
|
import java.io.InputStream;
|
|
|
import java.io.InputStreamReader;
|
|
|
import java.io.OutputStream;
|
|
|
+import java.io.StringWriter;
|
|
|
import java.net.HttpURLConnection;
|
|
|
import java.net.URL;
|
|
|
import java.security.KeyStore;
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.Collection;
|
|
|
+import java.util.HashSet;
|
|
|
+import java.util.List;
|
|
|
+import java.util.Random;
|
|
|
+import java.util.Set;
|
|
|
+import java.util.concurrent.TimeUnit;
|
|
|
import java.util.concurrent.atomic.AtomicInteger;
|
|
|
|
|
|
public abstract class AbstractTimelineMetricsSink {
|
|
@@ -46,20 +64,22 @@ public abstract class AbstractTimelineMetricsSink {
|
|
|
public static final String METRICS_SEND_INTERVAL = "sendInterval";
|
|
|
public static final String METRICS_POST_TIMEOUT_SECONDS = "timeout";
|
|
|
public static final String COLLECTOR_PROPERTY = "collector";
|
|
|
+ public static final String COLLECTOR_PROTOCOL = "protocol";
|
|
|
+ public static final String COLLECTOR_PORT = "port";
|
|
|
+ public static final String ZOOKEEPER_QUORUM = "zookeeper.quorum";
|
|
|
public static final int DEFAULT_POST_TIMEOUT_SECONDS = 10;
|
|
|
public static final String SKIP_COUNTER_TRANSFROMATION = "skipCounterDerivative";
|
|
|
public static final String RPC_METRIC_PREFIX = "metric.rpc";
|
|
|
- public static final String RPC_METRIC_NAME_SUFFIX = "suffix";
|
|
|
- public static final String RPC_METRIC_PORT_SUFFIX = "port";
|
|
|
-
|
|
|
public static final String WS_V1_TIMELINE_METRICS = "/ws/v1/timeline/metrics";
|
|
|
-
|
|
|
public static final String SSL_KEYSTORE_PATH_PROPERTY = "truststore.path";
|
|
|
public static final String SSL_KEYSTORE_TYPE_PROPERTY = "truststore.type";
|
|
|
public static final String SSL_KEYSTORE_PASSWORD_PROPERTY = "truststore.password";
|
|
|
+ public static final String COLLECTOR_LIVE_NODES_PATH = "/ws/v1/timeline/metrics/livenodes";
|
|
|
|
|
|
protected static final AtomicInteger failedCollectorConnectionsCounter = new AtomicInteger(0);
|
|
|
public static int NUMBER_OF_SKIPPED_COLLECTOR_EXCEPTIONS = 100;
|
|
|
+ public int ZK_CONNECT_TRY_TIME = 10000;
|
|
|
+ public int ZK_SLEEP_BETWEEN_RETRY_TIME = 2000;
|
|
|
|
|
|
private SSLSocketFactory sslSocketFactory;
|
|
|
|
|
@@ -67,6 +87,28 @@ public abstract class AbstractTimelineMetricsSink {
|
|
|
|
|
|
protected static ObjectMapper mapper;
|
|
|
|
|
|
+ protected MetricCollectorHAHelper collectorHAHelper;
|
|
|
+
|
|
|
+ protected MetricSinkWriteShardStrategy metricSinkWriteShardStrategy;
|
|
|
+
|
|
|
+ // Single element cache with fixed expiration - Helps adjacent Sinks as
|
|
|
+ // well as timed refresh
|
|
|
+ protected Supplier targetCollectorHostSupplier;
|
|
|
+
|
|
|
+ protected final List<String> allKnownLiveCollectors = new ArrayList<>();
|
|
|
+
|
|
|
+ private volatile boolean isInitializedForHA = false;
|
|
|
+
|
|
|
+ @SuppressWarnings("all")
|
|
|
+ private final int RETRY_COUNT_BEFORE_COLLECTOR_FAILOVER = 5;
|
|
|
+
|
|
|
+ private final Gson gson = new Gson();
|
|
|
+
|
|
|
+ private final Random rand = new Random();
|
|
|
+
|
|
|
+ private static final int COLLECTOR_HOST_CACHE_MAX_EXPIRATION_MINUTES = 75;
|
|
|
+ private static final int COLLECTOR_HOST_CACHE_MIN_EXPIRATION_MINUTES = 60;
|
|
|
+
|
|
|
static {
|
|
|
mapper = new ObjectMapper();
|
|
|
AnnotationIntrospector introspector = new JaxbAnnotationIntrospector();
|
|
@@ -79,6 +121,16 @@ public abstract class AbstractTimelineMetricsSink {
|
|
|
LOG = LogFactory.getLog(this.getClass());
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * Initialize Sink write strategy with respect to HA Collector
|
|
|
+ */
|
|
|
+ protected void init() {
|
|
|
+ metricSinkWriteShardStrategy = new MetricSinkWriteShardHostnameHashingStrategy(getHostname());
|
|
|
+ collectorHAHelper = new MetricCollectorHAHelper(getZookeeperQuorum(),
|
|
|
+ ZK_CONNECT_TRY_TIME, ZK_SLEEP_BETWEEN_RETRY_TIME);
|
|
|
+ isInitializedForHA = true;
|
|
|
+ }
|
|
|
+
|
|
|
protected boolean emitMetricsJson(String connectUrl, String jsonData) {
|
|
|
int timeout = getTimeoutSeconds() * 1000;
|
|
|
HttpURLConnection connection = null;
|
|
@@ -113,7 +165,7 @@ public abstract class AbstractTimelineMetricsSink {
|
|
|
}
|
|
|
}
|
|
|
cleanupInputStream(connection.getInputStream());
|
|
|
- //reset failedCollectorConnectionsCounter to "0"
|
|
|
+ // reset failedCollectorConnectionsCounter to "0"
|
|
|
failedCollectorConnectionsCounter.set(0);
|
|
|
return true;
|
|
|
} catch (IOException ioe) {
|
|
@@ -146,7 +198,20 @@ public abstract class AbstractTimelineMetricsSink {
|
|
|
}
|
|
|
|
|
|
protected boolean emitMetrics(TimelineMetrics metrics) {
|
|
|
- String connectUrl = getCollectorUri();
|
|
|
+ String collectorHost;
|
|
|
+ // Get cached target
|
|
|
+ if (targetCollectorHostSupplier != null) {
|
|
|
+ collectorHost = (String) targetCollectorHostSupplier.get();
|
|
|
+ // Last X attempts have failed - force refresh
|
|
|
+ if (failedCollectorConnectionsCounter.get() > RETRY_COUNT_BEFORE_COLLECTOR_FAILOVER) {
|
|
|
+ targetCollectorHostSupplier = null;
|
|
|
+ collectorHost = findPreferredCollectHost();
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ collectorHost = findPreferredCollectHost();
|
|
|
+ }
|
|
|
+
|
|
|
+ String connectUrl = getCollectorUri(collectorHost);
|
|
|
String jsonData = null;
|
|
|
try {
|
|
|
jsonData = mapper.writeValueAsString(metrics);
|
|
@@ -196,8 +261,7 @@ public abstract class AbstractTimelineMetricsSink {
|
|
|
protected HttpsURLConnection getSSLConnection(String spec)
|
|
|
throws IOException, IllegalStateException {
|
|
|
|
|
|
- HttpsURLConnection connection = (HttpsURLConnection) (new URL(spec)
|
|
|
- .openConnection());
|
|
|
+ HttpsURLConnection connection = (HttpsURLConnection) (new URL(spec).openConnection());
|
|
|
|
|
|
connection.setSSLSocketFactory(sslSocketFactory);
|
|
|
|
|
@@ -208,11 +272,7 @@ public abstract class AbstractTimelineMetricsSink {
|
|
|
String trustStorePassword) {
|
|
|
if (sslSocketFactory == null) {
|
|
|
if (trustStorePath == null || trustStorePassword == null) {
|
|
|
-
|
|
|
- String msg =
|
|
|
- String.format("Can't load TrustStore. " +
|
|
|
- "Truststore path or password is not set.");
|
|
|
-
|
|
|
+ String msg = "Can't load TrustStore. Truststore path or password is not set.";
|
|
|
LOG.error(msg);
|
|
|
throw new IllegalStateException(msg);
|
|
|
}
|
|
@@ -242,7 +302,191 @@ public abstract class AbstractTimelineMetricsSink {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- abstract protected String getCollectorUri();
|
|
|
+ /**
|
|
|
+ * Find appropriate write shard for this sink using the {@link org.apache.hadoop.metrics2.sink.timeline.availability.MetricSinkWriteShardStrategy}
|
|
|
+ *
|
|
|
+ * 1. Use configured collector(s) to discover available collectors
|
|
|
+ * 2. If configured collector(s) are unresponsive check Zookeeper to find live hosts
|
|
|
+ * 3. Refresh known collector list using ZK
|
|
|
+ * 4. Default: Return configured collector with no side effect due to discovery.
|
|
|
+ *
|
|
|
+ * throws {#link MetricsSinkInitializationException} if called before
|
|
|
+ * initialization, not other side effect
|
|
|
+ *
|
|
|
+ * @return String Collector hostname
|
|
|
+ */
|
|
|
+ protected synchronized String findPreferredCollectHost() {
|
|
|
+ if (!isInitializedForHA) {
|
|
|
+ init();
|
|
|
+ }
|
|
|
+
|
|
|
+ // Auto expire and re-calculate after 1 hour
|
|
|
+ if (targetCollectorHostSupplier != null) {
|
|
|
+ Object targetCollector = targetCollectorHostSupplier.get();
|
|
|
+ if (targetCollector != null) {
|
|
|
+ return (String) targetCollector;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ String configuredCollectors = getConfiguredCollectors();
|
|
|
+ // Reach out to all configured collectors before Zookeeper
|
|
|
+ if (configuredCollectors != null && !configuredCollectors.isEmpty()) {
|
|
|
+ String collectorHosts = getConfiguredCollectors();
|
|
|
+ if (!collectorHosts.isEmpty()) {
|
|
|
+ String[] hosts = collectorHosts.split(",");
|
|
|
+ for (String hostPortStr : hosts) {
|
|
|
+ if (hostPortStr != null && !hostPortStr.isEmpty()) {
|
|
|
+ String[] hostPortPair = hostPortStr.split(":");
|
|
|
+ if (hostPortPair.length < 2) {
|
|
|
+ LOG.warn("Collector port is missing from the configuration.");
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ String hostStr = hostPortPair[0].trim();
|
|
|
+ String portStr = hostPortPair[1].trim();
|
|
|
+ // Check liveliness and get known instances
|
|
|
+ try {
|
|
|
+ Collection<String> liveHosts = findLiveCollectorHostsFromKnownCollector(hostStr, portStr);
|
|
|
+ // Update live Hosts - current host will already be a part of this
|
|
|
+ for (String host : liveHosts) {
|
|
|
+ allKnownLiveCollectors.add(host);
|
|
|
+ }
|
|
|
+ } catch (MetricCollectorUnavailableException e) {
|
|
|
+ allKnownLiveCollectors.remove(hostStr);
|
|
|
+ LOG.info("Collector " + hostStr + " is not longer live. Removing " +
|
|
|
+ "it from list of know live collector hosts : " + allKnownLiveCollectors);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // Lookup Zookeeper for live hosts - max 10 seconds wait time
|
|
|
+ if (allKnownLiveCollectors.size() == 0) {
|
|
|
+ allKnownLiveCollectors.addAll(collectorHAHelper.findLiveCollectorHostsFromZNode());
|
|
|
+ }
|
|
|
+
|
|
|
+ if (allKnownLiveCollectors.size() != 0) {
|
|
|
+ targetCollectorHostSupplier = Suppliers.memoizeWithExpiration(
|
|
|
+ new Supplier() {
|
|
|
+ @Override
|
|
|
+ public Object get() {
|
|
|
+ return metricSinkWriteShardStrategy.findCollectorShard(new ArrayList<>(allKnownLiveCollectors));
|
|
|
+ }
|
|
|
+ }, // random.nextInt(max - min + 1) + min # (60 to 75 minutes)
|
|
|
+ rand.nextInt(COLLECTOR_HOST_CACHE_MAX_EXPIRATION_MINUTES
|
|
|
+ - COLLECTOR_HOST_CACHE_MIN_EXPIRATION_MINUTES + 1)
|
|
|
+ + COLLECTOR_HOST_CACHE_MIN_EXPIRATION_MINUTES,
|
|
|
+ TimeUnit.MINUTES
|
|
|
+ );
|
|
|
+
|
|
|
+ return (String) targetCollectorHostSupplier.get();
|
|
|
+ }
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ Collection<String> findLiveCollectorHostsFromKnownCollector(String host, String port) throws MetricCollectorUnavailableException {
|
|
|
+ List<String> collectors = new ArrayList<>();
|
|
|
+ HttpURLConnection connection = null;
|
|
|
+ StringBuilder sb = new StringBuilder(getCollectorProtocol());
|
|
|
+ sb.append("://");
|
|
|
+ sb.append(host);
|
|
|
+ sb.append(":");
|
|
|
+ sb.append(port);
|
|
|
+ sb.append(COLLECTOR_LIVE_NODES_PATH);
|
|
|
+ String connectUrl = sb.toString();
|
|
|
+ LOG.debug("Requesting live collector nodes : " + connectUrl);
|
|
|
+ try {
|
|
|
+ connection = getCollectorProtocol().startsWith("https") ?
|
|
|
+ getSSLConnection(connectUrl) : getConnection(connectUrl);
|
|
|
+
|
|
|
+ connection.setRequestMethod("GET");
|
|
|
+ // 5 seconds for this op is plenty of wait time
|
|
|
+ connection.setConnectTimeout(3000);
|
|
|
+ connection.setReadTimeout(2000);
|
|
|
+
|
|
|
+ int responseCode = connection.getResponseCode();
|
|
|
+ if (responseCode == 200) {
|
|
|
+ try (InputStream in = connection.getInputStream()) {
|
|
|
+ StringWriter writer = new StringWriter();
|
|
|
+ IOUtils.copy(in, writer);
|
|
|
+ try {
|
|
|
+ collectors = gson.fromJson(writer.toString(), new TypeToken<List<String>>(){}.getType());
|
|
|
+ } catch (JsonSyntaxException jse) {
|
|
|
+ // Swallow this at the behest of still trying to POST
|
|
|
+ LOG.debug("Exception deserializing the json data on live " +
|
|
|
+ "collector nodes.", jse);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ } catch (IOException ioe) {
|
|
|
+ StringBuilder errorMessage =
|
|
|
+ new StringBuilder("Unable to connect to collector, " + connectUrl);
|
|
|
+ try {
|
|
|
+ if ((connection != null)) {
|
|
|
+ errorMessage.append(cleanupInputStream(connection.getErrorStream()));
|
|
|
+ }
|
|
|
+ } catch (IOException e) {
|
|
|
+ //NOP
|
|
|
+ }
|
|
|
+ LOG.debug(errorMessage);
|
|
|
+ LOG.debug(ioe);
|
|
|
+ String warnMsg = "Unable to connect to collector to find live nodes.";
|
|
|
+ LOG.warn(warnMsg, ioe);
|
|
|
+ throw new MetricCollectorUnavailableException(warnMsg);
|
|
|
+ }
|
|
|
+ return collectors;
|
|
|
+ }
|
|
|
+
|
|
|
+ // Constructing without UriBuilder to avoid unfavorable httpclient
|
|
|
+ // dependencies
|
|
|
+ protected String constructTimelineMetricUri(String protocol, String host, String port) {
|
|
|
+ StringBuilder sb = new StringBuilder(protocol);
|
|
|
+ sb.append("://");
|
|
|
+ sb.append(host);
|
|
|
+ sb.append(":");
|
|
|
+ sb.append(port);
|
|
|
+ sb.append(WS_V1_TIMELINE_METRICS);
|
|
|
+ return sb.toString();
|
|
|
+ }
|
|
|
+
|
|
|
+ protected String constructContainerMetricUri(String protocol, String host, String port) {
|
|
|
+ StringBuilder sb = new StringBuilder(protocol);
|
|
|
+ sb.append("://");
|
|
|
+ sb.append(host);
|
|
|
+ sb.append(":");
|
|
|
+ sb.append(port);
|
|
|
+ sb.append(WS_V1_TIMELINE_METRICS);
|
|
|
+ return sb.toString();
|
|
|
+ }
|
|
|
|
|
|
+ /**
|
|
|
+ * Get a pre-formatted URI for the collector
|
|
|
+ */
|
|
|
+ abstract protected String getCollectorUri(String host);
|
|
|
+
|
|
|
+ abstract protected String getCollectorProtocol();
|
|
|
+
|
|
|
+ /**
|
|
|
+ * How soon to timeout on the emit calls in seconds.
|
|
|
+ */
|
|
|
abstract protected int getTimeoutSeconds();
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Get the zookeeper quorum for the cluster used to find collector
|
|
|
+ * @return String "host1:port1,host2:port2"
|
|
|
+ */
|
|
|
+ abstract protected String getZookeeperQuorum();
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Get pre-configured list of collectors available
|
|
|
+ * @return String "host1:port,host2:port"
|
|
|
+ */
|
|
|
+ abstract protected String getConfiguredCollectors();
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Get hostname used for calculating write shard.
|
|
|
+ * @return String "host1"
|
|
|
+ */
|
|
|
+ abstract protected String getHostname();
|
|
|
}
|