|
@@ -0,0 +1,435 @@
|
|
|
+/**
|
|
|
+ * Licensed to the Apache Software Foundation (ASF) under one
|
|
|
+ * or more contributor license agreements. See the NOTICE file
|
|
|
+ * distributed with this work for additional information
|
|
|
+ * regarding copyright ownership. The ASF licenses this file
|
|
|
+ * to you under the Apache License, Version 2.0 (the
|
|
|
+ * "License"); you may not use this file except in compliance
|
|
|
+ * with the License. You may obtain a copy of the License at
|
|
|
+ *
|
|
|
+ * http://www.apache.org/licenses/LICENSE-2.0
|
|
|
+ *
|
|
|
+ * Unless required by applicable law or agreed to in writing, software
|
|
|
+ * distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
+ * See the License for the specific language governing permissions and
|
|
|
+ * limitations under the License.
|
|
|
+ */
|
|
|
+package org.apache.ambari.server.alerts;
|
|
|
+
|
|
|
+import java.text.MessageFormat;
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.Collections;
|
|
|
+import java.util.HashSet;
|
|
|
+import java.util.List;
|
|
|
+import java.util.Set;
|
|
|
+
|
|
|
+import org.apache.ambari.server.AmbariException;
|
|
|
+import org.apache.ambari.server.actionmanager.ActionManager;
|
|
|
+import org.apache.ambari.server.actionmanager.RequestStatus;
|
|
|
+import org.apache.ambari.server.api.services.BaseRequest;
|
|
|
+import org.apache.ambari.server.controller.AmbariManagementController;
|
|
|
+import org.apache.ambari.server.controller.internal.AbstractControllerResourceProvider;
|
|
|
+import org.apache.ambari.server.controller.internal.ClusterResourceProvider;
|
|
|
+import org.apache.ambari.server.controller.spi.Predicate;
|
|
|
+import org.apache.ambari.server.controller.spi.Request;
|
|
|
+import org.apache.ambari.server.controller.spi.Resource.Type;
|
|
|
+import org.apache.ambari.server.controller.spi.ResourceProvider;
|
|
|
+import org.apache.ambari.server.controller.utilities.PredicateBuilder;
|
|
|
+import org.apache.ambari.server.controller.utilities.PropertyHelper;
|
|
|
+import org.apache.ambari.server.orm.dao.HostRoleCommandDAO;
|
|
|
+import org.apache.ambari.server.orm.entities.AlertDefinitionEntity;
|
|
|
+import org.apache.ambari.server.state.Alert;
|
|
|
+import org.apache.ambari.server.state.AlertState;
|
|
|
+import org.apache.ambari.server.state.Cluster;
|
|
|
+import org.apache.ambari.server.state.alert.AlertDefinition;
|
|
|
+import org.apache.ambari.server.state.alert.AlertDefinitionFactory;
|
|
|
+import org.apache.ambari.server.state.alert.ParameterizedSource.AlertParameter;
|
|
|
+import org.apache.ambari.server.state.alert.ServerSource;
|
|
|
+import org.apache.ambari.server.state.services.AmbariServerAlertService;
|
|
|
+import org.apache.commons.lang.StringUtils;
|
|
|
+import org.apache.commons.lang.math.NumberUtils;
|
|
|
+import org.slf4j.Logger;
|
|
|
+import org.slf4j.LoggerFactory;
|
|
|
+
|
|
|
+import com.google.inject.Inject;
|
|
|
+import com.google.inject.Provider;
|
|
|
+
|
|
|
+/**
|
|
|
+ * The {@link AmbariPerformanceRunnable} is used by the
|
|
|
+ * {@link AmbariServerAlertService} to ensure that certain areas of Ambari are
|
|
|
+ * responsive. It performs the following checks:
|
|
|
+ * <ul>
|
|
|
+ * <li>A GET request against the cluster endpoint.</li>
|
|
|
+ * <li>A query against {@link HostRoleCommandDAO} to get a summary of request
|
|
|
+ * statuses</li>
|
|
|
+ * <ul>
|
|
|
+ */
|
|
|
+public class AmbariPerformanceRunnable extends AlertRunnable {
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Logger.
|
|
|
+ */
|
|
|
+ private final static Logger LOG = LoggerFactory.getLogger(AmbariPerformanceRunnable.class);
|
|
|
+
|
|
|
+ /**
|
|
|
+ * <pre>
|
|
|
+ * Performance Overview:
|
|
|
+ * Database Access (Request By Status): 330ms (OK)
|
|
|
+ * REST API (Cluster Request): 5,456ms (WARNING)
|
|
|
+ * </pre>
|
|
|
+ */
|
|
|
+ private static final String PERFORMANCE_OVERVIEW_TEMPLATE = "Performance Overview:"
|
|
|
+ + System.lineSeparator() + "{0}";
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Example: {@code Database Access (Request By Status): 330ms (OK)}
|
|
|
+ */
|
|
|
+ private static final String PERFORMANCE_AREA_TEMPLATE = " {0}: {1}ms ({2})";
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Example:
|
|
|
+ * {@code Unable to execute performance alert area REQUEST_BY_STATUS (UNKNOWN)}
|
|
|
+ */
|
|
|
+ private static final String PERFORMANCE_AREA_FAILURE_TEMPLATE = " Unable to execute performance alert area {0}: ({1})";
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Used for converting {@link AlertDefinitionEntity} into
|
|
|
+ * {@link AlertDefinition} instances.
|
|
|
+ */
|
|
|
+ @Inject
|
|
|
+ private AlertDefinitionFactory m_definitionFactory;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * The {@link PerformanceArea} enumeration represents logical areas of
|
|
|
+ * functionality to test for performance.
|
|
|
+ */
|
|
|
+ private enum PerformanceArea {
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Query for requests by {@link RequestStatus#IN_PROGRESS}.
|
|
|
+ */
|
|
|
+ REQUEST_BY_STATUS("Database Access (Request By Status)",
|
|
|
+ "request.by.status.warning.threshold", 3000, "request.by.status.critical.threshold", 5000) {
|
|
|
+ /**
|
|
|
+ * {@inheritDoc}
|
|
|
+ */
|
|
|
+ @Override
|
|
|
+ void execute(AmbariPerformanceRunnable runnable, Cluster cluster) throws Exception {
|
|
|
+ runnable.m_actionManager.get().getRequestsByStatus(RequestStatus.IN_PROGRESS,
|
|
|
+ BaseRequest.DEFAULT_PAGE_SIZE, false);
|
|
|
+ }
|
|
|
+ },
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Query through the REST API framework for a cluster.
|
|
|
+ */
|
|
|
+ REST_API_GET_CLUSTER("REST API (Cluster)",
|
|
|
+ "rest.api.cluster.warning.threshold",
|
|
|
+ 5000, "rest.api.cluster.critical.threshold", 7000) {
|
|
|
+ /**
|
|
|
+ * {@inheritDoc}
|
|
|
+ */
|
|
|
+ @Override
|
|
|
+ void execute(AmbariPerformanceRunnable runnable, Cluster cluster) throws Exception {
|
|
|
+ Type type = Type.Cluster;
|
|
|
+ ResourceProvider provider = AbstractControllerResourceProvider.getResourceProvider(
|
|
|
+ type, PropertyHelper.getPropertyIds(type), PropertyHelper.getKeyPropertyIds(type),
|
|
|
+ runnable.m_amc.get());
|
|
|
+
|
|
|
+ Set<String> propertyIds = new HashSet<String>();
|
|
|
+
|
|
|
+ propertyIds.add(ClusterResourceProvider.CLUSTER_ID_PROPERTY_ID);
|
|
|
+ propertyIds.add(ClusterResourceProvider.CLUSTER_NAME_PROPERTY_ID);
|
|
|
+
|
|
|
+ // create the request
|
|
|
+ Request request = PropertyHelper.getReadRequest(propertyIds);
|
|
|
+
|
|
|
+ // build the predicate for this cluster
|
|
|
+ Predicate predicate = new PredicateBuilder().property(
|
|
|
+ ClusterResourceProvider.CLUSTER_NAME_PROPERTY_ID).equals(
|
|
|
+ cluster.getClusterName()).toPredicate();
|
|
|
+
|
|
|
+ provider.getResources(request, predicate);
|
|
|
+ }
|
|
|
+ };
|
|
|
+
|
|
|
+ /**
|
|
|
+ * The label for the performance area.
|
|
|
+ */
|
|
|
+ private final String m_label;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * The name of the parameter on the alert definition which represents the
|
|
|
+ * {@link AlertState#WARNING} threshold value.
|
|
|
+ */
|
|
|
+ private final String m_warningParameter;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * A default {@link AlertState#WARNING} threshold value of the definition
|
|
|
+ * doesn't have {@link #m_warningParameter} defined.
|
|
|
+ */
|
|
|
+ private final int m_defaultWarningThreshold;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * The name of the parameter on the alert definition which represents the
|
|
|
+ * {@link AlertState#CRITICAL} threshold value.
|
|
|
+ */
|
|
|
+ private final String m_criticalParameter;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * A default {@link AlertState#WARNING} threshold value of the definition
|
|
|
+ * doesn't have {@link #m_criticalParameter} defined.
|
|
|
+ */
|
|
|
+ private final int m_defaultCriticalThreshold;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Constructor.
|
|
|
+ *
|
|
|
+ * @param label
|
|
|
+ * the display label for this performance area (not {@code null}).
|
|
|
+ * @param warningParameter
|
|
|
+ * the definition parameter name for the warning threshold (not
|
|
|
+ * {@code null})
|
|
|
+ * @param defaultWarningThreshold
|
|
|
+ * the default value to use if the definition does not have a
|
|
|
+ * warning threshold paramter.
|
|
|
+ * @param criticalParameter
|
|
|
+ * the definition parameter name for the critical threshold (not
|
|
|
+ * {@code null})
|
|
|
+ * @param defaultCriticalThreshold
|
|
|
+ * the default value to use if the definition does not have a
|
|
|
+ * critical threshold paramter.
|
|
|
+ */
|
|
|
+ private PerformanceArea(String label, String warningParameter, int defaultWarningThreshold,
|
|
|
+ String criticalParameter, int defaultCriticalThreshold) {
|
|
|
+ m_label = label;
|
|
|
+ m_warningParameter = warningParameter;
|
|
|
+ m_defaultWarningThreshold = defaultWarningThreshold;
|
|
|
+ m_criticalParameter = criticalParameter;
|
|
|
+ m_defaultCriticalThreshold = defaultCriticalThreshold;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Runs the {@link PerformanceArea}.
|
|
|
+ *
|
|
|
+ * @param runnable
|
|
|
+ * a reference to the parent {@link AlertRunnable} which has
|
|
|
+ * injected members for use.
|
|
|
+ * @return a result of running the performance area (never {@code null}).
|
|
|
+ */
|
|
|
+ abstract void execute(AmbariPerformanceRunnable runnable, Cluster cluster) throws Exception;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Used for querying for requests by status.
|
|
|
+ */
|
|
|
+ @Inject
|
|
|
+ private Provider<ActionManager> m_actionManager;
|
|
|
+
|
|
|
+ @Inject
|
|
|
+ private Provider<AmbariManagementController> m_amc;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Constructor.
|
|
|
+ *
|
|
|
+ * @param definitionName
|
|
|
+ */
|
|
|
+ public AmbariPerformanceRunnable(String definitionName) {
|
|
|
+ super(definitionName);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * {@inheritDoc}
|
|
|
+ */
|
|
|
+ @Override
|
|
|
+ List<Alert> execute(Cluster cluster, AlertDefinitionEntity entity) throws AmbariException {
|
|
|
+ // coerce the entity into a business object so that the list of parameters
|
|
|
+ // can be extracted and used for threshold calculation
|
|
|
+ AlertDefinition definition = m_definitionFactory.coerce(entity);
|
|
|
+ ServerSource serverSource = (ServerSource) definition.getSource();
|
|
|
+ List<AlertParameter> parameters = serverSource.getParameters();
|
|
|
+ List<String> results = new ArrayList<>();
|
|
|
+
|
|
|
+ // start out assuming OK
|
|
|
+ AlertState alertState = AlertState.OK;
|
|
|
+
|
|
|
+ // run every performance area
|
|
|
+ for (PerformanceArea performanceArea : PerformanceArea.values()) {
|
|
|
+ // execute the performance area, creating an UNKNOWN state on exceptions
|
|
|
+ PerformanceResult performanceResult;
|
|
|
+ try {
|
|
|
+ long startTime = System.currentTimeMillis();
|
|
|
+ performanceArea.execute(this, cluster);
|
|
|
+ long totalTime = System.currentTimeMillis() - startTime;
|
|
|
+
|
|
|
+ performanceResult = calculatePerformanceResult(performanceArea, totalTime, parameters);
|
|
|
+
|
|
|
+ } catch (Exception exception) {
|
|
|
+ String result = MessageFormat.format(PERFORMANCE_AREA_FAILURE_TEMPLATE, performanceArea,
|
|
|
+ AlertState.UNKNOWN);
|
|
|
+
|
|
|
+ LOG.error(result, exception);
|
|
|
+ performanceResult = new PerformanceResult(result, AlertState.UNKNOWN);
|
|
|
+ }
|
|
|
+
|
|
|
+ String result = performanceResult.getResult();
|
|
|
+ AlertState resultAlertState = performanceResult.getAlertState();
|
|
|
+
|
|
|
+ // keep track of the string result for formatting later
|
|
|
+ results.add(result);
|
|
|
+
|
|
|
+ // keep track of the overall state of "this" alert
|
|
|
+ switch (resultAlertState) {
|
|
|
+ case CRITICAL:
|
|
|
+ alertState = AlertState.CRITICAL;
|
|
|
+ break;
|
|
|
+ case OK:
|
|
|
+ break;
|
|
|
+ case SKIPPED:
|
|
|
+ break;
|
|
|
+ case UNKNOWN:
|
|
|
+ if (alertState == AlertState.OK) {
|
|
|
+ alertState = AlertState.UNKNOWN;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ case WARNING:
|
|
|
+ if (alertState != AlertState.CRITICAL) {
|
|
|
+ alertState = AlertState.WARNING;
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // create a text overview of all of the runs
|
|
|
+ String allResults = StringUtils.join(results, System.lineSeparator());
|
|
|
+ String overview = MessageFormat.format(PERFORMANCE_OVERVIEW_TEMPLATE, allResults);
|
|
|
+
|
|
|
+ // build the alert to return
|
|
|
+ Alert alert = new Alert(entity.getDefinitionName(), null, entity.getServiceName(),
|
|
|
+ entity.getComponentName(), null, alertState);
|
|
|
+
|
|
|
+ alert.setLabel(entity.getLabel());
|
|
|
+ alert.setText(overview);
|
|
|
+ alert.setTimestamp(System.currentTimeMillis());
|
|
|
+ alert.setCluster(cluster.getClusterName());
|
|
|
+
|
|
|
+ return Collections.singletonList(alert);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Calculates the state based on the threshold values for a
|
|
|
+ * {@link PerformanceArea} and an actual run time.
|
|
|
+ *
|
|
|
+ * @param area
|
|
|
+ * the area to calculate the result for (not {@code null}).
|
|
|
+ * @param time
|
|
|
+ * the time taken, in milliseconds, to run the test.
|
|
|
+ * @param parameters
|
|
|
+ * a list of parameters from the alert definition which contain the
|
|
|
+ * threshold values.
|
|
|
+ * @return a result of running the performance area (never {@code null}).
|
|
|
+ */
|
|
|
+ PerformanceResult calculatePerformanceResult(PerformanceArea area, long time,
|
|
|
+ List<AlertParameter> parameters) {
|
|
|
+ AlertState alertState = AlertState.OK;
|
|
|
+ int warningThreshold = area.m_defaultWarningThreshold;
|
|
|
+ int criticalThreshold = area.m_defaultCriticalThreshold;
|
|
|
+
|
|
|
+ for (AlertParameter parameter : parameters) {
|
|
|
+ Object value = parameter.getValue();
|
|
|
+
|
|
|
+ if (StringUtils.equals(parameter.getName(), area.m_warningParameter)) {
|
|
|
+ warningThreshold = getThresholdValue(value, warningThreshold);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (StringUtils.equals(parameter.getName(), area.m_criticalParameter)) {
|
|
|
+ criticalThreshold = getThresholdValue(value, criticalThreshold);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (time >= warningThreshold && time < criticalThreshold) {
|
|
|
+ alertState = AlertState.WARNING;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (time >= criticalThreshold) {
|
|
|
+ alertState = AlertState.WARNING;
|
|
|
+ }
|
|
|
+
|
|
|
+ String resultLabel = MessageFormat.format(PERFORMANCE_AREA_TEMPLATE, area.m_label, time,
|
|
|
+ alertState);
|
|
|
+
|
|
|
+ return new PerformanceResult(resultLabel, alertState);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Converts the given value to an integer safely.
|
|
|
+ *
|
|
|
+ * @param value
|
|
|
+ * @param defaultValue
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ int getThresholdValue(Object value, int defaultValue) {
|
|
|
+ if (null == value) {
|
|
|
+ return defaultValue;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (value instanceof Number) {
|
|
|
+ return ((Number) value).intValue();
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!(value instanceof String)) {
|
|
|
+ value = value.toString();
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!NumberUtils.isNumber((String) value)) {
|
|
|
+ return defaultValue;
|
|
|
+ }
|
|
|
+
|
|
|
+ Number number = NumberUtils.createNumber((String) value);
|
|
|
+ return number.intValue();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * The {@link PerformanceResult} class is used to wrap the result of a
|
|
|
+ * {@link PerformanceArea}.
|
|
|
+ */
|
|
|
+ private static final class PerformanceResult {
|
|
|
+ private final String m_result;
|
|
|
+ private final AlertState m_alertState;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Constructor.
|
|
|
+ *
|
|
|
+ * @param result
|
|
|
+ * the text of the result (not {@code null}).
|
|
|
+ * @param alertState
|
|
|
+ * the result state (not {@code null}).
|
|
|
+ */
|
|
|
+ private PerformanceResult(String result, AlertState alertState) {
|
|
|
+ m_result = result;
|
|
|
+ m_alertState = alertState;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Gets the fully-rendered result text, such as:
|
|
|
+ * {@code Database Access (Request By Status): 330ms (OK)}
|
|
|
+ *
|
|
|
+ * @return the result
|
|
|
+ */
|
|
|
+ public String getResult() {
|
|
|
+ return m_result;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * The state of the result as calculated by the threshold parameters.
|
|
|
+ *
|
|
|
+ * @return the state
|
|
|
+ */
|
|
|
+ public AlertState getAlertState() {
|
|
|
+ return m_alertState;
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|