Просмотр исходного кода

AMBARI-15647 - Alerts Using the SKIPPED State Cause Stale Alert Notification (jonathanhurley)

Jonathan Hurley 9 лет назад
Родитель
Сommit
449be0f5c8

+ 0 - 9
ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py

@@ -113,15 +113,6 @@ class BaseAlert(object):
       result_state = res[0]
       reporting_state = result_state.lower()
 
-      # if the alert reports that it should be SKIPPED, then skip it
-      # this is useful for cases where the alert might run on multiple hosts
-      # but only 1 host should report the data
-      if result_state == BaseAlert.RESULT_SKIPPED:
-        logger.debug('[Alert][{0}] Skipping UUID {1}.'.format(self.get_name(),
-          self.get_uuid()))
-
-        return
-
       # it's possible that the alert definition doesn't have reporting; safely
       # check for it and fallback to default text if it doesn't exist
       if ('reporting' in self.alert_source_meta) and \

+ 5 - 2
ambari-agent/src/test/python/ambari_agent/TestAlerts.py

@@ -700,13 +700,16 @@ class TestAlerts(TestCase):
     alert.set_helpers(collector, cluster_configuration )
     alert.set_cluster("c1", "c6401.ambari.apache.org")
 
+    alert.collect()
+
     self.assertEquals(definition_json['source']['path'], alert.path)
     self.assertEquals(definition_json['source']['stacks_directory'], alert.stacks_dir)
     self.assertEquals(definition_json['source']['common_services_directory'], alert.common_services_dir)
     self.assertEquals(definition_json['source']['host_scripts_directory'], alert.host_scripts_dir)
 
-    # ensure that it was skipped
-    self.assertEquals(0,len(collector.alerts()))
+    # ensure that the skipped alert was still placed into the collector; it's up to
+    # the server to decide how to handle skipped alerts
+    self.assertEquals(1,len(collector.alerts()))
 
 
   def test_default_reporting_text(self):

+ 17 - 3
ambari-server/src/main/java/org/apache/ambari/server/events/listeners/alerts/AlertReceivedListener.java

@@ -165,6 +165,7 @@ public class AlertReceivedListener {
       }
 
       AlertCurrentEntity current;
+      AlertState alertState = alert.getState();
 
       if (StringUtils.isBlank(alert.getHostName()) || definition.isHostIgnored()) {
         current = m_alertsDao.findCurrentByNameNoHost(clusterId, alert.getName());
@@ -174,6 +175,12 @@ public class AlertReceivedListener {
       }
 
       if (null == current) {
+        // if there is no current alert and the state is skipped, then simple
+        // skip over this one as there is nothing to update in the databse
+        if (alertState == AlertState.SKIPPED) {
+          continue;
+        }
+
         AlertHistoryEntity history = createHistory(clusterId, definition, alert);
 
         // this new alert must reflect the correct MM state for the
@@ -194,11 +201,18 @@ public class AlertReceivedListener {
 
         toCreate.put(alert, current);
 
-      } else if (alert.getState() == current.getAlertHistory().getAlertState()) {
+      } else if (alertState == current.getAlertHistory().getAlertState()
+          || alertState == AlertState.SKIPPED) {
+
+        // update the timestamp
         current.setLatestTimestamp(alert.getTimestamp());
-        current.setLatestText(alert.getText());
-        toMerge.put(alert, current);
 
+        // only update the text if the alert isn't being skipped
+        if (alertState != AlertState.SKIPPED) {
+          current.setLatestText(alert.getText());
+        }
+
+        toMerge.put(alert, current);
       } else {
         if (LOG.isDebugEnabled()) {
           LOG.debug(

+ 10 - 1
ambari-server/src/main/java/org/apache/ambari/server/state/AlertState.java

@@ -25,17 +25,26 @@ public enum AlertState {
    * Alert does not need to be distributed.  Normal Operation.
    */
   OK,
+
   /**
    * Alert indicates there may be an issue.  The component may be operating
    * normally but may be in danger of becoming <code>CRITICAL</code>.
    */
   WARNING,
+
   /**
    * Indicates there is a critical situation that needs to be addressed.
    */
   CRITICAL,
+
   /**
    * The state of the alert is not known.
    */
-  UNKNOWN
+  UNKNOWN,
+
+  /**
+   * Indicates that the state of the alert should be discarded, but the alert
+   * timestamps should be updated so that it is not considered stale.
+   */
+  SKIPPED;
 }

+ 116 - 44
ambari-server/src/test/java/org/apache/ambari/server/state/alerts/AlertReceivedListenerTest.java

@@ -169,25 +169,24 @@ public class AlertReceivedListenerTest {
     String definitionName = ALERT_DEFINITION + "1";
     String componentName = "DATANODE";
 
-    Alert alert1 = new Alert(definitionName, null, "HDFS", componentName,
+    Alert alert = new Alert(definitionName, null, "HDFS", componentName,
         HOST1, AlertState.OK);
 
-    alert1.setCluster(m_cluster.getClusterName());
-    alert1.setLabel(ALERT_LABEL);
-    alert1.setText("HDFS " + componentName + " is OK");
-    alert1.setTimestamp(1L);
+    alert.setCluster(m_cluster.getClusterName());
+    alert.setLabel(ALERT_LABEL);
+    alert.setText("HDFS " + componentName + " is OK");
+    alert.setTimestamp(1L);
 
     // verify that the listener works with a regular alert
     AlertReceivedListener listener = m_injector.getInstance(AlertReceivedListener.class);
-    AlertReceivedEvent event1 = new AlertReceivedEvent(
-        m_cluster.getClusterId(), alert1);
-    listener.onAlertEvent(event1);
+    AlertReceivedEvent event = new AlertReceivedEvent(m_cluster.getClusterId(), alert);
+    listener.onAlertEvent(event);
 
     List<AlertCurrentEntity> allCurrent = m_dao.findCurrent();
     assertEquals(1, allCurrent.size());
 
     // invalid host
-    alert1.setHostName("INVALID");
+    alert.setHostName("INVALID");
 
     // remove all
     m_dao.removeCurrentByHost(HOST1);
@@ -195,7 +194,7 @@ public class AlertReceivedListenerTest {
     assertEquals(0, allCurrent.size());
 
     // verify no new alerts for disabled
-    listener.onAlertEvent(event1);
+    listener.onAlertEvent(event);
     allCurrent = m_dao.findCurrent();
     assertEquals(0, allCurrent.size());
   }
@@ -207,17 +206,17 @@ public class AlertReceivedListenerTest {
   public void testInvalidAlertDefinition() {
     String componentName = "DATANODE";
 
-    Alert alert1 = new Alert("missing_alert_definition_name", null, "HDFS",
+    Alert alert = new Alert("missing_alert_definition_name", null, "HDFS",
         componentName, HOST1, AlertState.OK);
 
-    alert1.setLabel(ALERT_LABEL);
-    alert1.setText("HDFS " + componentName + " is OK");
-    alert1.setTimestamp(1L);
+    alert.setLabel(ALERT_LABEL);
+    alert.setText("HDFS " + componentName + " is OK");
+    alert.setTimestamp(1L);
 
     // bad alert definition name means no current alerts
     AlertReceivedListener listener = m_injector.getInstance(AlertReceivedListener.class);
     AlertReceivedEvent event1 = new AlertReceivedEvent(
-        m_cluster.getClusterId(), alert1);
+        m_cluster.getClusterId(), alert);
     listener.onAlertEvent(event1);
 
     List<AlertCurrentEntity> allCurrent = m_dao.findCurrent();
@@ -232,25 +231,25 @@ public class AlertReceivedListenerTest {
     String definitionName = ALERT_DEFINITION + "1";
     String componentName = "DATANODE";
 
-    Alert alert1 = new Alert(definitionName, null, "HDFS", componentName,
+    Alert alert = new Alert(definitionName, null, "HDFS", componentName,
         HOST1, AlertState.OK);
 
-    alert1.setCluster(m_cluster.getClusterName());
-    alert1.setLabel(ALERT_LABEL);
-    alert1.setText("HDFS " + componentName + " is OK");
-    alert1.setTimestamp(1L);
+    alert.setCluster(m_cluster.getClusterName());
+    alert.setLabel(ALERT_LABEL);
+    alert.setText("HDFS " + componentName + " is OK");
+    alert.setTimestamp(1L);
 
     // verify that the listener works with a regular alert
     AlertReceivedListener listener = m_injector.getInstance(AlertReceivedListener.class);
     AlertReceivedEvent event1 = new AlertReceivedEvent(
-        m_cluster.getClusterId(), alert1);
+        m_cluster.getClusterId(), alert);
     listener.onAlertEvent(event1);
 
     List<AlertCurrentEntity> allCurrent = m_dao.findCurrent();
     assertEquals(1, allCurrent.size());
 
     // invalid host
-    alert1.setComponent("INVALID");
+    alert.setComponent("INVALID");
 
     // remove all
     m_dao.removeCurrentByHost(HOST1);
@@ -318,24 +317,24 @@ public class AlertReceivedListenerTest {
     String serviceName = Services.AMBARI.name();
     String componentName = Components.AMBARI_AGENT.name();
 
-    Alert alert1 = new Alert(definitionName, null, serviceName, componentName, HOST1,
+    Alert alert = new Alert(definitionName, null, serviceName, componentName, HOST1,
         AlertState.OK);
 
-    alert1.setCluster(m_cluster.getClusterName());
-    alert1.setLabel(ALERT_LABEL);
-    alert1.setText(serviceName + " " + componentName + " is OK");
-    alert1.setTimestamp(1L);
+    alert.setCluster(m_cluster.getClusterName());
+    alert.setLabel(ALERT_LABEL);
+    alert.setText(serviceName + " " + componentName + " is OK");
+    alert.setTimestamp(1L);
 
     // verify that the listener works with a regular alert
     AlertReceivedListener listener = m_injector.getInstance(AlertReceivedListener.class);
-    AlertReceivedEvent event1 = new AlertReceivedEvent(m_cluster.getClusterId(), alert1);
-    listener.onAlertEvent(event1);
+    AlertReceivedEvent event = new AlertReceivedEvent(m_cluster.getClusterId(), alert);
+    listener.onAlertEvent(event);
 
     List<AlertCurrentEntity> allCurrent = m_dao.findCurrent();
     assertEquals(1, allCurrent.size());
 
     // invalid host
-    alert1.setHostName("INVALID");
+    alert.setHostName("INVALID");
 
     // remove all
     m_dao.removeCurrentByHost(HOST1);
@@ -343,7 +342,7 @@ public class AlertReceivedListenerTest {
     assertEquals(0, allCurrent.size());
 
     // verify no new alerts received
-    listener.onAlertEvent(event1);
+    listener.onAlertEvent(event);
     allCurrent = m_dao.findCurrent();
     assertEquals(0, allCurrent.size());
   }
@@ -357,25 +356,25 @@ public class AlertReceivedListenerTest {
     String serviceName = Services.AMBARI.name();
     String componentName = Components.AMBARI_SERVER.name();
 
-    Alert alert1 = new Alert(definitionName, null, serviceName, componentName, HOST1,
+    Alert alert = new Alert(definitionName, null, serviceName, componentName, HOST1,
         AlertState.OK);
 
-    alert1.setCluster(m_cluster.getClusterName());
-    alert1.setLabel(ALERT_LABEL);
-    alert1.setText(serviceName + " " + componentName + " is OK");
-    alert1.setTimestamp(1L);
+    alert.setCluster(m_cluster.getClusterName());
+    alert.setLabel(ALERT_LABEL);
+    alert.setText(serviceName + " " + componentName + " is OK");
+    alert.setTimestamp(1L);
 
     // verify that the listener works with a regular alert
     AlertReceivedListener listener = m_injector.getInstance(AlertReceivedListener.class);
-    AlertReceivedEvent event1 = new AlertReceivedEvent(m_cluster.getClusterId(), alert1);
-    listener.onAlertEvent(event1);
+    AlertReceivedEvent event = new AlertReceivedEvent(m_cluster.getClusterId(), alert);
+    listener.onAlertEvent(event);
 
     List<AlertCurrentEntity> allCurrent = m_dao.findCurrent();
     assertEquals(1, allCurrent.size());
 
     // invalid host, invalid cluster
-    alert1.setHostName("INVALID");
-    alert1.setCluster("INVALID");
+    alert.setHostName("INVALID");
+    alert.setCluster("INVALID");
 
     // remove all
     m_dao.removeCurrentByHost(HOST1);
@@ -383,7 +382,7 @@ public class AlertReceivedListenerTest {
     assertEquals(0, allCurrent.size());
 
     // verify that the alert was still received
-    listener.onAlertEvent(event1);
+    listener.onAlertEvent(event);
     allCurrent = m_dao.findCurrent();
     assertEquals(1, allCurrent.size());
   }
@@ -408,8 +407,8 @@ public class AlertReceivedListenerTest {
 
     // verify that the listener works with a regular alert
     AlertReceivedListener listener = m_injector.getInstance(AlertReceivedListener.class);
-    AlertReceivedEvent event1 = new AlertReceivedEvent(m_cluster.getClusterId(), alert1);
-    listener.onAlertEvent(event1);
+    AlertReceivedEvent event = new AlertReceivedEvent(m_cluster.getClusterId(), alert1);
+    listener.onAlertEvent(event);
 
     List<AlertCurrentEntity> allCurrent = m_dao.findCurrent();
     assertEquals(1, allCurrent.size());
@@ -424,8 +423,81 @@ public class AlertReceivedListenerTest {
     assertEquals(0, allCurrent.size());
 
     // verify no new alerts received
-    listener.onAlertEvent(event1);
+    listener.onAlertEvent(event);
     allCurrent = m_dao.findCurrent();
     assertEquals(0, allCurrent.size());
   }
+
+  /**
+   * Tests that receiving and alert with {@link AlertState#SKIPPED} does not create an entry
+   * if there is currently no current alert.
+   */
+  @Test
+  public void testSkippedAlertWithNoCurrentAlert() {
+    String definitionName = ALERT_DEFINITION + "1";
+    String serviceName = "HDFS";
+    String componentName = "NAMENODE";
+
+    Alert alert = new Alert(definitionName, null, serviceName, componentName, HOST1, AlertState.SKIPPED);
+
+    alert.setCluster(m_cluster.getClusterName());
+    alert.setLabel(ALERT_LABEL);
+    alert.setText(serviceName + " " + componentName + " is OK");
+    alert.setTimestamp(1L);
+
+    // fire the alert, and check that nothing gets created
+    AlertReceivedListener listener = m_injector.getInstance(AlertReceivedListener.class);
+    AlertReceivedEvent event = new AlertReceivedEvent(m_cluster.getClusterId(), alert);
+    listener.onAlertEvent(event);
+
+    List<AlertCurrentEntity> allCurrent = m_dao.findCurrent();
+    assertEquals(0, allCurrent.size());
+  }
+
+  /**
+   * Tests that receiving and alert with {@link AlertState#SKIPPED} does not
+   * create an entry if there is currently no current alert.
+   */
+  @Test
+  public void testSkippedAlertUpdatesTimestamp() {
+    String definitionName = ALERT_DEFINITION + "1";
+    String serviceName = "HDFS";
+    String componentName = "NAMENODE";
+    String text = serviceName + " " + componentName + " is OK";
+
+    Alert alert = new Alert(definitionName, null, serviceName, componentName, HOST1, AlertState.OK);
+
+    alert.setCluster(m_cluster.getClusterName());
+    alert.setLabel(ALERT_LABEL);
+    alert.setText(text);
+    alert.setTimestamp(1L);
+
+    // fire the alert, and check that the new entry was created with the right
+    // timestamp
+    AlertReceivedListener listener = m_injector.getInstance(AlertReceivedListener.class);
+    AlertReceivedEvent event = new AlertReceivedEvent(m_cluster.getClusterId(), alert);
+    listener.onAlertEvent(event);
+
+    List<AlertCurrentEntity> allCurrent = m_dao.findCurrent();
+    assertEquals(1, allCurrent.size());
+
+    // check timestamp
+    assertEquals(1L, (long) allCurrent.get(0).getOriginalTimestamp());
+    assertEquals(1L, (long) allCurrent.get(0).getLatestTimestamp());
+
+    // update the timestamp and the state
+    alert.setState(AlertState.SKIPPED);
+    alert.setTimestamp(2L);
+
+    // the logic we have does NOT update the text, so make sure this does not
+    // change
+    alert.setText("INVALID");
+
+    // get the current make sure the fields were updated
+    listener.onAlertEvent(event);
+    allCurrent = m_dao.findCurrent();
+    assertEquals(1L, (long) allCurrent.get(0).getOriginalTimestamp());
+    assertEquals(2L, (long) allCurrent.get(0).getLatestTimestamp());
+    assertEquals(text, allCurrent.get(0).getLatestText());
+  }
 }