Parcourir la source

AMBARI-8143 - Alerts: NameNode Health HA Alert Check (jonathanhurley)

Jonathan Hurley il y a 10 ans
Parent
commit
90478a89df
22 fichiers modifiés avec 610 ajouts et 38 suppressions
  1. 17 2
      ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py
  2. 1 1
      ambari-agent/src/main/python/ambari_agent/alerts/script_alert.py
  3. 34 0
      ambari-agent/src/test/python/ambari_agent/TestAlerts.py
  4. 4 1
      ambari-agent/src/test/python/ambari_agent/dummy_files/test_script.py
  5. 13 0
      ambari-server/src/main/java/org/apache/ambari/server/controller/internal/AlertDefinitionResourceProvider.java
  6. 29 18
      ambari-server/src/main/java/org/apache/ambari/server/events/listeners/AlertReceivedListener.java
  7. 28 0
      ambari-server/src/main/java/org/apache/ambari/server/orm/entities/AlertDefinitionEntity.java
  8. 20 0
      ambari-server/src/main/java/org/apache/ambari/server/state/alert/AlertDefinition.java
  9. 2 0
      ambari-server/src/main/java/org/apache/ambari/server/state/alert/AlertDefinitionFactory.java
  10. 16 10
      ambari-server/src/main/java/org/apache/ambari/server/upgrade/SchemaUpgradeHelper.java
  11. 91 0
      ambari-server/src/main/java/org/apache/ambari/server/upgrade/UpgradeCatalog200.java
  12. 1 0
      ambari-server/src/main/resources/Ambari-DDL-MySQL-CREATE.sql
  13. 1 0
      ambari-server/src/main/resources/Ambari-DDL-Oracle-CREATE.sql
  14. 1 0
      ambari-server/src/main/resources/Ambari-DDL-Postgres-CREATE.sql
  15. 1 0
      ambari-server/src/main/resources/Ambari-DDL-Postgres-EMBEDDED-CREATE.sql
  16. 12 0
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HDFS/alerts.json
  17. 166 0
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HDFS/package/files/alert_ha_namenode_health.py
  18. 15 3
      ambari-server/src/test/java/org/apache/ambari/server/api/services/AmbariMetaInfoTest.java
  19. 6 0
      ambari-server/src/test/java/org/apache/ambari/server/controller/internal/AlertDefinitionResourceProviderTest.java
  20. 1 1
      ambari-server/src/test/java/org/apache/ambari/server/state/alerts/AlertEventPublisherTest.java
  21. 137 0
      ambari-server/src/test/java/org/apache/ambari/server/upgrade/UpgradeCatalog200Test.java
  22. 14 2
      ambari-server/src/test/resources/stacks/HDP/2.0.5/services/HDFS/alerts.json

+ 17 - 2
ambari-agent/src/main/python/ambari_agent/alerts/base_alert.py

@@ -30,6 +30,7 @@ class BaseAlert(object):
   RESULT_WARNING = 'WARNING'
   RESULT_CRITICAL = 'CRITICAL'
   RESULT_UNKNOWN = 'UNKNOWN'
+  RESULT_SKIPPED = 'SKIPPED'
   
   def __init__(self, alert_meta, alert_source_meta):
     self.alert_meta = alert_meta
@@ -89,7 +90,18 @@ class BaseAlert(object):
     
     try:
       res = self._collect()
-      reporting_state = res[0].lower()
+      result_state = res[0]
+      reporting_state = result_state.lower()
+
+      # if the alert reports that it should be SKIPPED, then skip it
+      # this is useful for cases where the alert might run on multiple hosts
+      # but only 1 host should report the data
+      if result_state == BaseAlert.RESULT_SKIPPED:
+        logger.debug('Alert {0} with UUID {1} was skipped.'.format(self.get_name(),
+          self.get_uuid()))
+
+        return
+
 
       if reporting_state in self.alert_source_meta['reporting']:
         res_base_text = self.alert_source_meta['reporting'][reporting_state]['text']
@@ -149,7 +161,10 @@ class BaseAlert(object):
     keys = re.findall("{{([\S]+)}}", key)
     
     if len(keys) > 0:
-      logger.debug("Found parameterized key {0} for {1}".format(str(keys), str(self)))
+      if logger.isEnabledFor(logging.DEBUG):
+        logger.debug("Found parameterized key {0} for {1}".format(
+          str(keys), str(self)))
+
       self._lookup_keys.append(keys[0])
       return keys[0]
       

+ 1 - 1
ambari-agent/src/main/python/ambari_agent/alerts/script_alert.py

@@ -73,7 +73,7 @@ class ScriptAlert(BaseAlert):
     if cmd_module is not None:
       # convert the dictionary from 
       # {'foo-site/bar': 'baz'} into 
-      # {'{{foo-site/bar}}': 'baz'}1
+      # {'{{foo-site/bar}}': 'baz'}
       parameters = {}
       for key in self.config_value_dict:
         parameters['{{' + key + '}}'] = self.config_value_dict[key]

+ 34 - 0
ambari-agent/src/test/python/ambari_agent/TestAlerts.py

@@ -569,3 +569,37 @@ class TestAlerts(TestCase):
     # execute the alert immediately and verify that the collector has the result
     ash.execute_alert(execution_commands)
     self.assertEquals(1, len(ash._collector.alerts()))
+
+
+  def test_skipped_alert(self):
+    json = {
+      "name": "namenode_process",
+      "service": "HDFS",
+      "component": "NAMENODE",
+      "label": "NameNode process",
+      "interval": 6,
+      "scope": "host",
+      "enabled": True,
+      "uuid": "c1f73191-4481-4435-8dae-fd380e4c0be1",
+      "source": {
+        "type": "SCRIPT",
+        "path": "test_script.py",
+      }
+    }
+
+    # normally set by AlertSchedulerHandler
+    json['source']['stacks_directory'] = os.path.join('ambari_agent', 'dummy_files')
+    json['source']['host_scripts_directory'] = os.path.join('ambari_agent', 'host_scripts')
+
+    collector = AlertCollector()
+    sa = ScriptAlert(json, json['source'])
+
+    # instruct the test alert script to be skipped
+    sa.set_helpers(collector, {'foo-site/skip': 'true'} )
+
+    self.assertEquals(json['source']['path'], sa.path)
+    self.assertEquals(json['source']['stacks_directory'], sa.stacks_dir)
+    self.assertEquals(json['source']['host_scripts_directory'], sa.host_scripts_dir)
+
+    # ensure that it was skipped
+    self.assertEquals(0,len(collector.alerts()))

+ 4 - 1
ambari-agent/src/test/python/ambari_agent/dummy_files/test_script.py

@@ -36,6 +36,9 @@ def execute(parameters=None, host_name=None):
     
     if '{{foo-site/baz}}' in parameters:
       baz = parameters['{{foo-site/baz}}']
+
+    if '{{foo-site/skip}}' in parameters:
+      return ('SKIPPED', ['This alert is skipped and will not be in the collector'])
   
   label = "bar is {0}, baz is {1}".format(bar, baz)  
-  return (('WARNING', [label]))
+  return ('WARNING', [label])

+ 13 - 0
ambari-server/src/main/java/org/apache/ambari/server/controller/internal/AlertDefinitionResourceProvider.java

@@ -76,6 +76,7 @@ public class AlertDefinitionResourceProvider extends AbstractControllerResourceP
   protected static final String ALERT_DEF_COMPONENT_NAME = "AlertDefinition/component_name";
   protected static final String ALERT_DEF_ENABLED = "AlertDefinition/enabled";
   protected static final String ALERT_DEF_SCOPE = "AlertDefinition/scope";
+  protected static final String ALERT_DEF_IGNORE_HOST = "AlertDefinition/ignore_host";
 
   protected static final String ALERT_DEF_SOURCE = "AlertDefinition/source";
   protected static final String ALERT_DEF_SOURCE_TYPE = "AlertDefinition/source/type";
@@ -124,6 +125,7 @@ public class AlertDefinitionResourceProvider extends AbstractControllerResourceP
     PROPERTY_IDS.add(ALERT_DEF_INTERVAL);
     PROPERTY_IDS.add(ALERT_DEF_ENABLED);
     PROPERTY_IDS.add(ALERT_DEF_SCOPE);
+    PROPERTY_IDS.add(ALERT_DEF_IGNORE_HOST);
     PROPERTY_IDS.add(ALERT_DEF_SOURCE);
     PROPERTY_IDS.add(ALERT_DEF_ACTION_RUN_NOW);
 
@@ -394,6 +396,13 @@ public class AlertDefinitionResourceProvider extends AbstractControllerResourceP
       enabled = Boolean.TRUE;
     }
 
+    Boolean ignoreHost = null;
+    if (requestMap.containsKey(ALERT_DEF_IGNORE_HOST)) {
+      ignoreHost = Boolean.parseBoolean((String) requestMap.get(ALERT_DEF_IGNORE_HOST));
+    } else if (bCreate) {
+      ignoreHost = Boolean.FALSE;
+    }
+
     Scope scope = null;
     if (null != desiredScope && desiredScope.length() > 0) {
       scope = Scope.valueOf(desiredScope);
@@ -499,6 +508,10 @@ public class AlertDefinitionResourceProvider extends AbstractControllerResourceP
       entity.setEnabled(enabled.booleanValue());
     }
 
+    if (null != ignoreHost) {
+      entity.setHostIgnored(ignoreHost.booleanValue());
+    }
+
     if (null != interval) {
       entity.setScheduleInterval(interval);
     }

+ 29 - 18
ambari-server/src/main/java/org/apache/ambari/server/events/listeners/AlertReceivedListener.java

@@ -86,12 +86,23 @@ public class AlertReceivedListener {
       LOG.debug(event.toString());
     }
 
-    long clusterId = event.getClusterId();
     Alert alert = event.getAlert();
+    long clusterId = event.getClusterId();
+
+    AlertDefinitionEntity definition = m_definitionDao.findByName(clusterId,
+        alert.getName());
+
+    if (null == definition) {
+      LOG.warn(
+          "Received an alert for {} which is a definition that does not exist anymore",
+          alert.getName());
+
+      return;
+    }
 
     AlertCurrentEntity current = null;
 
-    if (null == alert.getHost()) {
+    if (null == alert.getHost() || definition.isHostIgnored()) {
       current = m_alertsDao.findCurrentByNameNoHost(clusterId, alert.getName());
     } else {
       current = m_alertsDao.findCurrentByHostAndName(clusterId, alert.getHost(),
@@ -99,17 +110,6 @@ public class AlertReceivedListener {
     }
 
     if (null == current) {
-      AlertDefinitionEntity definition = m_definitionDao.findByName(clusterId,
-          alert.getName());
-
-      if (null == definition) {
-        LOG.warn(
-            "Received an alert for {} which is a definition that does not exist anymore",
-            alert.getName());
-
-        return;
-      }
-
       AlertHistoryEntity history = createHistory(clusterId, definition, alert);
 
       current = new AlertCurrentEntity();
@@ -166,12 +166,17 @@ public class AlertReceivedListener {
 
   /**
    * Convenience to create a new alert.
-   * @param clusterId the cluster id
-   * @param definition the definition
-   * @param alert the alert data
+   *
+   * @param clusterId
+   *          the cluster id
+   * @param definition
+   *          the definition
+   * @param alert
+   *          the alert data
    * @return the new history record
    */
-  private AlertHistoryEntity createHistory(long clusterId, AlertDefinitionEntity definition, Alert alert) {
+  private AlertHistoryEntity createHistory(long clusterId,
+      AlertDefinitionEntity definition, Alert alert) {
     AlertHistoryEntity history = new AlertHistoryEntity();
     history.setAlertDefinition(definition);
     history.setAlertInstance(alert.getInstance());
@@ -181,9 +186,15 @@ public class AlertReceivedListener {
     history.setAlertTimestamp(Long.valueOf(alert.getTimestamp()));
     history.setClusterId(Long.valueOf(clusterId));
     history.setComponentName(alert.getComponent());
-    history.setHostName(alert.getHost());
     history.setServiceName(alert.getService());
 
+    // only set a host for the history item if the alert definition says to
+    if (definition.isHostIgnored()) {
+      history.setHostName(null);
+    } else {
+      history.setHostName(alert.getHost());
+    }
+
     return history;
   }
 }

+ 28 - 0
ambari-server/src/main/java/org/apache/ambari/server/orm/entities/AlertDefinitionEntity.java

@@ -112,6 +112,9 @@ public class AlertDefinitionEntity {
   @Enumerated(value = EnumType.STRING)
   private SourceType sourceType;
 
+  @Column(name = "ignore_host", nullable = false)
+  private Integer ignoreHost = Integer.valueOf(0);
+
   /**
    * Bi-directional many-to-many association to {@link AlertGroupEntity}
    */
@@ -293,6 +296,31 @@ public class AlertDefinitionEntity {
     this.enabled = enabled ? Integer.valueOf(1) : Integer.valueOf(0);
   }
 
+  /**
+   * Gets whether this alert definition will ignore the hosts reporting the
+   * alert and combine them all into a single alert entry.
+   *
+   * @return {@code true} if this alert definition is to ignore hosts and
+   *         combine all alert instances into a single entry, {@code false}
+   *         otherwise.
+   */
+  public boolean isHostIgnored() {
+    return ignoreHost == Integer.valueOf(0) ? false : true;
+  }
+
+  /**
+   * Sets whether this alert definition will ignore the hosts reporting the
+   * alert and combine them all into a single alert entry.
+   *
+   * @param ignoreHost
+   *          {@code true} if this alert definition is to ignore hosts and
+   *          combine all alert instances into a single entry, {@code false}
+   *          otherwise.
+   */
+  public void setHostIgnored(boolean ignoreHost) {
+    this.ignoreHost = ignoreHost ? Integer.valueOf(1) : Integer.valueOf(0);
+  }
+
   /**
    * Gets the unique hash for the current state of this definition. If a
    * property of this definition changes, a new hash is calculated.

+ 20 - 0
ambari-server/src/main/java/org/apache/ambari/server/state/alert/AlertDefinition.java

@@ -19,6 +19,8 @@ package org.apache.ambari.server.state.alert;
 
 import java.util.HashSet;
 
+import com.google.gson.annotations.SerializedName;
+
 /**
  * The {@link AlertDefinition} class represents all of the necessary information
  * to schedule, run, and collect alerts.
@@ -46,6 +48,9 @@ public class AlertDefinition {
   private String label = null;
   private String uuid = null;
 
+  @SerializedName("ignore_host")
+  private boolean ignoreHost = false;
+
   /**
    * Gets the cluster ID for this definition.
    *
@@ -145,6 +150,17 @@ public class AlertDefinition {
     enabled = definitionEnabled;
   }
 
+  /**
+   * @return {@code true} if the host is ignored.
+   */
+  public boolean isHostIgnored() {
+    return ignoreHost;
+  }
+
+  public void setHostIgnored(boolean definitionHostIgnored) {
+    ignoreHost = definitionHostIgnored;
+  }
+
   public Source getSource() {
     return source;
   }
@@ -221,6 +237,10 @@ public class AlertDefinition {
       return false;
     }
 
+    if (ignoreHost != other.ignoreHost) {
+      return false;
+    }
+
     if (interval != other.interval) {
       return false;
     }

+ 2 - 0
ambari-server/src/main/java/org/apache/ambari/server/state/alert/AlertDefinitionFactory.java

@@ -180,6 +180,7 @@ public class AlertDefinitionFactory {
     definition.setClusterId(entity.getClusterId());
     definition.setComponentName(entity.getComponentName());
     definition.setEnabled(entity.getEnabled());
+    definition.setHostIgnored(entity.isHostIgnored());
     definition.setInterval(entity.getScheduleInterval());
     definition.setName(entity.getDefinitionName());
     definition.setScope(entity.getScope());
@@ -244,6 +245,7 @@ public class AlertDefinitionFactory {
     entity.setComponentName(definition.getComponentName());
     entity.setDefinitionName(definition.getName());
     entity.setEnabled(definition.isEnabled());
+    entity.setHostIgnored(definition.isHostIgnored());
     entity.setHash(UUID.randomUUID().toString());
     entity.setLabel(definition.getLabel());
     entity.setScheduleInterval(definition.getInterval());

+ 16 - 10
ambari-server/src/main/java/org/apache/ambari/server/upgrade/SchemaUpgradeHelper.java

@@ -17,11 +17,16 @@
  */
 package org.apache.ambari.server.upgrade;
 
-import com.google.inject.Guice;
-import com.google.inject.Inject;
-import com.google.inject.Injector;
-import com.google.inject.multibindings.Multibinder;
-import com.google.inject.persist.PersistService;
+import java.io.File;
+import java.io.IOException;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Properties;
+import java.util.Set;
+
 import org.apache.ambari.server.AmbariException;
 import org.apache.ambari.server.configuration.Configuration;
 import org.apache.ambari.server.controller.ControllerModule;
@@ -31,11 +36,11 @@ import org.apache.commons.io.FileUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.File;
-import java.io.IOException;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.util.*;
+import com.google.inject.Guice;
+import com.google.inject.Inject;
+import com.google.inject.Injector;
+import com.google.inject.multibindings.Multibinder;
+import com.google.inject.persist.PersistService;
 
 public class SchemaUpgradeHelper {
   private static final Logger LOG = LoggerFactory.getLogger
@@ -167,6 +172,7 @@ public class SchemaUpgradeHelper {
       catalogBinder.addBinding().to(UpgradeCatalog160.class);
       catalogBinder.addBinding().to(UpgradeCatalog161.class);
       catalogBinder.addBinding().to(UpgradeCatalog170.class);
+      catalogBinder.addBinding().to(UpgradeCatalog200.class);
     }
   }
 

+ 91 - 0
ambari-server/src/main/java/org/apache/ambari/server/upgrade/UpgradeCatalog200.java

@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ambari.server.upgrade;
+
+import java.sql.SQLException;
+
+import org.apache.ambari.server.AmbariException;
+import org.apache.ambari.server.orm.DBAccessor.DBColumnInfo;
+import org.apache.ambari.server.orm.dao.DaoUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.inject.Inject;
+import com.google.inject.Injector;
+
+/**
+ * Upgrade catalog for version 2.0.0.
+ */
+public class UpgradeCatalog200 extends AbstractUpgradeCatalog {
+
+  private static final String ALERT_TABLE_DEFINITION = "alert_definition";
+
+  @Inject
+  private DaoUtils daoUtils;
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public String getSourceVersion() {
+    return "1.7.0";
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public String getTargetVersion() {
+    return "1.2.0";
+  }
+
+  /**
+   * Logger.
+   */
+  private static final Logger LOG = LoggerFactory.getLogger
+      (UpgradeCatalog200.class);
+
+  /**
+   * Constructor.
+   *
+   * @param injector
+   */
+  @Inject
+  public UpgradeCatalog200(Injector injector) {
+    super(injector);
+    this.injector = injector;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  protected void executeDDLUpdates() throws AmbariException, SQLException {
+    // add ignore_host column to alert_definition
+    dbAccessor.addColumn(ALERT_TABLE_DEFINITION, new DBColumnInfo(
+        "ignore_host", Short.class, 1, 0, false));
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  protected void executeDMLUpdates() throws AmbariException, SQLException {
+  }
+}

+ 1 - 0
ambari-server/src/main/resources/Ambari-DDL-MySQL-CREATE.sql

@@ -161,6 +161,7 @@ CREATE TABLE alert_definition (
   source_type VARCHAR(255) NOT NULL,
   alert_source TEXT NOT NULL,
   hash VARCHAR(64) NOT NULL,
+  ignore_host SMALLINT DEFAULT 0 NOT NULL,
   PRIMARY KEY (definition_id),
   FOREIGN KEY (cluster_id) REFERENCES clusters(cluster_id),
   CONSTRAINT uni_alert_def_name UNIQUE(cluster_id,definition_name)

+ 1 - 0
ambari-server/src/main/resources/Ambari-DDL-Oracle-CREATE.sql

@@ -152,6 +152,7 @@ CREATE TABLE alert_definition (
   source_type VARCHAR2(255) NOT NULL,
   alert_source CLOB NOT NULL,
   hash VARCHAR2(64) NOT NULL,
+  ignore_host NUMBER(1) DEFAULT 0 NOT NULL,
   PRIMARY KEY (definition_id),
   FOREIGN KEY (cluster_id) REFERENCES clusters(cluster_id),
   CONSTRAINT uni_alert_def_name UNIQUE(cluster_id,definition_name)

+ 1 - 0
ambari-server/src/main/resources/Ambari-DDL-Postgres-CREATE.sql

@@ -184,6 +184,7 @@ CREATE TABLE alert_definition (
   source_type VARCHAR(255) NOT NULL,
   alert_source TEXT NOT NULL,
   hash VARCHAR(64) NOT NULL,
+  ignore_host SMALLINT DEFAULT 0 NOT NULL,
   PRIMARY KEY (definition_id),
   FOREIGN KEY (cluster_id) REFERENCES clusters(cluster_id),
   CONSTRAINT uni_alert_def_name UNIQUE(cluster_id,definition_name)

+ 1 - 0
ambari-server/src/main/resources/Ambari-DDL-Postgres-EMBEDDED-CREATE.sql

@@ -248,6 +248,7 @@ CREATE TABLE ambari.alert_definition (
   source_type VARCHAR(255) NOT NULL,
   alert_source TEXT NOT NULL,
   hash VARCHAR(64) NOT NULL,
+  ignore_host SMALLINT DEFAULT 0 NOT NULL,
   PRIMARY KEY (definition_id),
   FOREIGN KEY (cluster_id) REFERENCES ambari.clusters(cluster_id),
   CONSTRAINT uni_alert_def_name UNIQUE(cluster_id,definition_name)

+ 12 - 0
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HDFS/alerts.json

@@ -311,6 +311,18 @@
           "type": "SCRIPT",
           "path": "HDP/2.0.6/services/HDFS/package/files/alert_checkpoint_time.py"
         }
+      },
+      {
+        "name": "namenode_ha_health",
+        "label": "NameNode High Availability Health",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "ignore_host": true,
+        "source": {
+          "type": "SCRIPT",
+          "path": "HDP/2.0.6/services/HDFS/package/files/alert_ha_namenode_health.py"
+        }
       }
     ],
     "SECONDARY_NAMENODE": [

+ 166 - 0
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HDFS/package/files/alert_ha_namenode_health.py

@@ -0,0 +1,166 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import urllib2
+import json
+
+RESULT_STATE_OK = 'OK'
+RESULT_STATE_CRITICAL = 'CRITICAL'
+RESULT_STATE_UNKNOWN = 'UNKNOWN'
+RESULT_STATE_SKIPPED = 'SKIPPED'
+
+HDFS_NN_STATE_ACTIVE = 'active'
+HDFS_NN_STATE_STANDBY = 'standby'
+
+HDFS_SITE_KEY = '{{hdfs-site}}'
+NAMESERVICE_KEY = '{{hdfs-site/dfs.nameservices}}'
+NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}'
+NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
+DFS_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
+
+def get_tokens():
+  """
+  Returns a tuple of tokens in the format {{site/property}} that will be used
+  to build the dictionary passed into execute
+  """
+  return (HDFS_SITE_KEY, NAMESERVICE_KEY, NN_HTTP_ADDRESS_KEY,
+  NN_HTTPS_ADDRESS_KEY, DFS_POLICY_KEY)
+  
+
+def execute(parameters=None, host_name=None):
+  """
+  Returns a tuple containing the result code and a pre-formatted result label
+
+  Keyword arguments:
+  parameters (dictionary): a mapping of parameter key to value
+  host_name (string): the name of this host where the alert is running
+  """
+  if parameters is None:
+    return (RESULT_STATE_UNKNOWN, ['There were no parameters supplied to the script.'])
+
+  # if not in HA mode, then SKIP
+  if not NAMESERVICE_KEY in parameters:
+    return (RESULT_STATE_SKIPPED, ['NameNode HA is not enabled'])
+
+  # hdfs-site is required
+  if not HDFS_SITE_KEY in parameters:
+    return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script'.format(HDFS_SITE_KEY)])
+
+  # determine whether or not SSL is enabled
+  is_ssl_enabled = False
+  if DFS_POLICY_KEY in parameters:
+    dfs_policy = parameters[DFS_POLICY_KEY]
+    if dfs_policy == "HTTPS_ONLY":
+      is_ssl_enabled = True
+
+  name_service = parameters[NAMESERVICE_KEY]
+  hdfs_site = parameters[HDFS_SITE_KEY]
+
+  # look for dfs.ha.namenodes.foo
+  nn_unique_ids_key = 'dfs.ha.namenodes.' + name_service
+  if not nn_unique_ids_key in hdfs_site:
+    return (RESULT_STATE_UNKNOWN, ['Unable to find unique namenode alias key {0}'.format(nn_unique_ids_key)])
+
+  namenode_http_fragment = 'dfs.namenode.http-address.{0}.{1}'
+  jmx_uri_fragment = "http://{0}/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus"
+
+  if is_ssl_enabled:
+    namenode_http_fragment = 'dfs.namenode.https-address.{0}.{1}'
+    jmx_uri_fragment = "https://{0}/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus"
+
+
+  active_namenodes = []
+  standby_namenodes = []
+  unknown_namenodes = []
+
+  # now we have something like 'nn1,nn2,nn3,nn4'
+  # turn it into dfs.namenode.[property].[dfs.nameservices].[nn_unique_id]
+  # ie dfs.namenode.http-address.hacluster.nn1
+  nn_unique_ids = hdfs_site[nn_unique_ids_key].split(',')
+  for nn_unique_id in nn_unique_ids:
+    key = namenode_http_fragment.format(name_service,nn_unique_id)
+
+    if key in hdfs_site:
+      # use str() to ensure that unicode strings do not have the u' in them
+      value = str(hdfs_site[key])
+
+      try:
+        jmx_uri = jmx_uri_fragment.format(value)
+        state = get_value_from_jmx(jmx_uri,'State')
+
+        if state == HDFS_NN_STATE_ACTIVE:
+          active_namenodes.append(value)
+        elif state == HDFS_NN_STATE_STANDBY:
+          standby_namenodes.append(value)
+        else:
+          unknown_namenodes.append(value)
+      except:
+        unknown_namenodes.append(value)
+
+  # now that the request is done, determine if this host is the host that
+  # should report the status of the HA topology
+  is_active_namenode = False
+  for active_namenode in active_namenodes:
+    if active_namenode.startswith(host_name):
+      is_active_namenode = True
+
+  # there's only one scenario here; there is exactly 1 active and 1 standby
+  is_topology_healthy = len(active_namenodes) == 1 and len(standby_namenodes) == 1
+
+  result_label = 'Active{0}, Standby{1}, Unknown{2}'.format(str(active_namenodes),
+    str(standby_namenodes), str(unknown_namenodes))
+
+  # Healthy Topology:
+  #   - Active NN reports the alert, standby does not
+  #
+  # Unhealthy Topology:
+  #   - Report the alert if this is the first named host
+  #   - Report the alert if not the first named host, but the other host
+  #   could not report its status
+  if is_topology_healthy:
+    if is_active_namenode is True:
+      return (RESULT_STATE_OK, [result_label])
+    else:
+      return (RESULT_STATE_SKIPPED, ['Another host will report this alert'])
+  else:
+    # dfs.namenode.rpc-address.service.alias is guaranteed in HA mode
+    first_listed_host_key = 'dfs.namenode.rpc-address.{0}.{1}'.format(
+      name_service, nn_unique_ids[0])
+
+    first_listed_host = ''
+    if first_listed_host_key in hdfs_site:
+      first_listed_host = hdfs_site[first_listed_host_key]
+
+    is_first_listed_host = False
+    if first_listed_host.startswith(host_name):
+      is_first_listed_host = True
+
+    if is_first_listed_host:
+      return (RESULT_STATE_CRITICAL, [result_label])
+    else:
+      # not the first listed host, but the first host might be in the unknown
+      return (RESULT_STATE_SKIPPED, ['Another host will report this alert'])
+
+
+def get_value_from_jmx(qry, property):
+  response = urllib2.urlopen(qry)
+  data=response.read()
+  data_dict = json.loads(data)
+  return data_dict["beans"][0][property]

+ 15 - 3
ambari-server/src/test/java/org/apache/ambari/server/api/services/AmbariMetaInfoTest.java

@@ -1577,6 +1577,7 @@ public class AmbariMetaInfoTest {
     AlertDefinition nameNodeProcess = null;
     AlertDefinition nameNodeCpu = null;
     AlertDefinition datanodeStorage = null;
+    AlertDefinition ignoreHost = null;
 
     Iterator<AlertDefinition> iterator = set.iterator();
     while (iterator.hasNext()) {
@@ -1592,14 +1593,20 @@ public class AmbariMetaInfoTest {
       if (definition.getName().equals("datanode_storage")) {
         datanodeStorage = definition;
       }
+
+      if (definition.getName().equals("hdfs_ignore_host_test")) {
+        ignoreHost = definition;
+      }
     }
 
     assertNotNull(nameNodeProcess);
     assertNotNull(nameNodeCpu);
+    assertNotNull(ignoreHost);
 
     assertEquals("NameNode Host CPU Utilization", nameNodeCpu.getLabel());
 
     // test namenode_process
+    assertFalse(nameNodeProcess.isHostIgnored());
     Source source = nameNodeProcess.getSource();
     assertNotNull(source);
     assertNotNull(((PortSource) source).getPort());
@@ -1614,6 +1621,7 @@ public class AmbariMetaInfoTest {
     assertNull(reporting.getWarning());
 
     // test namenode_cpu
+    assertFalse(nameNodeCpu.isHostIgnored());
     source = nameNodeCpu.getSource();
     assertNotNull(source);
     reporting = source.getReporting();
@@ -1630,6 +1638,7 @@ public class AmbariMetaInfoTest {
 
     // test a metric alert
     assertNotNull(datanodeStorage);
+    assertFalse(datanodeStorage.isHostIgnored());
     MetricSource metricSource = (MetricSource) datanodeStorage.getSource();
     assertNotNull( metricSource.getUri() );
     assertNotNull( metricSource.getUri().getHttpsProperty() );
@@ -1637,6 +1646,9 @@ public class AmbariMetaInfoTest {
     assertNotNull( metricSource.getUri().getHttpsUri() );
     assertNotNull( metricSource.getUri().getHttpUri() );
     assertEquals(12345, metricSource.getUri().getDefaultPort());
+
+    // ignore host
+    assertTrue(ignoreHost.isHostIgnored());
   }
 
   /**
@@ -1658,7 +1670,7 @@ public class AmbariMetaInfoTest {
 
     AlertDefinitionDAO dao = injector.getInstance(AlertDefinitionDAO.class);
     List<AlertDefinitionEntity> definitions = dao.findAll();
-    assertEquals(6, definitions.size());
+    assertEquals(7, definitions.size());
 
     // figure out how many of these alerts were merged into from the
     // non-stack alerts.json
@@ -1671,7 +1683,7 @@ public class AmbariMetaInfoTest {
     }
 
     assertEquals(1, hostAlertCount);
-    assertEquals(5, definitions.size() - hostAlertCount);
+    assertEquals(6, definitions.size() - hostAlertCount);
 
     for (AlertDefinitionEntity definition : definitions) {
       definition.setScheduleInterval(28);
@@ -1681,7 +1693,7 @@ public class AmbariMetaInfoTest {
     metaInfo.reconcileAlertDefinitions(clusters);
 
     definitions = dao.findAll();
-    assertEquals(6, definitions.size());
+    assertEquals(7, definitions.size());
 
     for (AlertDefinitionEntity definition : definitions) {
       assertEquals(28, definition.getScheduleInterval().intValue());

+ 6 - 0
ambari-server/src/test/java/org/apache/ambari/server/controller/internal/AlertDefinitionResourceProviderTest.java

@@ -356,6 +356,7 @@ public class AlertDefinitionResourceProviderTest {
     Assert.assertEquals("HDFS", entity.getServiceName());
     Assert.assertEquals(SourceType.METRIC, entity.getSourceType());
     Assert.assertEquals("Mock Label (Create)", entity.getLabel());
+    Assert.assertEquals(false, entity.isHostIgnored());
 
     // verify Source
     Assert.assertNotNull(entity.getSource());
@@ -472,6 +473,7 @@ public class AlertDefinitionResourceProviderTest {
     String oldHash = entity.getHash();
     Integer oldInterval = entity.getScheduleInterval();
     boolean oldEnabled = entity.getEnabled();
+    boolean oldHostIgnore = entity.isHostIgnored();
     String oldSource = entity.getSource();
 
     resetToStrict(dao);
@@ -501,6 +503,9 @@ public class AlertDefinitionResourceProviderTest {
     requestProps.put(AlertDefinitionResourceProvider.ALERT_DEF_ENABLED,
         Boolean.FALSE.toString());
 
+    requestProps.put(AlertDefinitionResourceProvider.ALERT_DEF_IGNORE_HOST,
+        Boolean.TRUE.toString());
+
     request = PropertyHelper.getUpdateRequest(requestProps, null);
 
     provider.updateResources(request, p);
@@ -509,6 +514,7 @@ public class AlertDefinitionResourceProviderTest {
     Assert.assertFalse(oldName.equals(entity.getDefinitionName()));
     Assert.assertFalse(oldInterval.equals(entity.getScheduleInterval()));
     Assert.assertFalse(oldEnabled == entity.getEnabled());
+    Assert.assertFalse(oldHostIgnore == entity.isHostIgnored());
     Assert.assertFalse(oldSource.equals(entity.getSource()));
     Assert.assertTrue(entity.getSource().contains("_foobarbaz"));
 

+ 1 - 1
ambari-server/src/test/java/org/apache/ambari/server/state/alerts/AlertEventPublisherTest.java

@@ -141,7 +141,7 @@ public class AlertEventPublisherTest {
   public void testAlertDefinitionInsertion() throws Exception {
     Assert.assertEquals(0, definitionDao.findAll().size());
     installHdfsService();
-    Assert.assertEquals(5, definitionDao.findAll().size());
+    Assert.assertEquals(6, definitionDao.findAll().size());
   }
 
   /**

+ 137 - 0
ambari-server/src/test/java/org/apache/ambari/server/upgrade/UpgradeCatalog200Test.java

@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ambari.server.upgrade;
+
+import static org.easymock.EasyMock.capture;
+import static org.easymock.EasyMock.createNiceMock;
+import static org.easymock.EasyMock.createStrictMock;
+import static org.easymock.EasyMock.eq;
+import static org.easymock.EasyMock.expect;
+import static org.easymock.EasyMock.replay;
+import static org.easymock.EasyMock.reset;
+import static org.easymock.EasyMock.verify;
+
+import java.lang.reflect.Field;
+import java.sql.ResultSet;
+
+import javax.persistence.EntityManager;
+
+import org.apache.ambari.server.configuration.Configuration;
+import org.apache.ambari.server.orm.DBAccessor;
+import org.apache.ambari.server.orm.DBAccessor.DBColumnInfo;
+import org.apache.ambari.server.orm.GuiceJpaInitializer;
+import org.apache.ambari.server.orm.InMemoryDefaultTestModule;
+import org.easymock.Capture;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.google.inject.Binder;
+import com.google.inject.Guice;
+import com.google.inject.Injector;
+import com.google.inject.Module;
+import com.google.inject.Provider;
+import com.google.inject.persist.PersistService;
+
+/**
+ * {@link UpgradeCatalog200} unit tests.
+ */
+public class UpgradeCatalog200Test {
+
+  private Injector injector;
+  private Provider<EntityManager> entityManagerProvider = createStrictMock(Provider.class);
+  private EntityManager entityManager = createNiceMock(EntityManager.class);
+
+  @Before
+  public void init() {
+    reset(entityManagerProvider);
+    expect(entityManagerProvider.get()).andReturn(entityManager).anyTimes();
+    replay(entityManagerProvider);
+    injector = Guice.createInjector(new InMemoryDefaultTestModule());
+    injector.getInstance(GuiceJpaInitializer.class);
+  }
+
+  @After
+  public void tearDown() {
+    injector.getInstance(PersistService.class).stop();
+  }
+
+  @Test
+  public void testExecuteDDLUpdates() throws Exception {
+    final DBAccessor dbAccessor = createNiceMock(DBAccessor.class);
+    Configuration configuration = createNiceMock(Configuration.class);
+    ResultSet resultSet = createNiceMock(ResultSet.class);
+
+    expect(configuration.getDatabaseUrl()).andReturn(Configuration.JDBC_IN_MEMORY_URL).anyTimes();
+
+    Capture<DBAccessor.DBColumnInfo> alertDefinitionIgnoreColumnCapture = new Capture<DBAccessor.DBColumnInfo>();
+
+    dbAccessor.addColumn(eq("alert_definition"),
+        capture(alertDefinitionIgnoreColumnCapture));
+
+    replay(dbAccessor, configuration, resultSet);
+
+    AbstractUpgradeCatalog upgradeCatalog = getUpgradeCatalog(dbAccessor);
+    Class<?> c = AbstractUpgradeCatalog.class;
+    Field f = c.getDeclaredField("configuration");
+    f.setAccessible(true);
+    f.set(upgradeCatalog, configuration);
+
+    upgradeCatalog.executeDDLUpdates();
+    verify(dbAccessor, configuration, resultSet);
+
+    // verify ignore column for alert_definition
+    verifyAlertDefinitionIgnoreColumn(alertDefinitionIgnoreColumnCapture);
+  }
+
+  @Test
+  public void testExecuteDMLUpdates() throws Exception {
+  }
+
+  /**
+   * @param dbAccessor
+   * @return
+   */
+  private AbstractUpgradeCatalog getUpgradeCatalog(final DBAccessor dbAccessor) {
+    Module module = new Module() {
+      @Override
+      public void configure(Binder binder) {
+        binder.bind(DBAccessor.class).toInstance(dbAccessor);
+      }
+    };
+
+    Injector injector = Guice.createInjector(module);
+    return injector.getInstance(UpgradeCatalog200.class);
+  }
+
+  /**
+   * Verifies new ignore column.
+   *
+   * @param alertDefinitionIgnoreColumnCapture
+   */
+  private void verifyAlertDefinitionIgnoreColumn(
+      Capture<DBAccessor.DBColumnInfo> alertDefinitionIgnoreColumnCapture) {
+    DBColumnInfo column = alertDefinitionIgnoreColumnCapture.getValue();
+    Assert.assertEquals(Integer.valueOf(0), column.getDefaultValue());
+    Assert.assertEquals(Integer.valueOf(1), column.getLength());
+    Assert.assertEquals(Short.class, column.getType());
+    Assert.assertEquals("ignore_host", column.getName());
+  }
+}

+ 14 - 2
ambari-server/src/test/resources/stacks/HDP/2.0.5/services/HDFS/alerts.json

@@ -62,13 +62,25 @@
         "name": "hdfs_last_checkpoint",
         "label": "Last Checkpoint Time",
         "interval": 1,
-        "SCOPE": "service",
+        "scope": "SERVICE",
         "enabled": false,
         "source": {
           "type": "SCRIPT",
           "path": "scripts/alerts/last_checkpoint.py"
         }
-      }
+      },
+      {
+        "name": "hdfs_ignore_host_test",
+        "label": "Ignore Host Test",
+        "interval": 1,
+        "scope": "SERVICE",
+        "enabled": true,
+        "ignore_host": true,
+        "source": {
+          "type": "SCRIPT",
+          "path": "scripts/alerts/last_checkpoint.py"
+        }
+      }      
     ],
     "SECONDARY_NAMENODE": [
       {