Browse Source

AMBARI-4528. Suppress Nagios alerts for passive host components (ncole)

Nate Cole 11 năm trước cách đây
mục cha
commit
7f8d215cc8
33 tập tin đã thay đổi với 926 bổ sung169 xóa
  1. 15 0
      ambari-server/src/main/java/org/apache/ambari/server/agent/ExecutionCommand.java
  2. 2 5
      ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
  3. 15 6
      ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariActionExecutionHelper.java
  4. 5 0
      ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariCustomCommandExecutionHelper.java
  5. 45 4
      ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariManagementControllerImpl.java
  6. 64 2
      ambari-server/src/main/java/org/apache/ambari/server/controller/PassiveStateHelper.java
  7. 11 5
      ambari-server/src/main/java/org/apache/ambari/server/controller/internal/HostResourceProvider.java
  8. 10 1
      ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ServiceResourceProvider.java
  9. 10 1
      ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosAlert.java
  10. 43 14
      ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java
  11. 11 1
      ambari-server/src/main/resources/custom_action_definitions/system_action_definitions.xml
  12. 87 0
      ambari-server/src/main/resources/custom_actions/nagios_update_ignore.py
  13. 5 1
      ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_aggregate.php
  14. 56 0
      ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_wrapper.sh
  15. 5 0
      ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/scripts/nagios.py
  16. 25 0
      ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/scripts/nagios_server.py
  17. 2 1
      ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/scripts/nagios_server_config.py
  18. 21 16
      ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-commands.cfg.j2
  19. 20 20
      ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-services.cfg.j2
  20. 5 1
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_aggregate.php
  21. 59 0
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_wrapper.sh
  22. 5 0
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios.py
  23. 26 0
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server.py
  24. 2 1
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py
  25. 21 15
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2
  26. 1 1
      ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2
  27. 9 1
      ambari-server/src/test/java/org/apache/ambari/server/controller/AmbariManagementControllerTest.java
  28. 195 0
      ambari-server/src/test/java/org/apache/ambari/server/controller/PassiveStateHelperTest.java
  29. 126 69
      ambari-server/src/test/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProviderTest.java
  30. 11 1
      ambari-server/src/test/python/stacks/1.3.2/NAGIOS/test_nagios_server.py
  31. 11 2
      ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py
  32. 1 0
      ambari-server/src/test/resources/nagios_alerts.txt
  33. 2 1
      contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php

+ 15 - 0
ambari-server/src/main/java/org/apache/ambari/server/agent/ExecutionCommand.java

@@ -55,6 +55,7 @@ public class ExecutionCommand extends AgentCommand {
   private Map<String, String> commandParams;
   private String serviceName;
   private String componentName;
+  private Set<Map<String,String>> passiveInfo;
 
   @JsonProperty("commandId")
   public String getCommandId() {
@@ -226,6 +227,20 @@ public class ExecutionCommand extends AgentCommand {
   public Map<String, Map<String, String>> getConfigurationTags() {
     return configurationTags;
   }
+  
+  /**
+   * @return the passive info for the cluster
+   */
+  public Set<Map<String, String>> getPassiveInfo() {
+    return passiveInfo;
+  }
+  
+  /**
+   * @param info the passive info for the cluster
+   */
+  public void setPassiveInfo(Set<Map<String,String>> info) {
+    passiveInfo = info;
+  }
 
 
   /**

+ 2 - 5
ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java

@@ -909,11 +909,8 @@ public class Configuration {
   }
 
   public String getCustomActionDefinitionPath() {
-    String value = configsMap.get(CUSTOM_ACTION_DEFINITION_KEY);
-    if (value == null) {
-      value = CUSTOM_ACTION_DEFINITION_DEF_VALUE;
-    }
-    return value;
+    return properties.getProperty(CUSTOM_ACTION_DEFINITION_KEY,
+        CUSTOM_ACTION_DEFINITION_DEF_VALUE);
   }
 
   /**

+ 15 - 6
ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariActionExecutionHelper.java

@@ -18,6 +18,16 @@
 
 package org.apache.ambari.server.controller;
 
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.COMMAND_TIMEOUT;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.SCHEMA_VERSION;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.SCRIPT;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.SCRIPT_TYPE;
+
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+
 import org.apache.ambari.server.AmbariException;
 import org.apache.ambari.server.Role;
 import org.apache.ambari.server.RoleCommand;
@@ -41,12 +51,6 @@ import org.apache.ambari.server.utils.StageUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-
 import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.COMMAND_TIMEOUT;
 import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.SCHEMA_VERSION;
 import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.SCRIPT;
@@ -318,6 +322,11 @@ public class AmbariActionExecutionHelper {
       // Generate cluster host info
       execCmd.setClusterHostInfo(
           StageUtils.getClusterHostInfo(clusters.getHostsForCluster(clusterName), cluster));
+      
+      // cluster passive map
+      execCmd.setPassiveInfo(
+          PassiveStateHelper.getPassiveHostComponents(clusters, cluster));
+          
     }
   }
 }

+ 5 - 0
ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariCustomCommandExecutionHelper.java

@@ -769,6 +769,11 @@ public class AmbariCustomCommandExecutionHelper {
       hostParams.put(DB_DRIVER_FILENAME, configs.getMySQLJarName());
     }
     execCmd.setHostLevelParams(hostParams);
+
+    Map<String, String> roleParams = new TreeMap<String, String>();
+    execCmd.setRoleParams(roleParams);
+    
+    execCmd.setPassiveInfo(PassiveStateHelper.getPassiveHostComponents(clusters, cluster));
   }
 
   private String getRepoInfo(Cluster cluster, Host host) throws AmbariException {

+ 45 - 4
ambari-server/src/main/java/org/apache/ambari/server/controller/AmbariManagementControllerImpl.java

@@ -18,6 +18,17 @@
 
 package org.apache.ambari.server.controller;
 
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.AMBARI_DB_RCA_DRIVER;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.AMBARI_DB_RCA_PASSWORD;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.AMBARI_DB_RCA_URL;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.AMBARI_DB_RCA_USERNAME;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.JAVA_HOME;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.JCE_NAME;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.JDK_LOCATION;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.JDK_NAME;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.STACK_NAME;
+import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.STACK_VERSION;
+
 import java.io.File;
 import java.io.IOException;
 import java.net.InetAddress;
@@ -46,7 +57,12 @@ import org.apache.ambari.server.ServiceComponentHostNotFoundException;
 import org.apache.ambari.server.ServiceComponentNotFoundException;
 import org.apache.ambari.server.ServiceNotFoundException;
 import org.apache.ambari.server.StackAccessException;
-import org.apache.ambari.server.actionmanager.*;
+import org.apache.ambari.server.actionmanager.ActionManager;
+import org.apache.ambari.server.actionmanager.HostRoleCommand;
+import org.apache.ambari.server.actionmanager.Request;
+import org.apache.ambari.server.actionmanager.RequestFactory;
+import org.apache.ambari.server.actionmanager.Stage;
+import org.apache.ambari.server.actionmanager.StageFactory;
 import org.apache.ambari.server.api.services.AmbariMetaInfo;
 import org.apache.ambari.server.configuration.Configuration;
 import org.apache.ambari.server.controller.internal.URLStreamProvider;
@@ -57,7 +73,29 @@ import org.apache.ambari.server.security.authorization.AuthorizationHelper;
 import org.apache.ambari.server.security.authorization.User;
 import org.apache.ambari.server.security.authorization.Users;
 import org.apache.ambari.server.stageplanner.RoleGraph;
-import org.apache.ambari.server.state.*;
+import org.apache.ambari.server.state.Cluster;
+import org.apache.ambari.server.state.Clusters;
+import org.apache.ambari.server.state.ComponentInfo;
+import org.apache.ambari.server.state.Config;
+import org.apache.ambari.server.state.ConfigFactory;
+import org.apache.ambari.server.state.ConfigHelper;
+import org.apache.ambari.server.state.Host;
+import org.apache.ambari.server.state.HostState;
+import org.apache.ambari.server.state.OperatingSystemInfo;
+import org.apache.ambari.server.state.PassiveState;
+import org.apache.ambari.server.state.PropertyInfo;
+import org.apache.ambari.server.state.RepositoryInfo;
+import org.apache.ambari.server.state.Service;
+import org.apache.ambari.server.state.ServiceComponent;
+import org.apache.ambari.server.state.ServiceComponentFactory;
+import org.apache.ambari.server.state.ServiceComponentHost;
+import org.apache.ambari.server.state.ServiceComponentHostEvent;
+import org.apache.ambari.server.state.ServiceComponentHostFactory;
+import org.apache.ambari.server.state.ServiceFactory;
+import org.apache.ambari.server.state.ServiceInfo;
+import org.apache.ambari.server.state.StackId;
+import org.apache.ambari.server.state.StackInfo;
+import org.apache.ambari.server.state.State;
 import org.apache.ambari.server.state.configgroup.ConfigGroupFactory;
 import org.apache.ambari.server.state.fsm.InvalidStateTransitionException;
 import org.apache.ambari.server.state.scheduler.RequestExecutionFactory;
@@ -80,8 +118,6 @@ import com.google.inject.Injector;
 import com.google.inject.Singleton;
 import com.google.inject.persist.Transactional;
 
-import static org.apache.ambari.server.agent.ExecutionCommand.KeyNames.*;
-
 @Singleton
 public class AmbariManagementControllerImpl implements
     AmbariManagementController {
@@ -1501,6 +1537,11 @@ public class AmbariManagementControllerImpl implements
               "passive state to one of " + EnumSet.of(PassiveState.ACTIVE, PassiveState.PASSIVE));
           } else {
             sch.setPassiveState(newPassive);
+            try {
+              PassiveStateHelper.createRequest(this, sch.getClusterName(), sch.getServiceComponentName());
+            } catch (AmbariException e) {
+              LOG.warn("Could not send passive status to Nagios (" + e.getMessage() + ")");
+            }
           }
         }
       }

+ 64 - 2
ambari-server/src/main/java/org/apache/ambari/server/controller/PassiveStateHelper.java

@@ -17,15 +17,21 @@
  */
 package org.apache.ambari.server.controller;
 
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.ambari.server.AmbariException;
 import org.apache.ambari.server.HostNotFoundException;
+import org.apache.ambari.server.RoleCommand;
 import org.apache.ambari.server.state.Cluster;
 import org.apache.ambari.server.state.Clusters;
 import org.apache.ambari.server.state.Host;
 import org.apache.ambari.server.state.PassiveState;
 import org.apache.ambari.server.state.Service;
+import org.apache.ambari.server.state.ServiceComponent;
 import org.apache.ambari.server.state.ServiceComponentHost;
 
 import com.google.inject.Inject;
@@ -35,7 +41,10 @@ import com.google.inject.Injector;
  * Used to help manage passive state checks.
  */
 public class PassiveStateHelper {
-
+  private static final String NAGIOS_SERVICE = "NAGIOS";
+  private static final String NAGIOS_COMPONENT = "NAGIOS_SERVER";
+  private static final String NAGIOS_ACTION_NAME = "nagios_update_ignore";
+  
   @Inject
   private Clusters clusters;
   
@@ -62,14 +71,67 @@ public class PassiveStateHelper {
     if (null == host) // better not
       throw new HostNotFoundException(cluster.getClusterName(), sch.getHostName());
     
+    return getEffectiveState(cluster.getClusterId(), service, host, sch);
+  }
+  
+  private static PassiveState getEffectiveState(long clusterId, Service service,
+      Host host, ServiceComponentHost sch) {
     if (PassiveState.PASSIVE == sch.getPassiveState())
       return PassiveState.PASSIVE;
 
     if (PassiveState.ACTIVE != service.getPassiveState() ||
-        PassiveState.ACTIVE != host.getPassiveState(cluster.getClusterId()))
+        PassiveState.ACTIVE != host.getPassiveState(clusterId))
       return PassiveState.IMPLIED;
     
     return sch.getPassiveState();
   }
+
+  /**
+   * @param cluster
+   * @return
+   */
+  public static Set<Map<String, String>> getPassiveHostComponents(Clusters clusters,
+      Cluster cluster) throws AmbariException {
+    
+    Set<Map<String, String>> set = new HashSet<Map<String, String>>();
+    
+    for (Service service : cluster.getServices().values()) {
+      for (ServiceComponent sc : service.getServiceComponents().values()) {
+        if (sc.isClientComponent())
+          continue;
+
+        for (ServiceComponentHost sch : sc.getServiceComponentHosts().values()) {
+          Host host = clusters.getHostsForCluster(
+              cluster.getClusterName()).get(sch.getHostName());
+          
+          if (PassiveState.ACTIVE != getEffectiveState(cluster.getClusterId(),
+              service, host, sch)) {
+            Map<String, String> map = new HashMap<String, String>();
+            map.put("host", sch.getHostName());
+            map.put("service", sch.getServiceName());
+            map.put("component", sch.getServiceComponentName());
+            set.add(map);
+          }
+        }
+      }
+    }
+    
+    return set;
+  }
+  
+  public static RequestStatusResponse createRequest(AmbariManagementController amc,
+      String clusterName, String desc) throws AmbariException {
+    
+    Map<String, String> params = new HashMap<String, String>();
+    
+    ExecuteActionRequest actionRequest = new ExecuteActionRequest(
+        clusterName, RoleCommand.ACTIONEXECUTE.name(),
+        NAGIOS_ACTION_NAME, NAGIOS_SERVICE, NAGIOS_COMPONENT, null, params);
+
+    Map<String, String> map = new HashMap<String, String>();
+    map.put("context", "Update " + desc + " passive state");
+    
+    return amc.createAction(actionRequest, map);
+  }  
   
 }

+ 11 - 5
ambari-server/src/main/java/org/apache/ambari/server/controller/internal/HostResourceProvider.java

@@ -37,6 +37,7 @@ import org.apache.ambari.server.controller.AmbariManagementController;
 import org.apache.ambari.server.controller.ConfigurationRequest;
 import org.apache.ambari.server.controller.HostRequest;
 import org.apache.ambari.server.controller.HostResponse;
+import org.apache.ambari.server.controller.PassiveStateHelper;
 import org.apache.ambari.server.controller.spi.NoSuchParentResourceException;
 import org.apache.ambari.server.controller.spi.NoSuchResourceException;
 import org.apache.ambari.server.controller.spi.Predicate;
@@ -566,14 +567,19 @@ public class HostResourceProvider extends AbstractControllerResourceProvider {
       
       if (null != request.getClusterName() && null != request.getPassiveState()) {
         Cluster c = clusters.getCluster(request.getClusterName());
-        PassiveState newStatus = PassiveState.valueOf(request.getPassiveState());
-        PassiveState oldStatus = h.getPassiveState(c.getClusterId());
-        if (!newStatus.equals(oldStatus)) {
-          if (newStatus.equals(PassiveState.IMPLIED)) {
+        PassiveState newState = PassiveState.valueOf(request.getPassiveState());
+        PassiveState oldState = h.getPassiveState(c.getClusterId());
+        if (!newState.equals(oldState)) {
+          if (newState.equals(PassiveState.IMPLIED)) {
             throw new IllegalArgumentException("Invalid arguments, can only set " +
               "passive state to one of " + EnumSet.of(PassiveState.ACTIVE, PassiveState.PASSIVE));
           } else {
-            h.setPassiveState(c.getClusterId(), newStatus);
+            h.setPassiveState(c.getClusterId(), newState);
+            try {
+              PassiveStateHelper.createRequest(controller, c.getClusterName(), h.getHostName());
+            } catch (Exception e) {
+              LOG.warn("Could not send passive status to Nagios (" + e.getMessage() + ")");
+            }
           }
         }
       }

+ 10 - 1
ambari-server/src/main/java/org/apache/ambari/server/controller/internal/ServiceResourceProvider.java

@@ -37,6 +37,7 @@ import org.apache.ambari.server.ParentObjectNotFoundException;
 import org.apache.ambari.server.ServiceNotFoundException;
 import org.apache.ambari.server.api.services.AmbariMetaInfo;
 import org.apache.ambari.server.controller.AmbariManagementController;
+import org.apache.ambari.server.controller.PassiveStateHelper;
 import org.apache.ambari.server.controller.RequestStatusResponse;
 import org.apache.ambari.server.controller.ServiceComponentHostRequest;
 import org.apache.ambari.server.controller.ServiceComponentHostResponse;
@@ -559,6 +560,12 @@ public class ServiceResourceProvider extends AbstractControllerResourceProvider
               "passive state to one of " + EnumSet.of(PassiveState.ACTIVE, PassiveState.PASSIVE));
           } else {
             s.setPassiveState(newPassive);
+            try {
+              PassiveStateHelper.createRequest(controller, cluster.getClusterName(),
+                  s.getName());
+            } catch (Exception e) {
+              LOG.warn("Could not send passive status to Nagios (" + e.getMessage() + ")");
+            }
           }
         }
       }
@@ -974,6 +981,8 @@ public class ServiceResourceProvider extends AbstractControllerResourceProvider
           LOG.error("Can't determine service state.", e);
         }
       }
-      return State.UNKNOWN;    }
+      return State.UNKNOWN;
+    }
   }
+  
 }

+ 10 - 1
ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosAlert.java

@@ -35,7 +35,9 @@ public class NagiosAlert {
   private String last_time_critical = null;
 //  private String is_flapping = null;
 //  private String last_check = null;
-  private String service_type = null;  
+  private String service_type = null;
+  private String long_plugin_output = null;
+
   
   public NagiosAlert() {
   }
@@ -157,6 +159,13 @@ public class NagiosAlert {
     }
   }
   
+  /**
+   * @return the long output, if any
+   */
+  public String getLongPluginOutput() {
+    return long_plugin_output;
+  }
+  
   @Override
   public String toString() {
     StringBuilder sb = new StringBuilder();

+ 43 - 14
ambari-server/src/main/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProvider.java

@@ -71,6 +71,8 @@ public class NagiosPropertyProvider extends BaseProvider implements PropertyProv
   private static final String ALERT_SUMMARY_OK_PROPERTY_ID = "alerts/summary/OK";
   private static final String ALERT_SUMMARY_WARNING_PROPERTY_ID = "alerts/summary/WARNING";
   private static final String ALERT_SUMMARY_CRITICAL_PROPERTY_ID = "alerts/summary/CRITICAL";
+  private static final String ALERT_SUMMARY_PASSIVE_PROPERTY_ID = "alerts/summary/PASSIVE";
+  private static final String PASSIVE_TOKEN = "AMBARIPASSIVE=";
   
   private static final List<String> IGNORABLE_FOR_SERVICES = new ArrayList<String>(
       Arrays.asList("NodeManager health", "NodeManager process", "TaskTracker process",
@@ -196,6 +198,7 @@ public class NagiosPropertyProvider extends BaseProvider implements PropertyProv
     int ok = 0;
     int warning = 0;
     int critical = 0;
+    int passive = 0;
     
     List<Map<String, Object>> alerts = new ArrayList<Map<String, Object>>();
     
@@ -208,7 +211,7 @@ public class NagiosPropertyProvider extends BaseProvider implements PropertyProv
           if (match && null != alert.getDescription() &&
               IGNORABLE_FOR_SERVICES.contains(alert.getDescription())) {
             match = false;
-          }
+          }          
           break;
         case Host:
           match = alert.getHost().equals(matchValue);
@@ -226,7 +229,38 @@ public class NagiosPropertyProvider extends BaseProvider implements PropertyProv
       }
       
       if (match) {
-        switch (alert.getStatus()) {
+
+        // status = the return code from the plugin that controls
+        // whether an alert is sent out (0 when using wrapper)
+        // actual_status = the actual process result
+        
+        Map<String, Object> map = new LinkedHashMap<String, Object>();
+        
+        map.put("description", alert.getDescription());
+        map.put("host_name", alert.getHost());
+        map.put("last_status", NagiosAlert.getStatusString(alert.getLastStatus()));
+        map.put("last_status_time", Long.valueOf(alert.getLastStatusTime()));
+        map.put("service_name", alert.getService());
+        map.put("status", NagiosAlert.getStatusString(alert.getStatus()));
+        map.put("status_time", Long.valueOf(alert.getStatusTime()));
+        map.put("output", alert.getOutput());
+        map.put("actual_status", NagiosAlert.getStatusString(alert.getStatus()));
+        
+        String longOut = alert.getLongPluginOutput();
+        if (null != longOut && longOut.startsWith(PASSIVE_TOKEN)) {
+          int actualStatus = 3;
+          try {
+            int len = PASSIVE_TOKEN.length();
+            actualStatus = Integer.parseInt(longOut.substring(len, len+1));
+          } catch (Exception e) {
+            // do nothing
+          }
+          
+          map.put("status", "PASSIVE");
+          map.put("actual_status", NagiosAlert.getStatusString(actualStatus));
+          passive++;
+        } else {
+          switch (alert.getStatus()) {
           case 0:
             ok++;
             break;
@@ -238,19 +272,9 @@ public class NagiosPropertyProvider extends BaseProvider implements PropertyProv
             break;
           default:
             break;
+          }
         }
         
-        Map<String, Object> map = new LinkedHashMap<String, Object>();
-
-        map.put("description", alert.getDescription());
-        map.put("host_name", alert.getHost());
-        map.put("last_status", NagiosAlert.getStatusString(alert.getLastStatus()));
-        map.put("last_status_time", Long.valueOf(alert.getLastStatusTime()));
-        map.put("service_name", alert.getService());
-        map.put("status", NagiosAlert.getStatusString(alert.getStatus()));
-        map.put("status_time", Long.valueOf(alert.getStatusTime()));
-        map.put("output", alert.getOutput());
-        
         alerts.add(map);
       }
     }
@@ -258,6 +282,7 @@ public class NagiosPropertyProvider extends BaseProvider implements PropertyProv
     setResourceProperty(res, ALERT_SUMMARY_OK_PROPERTY_ID, Integer.valueOf(ok), requestedIds);
     setResourceProperty(res, ALERT_SUMMARY_WARNING_PROPERTY_ID, Integer.valueOf(warning), requestedIds);
     setResourceProperty(res, ALERT_SUMMARY_CRITICAL_PROPERTY_ID, Integer.valueOf(critical), requestedIds);
+    setResourceProperty(res, ALERT_SUMMARY_PASSIVE_PROPERTY_ID, Integer.valueOf(passive), requestedIds);
     
     if (!alerts.isEmpty())
       setResourceProperty(res, ALERT_DETAIL_PROPERTY_ID, alerts, requestedIds);
@@ -302,7 +327,11 @@ public class NagiosPropertyProvider extends BaseProvider implements PropertyProv
         Collections.sort(alerts.alerts, new Comparator<NagiosAlert>() {
           @Override
           public int compare(NagiosAlert o1, NagiosAlert o2) {
-            return o2.getStatus()-o1.getStatus();
+            if (o2.getStatus() != o1.getStatus())
+              return o2.getStatus()-o1.getStatus();
+            else {
+              return (int)(o2.getLastStatusTime()-o1.getLastStatusTime());
+            }
           }
         });
         

+ 11 - 1
ambari-server/src/main/resources/custom_action_definitions/system_action_definitions.xml

@@ -29,4 +29,14 @@
     <description>HDFS Rebalance</description>
     <targetType>ANY</targetType>
   </actionDefinition>
-</actionDefinitions>
+  <actionDefinition>
+    <actionName>nagios_update_ignore</actionName>
+    <actionType>SYSTEM</actionType>
+    <inputs>[nagios_ignore]</inputs>
+    <targetService>NAGIOS</targetService>
+    <targetComponent>NAGIOS_SERVER</targetComponent>
+    <defaultTimeout>60</defaultTimeout>
+    <description>Used to create an alert blackout</description>
+    <targetType>ANY</targetType>
+  </actionDefinition>
+</actionDefinitions>

+ 87 - 0
ambari-server/src/main/resources/custom_actions/nagios_update_ignore.py

@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+Ambari Agent
+
+"""
+
+import json
+import sys
+#import traceback
+from resource_management import *
+
+
+class NagiosIgnore(Script):
+  def actionexecute(self, env):
+    config = Script.get_config()
+
+    ignores = None
+
+    structured_output_example = {
+      'result': 'Ignore table updated.'
+    }
+
+    try:
+      if (config.has_key('passiveInfo')):
+        ignores = config['passiveInfo']
+      else:
+        structured_output_example['result'] = "Key 'passiveInfo' not found, skipping"
+        self.put_structured_out(structured_output_example)
+    except Exception:
+      structured_output_example['result'] = "Error accessing passiveInfo"
+      self.put_structured_out(structured_output_example)
+      return
+
+    if ignores is None:
+      return
+    
+    new_file_entries = []
+
+    if ignores is not None:
+      for define in ignores:
+        try:
+          host = str(define['host'])
+          service = str(define['service'])
+          component = str(define['component'])
+          key = host + " " + service + " " + component
+
+          new_file_entries.append(key)
+        except KeyError:
+          pass
+
+    writeFile(new_file_entries)
+
+    self.put_structured_out(structured_output_example)
+
+def writeFile(entries):
+  buf = ""
+  for entry in entries:
+    buf += entry + "\n"
+
+  f = None
+  try:
+    f = open('/var/nagios/ignore.dat', 'w')
+    f.write(buf)
+  except:
+    pass
+  finally:
+    if f is not None:
+      f.close()
+
+if __name__ == "__main__":
+  NagiosIgnore().execute()

+ 5 - 1
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_aggregate.php

@@ -16,6 +16,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+define("PASSIVE_MODE_STR", "AMBARIPASSIVE=");
 
   $options = getopt ("f:s:n:w:c:t:");
   if (!array_key_exists('t', $options) || !array_key_exists('f', $options) || !array_key_exists('w', $options)
@@ -94,7 +95,10 @@
     $total_alerts=0;
     $alerts=0;
     foreach ($matches[0] as $object) {
-      if (getParameter($object, "service_description") == $service_name) {
+      $long_out = getParameter($object, "long_plugin_output");
+      $skip_if_match=!strncmp($long_out, PASSIVE_MODE_STR, strlen(PASSIVE_MODE_STR));
+
+      if (getParameter($object, "service_description") == $service_name && !$skip_if_match) {
         $total_alerts++;
         if (getParameter($object, "current_state") >= $status_code) {
           $alerts++;

+ 56 - 0
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_wrapper.sh

@@ -0,0 +1,56 @@
+#!/bin/sh
+
+function real_service() {
+  desc=$NAGIOS_SERVICEGROUPNAME
+  eval "$1='$NAGIOS_SERVICEGROUPNAME'"
+}
+
+function real_component() {
+  arrDesc=(${NAGIOS_SERVICEDESC//::/ })
+
+  compName="${arrDesc[0]}"
+
+  case "$compName" in
+    HBASEMASTER)
+      realCompName="HBASE_MASTER"
+    ;;
+    REGIONSERVER)
+      realCompName="HBASE_REGIONSERVER"
+    ;;
+    *)
+      realCompName=$compName
+    ;;
+  esac
+
+  eval "$1='$realCompName'"
+}
+
+real_service_var=""
+real_service real_service_var
+
+real_comp_var=""
+real_component real_comp_var
+
+
+wrapper_output=`exec $1 $2 $3 $4 $5 $6 $7 $8 $9 ${10} ${11} ${12} ${13} ${14} ${15} ${16} ${17} ${18} ${19} ${20}`
+wrapper_result=$?
+
+if [ "$wrapper_result" == "0" ]; then
+  echo "$wrapper_output"
+  exit $wrapper_result
+fi
+
+if [ ! -f /var/nagios/ignore.dat ]; then
+  echo "$wrapper_output"
+  exit $wrapper_result
+else
+  count=`grep $NAGIOS_HOSTNAME /var/nagios/ignore.dat | grep $real_service_var | grep $real_comp_var | wc -l`
+  if [ "$count" -ne "0" ]; then
+    echo "$wrapper_output\nAMBARIPASSIVE=${wrapper_result}"
+    exit 0
+  else
+    echo "$wrapper_output"
+    exit $wrapper_result
+  fi
+fi
+

+ 5 - 0
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/scripts/nagios.py

@@ -72,6 +72,11 @@ def nagios():
     owner = params.nagios_user,
     group = params.nagios_group
   )
+
+  File(format("{nagios_var_dir}/ignore.dat"),
+    owner = params.nagios_user,
+    group = params.nagios_group,
+    mode = 0664)
   
   
 def set_web_permisssions():

+ 25 - 0
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/scripts/nagios_server.py

@@ -42,6 +42,8 @@ class NagiosServer(Script):
     import params
     env.set_params(params)
 
+    update_ignorable(params)
+
     self.configure(env) # done for updating configs after Security enabled
     nagios_service(action='start')
 
@@ -72,6 +74,29 @@ def remove_conflicting_packages():
     ignore_failures = True 
   )
 
+def update_ignorable(params):
+  if not params.config.has_key('passiveInfo'):
+    return
+  else:
+    buf = ""
+    for define in params.config['passiveInfo']:
+      try:
+        host = str(define['host'])
+        service = str(define['service'])
+        component = str(define['component'])
+        buf += host + " " + service + " " + component + "\n"
+      except KeyError:
+        pass
+
+    f = None
+    try:
+      f = open('/var/nagios/ignore.dat', 'w')
+      f.write(buf)
+    except:
+      pass
+    finally:
+      if f is not None:
+        f.close()
 def main():
   command_type = sys.argv[1] if len(sys.argv)>1 else "install"
   print "Running "+command_type

+ 2 - 1
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/scripts/nagios_server_config.py

@@ -63,6 +63,7 @@ def nagios_server_config():
   nagios_server_check( 'check_mapred_local_dir_used.sh')
   nagios_server_check( 'check_nodemanager_health.sh')
   nagios_server_check( 'check_namenodes_ha.sh')
+  nagios_server_check( 'check_wrapper.sh')
   nagios_server_check( 'hdp_nagios_init.php')
 
 
@@ -88,4 +89,4 @@ def nagios_server_check(name):
   File( format("{plugins_dir}/{name}"),
     content = StaticFile(name), 
     mode = 0755
-  )
+  )

+ 21 - 16
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-commands.cfg.j2

@@ -24,24 +24,24 @@
 # 'check_cpu' check remote cpu load
 define command {
         command_name    check_cpu
-        command_line    $USER1$/check_cpu.pl -H $HOSTADDRESS$ -C hadoop -w $ARG1$ -c $ARG2$
+        command_line    $USER1$/check_wrapper.sh $USER1$/check_cpu.pl -H $HOSTADDRESS$ -C hadoop -w $ARG1$ -c $ARG2$
        }
 {% endif %}
 
 # Check data node storage full 
 define command {
         command_name    check_datanode_storage
-        command_line    php $USER1$/check_datanode_storage.php -h $HOSTADDRESS$ -p $ARG1$ -w $ARG2$ -c $ARG3$ -e $ARG4$ -k $ARG5$ -r $ARG6$ -t $ARG7$ -s $ARG8$
+        command_line    $USER1$/check_wrapper.sh php $USER1$/check_datanode_storage.php -h $HOSTADDRESS$ -p $ARG1$ -w $ARG2$ -c $ARG3$ -e $ARG4$ -k $ARG5$ -r $ARG6$ -t $ARG7$ -s $ARG8$
        }
 
 define command{
         command_name    check_hdfs_blocks
-        command_line    php $USER1$/check_hdfs_blocks.php -h $ARG1$ -p $ARG2$ -w $ARG3$ -c $ARG4$ -s $ARG5$ -e $ARG6$ -k $ARG7$ -r $ARG8$ -t $ARG9$ -u $ARG10$
+        command_line    $USER1$/check_wrapper.sh php $USER1$/check_hdfs_blocks.php -h $ARG1$ -p $ARG2$ -w $ARG3$ -c $ARG4$ -s $ARG5$ -e $ARG6$ -k $ARG7$ -r $ARG8$ -t $ARG9$ -u $ARG10$
        }
 
 define command{
         command_name    check_hdfs_capacity
-        command_line    php $USER1$/check_hdfs_capacity.php -h $ARG1$ -p $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -s $ARG9$
+        command_line    $USER1$/check_wrapper.sh php $USER1$/check_hdfs_capacity.php -h $ARG1$ -p $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -s $ARG9$
        }
 
 define command{
@@ -51,56 +51,56 @@ define command{
 
 define command{
         command_name    check_rpcq_latency
-        command_line    php $USER1$/check_rpcq_latency.php -h $HOSTADDRESS$ -p $ARG2$ -n $ARG1$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -s $ARG9$
+        command_line    $USER1$/check_wrapper.sh php $USER1$/check_rpcq_latency.php -h $HOSTADDRESS$ -p $ARG2$ -n $ARG1$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -s $ARG9$
        }
 
 define command{
         command_name    check_nagios
-        command_line    $USER1$/check_nagios -e $ARG1$ -F $ARG2$ -C $ARG3$ 
+        command_line    $USER1$/check_wrapper.sh $USER1$/check_nagios -e $ARG1$ -F $ARG2$ -C $ARG3$ 
        }
 
 define command{
         command_name    check_webui
-        command_line    $USER1$/check_webui.sh $ARG1$ $HOSTADDRESS$ $ARG2$
+        command_line    $USER1$/check_wrapper.sh $USER1$/check_webui.sh $ARG1$ $HOSTADDRESS$ $ARG2$
        }
 
 define command{
         command_name    check_name_dir_status
-        command_line    php $USER1$/check_name_dir_status.php -h $HOSTADDRESS$ -p $ARG1$ -e $ARG2$ -k $ARG3$ -r $ARG4$ -t $ARG5$ -s $ARG6$
+        command_line    $USER1$/check_wrapper.sh php $USER1$/check_name_dir_status.php -h $HOSTADDRESS$ -p $ARG1$ -e $ARG2$ -k $ARG3$ -r $ARG4$ -t $ARG5$ -s $ARG6$
        }
 
 define command{
         command_name    check_oozie_status
-        command_line    $USER1$/check_oozie_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
+        command_line    $USER1$/check_wrapper.sh $USER1$/check_oozie_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
        }
 
 define command{
         command_name    check_templeton_status
-        command_line    $USER1$/check_templeton_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
+        command_line    $USER1$/check_wrapper.sh $USER1$/check_templeton_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
        }
 
 define command{
         command_name    check_hive_metastore_status
-        command_line    $USER1$/check_hive_metastore_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
+        command_line    $USER1$/check_wrapper.sh $USER1$/check_hive_metastore_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
        }
 define command{
         command_name    check_hue_status
-        command_line    $USER1$/check_hue_status.sh
+        command_line    $USER1$/check_wrapper.sh $USER1$/check_hue_status.sh
        }
 
 define command{
        command_name    check_mapred_local_dir_used_space
-       command_line    $USER1$/check_mapred_local_dir_used.sh $ARG1$ $ARG2$
+       command_line    $USER1$/check_wrapper.sh $USER1$/check_mapred_local_dir_used.sh $ARG1$ $ARG2$
        }
 
 define command{
        command_name    check_namenodes_ha
-       command_line    $USER1$/check_namenodes_ha.sh $ARG1$ $ARG2$
+       command_line    $USER1$/check_wrapper.sh $USER1$/check_namenodes_ha.sh $ARG1$ $ARG2$
        }
 
 define command{
         command_name    check_nodemanager_health
-        command_line    $USER1$/check_nodemanager_health.sh $HOSTADDRESS$ $ARG1$
+        command_line    $USER1$/check_wrapper.sh $USER1$/check_nodemanager_health.sh $HOSTADDRESS$ $ARG1$
        }
 
 define command{
@@ -111,4 +111,9 @@ define command{
 define command{
         command_name    service_sys_logger
         command_line    $USER1$/sys_logger.py $SERVICESTATETYPE$ $SERVICEATTEMPT$ $SERVICESTATE$ "$SERVICEDESC$" "Event Host=$HOSTADDRESS$ Service Description=$SERVICEDESC$($SERVICESTATE$), $SERVICEOUTPUT$ $LONGSERVICEOUTPUT$"
-       }
+       }
+
+define command{
+  command_name check_tcp_wrapper
+  command_line  $USER1$/check_wrapper.sh $USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ $ARG2$
+}

+ 20 - 20
ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-services.cfg.j2

@@ -75,7 +75,7 @@ define service {
         use                     hadoop-service
         service_description     AMBARI::Ambari Agent process
         servicegroups           AMBARI
-        check_command           check_tcp!{{all_ping_ports[loop.index-1]}}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{all_ping_ports[loop.index-1]}}!-w 1 -c 1
         normal_check_interval   1
         retry_check_interval    0.25
         max_check_attempts      4
@@ -121,7 +121,7 @@ define service {
         use                     hadoop-service
         service_description     GANGLIA::Ganglia Server process
         servicegroups           GANGLIA
-        check_command           check_tcp!{{ ganglia_port }}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{ ganglia_port }}!-w 1 -c 1
         normal_check_interval   0.25
         retry_check_interval    0.25
         max_check_attempts      4
@@ -132,7 +132,7 @@ define service {
         use                     hadoop-service
         service_description     GANGLIA::Ganglia Monitor process for NameNode
         servicegroups           GANGLIA
-        check_command           check_tcp!{{ ganglia_collector_namenode_port }}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{ ganglia_collector_namenode_port }}!-w 1 -c 1
         normal_check_interval   0.25
         retry_check_interval    0.25
         max_check_attempts      4
@@ -144,7 +144,7 @@ define service {
         use                     hadoop-service
         service_description     GANGLIA::Ganglia Monitor process for JobTracker
         servicegroups           GANGLIA
-        check_command           check_tcp!{{ ganglia_collector_jobtracker_port }}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{ ganglia_collector_jobtracker_port }}!-w 1 -c 1
         normal_check_interval   0.25
         retry_check_interval    0.25
         max_check_attempts      4
@@ -157,7 +157,7 @@ define service {
         use                     hadoop-service
         service_description     GANGLIA::Ganglia Monitor process for HBase Master
         servicegroups           GANGLIA
-        check_command           check_tcp!{{ ganglia_collector_hbase_port }}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{ ganglia_collector_hbase_port }}!-w 1 -c 1
         normal_check_interval   0.25
         retry_check_interval    0.25
         max_check_attempts      4
@@ -170,7 +170,7 @@ define service {
         use                     hadoop-service
         service_description     GANGLIA::Ganglia Monitor process for ResourceManager
         servicegroups           GANGLIA
-        check_command           check_tcp!{{ ganglia_collector_rm_port }}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{ ganglia_collector_rm_port }}!-w 1 -c 1
         normal_check_interval   0.25
         retry_check_interval    0.25
         max_check_attempts      4
@@ -183,7 +183,7 @@ define service {
         use                     hadoop-service
         service_description     GANGLIA::Ganglia Monitor process for HistoryServer
         servicegroups           GANGLIA
-        check_command           check_tcp!{{ ganglia_collector_hs_port }}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{ ganglia_collector_hs_port }}!-w 1 -c 1
         normal_check_interval   0.25
         retry_check_interval    0.25
         max_check_attempts      4
@@ -199,7 +199,7 @@ define service {
         use                     hadoop-service
         service_description     NAMENODE::Secondary NameNode process
         servicegroups           HDFS
-        check_command           check_tcp!{{ snamenode_port }}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{ snamenode_port }}!-w 1 -c 1
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -251,7 +251,7 @@ define service {
         use                     hadoop-service
         service_description     NAMENODE::NameNode process on {{ namenode_hostname }}
         servicegroups           HDFS
-        check_command           check_tcp!{{ namenode_metadata_port }}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{ namenode_metadata_port }}!-w 1 -c 1
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -336,7 +336,7 @@ define service {
         use                     hadoop-service
         service_description     JOBTRACKER::JobTracker process
         servicegroups           MAPREDUCE
-        check_command           check_tcp!{{ jtnode_port }}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{ jtnode_port }}!-w 1 -c 1
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      4
@@ -371,7 +371,7 @@ define service {
         use                     hadoop-service
         service_description     TASKTRACKER::TaskTracker process
         servicegroups           MAPREDUCE
-        check_command           check_tcp!{{ tasktracker_port }}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{ tasktracker_port }}!-w 1 -c 1
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -434,7 +434,7 @@ define service {
         use                     hadoop-service
         service_description     RESOURCEMANAGER::ResourceManager process
         servicegroups           YARN
-        check_command           check_tcp!{{ rm_port }}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{ rm_port }}!-w 1 -c 1
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -448,7 +448,7 @@ define service {
         use                     hadoop-service
         service_description     NODEMANAGER::NodeManager process
         servicegroups           YARN
-        check_command           check_tcp!{{ nm_port }}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{ nm_port }}!-w 1 -c 1
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -507,7 +507,7 @@ define service {
         use                     hadoop-service
         service_description     JOBHISTORY::HistoryServer process
         servicegroups           MAPREDUCE
-        check_command           check_tcp!{{ hs_port }}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{ hs_port }}!-w 1 -c 1
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -522,7 +522,7 @@ define service {
         use                     hadoop-service
         service_description     JOURNALNODE::JournalNode process
         servicegroups           HDFS
-        check_command           check_tcp!{{ journalnode_port }}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{ journalnode_port }}!-w 1 -c 1
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -549,7 +549,7 @@ define service {
         use                     hadoop-service
         service_description     DATANODE::DataNode process
         servicegroups           HDFS
-        check_command           check_tcp!{{datanode_port}}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{datanode_port}}!-w 1 -c 1
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -575,7 +575,7 @@ define service {
         use                     hadoop-service
         service_description     FLUME::Flume Agent process
         servicegroups           FLUME
-        check_command           check_tcp!{{ flume_port }}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{ flume_port }}!-w 1 -c 1
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -590,7 +590,7 @@ define service {
         use                     hadoop-service
         service_description     ZOOKEEPER::ZooKeeper Server process
         servicegroups           ZOOKEEPER
-        check_command           check_tcp!{{ clientPort }}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{ clientPort }}!-w 1 -c 1
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -604,7 +604,7 @@ define service {
         use                     hadoop-service
         service_description     REGIONSERVER::RegionServer process
         servicegroups           HBASE
-        check_command           check_tcp!{{ hbase_rs_port }}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{ hbase_rs_port }}!-w 1 -c 1
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3
@@ -639,7 +639,7 @@ define service {
         use                     hadoop-service
         service_description     HBASEMASTER::HBase Master process on {{ hbasemaster }}
         servicegroups           HBASE
-        check_command           check_tcp!{{ hbase_master_rpc_port }}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{ hbase_master_rpc_port }}!-w 1 -c 1
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      4

+ 5 - 1
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_aggregate.php

@@ -16,6 +16,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+define("PASSIVE_MODE_STR", "AMBARIPASSIVE=");
 
   $options = getopt ("f:s:n:w:c:t:");
   if (!array_key_exists('t', $options) || !array_key_exists('f', $options) || !array_key_exists('w', $options)
@@ -94,7 +95,10 @@
     $total_alerts=0;
     $alerts=0;
     foreach ($matches[0] as $object) {
-      if (getParameter($object, "service_description") == $service_name) {
+      $long_out = getParameter($object, "long_plugin_output");
+      $skip_if_match=!strncmp($long_out, PASSIVE_MODE_STR, strlen(PASSIVE_MODE_STR));
+
+      if (getParameter($object, "service_description") == $service_name && !$skip_if_match) {
         $total_alerts++;
         if (getParameter($object, "current_state") >= $status_code) {
           $alerts++;

+ 59 - 0
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_wrapper.sh

@@ -0,0 +1,59 @@
+#!/bin/sh
+
+function real_service() {
+  desc=$NAGIOS_SERVICEGROUPNAME
+  eval "$1='$NAGIOS_SERVICEGROUPNAME'"
+}
+
+function real_component() {
+  arrDesc=(${NAGIOS_SERVICEDESC//::/ })
+
+  compName="${arrDesc[0]}"
+
+  case "$compName" in
+    HBASEMASTER)
+      realCompName="HBASE_MASTER"
+    ;;
+    REGIONSERVER)
+      realCompName="HBASE_REGIONSERVER"
+    ;;
+    JOBHISTORY)
+      realCompName="MAPREDUCE2"
+    ;;
+    *)
+      realCompName=$compName
+    ;;
+  esac
+
+  eval "$1='$realCompName'"
+}
+
+real_service_var=""
+real_service real_service_var
+
+real_comp_var=""
+real_component real_comp_var
+
+
+wrapper_output=`exec $1 $2 $3 $4 $5 $6 $7 $8 $9 ${10} ${11} ${12} ${13} ${14} ${15} ${16} ${17} ${18} ${19} ${20}`
+wrapper_result=$?
+
+if [ "$wrapper_result" == "0" ]; then
+  echo "$wrapper_output"
+  exit $wrapper_result
+fi
+
+if [ ! -f /var/nagios/ignore.dat ]; then
+  echo "$wrapper_output"
+  exit $wrapper_result
+else
+  count=`grep $NAGIOS_HOSTNAME /var/nagios/ignore.dat | grep $real_service_var | grep $real_comp_var | wc -l`
+  if [ "$count" -ne "0" ]; then
+    echo "$wrapper_output\nAMBARIPASSIVE=${wrapper_result}"
+    exit 0
+  else
+    echo "$wrapper_output"
+    exit $wrapper_result
+  fi
+fi
+

+ 5 - 0
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios.py

@@ -72,6 +72,11 @@ def nagios():
     owner = params.nagios_user,
     group = params.nagios_group
   )
+
+  File(format("{nagios_var_dir}/ignore.dat"),
+    owner = params.nagios_user,
+    group = params.nagios_group,
+    mode = 0664)
   
   
 def set_web_permisssions():

+ 26 - 0
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server.py

@@ -42,6 +42,8 @@ class NagiosServer(Script):
     import params
     env.set_params(params)
 
+    update_ignorable(params)
+
     self.configure(env) # done for updating configs after Security enabled
     nagios_service(action='start')
 
@@ -72,6 +74,30 @@ def remove_conflicting_packages():
     ignore_failures = True 
   )
 
+def update_ignorable(params):
+  if not params.config.has_key('passiveInfo'):
+    return
+  else:
+    buf = ""
+    for define in params.config['passiveInfo']:
+      try:
+        host = str(define['host'])
+        service = str(define['service'])
+        component = str(define['component'])
+        buf += host + " " + service + " " + component + "\n"
+      except KeyError:
+        pass
+
+    f = None
+    try:
+      f = open('/var/nagios/ignore.dat', 'w')
+      f.write(buf)
+    except:
+      pass
+    finally:
+      if f is not None:
+        f.close()
+
 def main():
   command_type = sys.argv[1] if len(sys.argv)>1 else "install"
   print "Running "+command_type

+ 2 - 1
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py

@@ -63,6 +63,7 @@ def nagios_server_config():
   nagios_server_check( 'check_mapred_local_dir_used.sh')
   nagios_server_check( 'check_nodemanager_health.sh')
   nagios_server_check( 'check_namenodes_ha.sh')
+  nagios_server_check( 'check_wrapper.sh')
   nagios_server_check( 'hdp_nagios_init.php')
 
 
@@ -88,4 +89,4 @@ def nagios_server_check(name):
   File( format("{plugins_dir}/{name}"),
     content = StaticFile(name), 
     mode = 0755
-  )
+  )

+ 21 - 15
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2

@@ -24,24 +24,24 @@
 # 'check_cpu' check remote cpu load
 define command {
         command_name    check_cpu
-        command_line    $USER1$/check_cpu.pl -H $HOSTADDRESS$ -C hadoop -w $ARG1$ -c $ARG2$
+        command_line    $USER1$/check_wrapper.sh $USER1$/check_cpu.pl -H $HOSTADDRESS$ -C hadoop -w $ARG1$ -c $ARG2$
        }
 {% endif %}
 
 # Check data node storage full 
 define command {
         command_name    check_datanode_storage
-        command_line    php $USER1$/check_datanode_storage.php -h $HOSTADDRESS$ -p $ARG1$ -w $ARG2$ -c $ARG3$ -e $ARG4$ -k $ARG5$ -r $ARG6$ -t $ARG7$ -s $ARG8$
+        command_line    $USER1$/check_wrapper.sh php $USER1$/check_datanode_storage.php -h $HOSTADDRESS$ -p $ARG1$ -w $ARG2$ -c $ARG3$ -e $ARG4$ -k $ARG5$ -r $ARG6$ -t $ARG7$ -s $ARG8$
        }
 
 define command{
         command_name    check_hdfs_blocks
-        command_line    php $USER1$/check_hdfs_blocks.php -h $ARG1$ -p $ARG2$ -w $ARG3$ -c $ARG4$ -s $ARG5$ -e $ARG6$ -k $ARG7$ -r $ARG8$ -t $ARG9$ -u $ARG10$
+        command_line    $USER1$/check_wrapper.sh php $USER1$/check_hdfs_blocks.php -h $ARG1$ -p $ARG2$ -w $ARG3$ -c $ARG4$ -s $ARG5$ -e $ARG6$ -k $ARG7$ -r $ARG8$ -t $ARG9$ -u $ARG10$
        }
 
 define command{
         command_name    check_hdfs_capacity
-        command_line    php $USER1$/check_hdfs_capacity.php -h $ARG1$ -p $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -s $ARG9$
+        command_line    $USER1$/check_wrapper.sh php $USER1$/check_hdfs_capacity.php -h $ARG1$ -p $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -s $ARG9$
        }
 
 define command{
@@ -51,7 +51,7 @@ define command{
 
 define command{
         command_name    check_rpcq_latency
-        command_line    php $USER1$/check_rpcq_latency.php -h $HOSTADDRESS$ -p $ARG2$ -n $ARG1$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -s $ARG9$
+        command_line    $USER1$/check_wrapper.sh php $USER1$/check_rpcq_latency.php -h $HOSTADDRESS$ -p $ARG2$ -n $ARG1$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -s $ARG9$
        }
 
 define command{
@@ -61,46 +61,46 @@ define command{
 
 define command{
         command_name    check_webui
-        command_line    $USER1$/check_webui.sh $ARG1$ $HOSTADDRESS$ $ARG2$
+        command_line    $USER1$/check_wrapper.sh $USER1$/check_webui.sh $ARG1$ $HOSTADDRESS$ $ARG2$
        }
 
 define command{
         command_name    check_name_dir_status
-        command_line    php $USER1$/check_name_dir_status.php -h $HOSTADDRESS$ -p $ARG1$ -e $ARG2$ -k $ARG3$ -r $ARG4$ -t $ARG5$ -s $ARG6$
+        command_line    $USER1$/check_wrapper.sh php $USER1$/check_name_dir_status.php -h $HOSTADDRESS$ -p $ARG1$ -e $ARG2$ -k $ARG3$ -r $ARG4$ -t $ARG5$ -s $ARG6$
        }
 
 define command{
         command_name    check_oozie_status
-        command_line    $USER1$/check_oozie_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
+        command_line    $USER1$/check_wrapper.sh $USER1$/check_oozie_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
        }
 
 define command{
         command_name    check_templeton_status
-        command_line    $USER1$/check_templeton_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
+        command_line    $USER1$/check_wrapper.sh $USER1$/check_templeton_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
        }
 
 define command{
         command_name    check_hive_metastore_status
-        command_line    $USER1$/check_hive_metastore_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
+        command_line    $USER1$/check_wrapper.sh $USER1$/check_hive_metastore_status.sh $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$ $ARG6$ $ARG7$
        }
 define command{
         command_name    check_hue_status
-        command_line    $USER1$/check_hue_status.sh
+        command_line    $USER1$/check_wrapper.sh $USER1$/check_hue_status.sh
        }
 
 define command{
        command_name    check_mapred_local_dir_used_space
-       command_line    $USER1$/check_mapred_local_dir_used.sh $ARG1$ $ARG2$
+       command_line    $USER1$/check_wrapper.sh $USER1$/check_mapred_local_dir_used.sh $ARG1$ $ARG2$
        }
 
 define command{
        command_name    check_namenodes_ha
-       command_line    $USER1$/check_namenodes_ha.sh $ARG1$ $ARG2$
+       command_line    $USER1$/check_wrapper.sh $USER1$/check_namenodes_ha.sh $ARG1$ $ARG2$
        }
 
 define command{
         command_name    check_nodemanager_health
-        command_line    $USER1$/check_nodemanager_health.sh $HOSTADDRESS$ $ARG1$
+        command_line    $USER1$/check_wrapper.sh $USER1$/check_nodemanager_health.sh $HOSTADDRESS$ $ARG1$
        }
 
 define command{
@@ -111,4 +111,10 @@ define command{
 define command{
         command_name    service_sys_logger
         command_line    $USER1$/sys_logger.py $SERVICESTATETYPE$ $SERVICEATTEMPT$ $SERVICESTATE$ "$SERVICEDESC$" "Event Host=$HOSTADDRESS$ Service Description=$SERVICEDESC$($SERVICESTATE$), $SERVICEOUTPUT$ $LONGSERVICEOUTPUT$"
-       }
+       }
+
+define command{
+  command_name check_tcp_wrapper
+  command_line  $USER1$/check_wrapper.sh $USER1$/check_tcp -H $HOSTADDRESS$ -p $ARG1$ $ARG2$
+}
+        

+ 1 - 1
ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2

@@ -478,7 +478,7 @@ define service {
         use                     hadoop-service
         service_description     DATANODE::DataNode process
         servicegroups           HDFS
-        check_command           check_tcp!{{datanode_port}}!-w 1 -c 1
+        check_command           check_tcp_wrapper!{{datanode_port}}!-w 1 -c 1
         normal_check_interval   1
         retry_check_interval    0.5
         max_check_attempts      3

+ 9 - 1
ambari-server/src/test/java/org/apache/ambari/server/controller/AmbariManagementControllerTest.java

@@ -3884,6 +3884,9 @@ public class AmbariManagementControllerTest {
     assertEquals(1, response.getTasks().size());
     taskStatus = response.getTasks().get(0);
     Assert.assertEquals("h3", taskStatus.getHostName());
+    
+    Assert.assertTrue(null != cmd.getPassiveInfo());
+    
   }
 
   @Test
@@ -9296,6 +9299,11 @@ public class AmbariManagementControllerTest {
       // manually change live state to stopped as no running action manager
       List<HostRoleCommand> commands = actionDB.getRequestTasks(rsr.getRequestId());
       for (HostRoleCommand cmd : commands) {
+        Assert.assertNotNull(cmd.getExecutionCommandWrapper().getExecutionCommand().getPassiveInfo());
+        Assert.assertEquals(Integer.valueOf(1),
+            Integer.valueOf(
+                cmd.getExecutionCommandWrapper().getExecutionCommand().getPassiveInfo().size()));
+        
         clusters.getCluster(clusterName).getService(serviceName).getServiceComponent(cmd.getRole().name())
             .getServiceComponentHost(cmd.getHostName()).setState(State.INSTALLED);
       }
@@ -9309,7 +9317,7 @@ public class AmbariManagementControllerTest {
       for (ServiceComponentHost sch : sc.getServiceComponentHosts().values()) {
         Assert.assertEquals(sch == targetSch ? State.INIT : State.INSTALLED, sch.getState());
       }
-    }    
+    }
 
   }
 

+ 195 - 0
ambari-server/src/test/java/org/apache/ambari/server/controller/PassiveStateHelperTest.java

@@ -0,0 +1,195 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.ambari.server.controller;
+
+import static org.easymock.EasyMock.capture;
+import static org.easymock.EasyMock.createMock;
+import static org.easymock.EasyMock.expect;
+import static org.easymock.EasyMock.replay;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ambari.server.state.Cluster;
+import org.apache.ambari.server.state.Host;
+import org.apache.ambari.server.state.PassiveState;
+import org.apache.ambari.server.state.Service;
+import org.apache.ambari.server.state.ServiceComponent;
+import org.apache.ambari.server.state.ServiceComponentHost;
+import org.easymock.Capture;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Tests the PassiveAlertHelper class
+ */
+public class PassiveStateHelperTest {
+
+  @Test
+  public void testService() throws Exception {
+    testService(PassiveState.PASSIVE);
+    testService(PassiveState.ACTIVE);
+  }
+  
+  @Test
+  public void testHost() throws Exception {
+    testHost(PassiveState.PASSIVE);
+    testHost(PassiveState.ACTIVE);
+  }
+  
+  @Test
+  public void testHostComponent() throws Exception {
+    testHostComponent(PassiveState.PASSIVE);
+    testHostComponent(PassiveState.ACTIVE);
+  }
+  
+  private void testHostComponent(PassiveState state) throws Exception {
+    AmbariManagementController amc = createMock(AmbariManagementController.class);
+    Capture<ExecuteActionRequest> earCapture = new Capture<ExecuteActionRequest>();
+    Capture<Map<String, String>> rpCapture = new Capture<Map<String, String>>();
+    expect(amc.createAction(capture(earCapture), capture(rpCapture))).andReturn(null);
+    
+    Cluster cluster = createMock(Cluster.class);
+    expect(cluster.getClusterName()).andReturn("c1").anyTimes();
+    
+    ServiceComponentHost sch = createMock(ServiceComponentHost.class);
+    expect(sch.getClusterName()).andReturn("c1");
+    expect(sch.getPassiveState()).andReturn(state);
+    expect(sch.getServiceName()).andReturn("HDFS");
+    expect(sch.getServiceComponentName()).andReturn("NAMENODE").anyTimes();
+    expect(sch.getHostName()).andReturn("h1");
+    
+    replay(amc, cluster, sch);
+    
+    PassiveStateHelper.createRequest(amc, sch.getClusterName(), sch.getServiceComponentName());
+    
+    ExecuteActionRequest ear = earCapture.getValue();
+    Map<String, String> map = rpCapture.getValue();
+    
+    Assert.assertEquals("nagios_update_ignore", ear.getActionName());
+    Assert.assertEquals("ACTIONEXECUTE", ear.getCommandName());
+    Assert.assertEquals("NAGIOS", ear.getServiceName());
+    Assert.assertEquals("NAGIOS_SERVER", ear.getComponentName());
+    Assert.assertEquals("c1", ear.getClusterName());
+    Assert.assertTrue(map.containsKey("context"));  
+  }
+  
+  private void testHost(PassiveState state) throws Exception {
+    AmbariManagementController amc = createMock(AmbariManagementController.class);
+    Capture<ExecuteActionRequest> earCapture = new Capture<ExecuteActionRequest>();
+    Capture<Map<String, String>> rpCapture = new Capture<Map<String, String>>();
+    expect(amc.createAction(capture(earCapture), capture(rpCapture))).andReturn(null);
+    
+    Cluster cluster = createMock(Cluster.class);
+
+    Service service = createMock(Service.class);
+    
+    ServiceComponent sc1 = createMock(ServiceComponent.class);
+    ServiceComponent sc2 = createMock(ServiceComponent.class);
+    expect(sc1.isClientComponent()).andReturn(Boolean.FALSE).anyTimes();
+    expect(sc2.isClientComponent()).andReturn(Boolean.TRUE).anyTimes();
+
+    ServiceComponentHost sch1 = createMock(ServiceComponentHost.class);
+    Map<String, ServiceComponentHost> schMap = new HashMap<String, ServiceComponentHost>();
+    schMap.put("h1", sch1);
+    expect(sch1.getHostName()).andReturn("h1");
+    expect(sch1.getServiceName()).andReturn("HDFS").anyTimes();
+    expect(sch1.getServiceComponentName()).andReturn("NAMENODE").anyTimes();
+    
+    List<ServiceComponentHost> schList = new ArrayList<ServiceComponentHost>(schMap.values());
+    
+    expect(cluster.getClusterName()).andReturn("c1").anyTimes();
+    expect(cluster.getService("HDFS")).andReturn(service).anyTimes();
+    expect(cluster.getClusterId()).andReturn(Long.valueOf(1L));
+    expect(cluster.getServiceComponentHosts("h1")).andReturn(schList);
+    expect(service.getServiceComponent("NAMENODE")).andReturn(sc1);
+    
+    Host host = createMock(Host.class);
+    expect(host.getHostName()).andReturn("h1").anyTimes();
+    expect(host.getPassiveState(1L)).andReturn(state);
+    
+    replay(amc, cluster, service, sch1, host);
+    
+    PassiveStateHelper.createRequest(amc, cluster.getClusterName(), host.getHostName());
+    
+    ExecuteActionRequest ear = earCapture.getValue();
+    Map<String, String> map = rpCapture.getValue();
+    
+    Assert.assertEquals("nagios_update_ignore", ear.getActionName());
+    Assert.assertEquals("ACTIONEXECUTE", ear.getCommandName());
+    Assert.assertEquals("NAGIOS", ear.getServiceName());
+    Assert.assertEquals("NAGIOS_SERVER", ear.getComponentName());
+    Assert.assertEquals("c1", ear.getClusterName());
+    Assert.assertTrue(map.containsKey("context"));    
+  }
+  
+  
+  private void testService(PassiveState state) throws Exception {
+    AmbariManagementController amc = createMock(AmbariManagementController.class);
+    Capture<ExecuteActionRequest> earCapture = new Capture<ExecuteActionRequest>();
+    Capture<Map<String, String>> rpCapture = new Capture<Map<String, String>>();
+    expect(amc.createAction(capture(earCapture), capture(rpCapture))).andReturn(null);
+    
+    Cluster cluster = createMock(Cluster.class);
+    Service service = createMock(Service.class);
+    
+    ServiceComponent sc1 = createMock(ServiceComponent.class);
+    ServiceComponent sc2 = createMock(ServiceComponent.class);
+    expect(sc1.isClientComponent()).andReturn(Boolean.FALSE).anyTimes();
+    expect(sc2.isClientComponent()).andReturn(Boolean.TRUE).anyTimes();
+    
+    ServiceComponentHost sch1 = createMock(ServiceComponentHost.class);
+    Map<String, ServiceComponentHost> schMap = new HashMap<String, ServiceComponentHost>();
+    schMap.put("h1", sch1);
+    expect(sch1.getHostName()).andReturn("h1");
+    expect(sch1.getServiceName()).andReturn("HDFS");
+    expect(sch1.getServiceComponentName()).andReturn("NAMENODE");
+    
+    expect(sc1.getServiceComponentHosts()).andReturn(schMap);
+    
+    Map<String, ServiceComponent> scMap = new HashMap<String, ServiceComponent>();
+    scMap.put("NAMENODE", sc1);
+    scMap.put("HDFS_CLIENT", sc2);
+    
+    expect(cluster.getClusterName()).andReturn("c1");
+    expect(service.getCluster()).andReturn(cluster);
+    expect(service.getServiceComponents()).andReturn(scMap);
+    expect(service.getPassiveState()).andReturn(state);
+    expect(service.getName()).andReturn("HDFS");
+    
+    replay(amc, cluster, service, sc1, sc2, sch1);
+    
+    PassiveStateHelper.createRequest(amc, "c1", service.getName());
+    
+    ExecuteActionRequest ear = earCapture.getValue();
+    Map<String, String> map = rpCapture.getValue();
+    
+    Assert.assertEquals("nagios_update_ignore", ear.getActionName());
+    Assert.assertEquals("ACTIONEXECUTE", ear.getCommandName());
+    Assert.assertEquals("NAGIOS", ear.getServiceName());
+    Assert.assertEquals("NAGIOS_SERVER", ear.getComponentName());
+    Assert.assertEquals("c1", ear.getClusterName());
+    Assert.assertTrue(map.containsKey("context"));
+  }
+  
+  
+  
+  
+}

+ 126 - 69
ambari-server/src/test/java/org/apache/ambari/server/controller/nagios/NagiosPropertyProviderTest.java

@@ -17,12 +17,19 @@
  */
 package org.apache.ambari.server.controller.nagios;
 
+import static org.easymock.EasyMock.createMock;
+import static org.easymock.EasyMock.expect;
+import static org.easymock.EasyMock.replay;
+import static org.easymock.EasyMock.reset;
+
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Properties;
 import java.util.Set;
 
+import org.apache.ambari.server.AmbariException;
 import org.apache.ambari.server.configuration.Configuration;
 import org.apache.ambari.server.controller.ganglia.TestStreamProvider;
 import org.apache.ambari.server.controller.internal.ResourceImpl;
@@ -30,22 +37,19 @@ import org.apache.ambari.server.controller.spi.Request;
 import org.apache.ambari.server.controller.spi.Resource;
 import org.apache.ambari.server.controller.spi.TemporalInfo;
 import org.apache.ambari.server.controller.utilities.PropertyHelper;
-import org.apache.ambari.server.orm.GuiceJpaInitializer;
-import org.apache.ambari.server.orm.InMemoryDefaultTestModule;
 import org.apache.ambari.server.state.Cluster;
 import org.apache.ambari.server.state.Clusters;
-import org.apache.ambari.server.state.Host;
 import org.apache.ambari.server.state.Service;
 import org.apache.ambari.server.state.ServiceComponent;
-import org.apache.ambari.server.state.StackId;
-import org.junit.After;
+import org.apache.ambari.server.state.ServiceComponentHost;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
+import com.google.inject.Binder;
 import com.google.inject.Guice;
 import com.google.inject.Injector;
-import com.google.inject.persist.PersistService;
+import com.google.inject.Module;
 
 /**
  * Tests the nagios property provider
@@ -54,40 +58,45 @@ public class NagiosPropertyProviderTest {
 
   private static final String HOST = "c6401.ambari.apache.org";
 
-  private InMemoryDefaultTestModule module = null;
+  private GuiceModule module = null;
   private Clusters clusters = null;
   private Injector injector = null;
 
   @Before
   public void setup() throws Exception {
-    module = new InMemoryDefaultTestModule();
 
+    module = new GuiceModule();
     injector = Guice.createInjector(module);
-    injector.getInstance(GuiceJpaInitializer.class);
     NagiosPropertyProvider.init(injector);
     
-    clusters = injector.getInstance(Clusters.class);
-    clusters.addCluster("c1");
     
+    clusters = injector.getInstance(Clusters.class);
+    Cluster cluster = createMock(Cluster.class);
+    expect(clusters.getCluster("c1")).andReturn(cluster).anyTimes();
+
+    Service nagiosService = createMock(Service.class);
+    expect(cluster.getService("NAGIOS")).andReturn(nagiosService).anyTimes();
     
-    Cluster cluster = clusters.getCluster("c1");
-    cluster.setDesiredStackVersion(new StackId("HDP-2.0.5"));
+    ServiceComponent nagiosServiceComponent = createMock(ServiceComponent.class);
+    expect(nagiosService.getServiceComponent("NAGIOS_SERVER")).andReturn(
+        nagiosServiceComponent).anyTimes();
     
-    clusters.addHost(HOST);
-    Host host = clusters.getHost(HOST);
-    host.setOsType("centos5");
-    host.persist();
+    ServiceComponentHost nagiosScHost = createMock(ServiceComponentHost.class);
+    Map<String, ServiceComponentHost> map1 = new HashMap<String, ServiceComponentHost>();
+    map1.put(HOST, nagiosScHost);
+    expect(nagiosServiceComponent.getServiceComponentHosts()).andReturn(
+        map1).anyTimes();
     
-    clusters.mapHostToCluster(HOST, "c1");
-  }
-  
-  @After
-  public void teardown() throws Exception {
-    injector.getInstance(PersistService.class).stop();    
+    replay(clusters, cluster, nagiosService, nagiosServiceComponent);
   }
   
   @Test
   public void testNoNagiosService() throws Exception {
+    Cluster cluster = clusters.getCluster("c1");
+    reset(cluster); // simulate an error that NAGIOS not with the cluster
+    expect(cluster.getService("NAGIOS")).andThrow(new AmbariException("No Service"));
+    replay(cluster);
+    
     TestStreamProvider streamProvider = new TestStreamProvider("nagios_alerts.txt");
 
     NagiosPropertyProvider npp = new NagiosPropertyProvider(Resource.Type.Service,
@@ -114,10 +123,6 @@ public class NagiosPropertyProviderTest {
   
   @Test
   public void testNoNagiosServerCompoonent() throws Exception {
-    Cluster cluster = clusters.getCluster("c1");
-    Service service = cluster.addService("NAGIOS");
-    service.setDesiredStackVersion(new StackId("HDP-2.0.5"));
-    service.persist();
     
     TestStreamProvider streamProvider = new TestStreamProvider("nagios_alerts.txt");
 
@@ -145,15 +150,7 @@ public class NagiosPropertyProviderTest {
   
   @Test
   public void testNagiosServiceAlerts() throws Exception {
-    Cluster cluster = clusters.getCluster("c1");
-    Service service = cluster.addService("NAGIOS");
-    service.setDesiredStackVersion(new StackId("HDP-2.0.5"));
-    service.persist();
-    
-    ServiceComponent sc = service.addServiceComponent("NAGIOS_SERVER");
-    sc.setDesiredStackVersion(new StackId("HDP-2.0.5"));
-    sc.addServiceComponentHost(HOST);
-    sc.persist();
+
     
     TestStreamProvider streamProvider = new TestStreamProvider("nagios_alerts.txt");
 
@@ -196,6 +193,7 @@ public class NagiosPropertyProviderTest {
     Assert.assertTrue(summary.containsKey("OK"));
     Assert.assertTrue(summary.containsKey("WARNING"));
     Assert.assertTrue(summary.containsKey("CRITICAL"));
+    Assert.assertTrue(summary.containsKey("PASSIVE"));
     
     Assert.assertTrue(summary.get("OK").equals(Integer.valueOf(1)));
     Assert.assertTrue(summary.get("WARNING").equals(Integer.valueOf(0)));
@@ -204,17 +202,7 @@ public class NagiosPropertyProviderTest {
   
 
   @Test
-  public void testNagiosHostAlerts() throws Exception {
-    Cluster cluster = clusters.getCluster("c1");
-    Service service = cluster.addService("NAGIOS");
-    service.setDesiredStackVersion(new StackId("HDP-2.0.5"));
-    service.persist();
-    
-    ServiceComponent sc = service.addServiceComponent("NAGIOS_SERVER");
-    sc.setDesiredStackVersion(new StackId("HDP-2.0.5"));
-    sc.addServiceComponentHost(HOST);
-    sc.persist();
-    
+  public void testNagiosHostAlerts() throws Exception {    
     TestStreamProvider streamProvider = new TestStreamProvider("nagios_alerts.txt");
 
     NagiosPropertyProvider npp = new NagiosPropertyProvider(Resource.Type.Host,
@@ -256,6 +244,7 @@ public class NagiosPropertyProviderTest {
     Assert.assertTrue(summary.containsKey("OK"));
     Assert.assertTrue(summary.containsKey("WARNING"));
     Assert.assertTrue(summary.containsKey("CRITICAL"));
+    Assert.assertTrue(summary.containsKey("PASSIVE"));
     
     Assert.assertTrue(summary.get("OK").equals(Integer.valueOf(6)));
     Assert.assertTrue(summary.get("WARNING").equals(Integer.valueOf(0)));
@@ -264,15 +253,6 @@ public class NagiosPropertyProviderTest {
   
   @Test
   public void testNagiosHostAlertsWithIgnore() throws Exception {
-    Cluster cluster = clusters.getCluster("c1");
-    Service service = cluster.addService("NAGIOS");
-    service.setDesiredStackVersion(new StackId("HDP-2.0.5"));
-    service.persist();
-    
-    ServiceComponent sc = service.addServiceComponent("NAGIOS_SERVER");
-    sc.setDesiredStackVersion(new StackId("HDP-2.0.5"));
-    sc.addServiceComponentHost(HOST);
-    sc.persist();
     
     TestStreamProvider streamProvider = new TestStreamProvider("nagios_alerts.txt");
 
@@ -315,27 +295,19 @@ public class NagiosPropertyProviderTest {
     Assert.assertTrue(summary.containsKey("OK"));
     Assert.assertTrue(summary.containsKey("WARNING"));
     Assert.assertTrue(summary.containsKey("CRITICAL"));
+    Assert.assertTrue(summary.containsKey("PASSIVE"));
     
-    Assert.assertEquals(summary.get("OK"), Integer.valueOf(15));
+    Assert.assertEquals(summary.get("OK"), Integer.valueOf(14));
     Assert.assertEquals(summary.get("WARNING"), Integer.valueOf(0));
     Assert.assertEquals(summary.get("CRITICAL"), Integer.valueOf(1));
+    Assert.assertEquals(Integer.valueOf(1), summary.get("PASSIVE"));
   }  
   
   @Test
   public void testNagiosServiceAlertsAddIgnore() throws Exception {
-    module.getProperties().setProperty(Configuration.NAGIOS_IGNORE_FOR_SERVICES_KEY,
+    module.properties.setProperty(Configuration.NAGIOS_IGNORE_FOR_SERVICES_KEY,
         "HBase Master process on c6401.ambari.apache.org");
     
-    Cluster cluster = clusters.getCluster("c1");
-    Service service = cluster.addService("NAGIOS");
-    service.setDesiredStackVersion(new StackId("HDP-2.0.5"));
-    service.persist();
-    
-    ServiceComponent sc = service.addServiceComponent("NAGIOS_SERVER");
-    sc.setDesiredStackVersion(new StackId("HDP-2.0.5"));
-    sc.addServiceComponentHost(HOST);
-    sc.persist();
-    
     TestStreamProvider streamProvider = new TestStreamProvider("nagios_alerts.txt");
 
     NagiosPropertyProvider npp = new NagiosPropertyProvider(Resource.Type.Service,
@@ -379,10 +351,95 @@ public class NagiosPropertyProviderTest {
     Assert.assertTrue(summary.containsKey("OK"));
     Assert.assertTrue(summary.containsKey("WARNING"));
     Assert.assertTrue(summary.containsKey("CRITICAL"));
+    Assert.assertTrue(summary.containsKey("PASSIVE"));
     
     Assert.assertTrue(summary.get("OK").equals(Integer.valueOf(1)));
     Assert.assertTrue(summary.get("WARNING").equals(Integer.valueOf(0)));
     Assert.assertTrue(summary.get("CRITICAL").equals(Integer.valueOf(1)));
-  }    
+  }
+  
+  @Test
+  public void testNagiosServiceAlertsWithPassive() throws Exception {
+    Injector inj = Guice.createInjector(new GuiceModule());
+    
+    Clusters clusters = inj.getInstance(Clusters.class);
+    Cluster cluster = createMock(Cluster.class);
+    expect(clusters.getCluster("c1")).andReturn(cluster);
+
+    Service nagiosService = createMock(Service.class);
+    expect(cluster.getService("NAGIOS")).andReturn(nagiosService);
+    
+    ServiceComponent nagiosServiceComponent = createMock(ServiceComponent.class);
+    expect(nagiosService.getServiceComponent("NAGIOS_SERVER")).andReturn(nagiosServiceComponent);
+    
+    ServiceComponentHost nagiosScHost = createMock(ServiceComponentHost.class);
+    Map<String, ServiceComponentHost> map1 = new HashMap<String, ServiceComponentHost>();
+    map1.put(HOST, nagiosScHost);
+    expect(nagiosServiceComponent.getServiceComponentHosts()).andReturn(map1);
+    
+    replay(clusters, cluster, nagiosService, nagiosServiceComponent);
+
+    
+    TestStreamProvider streamProvider = new TestStreamProvider("nagios_alerts.txt");
+
+    NagiosPropertyProvider npp = new NagiosPropertyProvider(Resource.Type.Service,
+        streamProvider,
+        "ServiceInfo/cluster_name",
+        "ServiceInfo/service_name");
+    npp.forceReset();
+    NagiosPropertyProvider.init(inj);
+    
+    Resource resource = new ResourceImpl(Resource.Type.Service);
+    resource.setProperty("ServiceInfo/cluster_name", "c1");
+    resource.setProperty("ServiceInfo/service_name", "GANGLIA");
+    
+    // request with an empty set should get all supported properties
+    Request request = PropertyHelper.getReadRequest(Collections.<String>emptySet(), new HashMap<String, TemporalInfo>());
+
+    Set<Resource> set = npp.populateResources(Collections.singleton(resource), request, null);
+    Assert.assertEquals(1, set.size());
+    
+    Resource res = set.iterator().next();
+    
+    Map<String, Map<String, Object>> values = res.getPropertiesMap();
+    
+    Assert.assertTrue(values.containsKey("alerts"));
+    Assert.assertTrue(values.containsKey("alerts/summary"));
+    Assert.assertTrue(values.get("alerts").containsKey("detail"));
+    Assert.assertTrue(List.class.isInstance(values.get("alerts").get("detail")));
+    
+    List<?> list = (List<?>) values.get("alerts").get("detail");
+    // removed an additional one
+    Assert.assertEquals(Integer.valueOf(4), Integer.valueOf(list.size()));
+    for (Object o : list) {
+      Assert.assertTrue(Map.class.isInstance(o));
+      Map<?, ?> map = (Map<?, ?>) o;
+      Assert.assertTrue(map.containsKey("service_name"));
+      String serviceName = map.get("service_name").toString();
+      Assert.assertEquals(serviceName, "GANGLIA");
+    }
+    
+    Map<String, Object> summary = values.get("alerts/summary");
+    Assert.assertTrue(summary.containsKey("OK"));
+    Assert.assertTrue(summary.containsKey("WARNING"));
+    Assert.assertTrue(summary.containsKey("CRITICAL"));
+    Assert.assertTrue(summary.containsKey("PASSIVE"));
+    
+    Assert.assertEquals(Integer.valueOf(3), summary.get("OK"));
+    Assert.assertEquals(Integer.valueOf(0), summary.get("WARNING"));
+    Assert.assertEquals(Integer.valueOf(0), summary.get("CRITICAL"));
+    Assert.assertEquals(Integer.valueOf(1), summary.get("PASSIVE"));
+  }
+  
+  private static class GuiceModule implements Module {
+
+    private Properties properties = new Properties();
+    
+    @Override
+    public void configure(Binder binder) {
+     binder.bind(Clusters.class).toInstance(createMock(Clusters.class));
+     binder.bind(Configuration.class).toInstance(new Configuration(properties));
+    }
+  }
   
 }

+ 11 - 1
ambari-server/src/test/python/stacks/1.3.2/NAGIOS/test_nagios_server.py

@@ -237,6 +237,11 @@ class TestNagiosServer(RMFTestCase):
                               content=StaticFile('check_namenodes_ha.sh'),
                               mode=0755
     )
+    self.assertResourceCalled('File',
+                              '/usr/lib64/nagios/plugins/check_wrapper.sh',
+                              content=StaticFile('check_wrapper.sh'),
+                              mode=0755
+    )
     self.assertResourceCalled('File',
                               '/usr/lib64/nagios/plugins/hdp_nagios_init.php',
                               content=StaticFile('hdp_nagios_init.php'),
@@ -258,4 +263,9 @@ class TestNagiosServer(RMFTestCase):
     self.assertResourceCalled('File', '/etc/nagios/command.cfg',
                               owner='nagios',
                               group='nagios'
-    )
+    )
+    self.assertResourceCalled('File', '/var/nagios/ignore.dat',
+                              owner='nagios',
+                              group='nagios',
+                              mode=0664
+    )

+ 11 - 2
ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py

@@ -32,7 +32,6 @@ class TestNagiosServer(RMFTestCase):
     self.assert_configure_default()
     self.assertNoMoreResources()
 
-
   def test_start_default(self):
     self.executeScript(
       "2.0.6/services/NAGIOS/package/scripts/nagios_service.py",
@@ -237,6 +236,11 @@ class TestNagiosServer(RMFTestCase):
                               content=StaticFile('check_namenodes_ha.sh'),
                               mode=0755
     )
+    self.assertResourceCalled('File',
+                              '/usr/lib64/nagios/plugins/check_wrapper.sh',
+                              content=StaticFile('check_wrapper.sh'),
+                              mode=0755
+    )
     self.assertResourceCalled('File',
                               '/usr/lib64/nagios/plugins/hdp_nagios_init.php',
                               content=StaticFile('hdp_nagios_init.php'),
@@ -256,4 +260,9 @@ class TestNagiosServer(RMFTestCase):
     self.assertResourceCalled('File', '/etc/nagios/command.cfg',
                               owner='nagios',
                               group='nagios'
-    )
+    )
+    self.assertResourceCalled('File', '/var/nagios/ignore.dat',
+                              owner='nagios',
+                              group='nagios',
+                              mode=0664
+    )

+ 1 - 0
ambari-server/src/test/resources/nagios_alerts.txt

@@ -39,6 +39,7 @@
             "current_state": "0",
             "plugin_output": "TCP OK - 0.001 second response time on port 8662",
             "last_hard_state_change": "1389125389",
+            "long_plugin_output": "AMBARIPASSIVE=2\n",
             "last_hard_state": "0",
             "last_time_ok": "1389288714",
             "last_time_warning": "0",

+ 2 - 1
contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php

@@ -286,7 +286,8 @@ function hdp_mon_generate_response( $response_data )
     $servicestatus_attributes = array ("service_description", "host_name", "current_attempt",
                                        "current_state", "plugin_output", "last_hard_state_change", "last_hard_state",
                                        "last_time_ok", "last_time_warning", "last_time_unknown",
-                                       "last_time_critical", "is_flapping", "last_check");
+                                       "last_time_critical", "is_flapping", "last_check",
+                                       "long_plugin_output");
 
     $num_matches = preg_match_all("/servicestatus \{([\S\s]*?)\}/", $status_file_content, $matches, PREG_PATTERN_ORDER);
     #echo $matches[0][0] . ", " . $matches[0][1] . "\n";