Browse Source

(1) Faster retries from AM to HistoryServer (2) Correct diagnostics for containers. Contributed by Vinod Kumar Vavilapalli.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/MR-279@1141903 13f79535-47bb-0310-9956-ffa450edef68
Vinod Kumar Vavilapalli 14 năm trước cách đây
mục cha
commit
3f39c99a3d
15 tập tin đã thay đổi với 186 bổ sung104 xóa
  1. 3 0
      mapreduce/CHANGES.txt
  2. 1 2
      mapreduce/mr-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java
  3. 4 4
      mapreduce/mr-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java
  4. 2 2
      mapreduce/mr-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
  5. 4 1
      mapreduce/mr-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java
  6. 117 59
      mapreduce/mr-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java
  7. 2 6
      mapreduce/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
  8. 3 8
      mapreduce/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
  9. 2 6
      mapreduce/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java
  10. 5 1
      mapreduce/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
  11. 35 0
      mapreduce/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerKillEvent.java
  12. 2 9
      mapreduce/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
  13. 1 2
      mapreduce/yarn/yarn-server/yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java
  14. 1 2
      mapreduce/yarn/yarn-server/yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java
  15. 4 2
      mapreduce/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java

+ 3 - 0
mapreduce/CHANGES.txt

@@ -5,6 +5,9 @@ Trunk (unreleased changes)
 
     MAPREDUCE-279
 
+    (1) Faster retries from AM to HistoryServer (2) Correct diagnostics for
+    containers. (vinodkv)
+
     MAPREDUCE-2625. Add version info to nodemanager info page.
     (Jonathan Eagles via llu)
 

+ 1 - 2
mapreduce/mr-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java

@@ -29,13 +29,12 @@ import java.util.Map;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.ProtocolSignature;
+import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RPC.Server;
 import org.apache.hadoop.mapred.SortedRanges.Range;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
-import org.apache.hadoop.mapreduce.v2.MRConstants;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
 import org.apache.hadoop.mapreduce.v2.app.AMConstants;
 import org.apache.hadoop.mapreduce.v2.app.AppContext;

+ 4 - 4
mapreduce/mr-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java

@@ -215,9 +215,9 @@ public class ContainerLauncherImpl extends AbstractService implements
           context.getEventHandler().handle(
               new TaskAttemptEvent(taskAttemptID,
                   TaskAttemptEventType.TA_CONTAINER_LAUNCHED));
-        } catch (Exception e) {
+        } catch (Throwable t) {
           String message = "Container launch failed for " + containerID
-              + " : " + StringUtils.stringifyException(e);
+              + " : " + StringUtils.stringifyException(t);
           LOG.error(message);
           context.getEventHandler().handle(
               new TaskAttemptDiagnosticsUpdateEvent(taskAttemptID, message));
@@ -249,10 +249,10 @@ public class ContainerLauncherImpl extends AbstractService implements
             stopRequest.setContainerId(event.getContainerID());
             proxy.stopContainer(stopRequest);
 
-          } catch (Exception e) {
+          } catch (Throwable t) {
             //ignore the cleanup failure
             LOG.warn("cleanup failed for container " + event.getContainerID() ,
-                e);
+                t);
           }
 
           // after killing, send killed event to taskattempt

+ 2 - 2
mapreduce/mr-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java

@@ -143,9 +143,9 @@ public class RMContainerAllocator extends RMContainerRequestor
 
   @Override
   protected synchronized void heartbeat() throws Exception {
-    LOG.info("Before Allocation: " + getStat());
+    LOG.info("Before Scheduling: " + getStat());
     List<Container> allocatedContainers = getResources();
-    LOG.info("After Allocation: " + getStat());
+    LOG.info("After Scheduling: " + getStat());
     if (allocatedContainers.size() > 0) {
       LOG.info("Before Assign: " + getStat());
       scheduledRequests.assign(allocatedContainers);

+ 4 - 1
mapreduce/mr-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java

@@ -78,7 +78,10 @@ public class ClientServiceDelegate {
   private RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
 
   ClientServiceDelegate(Configuration conf, ResourceMgrDelegate rm) {
-    this.conf = conf;
+    this.conf = new Configuration(conf); // Cloning for modifying.
+    // For faster redirects from AM to HS.
+    this.conf.setInt(
+        CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, 3);
     this.rm = rm;
   }
 

+ 117 - 59
mapreduce/mr-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java

@@ -21,6 +21,7 @@ package org.apache.hadoop.mapred;
 import java.net.InetAddress;
 import java.net.InetSocketAddress;
 import java.net.UnknownHostException;
+import java.util.Iterator;
 
 import junit.framework.Assert;
 
@@ -28,6 +29,9 @@ import org.apache.avro.ipc.Server;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.mapreduce.ClientFactory;
+import org.apache.hadoop.mapreduce.Cluster;
 import org.apache.hadoop.mapreduce.v2.api.MRClientProtocol;
 import org.apache.hadoop.mapreduce.v2.api.protocolrecords.FailTaskAttemptRequest;
 import org.apache.hadoop.mapreduce.v2.api.protocolrecords.FailTaskAttemptResponse;
@@ -51,11 +55,12 @@ import org.apache.hadoop.mapreduce.v2.api.protocolrecords.KillTaskAttemptRequest
 import org.apache.hadoop.mapreduce.v2.api.protocolrecords.KillTaskAttemptResponse;
 import org.apache.hadoop.mapreduce.v2.api.protocolrecords.KillTaskRequest;
 import org.apache.hadoop.mapreduce.v2.api.protocolrecords.KillTaskResponse;
+import org.apache.hadoop.mapreduce.v2.api.records.Counter;
+import org.apache.hadoop.mapreduce.v2.api.records.CounterGroup;
 import org.apache.hadoop.mapreduce.v2.api.records.Counters;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
-import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
-import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
-import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
+import org.apache.hadoop.mapreduce.v2.api.records.JobReport;
+import org.apache.hadoop.mapreduce.v2.api.records.JobState;
 import org.apache.hadoop.mapreduce.v2.jobhistory.JHConfig;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.net.NetUtils;
@@ -89,7 +94,6 @@ import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.factory.providers.YarnRemoteExceptionFactoryProvider;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
-import org.apache.hadoop.yarn.server.resourcemanager.applicationsmanager.ApplicationsManager;
 import org.apache.hadoop.yarn.service.AbstractService;
 import org.junit.Test;
 
@@ -105,7 +109,6 @@ public class TestClientRedirect {
   
   private static final String AMHOSTADDRESS = "0.0.0.0:10020";
   private static final String HSHOSTADDRESS = "0.0.0.0:10021";
-  private static final int HSPORT = 10020;
   private volatile boolean amContact = false; 
   private volatile boolean hsContact = false;
   private volatile boolean amRunning = false;
@@ -114,6 +117,8 @@ public class TestClientRedirect {
   public void testRedirect() throws Exception {
     
     Configuration conf = new YarnConfiguration();
+    conf.setClass("mapreduce.clientfactory.class.name",
+        YarnClientFactory.class, ClientFactory.class);
     conf.set(YarnConfiguration.APPSMANAGER_ADDRESS, RMADDRESS);
     conf.set(JHConfig.HS_BIND_ADDRESS, HSHOSTADDRESS);
     RMService rmService = new RMService("test");
@@ -130,18 +135,46 @@ public class TestClientRedirect {
     historyService.start(conf);
   
     LOG.info("services started");
-    YARNRunner yarnRunner = new YARNRunner(conf);
-    Throwable t = null;
+    Cluster cluster = new Cluster(conf);
     org.apache.hadoop.mapreduce.JobID jobID =
       new org.apache.hadoop.mapred.JobID("201103121733", 1);
-    yarnRunner.getJobCounters(jobID);
+    org.apache.hadoop.mapreduce.Counters counters = cluster.getJob(jobID)
+        .getCounters();
+    Iterator<org.apache.hadoop.mapreduce.CounterGroup> it = counters.iterator();
+    while (it.hasNext()) {
+      org.apache.hadoop.mapreduce.CounterGroup group = it.next();
+      LOG.info("Group " + group.getDisplayName());
+      Iterator<org.apache.hadoop.mapreduce.Counter> itc = group.iterator();
+      while (itc.hasNext()) {
+        LOG.info("Counter is " + itc.next().getDisplayName());
+      }
+    }
     Assert.assertTrue(amContact);
-    
+
+    LOG.info("Sleeping for 5 seconds before stop for" +
+    " the client socket to not get EOF immediately..");
+    Thread.sleep(5000);
+
     //bring down the AM service
     amService.stop();
     amRunning = false;
-    
-    yarnRunner.getJobCounters(jobID);
+
+    LOG.info("Sleeping for 5 seconds after stop for" +
+    		" the server to exit cleanly..");
+    Thread.sleep(5000);
+
+    // Same client
+    counters = cluster.getJob(jobID).getCounters();
+    it = counters.iterator();
+    while (it.hasNext()) {
+      org.apache.hadoop.mapreduce.CounterGroup group = it.next();
+      LOG.info("Group " + group.getDisplayName());
+      Iterator<org.apache.hadoop.mapreduce.Counter> itc = group.iterator();
+      while (itc.hasNext()) {
+        LOG.info("Counter is " + itc.next().getDisplayName());
+      }
+    }
+
     Assert.assertTrue(hsContact);
     
     rmService.stop();
@@ -149,7 +182,6 @@ public class TestClientRedirect {
   }
 
   class RMService extends AbstractService implements ClientRMProtocol {
-    private ApplicationsManager applicationsManager;
     private String clientServiceBindAddress;
     InetSocketAddress clientBindAddress;
     private Server server;
@@ -208,45 +240,45 @@ public class TestClientRedirect {
     }
 
     @Override
-    public SubmitApplicationResponse submitApplication(SubmitApplicationRequest request) throws YarnRemoteException {
-      throw YarnRemoteExceptionFactoryProvider.getYarnRemoteExceptionFactory(null).createYarnRemoteException("Test");
+    public SubmitApplicationResponse submitApplication(
+        SubmitApplicationRequest request) throws YarnRemoteException {
+      throw YarnRemoteExceptionFactoryProvider.getYarnRemoteExceptionFactory(
+          null).createYarnRemoteException("Test");
     }
-    
+
     @Override
-    public FinishApplicationResponse finishApplication(FinishApplicationRequest request) throws YarnRemoteException {
+    public FinishApplicationResponse finishApplication(
+        FinishApplicationRequest request) throws YarnRemoteException {
       return null;
     }
-    
+
     @Override
-    public GetClusterMetricsResponse getClusterMetrics(GetClusterMetricsRequest request) throws YarnRemoteException {
+    public GetClusterMetricsResponse getClusterMetrics(
+        GetClusterMetricsRequest request) throws YarnRemoteException {
       return null;
     }
 
     @Override
     public GetAllApplicationsResponse getAllApplications(
         GetAllApplicationsRequest request) throws YarnRemoteException {
-      // TODO Auto-generated method stub
       return null;
     }
 
     @Override
     public GetClusterNodesResponse getClusterNodes(
         GetClusterNodesRequest request) throws YarnRemoteException {
-      // TODO Auto-generated method stub
       return null;
     }
 
     @Override
     public GetQueueInfoResponse getQueueInfo(GetQueueInfoRequest request)
         throws YarnRemoteException {
-      // TODO Auto-generated method stub
       return null;
     }
 
     @Override
     public GetQueueUserAclsInfoResponse getQueueUserAcls(
         GetQueueUserAclsInfoRequest request) throws YarnRemoteException {
-      // TODO Auto-generated method stub
       return null;
     }
   }
@@ -258,10 +290,8 @@ public class TestClientRedirect {
 
     @Override
     public GetCountersResponse getCounters(GetCountersRequest request) throws YarnRemoteException {
-      JobId jobId = request.getJobId();
       hsContact = true;
-      Counters counters = recordFactory.newRecordInstance(Counters.class);
-//      counters.groups = new HashMap<CharSequence, CounterGroup>();
+      Counters counters = getMyCounters();
       GetCountersResponse response = recordFactory.newRecordInstance(GetCountersResponse.class);
       response.setCounters(counters);
       return response;
@@ -278,7 +308,7 @@ public class TestClientRedirect {
     }
     
     public AMService(String hostAddress) {
-      super("TestClientService");
+      super("AMService");
       this.hostAddress = hostAddress;
     }
 
@@ -310,79 +340,107 @@ public class TestClientRedirect {
     }
 
     @Override
-    public GetCountersResponse getCounters(GetCountersRequest request) throws YarnRemoteException {
+    public GetCountersResponse getCounters(GetCountersRequest request)
+        throws YarnRemoteException {
       JobId jobID = request.getJobId();
-    
+
       amContact = true;
-      Counters counters = recordFactory.newRecordInstance(Counters.class);
-//      counters.groups = new HashMap<CharSequence, CounterGroup>();
-        GetCountersResponse response = recordFactory.newRecordInstance(GetCountersResponse.class);
-        response.setCounters(counters);
-        return response;
-      }
+
+      Counters counters = getMyCounters();
+      GetCountersResponse response = recordFactory
+          .newRecordInstance(GetCountersResponse.class);
+      response.setCounters(counters);
+      return response;
+    }
 
     @Override
-    public GetJobReportResponse getJobReport(GetJobReportRequest request) throws YarnRemoteException {
-      JobId jobId = request.getJobId();
-      return null;
+    public GetJobReportResponse getJobReport(GetJobReportRequest request)
+        throws YarnRemoteException {
+
+      amContact = true;
+
+      JobReport jobReport = recordFactory.newRecordInstance(JobReport.class);
+      jobReport.setJobId(request.getJobId());
+      jobReport.setJobState(JobState.RUNNING);
+      GetJobReportResponse response = recordFactory
+          .newRecordInstance(GetJobReportResponse.class);
+      response.setJobReport(jobReport);
+      return response;
     }
 
     @Override
-    public GetTaskReportResponse getTaskReport(GetTaskReportRequest request) throws YarnRemoteException {
-      TaskId taskID = request.getTaskId();
+    public GetTaskReportResponse getTaskReport(GetTaskReportRequest request)
+        throws YarnRemoteException {
       return null;
     }
 
-
     @Override
-    public GetTaskAttemptReportResponse getTaskAttemptReport(GetTaskAttemptReportRequest request) throws YarnRemoteException {
-      TaskAttemptId taskAttemptID = request.getTaskAttemptId();
+    public GetTaskAttemptReportResponse getTaskAttemptReport(
+        GetTaskAttemptReportRequest request) throws YarnRemoteException {
       return null;
     }
 
     @Override
-    public GetTaskAttemptCompletionEventsResponse getTaskAttemptCompletionEvents(GetTaskAttemptCompletionEventsRequest request) throws YarnRemoteException {
-      JobId jobId = request.getJobId();
-      int fromEventId = request.getFromEventId();
-      int maxEvents = request.getMaxEvents();
+    public GetTaskAttemptCompletionEventsResponse
+        getTaskAttemptCompletionEvents(
+            GetTaskAttemptCompletionEventsRequest request)
+            throws YarnRemoteException {
       return null;
     }
 
     @Override
-    public GetTaskReportsResponse getTaskReports(GetTaskReportsRequest request) throws YarnRemoteException {
-      JobId jobID = request.getJobId();
-      TaskType taskType = request.getTaskType();
+    public GetTaskReportsResponse
+        getTaskReports(GetTaskReportsRequest request)
+            throws YarnRemoteException {
       return null;
     }
 
     @Override
-    public GetDiagnosticsResponse getDiagnostics(GetDiagnosticsRequest request) throws YarnRemoteException {
-      TaskAttemptId taskAttemptID = request.getTaskAttemptId();
+    public GetDiagnosticsResponse
+        getDiagnostics(GetDiagnosticsRequest request)
+            throws YarnRemoteException {
       return null;
     }
 
     @Override
-    public KillJobResponse killJob(KillJobRequest request) throws YarnRemoteException {
-      JobId jobID = request.getJobId();
+    public KillJobResponse killJob(KillJobRequest request)
+        throws YarnRemoteException {
       return null;
     }
 
     @Override
-    public KillTaskResponse killTask(KillTaskRequest request) throws YarnRemoteException {
-      TaskId taskID = request.getTaskId();
+    public KillTaskResponse killTask(KillTaskRequest request)
+        throws YarnRemoteException {
       return null;
     }
 
     @Override
-    public KillTaskAttemptResponse killTaskAttempt(KillTaskAttemptRequest request) throws YarnRemoteException {
-      TaskAttemptId taskAttemptID = request.getTaskAttemptId();
+    public KillTaskAttemptResponse killTaskAttempt(
+        KillTaskAttemptRequest request) throws YarnRemoteException {
       return null;
     }
 
     @Override
-    public FailTaskAttemptResponse failTaskAttempt(FailTaskAttemptRequest request) throws YarnRemoteException {
-      TaskAttemptId taskAttemptID = request.getTaskAttemptId();
+    public FailTaskAttemptResponse failTaskAttempt(
+        FailTaskAttemptRequest request) throws YarnRemoteException {
       return null;
     }
   }
+
+  static Counters getMyCounters() {
+    Counter counter = recordFactory.newRecordInstance(Counter.class);
+    counter.setName("Mycounter");
+    counter.setDisplayName("My counter display name");
+    counter.setValue(12345);
+
+    CounterGroup group = recordFactory
+        .newRecordInstance(CounterGroup.class);
+    group.setName("MyGroup");
+    group.setDisplayName("My groupd display name");
+    group.setCounter("myCounter", counter);
+
+    Counters counters = recordFactory.newRecordInstance(Counters.class);
+    counters.setCounterGroup("myGroupd", group);
+    return counters;
+  }
 }

+ 2 - 6
mapreduce/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java

@@ -247,8 +247,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
     }
   }
 
-  protected void startStatusUpdater() throws InterruptedException,
-    YarnRemoteException {
+  protected void startStatusUpdater() {
 
     new Thread() {
       @Override
@@ -280,12 +279,9 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
               dispatcher.getEventHandler().handle(
                   new CMgrCompletedAppsEvent(appsToCleanup));
             }
-          } catch (YarnRemoteException e) {
+          } catch (Throwable e) {
             LOG.error("Caught exception in status-updater", e);
             break;
-          } catch (InterruptedException e) {
-            LOG.error("Status-updater interrupted", e);
-            break;
           }
         }
       }

+ 3 - 8
mapreduce/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java

@@ -72,10 +72,10 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Ap
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationInitEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerKillEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncher;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService;
@@ -301,10 +301,8 @@ public class ContainerManagerImpl extends CompositeService implements
       return response; // Return immediately.
     }
     dispatcher.getEventHandler().handle(
-        new ContainerDiagnosticsUpdateEvent(containerID,
+        new ContainerKillEvent(containerID,
             "Container killed by the ApplicationMaster."));
-    dispatcher.getEventHandler().handle(
-        new ContainerEvent(containerID, ContainerEventType.KILL_CONTAINER));
 
     // TODO: Move this code to appropriate place once kill_container is
     // implemented.
@@ -380,11 +378,8 @@ public class ContainerManagerImpl extends CompositeService implements
       for (org.apache.hadoop.yarn.api.records.Container container :
             containersFinishedEvent.getContainersToCleanup()) {
         this.dispatcher.getEventHandler().handle(
-            new ContainerDiagnosticsUpdateEvent(container.getId(),
+            new ContainerKillEvent(container.getId(),
                 "Container Killed by ResourceManager"));
-        this.dispatcher.getEventHandler().handle(
-            new ContainerEvent(container.getId(),
-                ContainerEventType.KILL_CONTAINER));
       }
       break;
     default:

+ 2 - 6
mapreduce/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/application/ApplicationImpl.java

@@ -30,9 +30,8 @@ import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerInitEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerKillEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ApplicationLocalizationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizationEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregation.ContainerLogsRetentionPolicy;
@@ -258,11 +257,8 @@ public class ApplicationImpl implements Application {
       // application.
       for (ContainerId containerID : app.containers.keySet()) {
         app.dispatcher.getEventHandler().handle(
-            new ContainerDiagnosticsUpdateEvent(containerID,
+            new ContainerKillEvent(containerID,
                 "Container killed on application-finish event from RM."));
-        app.dispatcher.getEventHandler().handle(
-            new ContainerEvent(containerID,
-                ContainerEventType.KILL_CONTAINER));
       }
       return ApplicationState.FINISHING_CONTAINERS_WAIT;
     }

+ 5 - 1
mapreduce/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java

@@ -234,7 +234,7 @@ public class ContainerImpl implements Container {
 
     // From DONE
     .addTransition(ContainerState.DONE, ContainerState.DONE,
-        ContainerEventType.KILL_CONTAINER, CONTAINER_DONE_TRANSITION)
+        ContainerEventType.KILL_CONTAINER)
     .addTransition(ContainerState.DONE, ContainerState.DONE,
        ContainerEventType.UPDATE_DIAGNOSTICS_MSG,
        UPDATE_DIAGNOSTICS_TRANSITION)
@@ -599,6 +599,8 @@ public class ContainerImpl implements Container {
           new ContainerLocalizationEvent(
             LocalizationEventType.CLEANUP_CONTAINER_RESOURCES, container));
       container.metrics.endInitingContainer();
+      ContainerKillEvent killEvent = (ContainerKillEvent) event;
+      container.diagnostics.append(killEvent.getDiagnostic()).append("\n");
     }
   }
 
@@ -629,6 +631,8 @@ public class ContainerImpl implements Container {
       container.dispatcher.getEventHandler().handle(
           new ContainersLauncherEvent(container,
               ContainersLauncherEventType.CLEANUP_CONTAINER));
+      ContainerKillEvent killEvent = (ContainerKillEvent) event;
+      container.diagnostics.append(killEvent.getDiagnostic()).append("\n");
     }
   }
 

+ 35 - 0
mapreduce/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerKillEvent.java

@@ -0,0 +1,35 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.container;
+
+import org.apache.hadoop.yarn.api.records.ContainerId;
+
+public class ContainerKillEvent extends ContainerEvent {
+
+  private final String diagnostic;
+
+  public ContainerKillEvent(ContainerId cID, String diagnostic) {
+    super(cID, ContainerEventType.KILL_CONTAINER);
+    this.diagnostic = diagnostic;
+  }
+
+  public String getDiagnostic() {
+    return this.diagnostic;
+  }
+}

+ 2 - 9
mapreduce/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java

@@ -16,11 +16,7 @@ import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.NMConfig;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerKillEvent;
 import org.apache.hadoop.yarn.service.AbstractService;
 import org.apache.hadoop.yarn.util.ProcfsBasedProcessTree;
 import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin;
@@ -450,12 +446,9 @@ public class ContainersMonitorImpl extends AbstractService implements
                 LOG.error("Killed container process with PID " + pId
                     + " but it is not a process group leader.");
               }
-              eventDispatcher.getEventHandler().handle(
-                  new ContainerDiagnosticsUpdateEvent(containerId, msg));
               // kill the container
               eventDispatcher.getEventHandler().handle(
-                  new ContainerEvent(containerId,
-                      ContainerEventType.KILL_CONTAINER));
+                  new ContainerKillEvent(containerId, msg));
               it.remove();
               LOG.info("Removed ProcessTree with root " + pId);
             } else {

+ 1 - 2
mapreduce/yarn/yarn-server/yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java

@@ -91,8 +91,7 @@ public class TestEventFlow {
       };
 
       @Override
-      protected void startStatusUpdater() throws InterruptedException,
-          YarnRemoteException {
+      protected void startStatusUpdater() {
         return; // Don't start any updating thread.
       }
     };

+ 1 - 2
mapreduce/yarn/yarn-server/yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java

@@ -102,8 +102,7 @@ public abstract class BaseContainerManagerTest {
     };
 
     @Override
-    protected void startStatusUpdater() throws InterruptedException,
-        YarnRemoteException {
+    protected void startStatusUpdater() {
       return; // Don't start any updating thread.
     }
   };

+ 4 - 2
mapreduce/yarn/yarn-server/yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java

@@ -507,7 +507,8 @@ public class LeafQueue implements Queue {
         continue;
       }
       
-      LOG.info("DEBUG --- pre-assignContainers");
+      LOG.info("DEBUG --- pre-assignContainers for application "
+          + application.getApplicationId());
       application.showRequests();
 
       synchronized (application) {
@@ -569,7 +570,8 @@ public class LeafQueue implements Queue {
         }
       }
 
-      LOG.info("DEBUG --- post-assignContainers");
+      LOG.info("DEBUG --- post-assignContainers for application "
+          + application.getApplicationId());
       application.showRequests();
     }