浏览代码

Fix for NPE in YarnChild that was causing lots of tasks to fail. Contributed by Vinod Kumar Vavilapalli.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/MR-279@1138456 13f79535-47bb-0310-9956-ffa450edef68
Vinod Kumar Vavilapalli 14 年之前
父节点
当前提交
7bb6812a2e

+ 2 - 0
mapreduce/CHANGES.txt

@@ -5,6 +5,8 @@ Trunk (unreleased changes)
 
     MAPREDUCE-279
 
+    Fix for NPE in YarnChild that was causing lots of tasks to fail. (vinodkv)
+
     Fix for ConcurrentModification exception while iterating through tokens in
     a UGI in ContainerLauncherImpl. (ddas)
 

+ 1 - 1
mapreduce/mr-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java

@@ -402,7 +402,7 @@ public class TaskAttemptListenerImpl extends CompositeService
       
       return jvmTask;
     }
-    return new JvmTask(null, false);
+    return null;
   }
 
   @Override

+ 5 - 2
mapreduce/mr-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java

@@ -18,7 +18,7 @@
 
 package org.apache.hadoop.mapred;
 
-import static java.util.concurrent.TimeUnit.SECONDS;
+import static java.util.concurrent.TimeUnit.MILLISECONDS;
 
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
@@ -112,7 +112,10 @@ class YarnChild {
       JvmTask myTask = null;;
       // poll for new task
       for (int idle = 0; null == myTask; ++idle) {
-        SECONDS.sleep(Math.min(idle * 500, 1500));
+        long sleepTimeMilliSecs = Math.min(idle * 500, 1500);
+        LOG.info("Sleeping for " + sleepTimeMilliSecs
+            + "ms before retrying again. Got null now.");
+        MILLISECONDS.sleep(sleepTimeMilliSecs);
         myTask = umbilical.getTask(context);
       }
       if (myTask.shouldDie()) {

+ 2 - 2
mapreduce/mr-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java

@@ -202,14 +202,14 @@ public class ContainerLauncherImpl extends AbstractService implements
           ContainerLaunchContext containerLaunchContext =
               launchEv.getContainer();
 
-          // TODO: Make sure that child's mapred-local-dir is set correctly.
-
           // Now launch the actual container
           StartContainerRequest startRequest = recordFactory
               .newRecordInstance(StartContainerRequest.class);
           startRequest.setContainerLaunchContext(containerLaunchContext);
           proxy.startContainer(startRequest);
 
+          LOG.info("Returning from container-launch for " + taskAttemptID);
+
           // after launching, send launched event to task attempt to move
           // it from ASSIGNED to RUNNING state
           context.getEventHandler().handle(

+ 1 - 5
mapreduce/yarn/yarn-server/yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java

@@ -29,13 +29,12 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.Shell.ExitCodeException;
 import org.apache.hadoop.util.Shell.ShellCommandExecutor;
+import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerDiagnosticsUpdateEvent;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 
@@ -167,9 +166,6 @@ public class LinuxContainerExecutor extends ContainerExecutor {
     launchCommandObjs.put(containerId, shExec);
     // DEBUG
     LOG.info("launchContainer: " + Arrays.toString(commandArray));
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("launchContainer: " + Arrays.toString(commandArray));
-    }
     String output = shExec.getOutput();
     try {
       shExec.execute();