瀏覽代碼

YARN-820. Fixed an invalid state transition in NodeManager caused by failing resource localization. Contributed by Mayank Bansal.
svn merge --ignore-ancestry -c 1503947 ../../trunk/


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1503948 13f79535-47bb-0310-9956-ffa450edef68

Vinod Kumar Vavilapalli 11 年之前
父節點
當前提交
0623ee954a

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -48,6 +48,9 @@ Release 2.1.1-beta - UNRELEASED
     YARN-661. Fixed NM to cleanup users' local directories correctly when
     starting up. (Omkar Vinit Joshi via vinodkv)
 
+    YARN-820. Fixed an invalid state transition in NodeManager caused by failing
+    resource localization. (Mayank Bansal via vinodkv)
+
 Release 2.1.0-beta - 2013-07-02
 
   INCOMPATIBLE CHANGES

+ 5 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java

@@ -290,6 +290,11 @@ public class ContainerImpl implements Container {
     .addTransition(ContainerState.DONE, ContainerState.DONE,
        ContainerEventType.UPDATE_DIAGNOSTICS_MSG,
        UPDATE_DIAGNOSTICS_TRANSITION)
+    // This transition may result when
+    // we notify container of failed localization if localizer thread (for
+    // that container) fails for some reason
+    .addTransition(ContainerState.DONE, ContainerState.DONE,
+        ContainerEventType.RESOURCE_FAILED)
 
     // create the topology tables
     .installTopology();

+ 10 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java

@@ -112,12 +112,17 @@ public class LocalizedResource implements EventHandler<ResourceEvent> {
       .append(getState() == ResourceState.LOCALIZED
           ? getLocalPath() + "," + getSize()
           : "pending").append(",[");
-    for (ContainerId c : ref) {
-      sb.append("(").append(c.toString()).append(")");
+    try {
+      this.readLock.lock();
+      for (ContainerId c : ref) {
+        sb.append("(").append(c.toString()).append(")");
+      }
+      sb.append("],").append(getTimestamp()).append(",").append(getState())
+        .append("}");
+      return sb.toString();
+    } finally {
+      this.readLock.unlock();
     }
-    sb.append("],").append(getTimestamp()).append(",")
-      .append(getState()).append("}");
-    return sb.toString();
   }
 
   private void release(ContainerId container) {

+ 31 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java

@@ -240,6 +240,32 @@ public class TestContainer {
       }
     }
   }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  // mocked generic
+  public void testLocalizationFailureAtDone() throws Exception {
+    WrappedContainer wc = null;
+    try {
+      wc = new WrappedContainer(6, 314159265358979L, 4344, "yak");
+      wc.initContainer();
+      wc.localizeResources();
+      wc.launchContainer();
+      reset(wc.localizerBus);
+      wc.containerSuccessful();
+      wc.containerResourcesCleanup();
+      assertEquals(ContainerState.DONE, wc.c.getContainerState());
+      // Now in DONE, issue RESOURCE_FAILED as done by LocalizeRunner
+      wc.resourceFailedContainer();
+      // Verify still in DONE
+      assertEquals(ContainerState.DONE, wc.c.getContainerState());
+      verifyCleanupCall(wc);
+    } finally {
+      if (wc != null) {
+        wc.finished();
+      }
+    }
+  }
   
   @Test
   @SuppressWarnings("unchecked") // mocked generic
@@ -624,6 +650,11 @@ public class TestContainer {
       drainDispatcherEvents();
     }
 
+    public void resourceFailedContainer() {
+      c.handle(new ContainerEvent(cId, ContainerEventType.RESOURCE_FAILED));
+      drainDispatcherEvents();
+    }
+
     // Localize resources 
     // Skip some resources so as to consider them failed
     public Map<Path, List<String>> doLocalizeResources(