Explorar el Código

YARN-93. Fixed RM to propagate diagnostics from applications that have finished but failed Contributed by Jason Lowe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1384169 13f79535-47bb-0310-9956-ffa450edef68
Vinod Kumar Vavilapalli hace 12 años
padre
commit
7c491887e5

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -28,6 +28,9 @@ Release 2.0.3-alpha - Unreleased
     YARN-78. Changed UnManagedAM application to use YarnClient. (Bikas Saha via
     vinodkv)
 
+    YARN-93. Fixed RM to propagate diagnostics from applications that have
+    finished but failed (Jason Lowe via vinodkv). 
+
   OPTIMIZATIONS
 
   BUG FIXES

+ 35 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppFinishedAttemptEvent.java

@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
+
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+
+public class RMAppFinishedAttemptEvent extends RMAppEvent {
+
+  private final String diagnostics;
+
+  public RMAppFinishedAttemptEvent(ApplicationId appId, String diagnostics) {
+    super(appId, RMAppEventType.ATTEMPT_FINISHED);
+    this.diagnostics = diagnostics;
+  }
+
+  public String getDiagnostics() {
+    return this.diagnostics;
+  }
+}

+ 13 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java

@@ -101,6 +101,8 @@ public class RMAppImpl implements RMApp {
   @SuppressWarnings("rawtypes")
   private EventHandler handler;
   private static final FinalTransition FINAL_TRANSITION = new FinalTransition();
+  private static final AppFinishedTransition FINISHED_TRANSITION =
+      new AppFinishedTransition();
 
   private static final StateMachineFactory<RMAppImpl,
                                            RMAppState,
@@ -150,7 +152,7 @@ public class RMAppImpl implements RMApp {
     .addTransition(RMAppState.RUNNING, RMAppState.FINISHING,
         RMAppEventType.ATTEMPT_FINISHING, new RMAppFinishingTransition())
     .addTransition(RMAppState.RUNNING, RMAppState.FINISHED,
-        RMAppEventType.ATTEMPT_FINISHED, FINAL_TRANSITION)
+        RMAppEventType.ATTEMPT_FINISHED, FINISHED_TRANSITION)
     .addTransition(RMAppState.RUNNING,
         EnumSet.of(RMAppState.SUBMITTED, RMAppState.FAILED),
         RMAppEventType.ATTEMPT_FAILED,
@@ -160,7 +162,7 @@ public class RMAppImpl implements RMApp {
 
      // Transitions from FINISHING state
     .addTransition(RMAppState.FINISHING, RMAppState.FINISHED,
-        RMAppEventType.ATTEMPT_FINISHED, FINAL_TRANSITION)
+        RMAppEventType.ATTEMPT_FINISHED, FINISHED_TRANSITION)
     .addTransition(RMAppState.FINISHING, RMAppState.FINISHED,
         RMAppEventType.KILL, new KillAppAndAttemptTransition())
     // ignorable transitions
@@ -572,6 +574,15 @@ public class RMAppImpl implements RMApp {
     }
   }
 
+  private static class AppFinishedTransition extends FinalTransition {
+    public void transition(RMAppImpl app, RMAppEvent event) {
+      RMAppFinishedAttemptEvent finishedEvent =
+          (RMAppFinishedAttemptEvent)event;
+      app.diagnostics.append(finishedEvent.getDiagnostics());
+      super.transition(app, event);
+    };
+  }
+
   private static class AppKilledTransition extends FinalTransition {
     @Override
     public void transition(RMAppImpl app, RMAppEvent event) {

+ 3 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java

@@ -56,6 +56,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFailedAttemptEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFinishedAttemptEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAcquiredEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent;
@@ -688,8 +689,8 @@ public class RMAppAttemptImpl implements RMAppAttempt {
       switch (finalAttemptState) {
         case FINISHED:
         {
-          appEvent =
-              new RMAppEvent(applicationId, RMAppEventType.ATTEMPT_FINISHED);
+          appEvent = new RMAppFinishedAttemptEvent(applicationId,
+              appAttempt.getDiagnostics());
         }
         break;
         case KILLED:

+ 18 - 11
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java

@@ -321,7 +321,8 @@ public class TestRMAppTransitions {
   }
 
   protected RMApp testCreateAppFinished(
-      ApplicationSubmissionContext submissionContext) throws IOException {
+      ApplicationSubmissionContext submissionContext,
+      String diagnostics) throws IOException {
     // unmanaged AMs don't use the FINISHING state
     RMApp application = null;
     if (submissionContext != null && submissionContext.getUnmanagedAM()) {
@@ -330,14 +331,15 @@ public class TestRMAppTransitions {
       application = testCreateAppFinishing(submissionContext);
     }
     // RUNNING/FINISHING => FINISHED event RMAppEventType.ATTEMPT_FINISHED
-    RMAppEvent finishedEvent = 
-        new RMAppEvent(application.getApplicationId(), 
-            RMAppEventType.ATTEMPT_FINISHED);
+    RMAppEvent finishedEvent = new RMAppFinishedAttemptEvent(
+        application.getApplicationId(), diagnostics);
     application.handle(finishedEvent);
     assertAppState(RMAppState.FINISHED, application);
     assertTimesAtFinish(application);
     // finished without a proper unregister implies failed
     assertFinalAppStatus(FinalApplicationStatus.FAILED, application);
+    Assert.assertTrue("Finished app missing diagnostics",
+        application.getDiagnostics().indexOf(diagnostics) != -1);
     return application;
   }
 
@@ -348,11 +350,14 @@ public class TestRMAppTransitions {
 
     // test success path
     LOG.info("--- START: testUnmanagedAppSuccessPath ---");
-    testCreateAppFinished(subContext);
+    final String diagMsg = "some diagnostics";
+    RMApp application = testCreateAppFinished(subContext, diagMsg);
+    Assert.assertTrue("Finished app missing diagnostics",
+        application.getDiagnostics().indexOf(diagMsg) != -1);
 
     // test app fails after 1 app attempt failure
     LOG.info("--- START: testUnmanagedAppFailPath ---");
-    RMApp application = testCreateAppRunning(subContext);
+    application = testCreateAppRunning(subContext);
     RMAppEvent event = new RMAppFailedAttemptEvent(
         application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED, "");
     application.handle(event);
@@ -366,7 +371,10 @@ public class TestRMAppTransitions {
   @Test
   public void testAppSuccessPath() throws IOException {
     LOG.info("--- START: testAppSuccessPath ---");
-    testCreateAppFinished(null);
+    final String diagMsg = "some diagnostics";
+    RMApp application = testCreateAppFinished(null, diagMsg);
+    Assert.assertTrue("Finished application missing diagnostics",
+        application.getDiagnostics().indexOf(diagMsg) != -1);
   }
 
   @Test
@@ -551,7 +559,7 @@ public class TestRMAppTransitions {
   public void testAppFinishedFinished() throws IOException {
     LOG.info("--- START: testAppFinishedFinished ---");
 
-    RMApp application = testCreateAppFinished(null);
+    RMApp application = testCreateAppFinished(null, "");
     // FINISHED => FINISHED event RMAppEventType.KILL
     RMAppEvent event = 
         new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
@@ -579,9 +587,8 @@ public class TestRMAppTransitions {
     assertAppState(RMAppState.KILLED, application);
 
     // KILLED => KILLED event RMAppEventType.ATTEMPT_FINISHED
-    event =
-        new RMAppEvent(application.getApplicationId(), 
-            RMAppEventType.ATTEMPT_FINISHED);
+    event = new RMAppFinishedAttemptEvent(
+        application.getApplicationId(), "");
     application.handle(event);
     rmDispatcher.await();
     assertTimesAtFinish(application);