Bladeren bron

YARN-8567. Fetching yarn logs fails for long running application if it is not present in timeline store. Contributed by Tarun Parimi.

Rohith Sharma K S 6 jaren geleden
bovenliggende
commit
573b158791

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java

@@ -879,7 +879,7 @@ public class YarnClientImpl extends YarnClient {
     try {
       containersListFromAHS =
           getContainerReportFromHistory(applicationAttemptId);
-    } catch (IOException e) {
+    } catch (IOException | YarnException e) {
       if (appNotFoundInRM) {
         throw e;
       }

+ 50 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestYarnClient.java

@@ -530,6 +530,44 @@ public class TestYarnClient extends ParameterizedSchedulerTestBase {
     client.stop();
   }
 
+  @Test(timeout = 10000)
+  public void testGetContainersOnAHSFail() throws YarnException, IOException {
+    Configuration conf = getConf();
+    conf.setBoolean(YarnConfiguration.APPLICATION_HISTORY_ENABLED,
+        true);
+
+    final YarnClient client = new MockYarnClient() {
+      @Override
+      public List<ContainerReport> getContainers(
+          ApplicationAttemptId appAttemptId) throws YarnException,
+          IOException  {
+        return getContainersOnAHSFail(appAttemptId);
+      }
+    };
+
+    client.init(conf);
+    client.start();
+
+    ApplicationId applicationId = ApplicationId.newInstance(1234, 5);
+    ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(
+        applicationId, 1);
+    List<ContainerReport> reports = client.getContainers(appAttemptId);
+    Assert.assertNotNull(reports);
+    Assert.assertTrue(reports.size() == 2);
+    Assert.assertEquals(reports.get(0).getContainerId(),
+        (ContainerId.newContainerId(appAttemptId, 1)));
+    Assert.assertEquals(reports.get(1).getContainerId(),
+        (ContainerId.newContainerId(appAttemptId, 2)));
+
+    //Only 2 running containers from RM are present when AHS throws exception
+    Assert.assertEquals(ContainerState.RUNNING,
+        (reports.get(0).getContainerState()));
+    Assert.assertEquals(ContainerState.RUNNING,
+        (reports.get(1).getContainerState()));
+    client.stop();
+  }
+
+
   @Test(timeout = 10000)
   public void testGetContainerReport() throws YarnException, IOException {
     Configuration conf = getConf();
@@ -914,6 +952,18 @@ public class TestYarnClient extends ParameterizedSchedulerTestBase {
       return super.getContainers(appAttemptId);
     }
 
+    protected List<ContainerReport>
+        getContainersOnAHSFail(ApplicationAttemptId appAttemptId)
+          throws YarnException, IOException {
+      when(mockContainersResponse.getContainerList()).thenReturn(
+          getContainersReport(appAttemptId));
+      when(historyClient.getContainers(any(ApplicationAttemptId.class)))
+          .thenThrow(new ApplicationNotFoundException(
+              appAttemptId.getApplicationId() +
+                  " does not exist in the timeline store"));
+      return super.getContainers(appAttemptId);
+    }
+
     private List<ContainerReport> getContainersFromAHS(
         ApplicationAttemptId appAttemptId) {
       return containersFromAHS.get(appAttemptId);