瀏覽代碼

YARN-4747. AHS error 500 due to NPE when container start event is missing. Contributed by Varun Saxena

Jason Lowe 9 年之前
父節點
當前提交
b2ed6ae731

+ 12 - 9
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryManagerOnTimelineStore.java

@@ -587,19 +587,22 @@ public class ApplicationHistoryManagerOnTimelineStore extends AbstractService
         }
       }
     }
-    NodeId allocatedNode = NodeId.newInstance(allocatedHost, allocatedPort);
     ContainerId containerId =
         ConverterUtils.toContainerId(entity.getEntityId());
-    String logUrl = WebAppUtils.getAggregatedLogURL(
-        serverHttpAddress,
-        allocatedNode.toString(),
-        containerId.toString(),
-        containerId.toString(),
-        user);
+    String logUrl = null;
+    NodeId allocatedNode = null;
+    if (allocatedHost != null) {
+      allocatedNode = NodeId.newInstance(allocatedHost, allocatedPort);
+      logUrl = WebAppUtils.getAggregatedLogURL(
+          serverHttpAddress,
+          allocatedNode.toString(),
+          containerId.toString(),
+          containerId.toString(),
+          user);
+    }
     return ContainerReport.newInstance(
         ConverterUtils.toContainerId(entity.getEntityId()),
-        Resource.newInstance(allocatedMem, allocatedVcore),
-        NodeId.newInstance(allocatedHost, allocatedPort),
+        Resource.newInstance(allocatedMem, allocatedVcore), allocatedNode,
         Priority.newInstance(allocatedPriority),
         createdTime, finishedTime, diagnosticsInfo, logUrl, exitStatus, state,
         nodeHttpAddress);

+ 8 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/ContainerFinishedEvent.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.metrics;
 
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerState;
+import org.apache.hadoop.yarn.api.records.NodeId;
 
 public class ContainerFinishedEvent extends SystemMetricsEvent {
 
@@ -27,17 +28,20 @@ public class ContainerFinishedEvent extends SystemMetricsEvent {
   private String diagnosticsInfo;
   private int containerExitStatus;
   private ContainerState state;
+  private NodeId allocatedNode;
 
   public ContainerFinishedEvent(
       ContainerId containerId,
       String diagnosticsInfo,
       int containerExitStatus,
       ContainerState state,
-      long finishedTime) {
+      long finishedTime,
+      NodeId allocatedNode) {
     super(SystemMetricsEventType.CONTAINER_FINISHED, finishedTime);
     this.containerId = containerId;
     this.diagnosticsInfo = diagnosticsInfo;
     this.containerExitStatus = containerExitStatus;
+    this.allocatedNode = allocatedNode;
     this.state = state;
   }
 
@@ -62,4 +66,7 @@ public class ContainerFinishedEvent extends SystemMetricsEvent {
     return state;
   }
 
+  public NodeId getAllocatedNode() {
+    return allocatedNode;
+  }
 }

+ 7 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java

@@ -218,7 +218,7 @@ public class SystemMetricsPublisher extends CompositeService {
               container.getDiagnosticsInfo(),
               container.getContainerExitStatus(),
               container.getContainerState(),
-              finishedTime));
+              finishedTime, container.getAllocatedNode()));
     }
   }
 
@@ -479,6 +479,12 @@ public class SystemMetricsPublisher extends CompositeService {
         event.getContainerExitStatus());
     eventInfo.put(ContainerMetricsConstants.STATE_EVENT_INFO,
         event.getContainerState().toString());
+    Map<String, Object> entityInfo = new HashMap<String, Object>();
+    entityInfo.put(ContainerMetricsConstants.ALLOCATED_HOST_ENTITY_INFO,
+        event.getAllocatedNode().getHost());
+    entityInfo.put(ContainerMetricsConstants.ALLOCATED_PORT_ENTITY_INFO,
+        event.getAllocatedNode().getPort());
+    entity.setOtherInfo(entityInfo);
     tEvent.setEventInfo(eventInfo);
     entity.addEvent(tEvent);
     putEntity(entity);

+ 30 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java

@@ -344,6 +344,36 @@ public class TestSystemMetricsPublisher {
     Assert.assertTrue(hasRegisteredEvent && hasFinishedEvent);
   }
 
+  @Test(timeout = 10000)
+  public void testPublishHostPortInfoOnContainerFinished() throws Exception {
+    ContainerId containerId =
+        ContainerId.newContainerId(ApplicationAttemptId.newInstance(
+            ApplicationId.newInstance(0, 1), 1), 1);
+    RMContainer container = createRMContainer(containerId);
+    metricsPublisher.containerFinished(container, container.getFinishTime());
+    TimelineEntity entity = null;
+    do {
+      entity =
+          store.getEntity(containerId.toString(),
+              ContainerMetricsConstants.ENTITY_TYPE,
+              EnumSet.allOf(Field.class));
+    } while (entity == null || entity.getEvents().size() < 1);
+    Assert.assertNotNull(entity.getOtherInfo());
+    Assert.assertEquals(2, entity.getOtherInfo().size());
+    Assert.assertNotNull(entity.getOtherInfo().get(
+        ContainerMetricsConstants.ALLOCATED_HOST_ENTITY_INFO));
+    Assert.assertNotNull(entity.getOtherInfo().get(
+        ContainerMetricsConstants.ALLOCATED_PORT_ENTITY_INFO));
+    Assert.assertEquals(
+        container.getAllocatedNode().getHost(),
+        entity.getOtherInfo().get(
+            ContainerMetricsConstants.ALLOCATED_HOST_ENTITY_INFO));
+    Assert.assertEquals(
+        container.getAllocatedNode().getPort(),
+        entity.getOtherInfo().get(
+            ContainerMetricsConstants.ALLOCATED_PORT_ENTITY_INFO));
+  }
+
   @Test(timeout = 10000)
   public void testPublishContainerMetrics() throws Exception {
     ContainerId containerId =