Переглянути джерело

YARN-4596. SystemMetricPublisher should not swallow error messages from TimelineClient#putEntities. Contributed by Li Lu

Jian He 9 роки тому
батько
коміт
f385851141

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -1277,6 +1277,9 @@ Release 2.8.0 - UNRELEASED
     YARN-4502. Fix two AM containers get allocated when AM restart.
     YARN-4502. Fix two AM containers get allocated when AM restart.
     (Vinod Kumar Vavilapalli via wangda)
     (Vinod Kumar Vavilapalli via wangda)
 
 
+    YARN-4596. SystemMetricPublisher should not swallow error messages from
+    TimelineClient#putEntities. (Li Lu via jianhe)
+
 Release 2.7.3 - UNRELEASED
 Release 2.7.3 - UNRELEASED
 
 
   INCOMPATIBLE CHANGES
   INCOMPATIBLE CHANGES

+ 22 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java

@@ -1140,7 +1140,8 @@ public class ApplicationMaster {
       ugi.doAs(new PrivilegedExceptionAction<TimelinePutResponse>() {
       ugi.doAs(new PrivilegedExceptionAction<TimelinePutResponse>() {
         @Override
         @Override
         public TimelinePutResponse run() throws Exception {
         public TimelinePutResponse run() throws Exception {
-          return timelineClient.putEntities(entity);
+          return processTimelineResponseErrors(
+              timelineClient.putEntities(entity));
         }
         }
       });
       });
     } catch (Exception e) {
     } catch (Exception e) {
@@ -1165,7 +1166,8 @@ public class ApplicationMaster {
     event.addEventInfo("Exit Status", container.getExitStatus());
     event.addEventInfo("Exit Status", container.getExitStatus());
     entity.addEvent(event);
     entity.addEvent(event);
     try {
     try {
-      timelineClient.putEntities(entity);
+      TimelinePutResponse response = timelineClient.putEntities(entity);
+      processTimelineResponseErrors(response);
     } catch (YarnException | IOException e) {
     } catch (YarnException | IOException e) {
       LOG.error("Container end event could not be published for "
       LOG.error("Container end event could not be published for "
           + container.getContainerId().toString(), e);
           + container.getContainerId().toString(), e);
@@ -1185,7 +1187,8 @@ public class ApplicationMaster {
     event.setTimestamp(System.currentTimeMillis());
     event.setTimestamp(System.currentTimeMillis());
     entity.addEvent(event);
     entity.addEvent(event);
     try {
     try {
-      timelineClient.putEntities(entity);
+      TimelinePutResponse response = timelineClient.putEntities(entity);
+      processTimelineResponseErrors(response);
     } catch (YarnException | IOException e) {
     } catch (YarnException | IOException e) {
       LOG.error("App Attempt "
       LOG.error("App Attempt "
           + (appEvent.equals(DSEvent.DS_APP_ATTEMPT_START) ? "start" : "end")
           + (appEvent.equals(DSEvent.DS_APP_ATTEMPT_START) ? "start" : "end")
@@ -1194,6 +1197,22 @@ public class ApplicationMaster {
     }
     }
   }
   }
 
 
+  private static TimelinePutResponse processTimelineResponseErrors(
+      TimelinePutResponse response) {
+    List<TimelinePutResponse.TimelinePutError> errors = response.getErrors();
+    if (errors.size() == 0) {
+      LOG.debug("Timeline entities are successfully put");
+    } else {
+      for (TimelinePutResponse.TimelinePutError error : errors) {
+        LOG.error(
+            "Error when publishing entity [" + error.getEntityType() + ","
+                + error.getEntityId() + "], server side error code: "
+                + error.getErrorCode());
+      }
+    }
+    return response;
+  }
+
   RMCallbackHandler getRMCallbackHandler() {
   RMCallbackHandler getRMCallbackHandler() {
     return new RMCallbackHandler();
     return new RMCallbackHandler();
   }
   }

+ 13 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java

@@ -35,6 +35,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity;
 import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity;
 import org.apache.hadoop.yarn.api.records.timeline.TimelineEvent;
 import org.apache.hadoop.yarn.api.records.timeline.TimelineEvent;
+import org.apache.hadoop.yarn.api.records.timeline.TimelinePutResponse;
 import org.apache.hadoop.yarn.client.api.TimelineClient;
 import org.apache.hadoop.yarn.client.api.TimelineClient;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.AsyncDispatcher;
 import org.apache.hadoop.yarn.event.AsyncDispatcher;
@@ -500,7 +501,18 @@ public class SystemMetricsPublisher extends CompositeService {
         LOG.debug("Publishing the entity " + entity.getEntityId() +
         LOG.debug("Publishing the entity " + entity.getEntityId() +
             ", JSON-style content: " + TimelineUtils.dumpTimelineRecordtoJSON(entity));
             ", JSON-style content: " + TimelineUtils.dumpTimelineRecordtoJSON(entity));
       }
       }
-      client.putEntities(entity);
+      TimelinePutResponse response = client.putEntities(entity);
+      List<TimelinePutResponse.TimelinePutError> errors = response.getErrors();
+      if (errors.size() == 0) {
+        LOG.debug("Timeline entities are successfully put");
+      } else {
+        for (TimelinePutResponse.TimelinePutError error : errors) {
+          LOG.error(
+              "Error when publishing entity [" + error.getEntityType() + ","
+                  + error.getEntityId() + "], server side error code: "
+                  + error.getErrorCode());
+        }
+      }
     } catch (Exception e) {
     } catch (Exception e) {
       LOG.error("Error when publishing entity [" + entity.getEntityType() + ","
       LOG.error("Error when publishing entity [" + entity.getEntityType() + ","
           + entity.getEntityId() + "]", e);
           + entity.getEntityId() + "]", e);