فهرست منبع

YARN-7580. ContainersMonitorImpl logged message lacks detail when exceeding memory limits. Contributed by Wilfred Spiegelenburg.

Miklos Szegedi 7 سال پیش
والد
کامیت
b82049b4f0

+ 13 - 5
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java

@@ -650,26 +650,34 @@ public class ContainersMonitorImpl extends AbstractService implements
       if (isVmemCheckEnabled()
       if (isVmemCheckEnabled()
               && isProcessTreeOverLimit(containerId.toString(),
               && isProcessTreeOverLimit(containerId.toString(),
               currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) {
               currentVmemUsage, curMemUsageOfAgedProcesses, vmemLimit)) {
+        // The current usage (age=0) is always higher than the aged usage. We
+        // do not show the aged size in the message, base the delta on the
+        // current usage
+        long delta = currentVmemUsage - vmemLimit;
         // Container (the root process) is still alive and overflowing
         // Container (the root process) is still alive and overflowing
         // memory.
         // memory.
         // Dump the process-tree and then clean it up.
         // Dump the process-tree and then clean it up.
         msg = formatErrorMessage("virtual",
         msg = formatErrorMessage("virtual",
                 formatUsageString(currentVmemUsage, vmemLimit,
                 formatUsageString(currentVmemUsage, vmemLimit,
                   currentPmemUsage, pmemLimit),
                   currentPmemUsage, pmemLimit),
-                pId, containerId, pTree);
+                pId, containerId, pTree, delta);
         isMemoryOverLimit = true;
         isMemoryOverLimit = true;
         containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM;
         containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_VMEM;
       } else if (isPmemCheckEnabled()
       } else if (isPmemCheckEnabled()
               && isProcessTreeOverLimit(containerId.toString(),
               && isProcessTreeOverLimit(containerId.toString(),
               currentPmemUsage, curRssMemUsageOfAgedProcesses,
               currentPmemUsage, curRssMemUsageOfAgedProcesses,
               pmemLimit)) {
               pmemLimit)) {
+        // The current usage (age=0) is always higher than the aged usage. We
+        // do not show the aged size in the message, base the delta on the
+        // current usage
+        long delta = currentPmemUsage - pmemLimit;
         // Container (the root process) is still alive and overflowing
         // Container (the root process) is still alive and overflowing
         // memory.
         // memory.
         // Dump the process-tree and then clean it up.
         // Dump the process-tree and then clean it up.
         msg = formatErrorMessage("physical",
         msg = formatErrorMessage("physical",
                 formatUsageString(currentVmemUsage, vmemLimit,
                 formatUsageString(currentVmemUsage, vmemLimit,
                   currentPmemUsage, pmemLimit),
                   currentPmemUsage, pmemLimit),
-                pId, containerId, pTree);
+                pId, containerId, pTree, delta);
         isMemoryOverLimit = true;
         isMemoryOverLimit = true;
         containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
         containerExitStatus = ContainerExitStatus.KILLED_EXCEEDED_PMEM;
       }
       }
@@ -726,11 +734,11 @@ public class ContainersMonitorImpl extends AbstractService implements
      */
      */
     private String formatErrorMessage(String memTypeExceeded,
     private String formatErrorMessage(String memTypeExceeded,
         String usageString, String pId, ContainerId containerId,
         String usageString, String pId, ContainerId containerId,
-        ResourceCalculatorProcessTree pTree) {
+        ResourceCalculatorProcessTree pTree, long delta) {
       return
       return
         String.format("Container [pid=%s,containerID=%s] is " +
         String.format("Container [pid=%s,containerID=%s] is " +
-            "running beyond %s memory limits. ",
-            pId, containerId, memTypeExceeded) +
+            "running %dB beyond the '%S' memory limit. ",
+            pId, containerId, delta, memTypeExceeded) +
         "Current usage: " + usageString +
         "Current usage: " + usageString +
         ". Killing container.\n" +
         ". Killing container.\n" +
         "Dump of the process-tree for " + containerId + " :\n" +
         "Dump of the process-tree for " + containerId + " :\n" +

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java

@@ -332,8 +332,8 @@ public class TestContainersMonitor extends BaseContainerManagerTest {
     Assert.assertEquals(ContainerExitStatus.KILLED_EXCEEDED_VMEM,
     Assert.assertEquals(ContainerExitStatus.KILLED_EXCEEDED_VMEM,
         containerStatus.getExitStatus());
         containerStatus.getExitStatus());
     String expectedMsgPattern =
     String expectedMsgPattern =
-        "Container \\[pid=" + pid + ",containerID=" + cId
-            + "\\] is running beyond virtual memory limits. Current usage: "
+        "Container \\[pid=" + pid + ",containerID=" + cId + "\\] is running "
+            + "[0-9]+B beyond the 'VIRTUAL' memory limit. Current usage: "
             + "[0-9.]+ ?[KMGTPE]?B of [0-9.]+ ?[KMGTPE]?B physical memory used; "
             + "[0-9.]+ ?[KMGTPE]?B of [0-9.]+ ?[KMGTPE]?B physical memory used; "
             + "[0-9.]+ ?[KMGTPE]?B of [0-9.]+ ?[KMGTPE]?B virtual memory used. "
             + "[0-9.]+ ?[KMGTPE]?B of [0-9.]+ ?[KMGTPE]?B virtual memory used. "
             + "Killing container.\nDump of the process-tree for "
             + "Killing container.\nDump of the process-tree for "