Browse Source

HADOOP-12356. Fix computing CPU usage statistics on Windows. (Inigo Goiri via wangda)

(cherry picked from commit 89d1fd5dac4bccf42d82686e146b02eb60d14736)
Wangda Tan 9 years ago
parent
commit
23deeb4230

+ 3 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -992,6 +992,9 @@ Release 2.8.0 - UNRELEASED
     HADOOP-12700. Remove unused import in TestCompressorDecompressor.java.
     (John Zhuge via aajisaka)
 
+    HADOOP-12356. Fix computing CPU usage statistics on Windows.
+    (Inigo Goiri via wangda)
+
 Release 2.7.3 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 9 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfo.java

@@ -104,9 +104,16 @@ public abstract class SysInfo {
   /**
    * Obtain the CPU usage % of the machine. Return -1 if it is unavailable
    *
-   * @return CPU usage as a percentage of available cycles.
+   * @return CPU usage as a percentage (from 0 to 100) of available cycles.
    */
-  public abstract float getCpuUsage();
+  public abstract float getCpuUsagePercentage();
+
+  /**
+   * Obtain the number of VCores used. Return -1 if it is unavailable
+   *
+   * @return Number of VCores used a percentage (from 0 to #VCores).
+   */
+  public abstract float getNumVCoresUsed();
 
   /**
    * Obtain the aggregated number of bytes read over the network.

+ 13 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoLinux.java

@@ -608,7 +608,7 @@ public class SysInfoLinux extends SysInfo {
 
   /** {@inheritDoc} */
   @Override
-  public float getCpuUsage() {
+  public float getCpuUsagePercentage() {
     readProcStatFile();
     float overallCpuUsage = cpuTimeTracker.getCpuTrackerUsagePercent();
     if (overallCpuUsage != CpuTimeTracker.UNAVAILABLE) {
@@ -617,6 +617,17 @@ public class SysInfoLinux extends SysInfo {
     return overallCpuUsage;
   }
 
+  /** {@inheritDoc} */
+  @Override
+  public float getNumVCoresUsed() {
+    readProcStatFile();
+    float overallVCoresUsage = cpuTimeTracker.getCpuTrackerUsagePercent();
+    if (overallVCoresUsage != CpuTimeTracker.UNAVAILABLE) {
+      overallVCoresUsage = overallVCoresUsage / 100F;
+    }
+    return overallVCoresUsage;
+  }
+
   /** {@inheritDoc} */
   @Override
   public long getNetworkBytesRead() {
@@ -676,7 +687,7 @@ public class SysInfoLinux extends SysInfo {
     } catch (InterruptedException e) {
       // do nothing
     }
-    System.out.println("CPU usage % : " + plugin.getCpuUsage());
+    System.out.println("CPU usage % : " + plugin.getCpuUsagePercentage());
   }
 
   @VisibleForTesting

+ 23 - 3
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/SysInfoWindows.java

@@ -104,8 +104,13 @@ public class SysInfoWindows extends SysInfo {
             cpuFrequencyKhz = Long.parseLong(sysInfo[5]);
             cumulativeCpuTimeMs = Long.parseLong(sysInfo[6]);
             if (lastCumCpuTimeMs != -1) {
+              /**
+               * This number will be the aggregated usage across all cores in
+               * [0.0, 100.0]. For example, it will be 400.0 if there are 8
+               * cores and each of them is running at 50% utilization.
+               */
               cpuUsage = (cumulativeCpuTimeMs - lastCumCpuTimeMs)
-                  / (refreshInterval * 1.0f);
+                  * 100F / refreshInterval;
             }
           } catch (NumberFormatException nfe) {
             LOG.warn("Error parsing sysInfo", nfe);
@@ -175,9 +180,24 @@ public class SysInfoWindows extends SysInfo {
 
   /** {@inheritDoc} */
   @Override
-  public float getCpuUsage() {
+  public float getCpuUsagePercentage() {
     refreshIfNeeded();
-    return cpuUsage;
+    float ret = cpuUsage;
+    if (ret != -1) {
+      ret = ret / numProcessors;
+    }
+    return ret;
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public float getNumVCoresUsed() {
+    refreshIfNeeded();
+    float ret = cpuUsage;
+    if (ret != -1) {
+      ret = ret / 100F;
+    }
+    return ret;
   }
 
   /** {@inheritDoc} */

+ 13 - 5
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestSysInfoLinux.java

@@ -229,7 +229,10 @@ public class TestSysInfoLinux {
     updateStatFile(uTime, nTime, sTime);
     assertEquals(plugin.getCumulativeCpuTime(),
                  FAKE_JIFFY_LENGTH * (uTime + nTime + sTime));
-    assertEquals(plugin.getCpuUsage(), (float)(CpuTimeTracker.UNAVAILABLE),0.0);
+    assertEquals(plugin.getCpuUsagePercentage(),
+        (float)(CpuTimeTracker.UNAVAILABLE),0.0);
+    assertEquals(plugin.getNumVCoresUsed(),
+        (float)(CpuTimeTracker.UNAVAILABLE),0.0);
 
     // Advance the time and sample again to test the CPU usage calculation
     uTime += 100L;
@@ -237,13 +240,15 @@ public class TestSysInfoLinux {
     updateStatFile(uTime, nTime, sTime);
     assertEquals(plugin.getCumulativeCpuTime(),
                  FAKE_JIFFY_LENGTH * (uTime + nTime + sTime));
-    assertEquals(plugin.getCpuUsage(), 6.25F, 0.0);
+    assertEquals(plugin.getCpuUsagePercentage(), 6.25F, 0.0);
+    assertEquals(plugin.getNumVCoresUsed(), 0.5F, 0.0);
 
-    // Advance the time and sample again. This time, we call getCpuUsage() only.
+    // Advance the time and sample again. This time, we call getCpuUsagePercentage() only.
     uTime += 600L;
     plugin.advanceTime(300L);
     updateStatFile(uTime, nTime, sTime);
-    assertEquals(plugin.getCpuUsage(), 25F, 0.0);
+    assertEquals(plugin.getCpuUsagePercentage(), 25F, 0.0);
+    assertEquals(plugin.getNumVCoresUsed(), 2F, 0.0);
 
     // Advance very short period of time (one jiffy length).
     // In this case, CPU usage should not be updated.
@@ -252,7 +257,10 @@ public class TestSysInfoLinux {
     updateStatFile(uTime, nTime, sTime);
     assertEquals(plugin.getCumulativeCpuTime(),
                  FAKE_JIFFY_LENGTH * (uTime + nTime + sTime));
-    assertEquals(plugin.getCpuUsage(), 25F, 0.0); // CPU usage is not updated.
+    assertEquals(
+        plugin.getCpuUsagePercentage(), 25F, 0.0); // CPU usage is not updated.
+    assertEquals(
+        plugin.getNumVCoresUsed(), 2F, 0.0); // CPU usage is not updated.
   }
 
   /**

+ 46 - 5
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestSysInfoWindows.java

@@ -58,7 +58,10 @@ public class TestSysInfoWindows {
     assertEquals(2805000L, tester.getCpuFrequency());
     assertEquals(6261812L, tester.getCumulativeCpuTime());
     // undef on first call
-    assertEquals(-1.0, tester.getCpuUsage(), 0.0);
+    assertEquals((float)CpuTimeTracker.UNAVAILABLE,
+        tester.getCpuUsagePercentage(), 0.0);
+    assertEquals((float)CpuTimeTracker.UNAVAILABLE,
+        tester.getNumVCoresUsed(), 0.0);
   }
 
   @Test(timeout = 10000)
@@ -70,22 +73,60 @@ public class TestSysInfoWindows {
     tester.getAvailablePhysicalMemorySize();
     // verify information has been refreshed
     assertEquals(6400417792L, tester.getAvailablePhysicalMemorySize());
-    assertEquals(-1.0, tester.getCpuUsage(), 0.0);
+    assertEquals((float)CpuTimeTracker.UNAVAILABLE,
+        tester.getCpuUsagePercentage(), 0.0);
+    assertEquals((float)CpuTimeTracker.UNAVAILABLE,
+        tester.getNumVCoresUsed(), 0.0);
 
     tester.setSysinfoString(
         "17177038848,8589467648,15232745472,5400417792,1,2805000,6263012\r\n");
     tester.getAvailablePhysicalMemorySize();
     // verify information has not been refreshed
     assertEquals(6400417792L, tester.getAvailablePhysicalMemorySize());
-    assertEquals(-1.0, tester.getCpuUsage(), 0.0);
+    assertEquals((float)CpuTimeTracker.UNAVAILABLE,
+        tester.getCpuUsagePercentage(), 0.0);
+    assertEquals((float)CpuTimeTracker.UNAVAILABLE,
+        tester.getNumVCoresUsed(), 0.0);
 
     // advance clock
     tester.advance(SysInfoWindows.REFRESH_INTERVAL_MS + 1);
 
     // verify information has been refreshed
     assertEquals(5400417792L, tester.getAvailablePhysicalMemorySize());
-    assertEquals((6263012 - 6261812) / (SysInfoWindows.REFRESH_INTERVAL_MS + 1f),
-        tester.getCpuUsage(), 0.0);
+    assertEquals((6263012 - 6261812) * 100F /
+                 (SysInfoWindows.REFRESH_INTERVAL_MS + 1f) / 1,
+                 tester.getCpuUsagePercentage(), 0.0);
+    assertEquals((6263012 - 6261812) /
+                 (SysInfoWindows.REFRESH_INTERVAL_MS + 1f) / 1,
+                 tester.getNumVCoresUsed(), 0.0);
+  }
+
+  @Test(timeout = 10000)
+  public void refreshAndCpuUsageMulticore() throws InterruptedException {
+    // test with 12 cores
+    SysInfoWindowsMock tester = new SysInfoWindowsMock();
+    tester.setSysinfoString(
+        "17177038848,8589467648,15232745472,6400417792,12,2805000,6261812\r\n");
+    // verify information has been refreshed
+    assertEquals(6400417792L, tester.getAvailablePhysicalMemorySize());
+
+    tester.setSysinfoString(
+        "17177038848,8589467648,15232745472,5400417792,12,2805000,6263012\r\n");
+    // verify information has not been refreshed
+    assertEquals(6400417792L, tester.getAvailablePhysicalMemorySize());
+
+    // advance clock
+    tester.advance(SysInfoWindows.REFRESH_INTERVAL_MS + 1);
+
+    // verify information has been refreshed
+    assertEquals(5400417792L, tester.getAvailablePhysicalMemorySize());
+    // verify information has been refreshed
+    assertEquals((6263012 - 6261812) * 100F /
+                 (SysInfoWindows.REFRESH_INTERVAL_MS + 1f) / 12,
+                 tester.getCpuUsagePercentage(), 0.0);
+    assertEquals((6263012 - 6261812) /
+                 (SysInfoWindows.REFRESH_INTERVAL_MS + 1f),
+                 tester.getNumVCoresUsed(), 0.0);
   }
 
   @Test(timeout = 10000)

+ 1 - 1
hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/DummyResourceCalculatorPlugin.java

@@ -120,7 +120,7 @@ public class DummyResourceCalculatorPlugin extends ResourceCalculatorPlugin {
 
   /** {@inheritDoc} */
   @Override
-  public float getCpuUsage() {
+  public float getCpuUsagePercentage() {
     return getConf().getFloat(CPU_USAGE, -1);
   }
 

+ 12 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorPlugin.java

@@ -120,12 +120,21 @@ public class ResourceCalculatorPlugin extends Configured {
   }
 
   /**
-   * Obtain the CPU usage % of the machine. Return -1 if it is unavailable
+   * Obtain the CPU usage % of the machine. Return -1 if it is unavailable.
    *
    * @return CPU usage in %
    */
-  public float getCpuUsage() {
-    return sys.getCpuUsage();
+  public float getCpuUsagePercentage() {
+    return sys.getCpuUsagePercentage();
+  }
+
+  /**
+   * Obtain the number of VCores used. Return -1 if it is unavailable.
+   *
+   * @return Number of VCores used a percentage (from 0 to #VCores)
+   */
+  public float getNumVCoresUsed() {
+    return sys.getNumVCoresUsed();
   }
 
    /**

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeResourceMonitorImpl.java

@@ -141,12 +141,12 @@ public class NodeResourceMonitorImpl extends AbstractService implements
         long vmem =
             resourceCalculatorPlugin.getVirtualMemorySize()
                 - resourceCalculatorPlugin.getAvailableVirtualMemorySize();
-        float cpu = resourceCalculatorPlugin.getCpuUsage();
+        float vcores = resourceCalculatorPlugin.getNumVCoresUsed();
         nodeUtilization =
             ResourceUtilization.newInstance(
                 (int) (pmem >> 20), // B -> MB
                 (int) (vmem >> 20), // B -> MB
-                cpu); // 1 CPU at 100% is 1
+                vcores); // Used Virtual Cores
 
         try {
           Thread.sleep(monitoringInterval);

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java

@@ -63,7 +63,7 @@ public class MockResourceCalculatorPlugin extends ResourceCalculatorPlugin {
   }
 
   @Override
-  public float getCpuUsage() {
+  public float getCpuUsagePercentage() {
     return 0;
   }
 }

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/util/TestNodeManagerHardwareUtils.java

@@ -73,7 +73,7 @@ public class TestNodeManagerHardwareUtils {
     }
 
     @Override
-    public float getCpuUsage() {
+    public float getCpuUsagePercentage() {
       return 0;
     }