Quellcode durchsuchen

YARN-3943. Use separate threshold configurations for disk-full detection and disk-not-full detection. Contributed by Zhihai Xu

Jason Lowe vor 9 Jahren
Ursprung
Commit
8d226225d0

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -492,6 +492,9 @@ Release 2.8.0 - UNRELEASED
     YARN-4215. RMNodeLabels Manager Need to verify and replace node labels for the 
     only modified Node Label Mappings in the request. (Naganarasimha G R via wangda)
 
+    YARN-3943. Use separate threshold configurations for disk-full detection
+    and disk-not-full detection. (Zhihai Xu via jlowe)
+
   OPTIMIZATIONS
 
     YARN-3339. TestDockerContainerExecutor should pull a single image and not

+ 12 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java

@@ -1059,6 +1059,18 @@ public class YarnConfiguration extends Configuration {
   public static final float DEFAULT_NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE =
       90.0F;
 
+  /**
+   * The low threshold percentage of disk space used when an offline disk is
+   * marked as online. Values can range from 0.0 to 100.0. The value shouldn't
+   * be more than NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE. If its value is
+   * more than NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE or not set, it will be
+   * set to the same value as NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE.
+   * This applies to nm-local-dirs and nm-log-dirs.
+   */
+  public static final String NM_WM_LOW_PER_DISK_UTILIZATION_PERCENTAGE =
+      NM_DISK_HEALTH_CHECK_PREFIX +
+      "disk-utilization-watermark-low-per-disk-percentage";
+
   /**
    * The minimum space that must be available on a local dir for it to be used.
    * This applies to nm-local-dirs and nm-log-dirs.

+ 11 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml

@@ -1317,6 +1317,17 @@
     <value>90.0</value>
   </property>
 
+  <property>
+    <description>The low threshold percentage of disk space used when a bad disk is
+    marked as good. Values can range from 0.0 to 100.0. This applies to
+    yarn-nodemanager.local-dirs and yarn.nodemanager.log-dirs.
+    Note that if its value is more than yarn.nodemanager.disk-health-checker.
+    max-disk-utilization-per-disk-percentage or not set, it will be set to the same value as
+    yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage.</description>
+    <name>yarn.nodemanager.disk-health-checker.disk-utilization-watermark-low-per-disk-percentage</name>
+    <value></value>
+  </property>
+
   <property>
     <description>The minimum space that must be available on a disk for
     it to be used. This applies to yarn-nodemanager.local-dirs and 

+ 44 - 24
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java

@@ -39,6 +39,8 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.util.DiskChecker;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * Manages a list of local storage directories.
  */
@@ -88,8 +90,9 @@ public class DirectoryCollection {
   private List<String> fullDirs;
 
   private int numFailures;
-  
-  private float diskUtilizationPercentageCutoff;
+
+  private float diskUtilizationPercentageCutoffHigh;
+  private float diskUtilizationPercentageCutoffLow;
   private long diskUtilizationSpaceCutoff;
 
   private int goodDirsDiskUtilizationPercentage;
@@ -103,7 +106,7 @@ public class DirectoryCollection {
    *          directories to be monitored
    */
   public DirectoryCollection(String[] dirs) {
-    this(dirs, 100.0F, 0);
+    this(dirs, 100.0F, 100.0F, 0);
   }
 
   /**
@@ -119,7 +122,7 @@ public class DirectoryCollection {
    * 
    */
   public DirectoryCollection(String[] dirs, float utilizationPercentageCutOff) {
-    this(dirs, utilizationPercentageCutOff, 0);
+    this(dirs, utilizationPercentageCutOff, utilizationPercentageCutOff, 0);
   }
 
   /**
@@ -134,7 +137,7 @@ public class DirectoryCollection {
    * 
    */
   public DirectoryCollection(String[] dirs, long utilizationSpaceCutOff) {
-    this(dirs, 100.0F, utilizationSpaceCutOff);
+    this(dirs, 100.0F, 100.0F, utilizationSpaceCutOff);
   }
 
   /**
@@ -145,25 +148,29 @@ public class DirectoryCollection {
    * 
    * @param dirs
    *          directories to be monitored
-   * @param utilizationPercentageCutOff
+   * @param utilizationPercentageCutOffHigh
    *          percentage of disk that can be used before the dir is taken out of
    *          the good dirs list
+   * @param utilizationPercentageCutOffLow
+   *          percentage of disk that can be used when the dir is moved from
+   *          the bad dirs list to the good dirs list
    * @param utilizationSpaceCutOff
    *          minimum space, in MB, that must be available on the disk for the
    *          dir to be marked as good
    * 
    */
-  public DirectoryCollection(String[] dirs, 
-      float utilizationPercentageCutOff,
+  public DirectoryCollection(String[] dirs,
+      float utilizationPercentageCutOffHigh,
+      float utilizationPercentageCutOffLow,
       long utilizationSpaceCutOff) {
     localDirs = new CopyOnWriteArrayList<String>(dirs);
     errorDirs = new CopyOnWriteArrayList<String>();
     fullDirs = new CopyOnWriteArrayList<String>();
 
-    diskUtilizationPercentageCutoff =
-        utilizationPercentageCutOff < 0.0F ? 0.0F
-            : (utilizationPercentageCutOff > 100.0F ? 100.0F
-                : utilizationPercentageCutOff);
+    diskUtilizationPercentageCutoffHigh = Math.max(0.0F, Math.min(100.0F,
+        utilizationPercentageCutOffHigh));
+    diskUtilizationPercentageCutoffLow = Math.max(0.0F, Math.min(
+        diskUtilizationPercentageCutoffHigh, utilizationPercentageCutOffLow));
     diskUtilizationSpaceCutoff =
         utilizationSpaceCutOff < 0 ? 0 : utilizationSpaceCutOff;
 
@@ -254,7 +261,8 @@ public class DirectoryCollection {
     List<String> allLocalDirs =
         DirectoryCollection.concat(localDirs, failedDirs);
 
-    Map<String, DiskErrorInformation> dirsFailedCheck = testDirs(allLocalDirs);
+    Map<String, DiskErrorInformation> dirsFailedCheck = testDirs(allLocalDirs,
+        preCheckGoodDirs);
 
     localDirs.clear();
     errorDirs.clear();
@@ -314,7 +322,8 @@ public class DirectoryCollection {
     return setChanged;
   }
 
-  Map<String, DiskErrorInformation> testDirs(List<String> dirs) {
+  Map<String, DiskErrorInformation> testDirs(List<String> dirs,
+      Set<String> goodDirs) {
     HashMap<String, DiskErrorInformation> ret =
         new HashMap<String, DiskErrorInformation>();
     for (final String dir : dirs) {
@@ -322,7 +331,10 @@ public class DirectoryCollection {
       try {
         File testDir = new File(dir);
         DiskChecker.checkDir(testDir);
-        if (isDiskUsageOverPercentageLimit(testDir)) {
+        float diskUtilizationPercentageCutoff = goodDirs.contains(dir) ?
+            diskUtilizationPercentageCutoffHigh : diskUtilizationPercentageCutoffLow;
+        if (isDiskUsageOverPercentageLimit(testDir,
+            diskUtilizationPercentageCutoff)) {
           msg =
               "used space above threshold of "
                   + diskUtilizationPercentageCutoff
@@ -374,7 +386,8 @@ public class DirectoryCollection {
     }
   }
 
-  private boolean isDiskUsageOverPercentageLimit(File dir) {
+  private boolean isDiskUsageOverPercentageLimit(File dir,
+      float diskUtilizationPercentageCutoff) {
     float freePercentage =
         100 * (dir.getUsableSpace() / (float) dir.getTotalSpace());
     float usedPercentage = 100.0F - freePercentage;
@@ -402,17 +415,24 @@ public class DirectoryCollection {
       }
     }
   }
-  
-  public float getDiskUtilizationPercentageCutoff() {
-    return diskUtilizationPercentageCutoff;
+
+  @VisibleForTesting
+  float getDiskUtilizationPercentageCutoffHigh() {
+    return diskUtilizationPercentageCutoffHigh;
+  }
+
+  @VisibleForTesting
+  float getDiskUtilizationPercentageCutoffLow() {
+    return diskUtilizationPercentageCutoffLow;
   }
 
   public void setDiskUtilizationPercentageCutoff(
-      float diskUtilizationPercentageCutoff) {
-    this.diskUtilizationPercentageCutoff =
-        diskUtilizationPercentageCutoff < 0.0F ? 0.0F
-            : (diskUtilizationPercentageCutoff > 100.0F ? 100.0F
-                : diskUtilizationPercentageCutoff);
+      float utilizationPercentageCutOffHigh,
+      float utilizationPercentageCutOffLow) {
+    diskUtilizationPercentageCutoffHigh = Math.max(0.0F, Math.min(100.0F,
+        utilizationPercentageCutOffHigh));
+    diskUtilizationPercentageCutoffLow = Math.max(0.0F, Math.min(
+        diskUtilizationPercentageCutoffHigh, utilizationPercentageCutOffLow));
   }
 
   public long getDiskUtilizationSpaceCutoff() {

+ 24 - 6
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java

@@ -114,22 +114,40 @@ public class LocalDirsHandlerService extends AbstractService {
   private final class MonitoringTimerTask extends TimerTask {
 
     public MonitoringTimerTask(Configuration conf) throws YarnRuntimeException {
-      float maxUsableSpacePercentagePerDisk =
+      float highUsableSpacePercentagePerDisk =
           conf.getFloat(
             YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE,
             YarnConfiguration.DEFAULT_NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE);
+      float lowUsableSpacePercentagePerDisk =
+          conf.getFloat(
+              YarnConfiguration.NM_WM_LOW_PER_DISK_UTILIZATION_PERCENTAGE,
+              highUsableSpacePercentagePerDisk);
+      if (lowUsableSpacePercentagePerDisk > highUsableSpacePercentagePerDisk) {
+        LOG.warn("Using " + YarnConfiguration.
+            NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE + " as " +
+            YarnConfiguration.NM_WM_LOW_PER_DISK_UTILIZATION_PERCENTAGE +
+            ", because " + YarnConfiguration.
+            NM_WM_LOW_PER_DISK_UTILIZATION_PERCENTAGE +
+            " is not configured properly.");
+        lowUsableSpacePercentagePerDisk = highUsableSpacePercentagePerDisk;
+      }
       long minFreeSpacePerDiskMB =
           conf.getLong(YarnConfiguration.NM_MIN_PER_DISK_FREE_SPACE_MB,
             YarnConfiguration.DEFAULT_NM_MIN_PER_DISK_FREE_SPACE_MB);
       localDirs =
           new DirectoryCollection(
-            validatePaths(conf
-              .getTrimmedStrings(YarnConfiguration.NM_LOCAL_DIRS)),
-            maxUsableSpacePercentagePerDisk, minFreeSpacePerDiskMB);
+              validatePaths(conf
+                  .getTrimmedStrings(YarnConfiguration.NM_LOCAL_DIRS)),
+              highUsableSpacePercentagePerDisk,
+              lowUsableSpacePercentagePerDisk,
+              minFreeSpacePerDiskMB);
       logDirs =
           new DirectoryCollection(
-            validatePaths(conf.getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS)),
-            maxUsableSpacePercentagePerDisk, minFreeSpacePerDiskMB);
+              validatePaths(conf
+                  .getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS)),
+              highUsableSpacePercentagePerDisk,
+              lowUsableSpacePercentagePerDisk,
+              minFreeSpacePerDiskMB);
 
       String local = conf.get(YarnConfiguration.NM_LOCAL_DIRS);
       conf.set(NM_GOOD_LOCAL_DIRS,

+ 48 - 20
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java

@@ -152,7 +152,7 @@ public class TestDirectoryCollection {
     // no good dirs
     Assert.assertEquals(0, dc.getGoodDirsDiskUtilizationPercentage());
 
-    dc = new DirectoryCollection(dirs, 100.0F, 0);
+    dc = new DirectoryCollection(dirs, 100.0F, 100.0F, 0);
     utilizedSpacePerc =
         (int)((testDir.getTotalSpace() - testDir.getUsableSpace()) * 100 /
             testDir.getTotalSpace());
@@ -168,18 +168,28 @@ public class TestDirectoryCollection {
   public void testDiskLimitsCutoffSetters() throws IOException {
 
     String[] dirs = { "dir" };
-    DirectoryCollection dc = new DirectoryCollection(dirs, 0.0F, 100);
+    DirectoryCollection dc = new DirectoryCollection(dirs, 0.0F, 0.0F, 100);
     float testValue = 57.5F;
     float delta = 0.1F;
-    dc.setDiskUtilizationPercentageCutoff(testValue);
-    Assert.assertEquals(testValue, dc.getDiskUtilizationPercentageCutoff(),
-      delta);
+    dc.setDiskUtilizationPercentageCutoff(testValue, 50.0F);
+    Assert.assertEquals(testValue, dc.getDiskUtilizationPercentageCutoffHigh(),
+        delta);
+    Assert.assertEquals(50.0F, dc.getDiskUtilizationPercentageCutoffLow(),
+        delta);
+
     testValue = -57.5F;
-    dc.setDiskUtilizationPercentageCutoff(testValue);
-    Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
+    dc.setDiskUtilizationPercentageCutoff(testValue, testValue);
+    Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
+        delta);
+    Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffLow(),
+        delta);
+
     testValue = 157.5F;
-    dc.setDiskUtilizationPercentageCutoff(testValue);
-    Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
+    dc.setDiskUtilizationPercentageCutoff(testValue, testValue);
+    Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
+        delta);
+    Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
+        delta);
 
     long spaceValue = 57;
     dc.setDiskUtilizationSpaceCutoff(spaceValue);
@@ -200,7 +210,7 @@ public class TestDirectoryCollection {
     Assert.assertEquals(1, dc.getFailedDirs().size());
     Assert.assertEquals(1, dc.getFullDirs().size());
 
-    dc.setDiskUtilizationPercentageCutoff(100.0F);
+    dc.setDiskUtilizationPercentageCutoff(100.0F, 100.0F);
     dc.checkDirs();
     Assert.assertEquals(1, dc.getGoodDirs().size());
     Assert.assertEquals(0, dc.getFailedDirs().size());
@@ -236,27 +246,45 @@ public class TestDirectoryCollection {
     String[] dirs = { "dir" };
     float delta = 0.1F;
     DirectoryCollection dc = new DirectoryCollection(dirs);
-    Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
+    Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
+        delta);
+    Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
+        delta);
     Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
 
     dc = new DirectoryCollection(dirs, 57.5F);
-    Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoff(), delta);
+    Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoffHigh(),
+        delta);
+    Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoffLow(),
+        delta);
     Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
 
     dc = new DirectoryCollection(dirs, 57);
-    Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
+    Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
+        delta);
+    Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
+        delta);
     Assert.assertEquals(57, dc.getDiskUtilizationSpaceCutoff());
 
-    dc = new DirectoryCollection(dirs, 57.5F, 67);
-    Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoff(), delta);
+    dc = new DirectoryCollection(dirs, 57.5F, 50.5F, 67);
+    Assert.assertEquals(57.5F, dc.getDiskUtilizationPercentageCutoffHigh(),
+        delta);
+    Assert.assertEquals(50.5F, dc.getDiskUtilizationPercentageCutoffLow(),
+        delta);
     Assert.assertEquals(67, dc.getDiskUtilizationSpaceCutoff());
 
-    dc = new DirectoryCollection(dirs, -57.5F, -67);
-    Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
+    dc = new DirectoryCollection(dirs, -57.5F, -57.5F, -67);
+    Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
+        delta);
+    Assert.assertEquals(0.0F, dc.getDiskUtilizationPercentageCutoffLow(),
+        delta);
     Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
 
-    dc = new DirectoryCollection(dirs, 157.5F, -67);
-    Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoff(), delta);
+    dc = new DirectoryCollection(dirs, 157.5F, 157.5F, -67);
+    Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffHigh(),
+        delta);
+    Assert.assertEquals(100.0F, dc.getDiskUtilizationPercentageCutoffLow(),
+        delta);
     Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
   }
 
@@ -288,7 +316,7 @@ public class TestDirectoryCollection {
     Assert.assertEquals(listener3.num, 1);
 
     dc.deregisterDirsChangeListener(listener2);
-    dc.setDiskUtilizationPercentageCutoff(100.0F);
+    dc.setDiskUtilizationPercentageCutoff(100.0F, 100.0F);
     dc.checkDirs();
     Assert.assertEquals(1, dc.getGoodDirs().size());
     Assert.assertEquals(listener1.num, 3);