Browse Source

HDFS-16858. Dynamically adjust max slow disks to exclude. (#5180)

Reviewed-by: Chris Nauroth <cnauroth@apache.org>
Reviewed-by: slfan1989 <55643692+slfan1989@users.noreply.github.com>
Signed-off-by: Tao Li <tomscut@apache.org>
dingshun3016 2 years ago
parent
commit
2fa540dca1

+ 10 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java

@@ -72,6 +72,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_STARTUP_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT;
@@ -353,6 +355,7 @@ public class DataNode extends ReconfigurableBase
               DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY,
               DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY,
               DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY,
+              DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY,
               FS_DU_INTERVAL_KEY,
               FS_GETSPACEUSED_JITTER_KEY,
               FS_GETSPACEUSED_CLASSNAME));
@@ -699,6 +702,7 @@ public class DataNode extends ReconfigurableBase
     case DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY:
     case DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY:
     case DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY:
+    case DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY:
       return reconfSlowDiskParameters(property, newVal);
     case FS_DU_INTERVAL_KEY:
     case FS_GETSPACEUSED_JITTER_KEY:
@@ -877,6 +881,12 @@ public class DataNode extends ReconfigurableBase
             Long.parseLong(newVal));
         result = Long.toString(threshold);
         diskMetrics.setLowThresholdMs(threshold);
+      } else if (property.equals(DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY)) {
+        checkNotNull(diskMetrics, "DataNode disk stats may be disabled.");
+        int maxSlowDisksToExclude = (newVal == null ?
+            DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_DEFAULT : Integer.parseInt(newVal));
+        result = Integer.toString(maxSlowDisksToExclude);
+        diskMetrics.setMaxSlowDisksToExclude(maxSlowDisksToExclude);
       }
       LOG.info("RECONFIGURE* changed {} to {}", property, newVal);
       return result;

+ 9 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeDiskMetrics.java

@@ -80,7 +80,7 @@ public class DataNodeDiskMetrics {
   /**
    * The number of slow disks that needs to be excluded.
    */
-  private int maxSlowDisksToExclude;
+  private volatile int maxSlowDisksToExclude;
   /**
    * List of slow disks that need to be excluded.
    */
@@ -274,6 +274,14 @@ public class DataNodeDiskMetrics {
     return slowDisksToExclude;
   }
 
+  public int getMaxSlowDisksToExclude() {
+    return maxSlowDisksToExclude;
+  }
+
+  public void setMaxSlowDisksToExclude(int maxSlowDisksToExclude) {
+    this.maxSlowDisksToExclude = maxSlowDisksToExclude;
+  }
+
   public void setLowThresholdMs(long thresholdMs) {
     Preconditions.checkArgument(thresholdMs > 0,
         DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY + " should be larger than 0");

+ 8 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java

@@ -45,6 +45,7 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_FILEIO_PROFILING
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_OUTLIERS_REPORT_INTERVAL_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
@@ -636,13 +637,15 @@ public class TestDataNodeReconfiguration {
       String[] slowDisksParameters2 = {
           DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY,
           DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY,
-          DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY};
+          DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY,
+          DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY};
       for (String parameter : slowDisksParameters2) {
         dn.reconfigureProperty(parameter, "99");
       }
       // Assert diskMetrics.
       assertEquals(99, dn.getDiskMetrics().getMinOutlierDetectionDisks());
       assertEquals(99, dn.getDiskMetrics().getLowThresholdMs());
+      assertEquals(99, dn.getDiskMetrics().getMaxSlowDisksToExclude());
       // Assert dnConf.
       assertTrue(dn.getDnConf().diskStatsEnabled);
       // Assert profilingEventHook.
@@ -673,12 +676,16 @@ public class TestDataNodeReconfiguration {
       dn.reconfigureProperty(DFS_DATANODE_FILEIO_PROFILING_SAMPLING_PERCENTAGE_KEY, "1");
       dn.reconfigureProperty(DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY, null);
       dn.reconfigureProperty(DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY, null);
+      dn.reconfigureProperty(DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY, null);
       assertEquals(String.format("expect %s is not configured",
           DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY), null,
           dn.getConf().get(DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_KEY));
       assertEquals(String.format("expect %s is not configured",
           DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY), null,
           dn.getConf().get(DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_KEY));
+      assertEquals(String.format("expect %s is not configured",
+          DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY), null,
+          dn.getConf().get(DFS_DATANODE_MAX_SLOWDISKS_TO_EXCLUDE_KEY));
       assertEquals(DFS_DATANODE_MIN_OUTLIER_DETECTION_DISKS_DEFAULT,
           dn.getDiskMetrics().getSlowDiskDetector().getMinOutlierDetectionNodes());
       assertEquals(DFS_DATANODE_SLOWDISK_LOW_THRESHOLD_MS_DEFAULT,

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java

@@ -345,7 +345,7 @@ public class TestDFSAdmin {
     final List<String> outs = Lists.newArrayList();
     final List<String> errs = Lists.newArrayList();
     getReconfigurableProperties("datanode", address, outs, errs);
-    assertEquals(19, outs.size());
+    assertEquals(20, outs.size());
     assertEquals(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, outs.get(1));
   }