Browse Source

HDFS-1161. Make DN minimum valid volumes configurable. Contributed by Eli Collins.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hdfs/trunk@950618 13f79535-47bb-0310-9956-ffa450edef68
Thomas White 15 years ago
parent
commit
a9293ccc34

+ 3 - 0
CHANGES.txt

@@ -560,6 +560,9 @@ Release 0.21.0 - Unreleased
     HDFS-995.  Replace usage of FileStatus#isDir().  (Eli Collins via
     tomwhite)
 
+    HDFS-1161.  Make DN minimum valid volumes configurable.
+    (Eli Collins via tomwhite)
+
   OPTIMIZATIONS
 
     HDFS-946. NameNode should not return full path name when lisitng a

+ 9 - 0
src/java/hdfs-default.xml

@@ -510,4 +510,13 @@ creations/deletions), or "all".</description>
   </description>
 </property>
 
+<property>
+  <name>dfs.datanode.failed.volumes.tolerated</name>
+  <value>0</value>
+  <description>The number of volumes that are allowed to
+  fail before a datanode stops offering service. By default
+  any volume failure will cause a datanode to shutdown.
+  </description>
+</property>
+
 </configuration>

+ 2 - 0
src/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

@@ -89,6 +89,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final boolean DFS_NAMENODE_NAME_DIR_RESTORE_DEFAULT = false;
   public static final String  DFS_LIST_LIMIT = "dfs.ls.limit";
   public static final int     DFS_LIST_LIMIT_DEFAULT = 1000;
+  public static final String  DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY = "dfs.datanode.failed.volumes.tolerated";
+  public static final int     DFS_DATANODE_FAILED_VOLUMES_TOLERATED_DEFAULT = 0;
 
   //Delegation token related keys
   public static final String  DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY = "dfs.namenode.delegation.key.update-interval";

+ 0 - 1
src/java/org/apache/hadoop/hdfs/protocol/FSConstants.java

@@ -60,7 +60,6 @@ public interface FSConstants {
   public static final int DEFAULT_DATA_SOCKET_SIZE = 128 * 1024;
 
   public static final int SIZE_OF_INTEGER = Integer.SIZE / Byte.SIZE;
-  public static final int MIN_NUM_OF_VALID_VOLUMES = 1;// for a DN to run
 
   // SafeMode actions
   public enum SafeModeAction{ SAFEMODE_LEAVE, SAFEMODE_ENTER, SAFEMODE_GET; }

+ 16 - 4
src/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java

@@ -836,6 +836,7 @@ public class FSDataset implements FSConstants, FSDatasetInterface {
   ReplicasMap volumeMap = new ReplicasMap();
   static  Random random = new Random();
   FSDatasetAsyncDiskService asyncDiskService;
+  private int validVolsRequired;
 
   // Used for synchronizing access to usage stats
   private Object statsLock = new Object();
@@ -849,6 +850,17 @@ public class FSDataset implements FSConstants, FSDatasetInterface {
     this.maxBlocksPerDir = conf.getInt("dfs.datanode.numblocks", 64);
     this.supportAppends = conf.getBoolean(DFSConfigKeys.DFS_SUPPORT_APPEND_KEY,
                                       DFSConfigKeys.DFS_SUPPORT_APPEND_DEFAULT);
+    // The number of volumes required for operation is the total number 
+    // of volumes minus the number of failed volumes we can tolerate.
+    final int volFailuresTolerated =
+      conf.getInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY,
+                  DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_DEFAULT);
+    this.validVolsRequired = storage.getNumStorageDirs() - volFailuresTolerated; 
+    if (validVolsRequired < 1 ||
+        validVolsRequired > storage.getNumStorageDirs()) {
+      DataNode.LOG.error("Invalid value " + volFailuresTolerated + " for " +
+          DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY);
+    }
     FSVolume[] volArray = new FSVolume[storage.getNumStorageDirs()];
     for (int idx = 0; idx < storage.getNumStorageDirs(); idx++) {
       volArray[idx] = new FSVolume(storage.getStorageDir(idx).getCurrentDir(), conf);
@@ -871,12 +883,12 @@ public class FSDataset implements FSConstants, FSDatasetInterface {
       return volumes.getDfsUsed();
     }
   }
+
   /**
-   * Return true - if there are still valid volumes 
-   * on the DataNode
+   * Return true - if there are still valid volumes on the DataNode. 
    */
-  public boolean hasEnoughResource(){
-    return volumes.numberOfVolumes() >= MIN_NUM_OF_VALID_VOLUMES;
+  public boolean hasEnoughResource() {
+    return volumes.numberOfVolumes() >= validVolsRequired; 
   }
 
   /**

+ 2 - 0
src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java

@@ -71,6 +71,8 @@ public class TestDataNodeVolumeFailure extends TestCase{
     // bring up a cluster of 2
     Configuration conf = new HdfsConfiguration();
     conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, block_size);
+    // Allow a single volume failure (there are two volumes)
+    conf.setInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY, 1);
     cluster = new MiniDFSCluster(conf, dn_num, true, null);
     cluster.waitActive();
   }