Ver código fonte

HDFS-2118. Couple dfs data dir improvements. Contributed by Eli Collins

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1141713 13f79535-47bb-0310-9956-ffa450edef68
Eli Collins 14 anos atrás
pai
commit
3af51887b4

+ 2 - 0
hdfs/CHANGES.txt

@@ -552,6 +552,8 @@ Trunk (unreleased changes)
 
     HDFS-2056. Update fetchdt usage. (Tanping Wang via jitendra)
 
+    HDFS-2118. Couple dfs data dir improvements. (eli)
+
   BUG FIXES
 
     HDFS-1955. FSImage.doUpgrade() was made too fault-tolerant by HDFS-1826.

+ 9 - 8
hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java

@@ -2186,20 +2186,21 @@ public class DataNode extends Configured
         continue;
       }
       // drop any (illegal) authority in the URI for backwards compatibility
-      File data = new File(dirURI.getPath());
+      File dir = new File(dirURI.getPath());
       try {
-        DiskChecker.checkDir(localFS, new Path(data.toURI()), permission);
-        dirs.add(data);
-      } catch (IOException e) {
-        LOG.warn("Invalid directory in: "
-                 + DFS_DATANODE_DATA_DIR_KEY + ": ", e);
-        invalidDirs.append("\"").append(data.getCanonicalPath()).append("\" ");
+        DiskChecker.checkDir(localFS, new Path(dir.toURI()), permission);
+        dirs.add(dir);
+      } catch (IOException ioe) {
+        LOG.warn("Invalid " + DFS_DATANODE_DATA_DIR_KEY + " "
+            + dir + " : ", ioe);
+        invalidDirs.append("\"").append(dir.getCanonicalPath()).append("\" ");
       }
     }
-    if (dirs.size() == 0)
+    if (dirs.size() == 0) {
       throw new IOException("All directories in "
           + DFS_DATANODE_DATA_DIR_KEY + " are invalid: "
           + invalidDirs);
+    }
     return dirs;
   }
 

+ 1 - 1
hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java

@@ -125,7 +125,7 @@ class DataXceiver extends Receiver implements Runnable, FSConstants {
   DataNode getDataNode() {return datanode;}
 
   /**
-   * Read/write data from/to the DataXceiveServer.
+   * Read/write data from/to the DataXceiverServer.
    */
   public void run() {
     updateCurrentThreadName("Waiting for operation");

+ 4 - 4
hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java

@@ -146,10 +146,10 @@ class DataXceiverServer implements Runnable, FSConstants {
       } catch (SocketTimeoutException ignored) {
         // wake up to see if should continue to run
       } catch (IOException ie) {
-        LOG.warn(datanode.getMachineName() + ":DataXceiveServer: ", ie);
+        LOG.warn(datanode.getMachineName() + ":DataXceiverServer: ", ie);
       } catch (Throwable te) {
         LOG.error(datanode.getMachineName()
-            + ":DataXceiveServer: Exiting due to: ", te);
+            + ":DataXceiverServer: Exiting due to: ", te);
         datanode.shouldRun = false;
       }
     }
@@ -157,7 +157,7 @@ class DataXceiverServer implements Runnable, FSConstants {
       ss.close();
     } catch (IOException ie) {
       LOG.warn(datanode.getMachineName()
-          + ":DataXceiveServer: Close exception due to: ", ie);
+          + ":DataXceiverServer: Close exception due to: ", ie);
     }
   }
   
@@ -167,7 +167,7 @@ class DataXceiverServer implements Runnable, FSConstants {
     try {
       this.ss.close();
     } catch (IOException ie) {
-      LOG.warn(datanode.getMachineName() + ":DataXceiveServer.kill(): " 
+      LOG.warn(datanode.getMachineName() + ":DataXceiverServer.kill(): "
                               + StringUtils.stringifyException(ie));
     }
 

+ 1 - 1
hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java

@@ -1150,7 +1150,7 @@ public class FSDataset implements FSConstants, FSDatasetInterface {
       conf.getInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY,
                   DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_DEFAULT);
 
-    String[] dataDirs = conf.getStrings(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY);
+    String[] dataDirs = conf.getTrimmedStrings(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY);
 
     int volsConfigured = (dataDirs == null) ? 0 : dataDirs.length;
 

+ 11 - 18
hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureToleration.java

@@ -133,17 +133,17 @@ public class TestDataNodeVolumeFailureToleration {
   }
 
   /** 
-   * Restart the cluster with a new volume tolerated value.
-   * @param volTolerated
-   * @param manageCluster
+   * Restart the datanodes with a new volume tolerated value.
+   * @param volTolerated number of dfs data dir failures to tolerate
+   * @param manageDfsDirs whether the mini cluster should manage data dirs
    * @throws IOException
    */
-  private void restartCluster(int volTolerated, boolean manageCluster)
+  private void restartDatanodes(int volTolerated, boolean manageDfsDirs)
       throws IOException {
     //Make sure no datanode is running
     cluster.shutdownDataNodes();
     conf.setInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY, volTolerated);
-    cluster.startDataNodes(conf, 1, manageCluster, null, null);
+    cluster.startDataNodes(conf, 1, manageDfsDirs, null, null);
     cluster.waitActive();
   }
 
@@ -174,19 +174,14 @@ public class TestDataNodeVolumeFailureToleration {
 
   /**
    * Tests for a given volumes to be tolerated and volumes failed.
-   * 
-   * @param volumesTolerated
-   * @param volumesFailed
-   * @param expectedBPServiceState
-   * @param clusterManaged
-   * @throws IOException
-   * @throws InterruptedException
    */
   private void testVolumeConfig(int volumesTolerated, int volumesFailed,
-      boolean expectedBPServiceState, boolean clusterManaged)
+      boolean expectedBPServiceState, boolean manageDfsDirs)
       throws IOException, InterruptedException {
     assumeTrue(!System.getProperty("os.name").startsWith("Windows"));
     final int dnIndex = 0;
+    // Fail the current directory since invalid storage directory perms
+    // get fixed up automatically on datanode startup.
     File[] dirs = {
         new File(MiniDFSCluster.getStorageDir(dnIndex, 0), "current"),
         new File(MiniDFSCluster.getStorageDir(dnIndex, 1), "current") };
@@ -195,11 +190,10 @@ public class TestDataNodeVolumeFailureToleration {
       for (int i = 0; i < volumesFailed; i++) {
         prepareDirToFail(dirs[i]);
       }
-      restartCluster(volumesTolerated, clusterManaged);
+      restartDatanodes(volumesTolerated, manageDfsDirs);
       assertEquals(expectedBPServiceState, cluster.getDataNodes().get(0)
           .isBPServiceAlive(cluster.getNamesystem().getBlockPoolId()));
     } finally {
-      // restore its old permission
       for (File dir : dirs) {
         FileUtil.chmod(dir.toString(), "755");
       }
@@ -215,8 +209,7 @@ public class TestDataNodeVolumeFailureToleration {
   private void prepareDirToFail(File dir) throws IOException,
       InterruptedException {
     dir.mkdirs();
-    assertTrue("Couldn't chmod local vol", FileUtil
-        .chmod(dir.toString(), "000") == 0);
+    assertEquals("Couldn't chmod local vol", 0,
+        FileUtil.chmod(dir.toString(), "000"));
   }
-
 }