Browse Source

HDFS-3131. Improve TestStorageRestore. Contributed by Brandon Li.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-1@1305687 13f79535-47bb-0310-9956-ffa450edef68
Aaron Myers 13 years ago
parent
commit
5d59b1a46d
2 changed files with 51 additions and 75 deletions
  1. 2 0
      CHANGES.txt
  2. 49 75
      src/test/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java

+ 2 - 0
CHANGES.txt

@@ -72,6 +72,8 @@ Release 1.1.0 - unreleased
 
     MAPREDUCE-2835. Make per-job counter limits configurable. (tomwhite)
 
+    HDFS-3131. Improve TestStorageRestore. (Brandon Li via atm)
+
   BUG FIXES
 
     HDFS-2305. Running multiple 2NNs can result in corrupt file system. (atm)

+ 49 - 75
src/test/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java

@@ -114,7 +114,6 @@ public class TestStorageRestore extends TestCase {
    * clean up
    */
   public void tearDown() throws Exception {
-    restoreAccess();    
     if (hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir)) {
       throw new IOException("Could not delete hdfs directory in tearDown '"
           + hdfsDir + "'");
@@ -136,45 +135,10 @@ public class TestStorageRestore extends TestCase {
     }
   }
 
-  /**
-   * invalidate storage by removing xwr permission from name2 and name3
-   */
-  public void removeStorageAccess(FSImage fi) throws IOException {
-    path2.setReadable(false);
-    path2.setExecutable(false);
-    path2.setWritable(false);
-    path3.setReadable(false);
-    path3.setExecutable(false);
-    path3.setWritable(false);
-    
-    for (Iterator<StorageDirectory> it = fi.dirIterator(); it.hasNext();) {
-      StorageDirectory sd = it.next();
-      
-      if (sd.getRoot().equals(path2) || sd.getRoot().equals(path3)) {
-        fi.getEditLog().removeEditsForStorageDir(sd);
-        fi.updateRemovedDirs(sd, null);
-        it.remove();
-      }
-    }
-  }
-  
-  public void restoreAccess() {
-    if (path2.exists()) {
-      path2.setReadable(true);
-      path2.setExecutable(true);
-      path2.setWritable(true);
-    }
-    if (path3.exists()) {
-      path3.setReadable(true);    
-      path3.setExecutable(true);
-      path3.setWritable(true);
-    }
-  }
-  
   /**
    * get the total number of healthy storage directories
    */
-  public int numStorageDirs(FSImage fi) throws IOException {
+  private static int numStorageDirs(FSImage fi) throws IOException {
     int sum = 0;
     for (Iterator<StorageDirectory> it = fi.dirIterator(); it.hasNext();) {
       sum++;
@@ -351,53 +315,63 @@ public class TestStorageRestore extends TestCase {
    * 1. create DFS cluster with 3 storage directories
    *    - 2 EDITS_IMAGE(name1, name2), 1 EDITS(name3)
    * 2. create a file
-   * 3. corrupt/disable name2 and name3 by removing xwr permission
-   * 4. run doCheckpoint - it will fail on removed dirs (which will invalidate the storages)
+   * 3. corrupt/disable name2 and name3 by removing rwx permission
+   * 4. run doCheckpoint
+   *    - will fail on removed dirs (which invalidates them)
    * 5. write another file
-   * 6. check that edits and fsimage differ
-   * 7. run doCheckpoint - recover should fail but checkpoint should succeed 
-   * 8. restore the access permission for name2 and name 3, run checkpoint again
-   * 9. verify that all the image and edits files are the same.
+   * 6. check there is only one healthy storage dir
+   * 7. run doCheckpoint - recover should fail but checkpoint should succeed
+   * 8. check there is still only one healthy storage dir
+   * 9. restore the access permission for name2 and name 3, run checkpoint again
+   * 10.verify there are 3 healthy storage dirs with same metadata files.
    */
   public void testStorageRestoreFailure() throws Exception {
-    int numDatanodes = 2;
-    cluster = new MiniDFSCluster(0, config, numDatanodes, true, false, true,
-        null, null, null, null);
-    cluster.waitActive();
-
-    SecondaryNameNode secondary = new SecondaryNameNode(config);
-    System.out.println("****testStorageRestore: Cluster and SNN started");
-    printStorages(cluster.getNameNode().getFSImage());
+    SecondaryNameNode secondary = null;
+    try {
+      cluster = new MiniDFSCluster(0, config, 2, true, false, true, null, null,
+          null, null);
+      cluster.waitActive();
 
-    FileSystem fs = cluster.getFileSystem();
-    Path path = new Path("/", "test");
-    writeFile(fs, path, 2);
+      secondary = new SecondaryNameNode(config);
+      printStorages(cluster.getNameNode().getFSImage());
 
-    System.out
-        .println("****testStorageRestore: file test written, invalidating storage...");
+      FileSystem fs = cluster.getFileSystem();
+      Path path = new Path("/", "test");
+      writeFile(fs, path, 2);
 
-    removeStorageAccess(cluster.getNameNode().getFSImage());
-    printStorages(cluster.getNameNode().getFSImage());
-    System.out
-        .println("****testStorageRestore: storage invalidated + doCheckpoint");
-
-    path = new Path("/", "test1");
-    writeFile(fs, path, 2);
-    System.out.println("****testStorageRestore: file test1 written");
-    assert(numStorageDirs(cluster.getNameNode().getFSImage()) == 1);
+      // invalidate storage by removing rwx permission from name2 and name3
+      FileUtil.chmod(path2.toString(), "000");
+      FileUtil.chmod(path3.toString(), "000");
+      secondary.doCheckpoint(); // should remove name2 and name3
+      
+      printStorages(cluster.getNameNode().getFSImage());
 
-    System.out.println("****testStorageRestore: checkfiles(false) run");
+      path = new Path("/", "test1");
+      writeFile(fs, path, 2);
+      assert (numStorageDirs(cluster.getNameNode().getFSImage()) == 1);
 
-    secondary.doCheckpoint(); // still can't recover removed storage dirs
-    assert(numStorageDirs(cluster.getNameNode().getFSImage()) == 1);
+      secondary.doCheckpoint(); // still can't recover name 2 and 3
+      assert (numStorageDirs(cluster.getNameNode().getFSImage()) == 1);
 
-    restoreAccess();
-    secondary.doCheckpoint(); // should restore removed storage dirs
-    checkFiles(true);
+      FileUtil.chmod(path2.toString(), "755");
+      FileUtil.chmod(path3.toString(), "755");
+      secondary.doCheckpoint(); // should restore name 2 and 3
+      assert (numStorageDirs(cluster.getNameNode().getFSImage()) == 3);
+      checkFiles(true);
 
-    System.out
-        .println("****testStorageRestore: second Checkpoint done and checkFiles(true) run");
-    secondary.shutdown();
-    cluster.shutdown();
+    } finally {
+      if (path2.exists()) {
+        FileUtil.chmod(path2.toString(), "755");
+      }
+      if (path3.exists()) {
+        FileUtil.chmod(path3.toString(), "755");
+      }
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+      if (secondary != null) {
+        secondary.shutdown();
+      }
+    }
   }
 }