Browse Source

HADOOP-3069. Primary name-node should not truncate image when transferring it from the secondary. Contributed by Konstantin Shvachko.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/branches/branch-0.16@646556 13f79535-47bb-0310-9956-ffa450edef68
Konstantin Shvachko 17 years ago
parent
commit
35dbd049b3

+ 3 - 0
CHANGES.txt

@@ -20,6 +20,9 @@ Release 0.16.3 - Unreleased
     HADOOP-3195. Fix TestFileSystem to be deterministic.
     (Tsz Wo (Nicholas), SZE via cdouglas)
 
+    HADOOP-3069. Primary name-node should not truncate image when transferring
+    it from the secondary. (shv)
+
 Release 0.16.2 - 2008-04-02
 
   BUG FIXES

+ 5 - 5
src/java/org/apache/hadoop/dfs/GetImageServlet.java

@@ -61,12 +61,12 @@ public class GetImageServlet extends HttpServlet {
                                       nn.getFsImageNameCheckpoint());
         nn.checkpointUploadDone();
       }
-    } catch (IOException ie) {
-      StringUtils.stringifyException(ie);
-      LOG.warn(ie);
-      String errMsg = "GetImage failed.";
+    } catch (Exception ie) {
+      String errMsg = "GetImage failed. " + StringUtils.stringifyException(ie);
       response.sendError(HttpServletResponse.SC_GONE, errMsg);
-      throw ie;
+      throw new IOException(errMsg);
+    } finally {
+      response.getOutputStream().close();
     }
   }
 }

+ 36 - 34
src/java/org/apache/hadoop/dfs/SecondaryNameNode.java

@@ -72,7 +72,35 @@ public class SecondaryNameNode implements FSConstants, Runnable {
   private File destImage;
   private File editFile;
 
-  private boolean[] simulation = null; // error simulation events
+  /**
+   * Utility class to facilitate junit test error simulation.
+   */
+  static class ErrorSimulator {
+    private static boolean[] simulation = null; // error simulation events
+    static void initializeErrorSimulationEvent(int numberOfEvents) {
+      simulation = new boolean[numberOfEvents]; 
+      for (int i = 0; i < numberOfEvents; i++) {
+        simulation[i] = false;
+      }
+    }
+    
+    static boolean getErrorSimulation(int index) {
+      if(simulation == null)
+        return false;
+      assert(index < simulation.length);
+      return simulation[index];
+    }
+    
+    static void setErrorSimulation(int index) {
+      assert(index < simulation.length);
+      simulation[index] = true;
+    }
+    
+    static void clearErrorSimulation(int index) {
+      assert(index < simulation.length);
+      simulation[index] = false;
+    }
+  }
 
   /**
    * Create a connection to the primary namenode.
@@ -82,11 +110,6 @@ public class SecondaryNameNode implements FSConstants, Runnable {
     // initiate Java VM metrics
     JvmMetrics.init("SecondaryNameNode", conf.get("session.id"));
     
-    //
-    // initialize error simulation code for junit test
-    //
-    initializeErrorSimulationEvent(2);
-
     //
     // Create connection to the namenode.
     //
@@ -278,7 +301,7 @@ public class SecondaryNameNode implements FSConstants, Runnable {
     //
     // error simulation code for junit test
     //
-    if (simulation != null && simulation[0]) {
+    if (ErrorSimulator.getErrorSimulation(0)) {
       throw new IOException("Simulating error0 " +
                             "after creating edits.new");
     }
@@ -296,7 +319,7 @@ public class SecondaryNameNode implements FSConstants, Runnable {
     //
     // error simulation code for junit test
     //
-    if (simulation != null && simulation[1]) {
+    if (ErrorSimulator.getErrorSimulation(1)) {
       throw new IOException("Simulating error1 " +
                             "after uploading new image to NameNode");
     }
@@ -422,26 +445,6 @@ public class SecondaryNameNode implements FSConstants, Runnable {
     }
   }
 
-  //
-  // utility method to facilitate junit test error simulation
-  //
-  void initializeErrorSimulationEvent(int numberOfEvents) {
-    simulation = new boolean[numberOfEvents]; 
-    for (int i = 0; i < numberOfEvents; i++) {
-      simulation[i] = false;
-    }
-  }
-
-  void setErrorSimulation(int index) {
-    assert(index < simulation.length);
-    simulation[index] = true;
-  }
-
-  void clearErrorSimulation(int index) {
-    assert(index < simulation.length);
-    simulation[index] = false;
-  }
-
   /**
    * This class is used in Namesystem's jetty to retrieve a file.
    * Typically used by the Secondary NameNode to retrieve image and
@@ -463,13 +466,12 @@ public class SecondaryNameNode implements FSConstants, Runnable {
                                         nn.getNewImage());
         }
         LOG.info("New Image " + nn.getNewImage() + " retrieved by Namenode.");
-      } catch (IOException ie) {
-        StringUtils.stringifyException(ie);
-        LOG.error(ie);
-        String errMsg = "GetImage failed.";
+      } catch (Exception ie) {
+        String errMsg = "GetImage failed. " + StringUtils.stringifyException(ie);
         response.sendError(HttpServletResponse.SC_GONE, errMsg);
-        throw ie;
-
+        throw new IOException(errMsg);
+      } finally {
+        response.getOutputStream().close();
       }
     }
   }

+ 8 - 1
src/java/org/apache/hadoop/dfs/TransferFsImage.java

@@ -24,6 +24,8 @@ import java.util.Map;
 import javax.servlet.http.HttpServletResponse;
 import javax.servlet.http.HttpServletRequest;
 
+import org.apache.hadoop.dfs.SecondaryNameNode.ErrorSimulator;
+
 /**
  * This class provides fetching a specified file from the NameNode.
  */
@@ -108,6 +110,12 @@ class TransferFsImage implements FSConstants {
     FileInputStream infile = null;
     try {
       infile = new FileInputStream(localfile);
+      if (ErrorSimulator.getErrorSimulation(2)
+          && localfile.getAbsolutePath().contains("secondary")) {
+        // throw exception only when the secondary sends its image
+        throw new IOException("If this exception is not caught by the " +
+            "name-node fs image will be truncated.");
+      }
       int num = 1;
       while (num > 0) {
         num = infile.read(buf);
@@ -117,7 +125,6 @@ class TransferFsImage implements FSConstants {
         outstream.write(buf, 0, num);
       }
     } finally {
-      outstream.close();
       if (infile != null) {
         infile.close();
       }

+ 60 - 20
src/test/org/apache/hadoop/dfs/TestCheckpoint.java

@@ -23,6 +23,7 @@ import java.util.Collection;
 import java.util.Random;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.dfs.FSImage.NameNodeFile;
+import org.apache.hadoop.dfs.SecondaryNameNode.ErrorSimulator;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -132,18 +133,17 @@ public class TestCheckpoint extends TestCase {
     try {
       assertTrue(!fileSys.exists(file1));
       //
-      // Make the checkpoint fail after rolling the
-      // edit log.
+      // Make the checkpoint fail after rolling the edits log.
       //
       SecondaryNameNode secondary = new SecondaryNameNode(conf);
-      secondary.initializeErrorSimulationEvent(2);
-      secondary.setErrorSimulation(0);
+      ErrorSimulator.setErrorSimulation(0);
 
       try {
         secondary.doCheckpoint();  // this should fail
-        assertTrue(false);      
+        assertTrue(false);
       } catch (IOException e) {
       }
+      ErrorSimulator.clearErrorSimulation(0);
       secondary.shutdown();
 
       //
@@ -173,15 +173,7 @@ public class TestCheckpoint extends TestCase {
       assertFalse(image.getEditNewFile(idx).exists());
       File edits = image.getEditFile(idx);
       assertTrue(edits.exists()); // edits should exist and be empty
-      long editsLen = -1;
-      RandomAccessFile eF = null;
-      try {
-        eF = new RandomAccessFile(edits, "r");
-        editsLen = eF.length();
-      } finally {
-        if(eF != null)
-          eF.close();
-      }
+      long editsLen = edits.length();
       assertTrue(editsLen == Integer.SIZE/Byte.SIZE);
     }
     
@@ -215,14 +207,14 @@ public class TestCheckpoint extends TestCase {
       // Make the checkpoint fail after uploading the new fsimage.
       //
       SecondaryNameNode secondary = new SecondaryNameNode(conf);
-      secondary.initializeErrorSimulationEvent(2);
-      secondary.setErrorSimulation(1);
+      ErrorSimulator.setErrorSimulation(1);
 
       try {
         secondary.doCheckpoint();  // this should fail
-        assertTrue(false);      
+        assertTrue(false);
       } catch (IOException e) {
       }
+      ErrorSimulator.clearErrorSimulation(1);
       secondary.shutdown();
 
       //
@@ -273,14 +265,14 @@ public class TestCheckpoint extends TestCase {
       // Make the checkpoint fail after rolling the edit log.
       //
       SecondaryNameNode secondary = new SecondaryNameNode(conf);
-      secondary.initializeErrorSimulationEvent(2);
-      secondary.setErrorSimulation(0);
+      ErrorSimulator.setErrorSimulation(0);
 
       try {
         secondary.doCheckpoint();  // this should fail
-        assertTrue(false);      
+        assertTrue(false);
       } catch (IOException e) {
       }
+      ErrorSimulator.clearErrorSimulation(0);
       secondary.shutdown(); // secondary namenode crash!
 
       // start new instance of secondary and verify that 
@@ -322,6 +314,52 @@ public class TestCheckpoint extends TestCase {
     }
   }
 
+  /**
+   * Simulate a secondary node failure to transfer image
+   * back to the name-node.
+   * Used to truncate primary fsimage file.
+   */
+  void testSecondaryFailsToReturnImage(Configuration conf)
+    throws IOException {
+    System.out.println("Starting testSecondaryFailsToReturnImage");
+    Path file1 = new Path("checkpointRI.dat");
+    MiniDFSCluster cluster = new MiniDFSCluster(conf, numDatanodes, 
+                                                false, null);
+    cluster.waitActive();
+    FileSystem fileSys = cluster.getFileSystem();
+    FSImage image = cluster.getNameNode().getFSImage();
+    try {
+      assertTrue(!fileSys.exists(file1));
+      long fsimageLength = image.getImageFile(0, NameNodeFile.IMAGE).length();
+      //
+      // Make the checkpoint
+      //
+      SecondaryNameNode secondary = new SecondaryNameNode(conf);
+      ErrorSimulator.setErrorSimulation(2);
+
+      try {
+        secondary.doCheckpoint();  // this should fail
+        assertTrue(false);
+      } catch (IOException e) {
+        System.out.println("testSecondaryFailsToReturnImage: doCheckpoint() " +
+            "failed predictably - " + e);
+      }
+      ErrorSimulator.clearErrorSimulation(2);
+
+      // Verify that image file sizes did not change.
+      int nrDirs = image.getNumStorageDirs();
+      for(int idx = 0; idx < nrDirs; idx++) {
+        assertTrue(image.getImageFile(idx, 
+                                NameNodeFile.IMAGE).length() == fsimageLength);
+      }
+
+      secondary.shutdown();
+    } finally {
+      fileSys.close();
+      cluster.shutdown();
+    }
+  }
+
   /**
    * Tests checkpoint in DFS.
    */
@@ -355,6 +393,7 @@ public class TestCheckpoint extends TestCase {
       // Take a checkpoint
       //
       SecondaryNameNode secondary = new SecondaryNameNode(conf);
+      ErrorSimulator.initializeErrorSimulationEvent(3);
       secondary.doCheckpoint();
       secondary.shutdown();
     } finally {
@@ -412,5 +451,6 @@ public class TestCheckpoint extends TestCase {
     testSecondaryNamenodeError2(conf);
     testSecondaryNamenodeError3(conf);
     testNamedirError(conf, namedirs);
+    testSecondaryFailsToReturnImage(conf);
   }
 }