Przeglądaj źródła

HADOOP-1603. Fix a bug in namenode initialization where default replication is sometimes reset to one on restart. Contributed by Raghu.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/branches/branch-0.13@558159 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 18 lat temu
rodzic
commit
ab203497a3

+ 4 - 0
CHANGES.txt

@@ -6,6 +6,10 @@ Branch 0.13 (unreleased changes)
  1. HADOOP-1623.  Fix an infinite loop when copying directories into
     themselves.  (Dhruba Borthakur via cutting)
 
+ 2. HADOOP-1603.  Fix a bug in namenode initialization where
+    default replication is sometimes reset to one on restart.
+    (Raghu Angadi via cutting)
+
 
 Release 0.13.0 - 2007-06-08
 

+ 43 - 33
src/java/org/apache/hadoop/dfs/FSNamesystem.java

@@ -213,38 +213,7 @@ class FSNamesystem implements FSConstants {
                       int port,
                       NameNode nn, Configuration conf) throws IOException {
     fsNamesystemObject = this;
-    this.replicator = new ReplicationTargetChooser(
-                                                   conf.getBoolean("dfs.replication.considerLoad", true),
-                                                   this,
-                                                   clusterMap,
-                                                   LOG);
-    this.defaultReplication = conf.getInt("dfs.replication", 3);
-    this.maxReplication = conf.getInt("dfs.replication.max", 512);
-    this.minReplication = conf.getInt("dfs.replication.min", 1);
-    if (minReplication <= 0)
-      throw new IOException(
-                            "Unexpected configuration parameters: dfs.replication.min = " 
-                            + minReplication
-                            + " must be greater than 0");
-    if (maxReplication >= (int)Short.MAX_VALUE)
-      throw new IOException(
-                            "Unexpected configuration parameters: dfs.replication.max = " 
-                            + maxReplication + " must be less than " + (Short.MAX_VALUE));
-    if (maxReplication < minReplication)
-      throw new IOException(
-                            "Unexpected configuration parameters: dfs.replication.min = " 
-                            + minReplication
-                            + " must be less than dfs.replication.max = " 
-                            + maxReplication);
-    this.maxReplicationStreams = conf.getInt("dfs.max-repl-streams", 2);
-    long heartbeatInterval = conf.getLong("dfs.heartbeat.interval", 3) * 1000;
-    this.heartbeatRecheckInterval = 5 * 60 * 1000; // 5 minutes
-    this.heartbeatExpireInterval = 2 * heartbeatRecheckInterval +
-      10 * heartbeatInterval;
-    this.replicationRecheckInterval = 3 * 1000; //  3 second
-    this.decommissionRecheckInterval = conf.getInt(
-                                                   "dfs.namenode.decommission.interval",
-                                                   5 * 60 * 1000);
+    setConfigurationParameters(conf);
 
     this.localMachine = hostname;
     this.port = port;
@@ -300,11 +269,52 @@ class FSNamesystem implements FSConstants {
    * dirs is a list of directories where the filesystem directory state 
    * is stored
    */
-  FSNamesystem(FSImage fsImage) throws IOException {
+  FSNamesystem(FSImage fsImage, Configuration conf) throws IOException {
     fsNamesystemObject = this;
+    setConfigurationParameters(conf);
     this.dir = new FSDirectory(fsImage, this);
   }
 
+  /**
+   * Initializes some of the members from configuration
+   */
+  private void setConfigurationParameters(Configuration conf) 
+                                         throws IOException {
+    
+    this.replicator = new ReplicationTargetChooser(
+                                                   conf.getBoolean("dfs.replication.considerLoad", true),
+                                                   this,
+                                                   clusterMap,
+                                                   LOG);
+    this.defaultReplication = conf.getInt("dfs.replication", 3);
+    this.maxReplication = conf.getInt("dfs.replication.max", 512);
+    this.minReplication = conf.getInt("dfs.replication.min", 1);
+    if (minReplication <= 0)
+      throw new IOException(
+                            "Unexpected configuration parameters: dfs.replication.min = " 
+                            + minReplication
+                            + " must be greater than 0");
+    if (maxReplication >= (int)Short.MAX_VALUE)
+      throw new IOException(
+                            "Unexpected configuration parameters: dfs.replication.max = " 
+                            + maxReplication + " must be less than " + (Short.MAX_VALUE));
+    if (maxReplication < minReplication)
+      throw new IOException(
+                            "Unexpected configuration parameters: dfs.replication.min = " 
+                            + minReplication
+                            + " must be less than dfs.replication.max = " 
+                            + maxReplication);
+    this.maxReplicationStreams = conf.getInt("dfs.max-repl-streams", 2);
+    long heartbeatInterval = conf.getLong("dfs.heartbeat.interval", 3) * 1000;
+    this.heartbeatRecheckInterval = 5 * 60 * 1000; // 5 minutes
+    this.heartbeatExpireInterval = 2 * heartbeatRecheckInterval +
+      10 * heartbeatInterval;
+    this.replicationRecheckInterval = 3 * 1000; //  3 second
+    this.decommissionRecheckInterval = conf.getInt(
+                                                   "dfs.namenode.decommission.interval",
+                                                   5 * 60 * 1000);
+  }
+
   /** Return the FSNamesystem object
    * 
    */

+ 1 - 1
src/java/org/apache/hadoop/dfs/NameNode.java

@@ -754,7 +754,7 @@ public class NameNode implements ClientProtocol, DatanodeProtocol, FSConstants {
       }
     }
 
-    FSNamesystem nsys = new FSNamesystem(new FSImage(dirsToFormat));
+    FSNamesystem nsys = new FSNamesystem(new FSImage(dirsToFormat), conf);
     nsys.dir.fsImage.format();
     return false;
   }

+ 2 - 2
src/java/org/apache/hadoop/dfs/SecondaryNameNode.java

@@ -289,8 +289,8 @@ public class SecondaryNameNode implements FSConstants, Runnable {
    * DEST_FS_IMAGE
    */
   private void doMerge() throws IOException {
-    FSNamesystem namesystem = new FSNamesystem(
-                                               new FSImage(checkpointDir));
+    FSNamesystem namesystem = new FSNamesystem(new FSImage(checkpointDir), 
+                                               conf);                                               
     FSImage fsImage = namesystem.dir.fsImage;
     fsImage.loadFSImage(srcImage);
     fsImage.getEditLog().loadFSEdits(editFile);

+ 18 - 18
src/test/org/apache/hadoop/dfs/TestCheckpoint.java

@@ -34,7 +34,8 @@ public class TestCheckpoint extends TestCase {
   static final long seed = 0xDEADBEEFL;
   static final int blockSize = 4096;
   static final int fileSize = 8192;
-  static final int numDatanodes = 1;
+  static final int numDatanodes = 3;
+  short replication = 3;
 
   private void writeFile(FileSystem fileSys, Path name, int repl)
     throws IOException {
@@ -52,11 +53,9 @@ public class TestCheckpoint extends TestCase {
   private void checkFile(FileSystem fileSys, Path name, int repl)
     throws IOException {
     assertTrue(fileSys.exists(name));
-    String[][] locations = fileSys.getFileCacheHints(name, 0, fileSize);
-    for (int idx = 0; idx < locations.length; idx++) {
-      assertEquals("Number of replicas for block" + idx,
-                   Math.min(numDatanodes, repl), locations[idx].length);
-    }
+    int replication = fileSys.getReplication(name);
+    assertEquals("replication for " + name, repl, replication);
+    //We should probably test for more of the file properties.
   }
   
   private void cleanupFile(FileSystem fileSys, Path name)
@@ -150,8 +149,8 @@ public class TestCheckpoint extends TestCase {
       //
       // Create a new file
       //
-      writeFile(fileSys, file1, 1);
-      checkFile(fileSys, file1, 1);
+      writeFile(fileSys, file1, replication);
+      checkFile(fileSys, file1, replication);
     } finally {
       fileSys.close();
       cluster.shutdown();
@@ -167,7 +166,7 @@ public class TestCheckpoint extends TestCase {
     cluster.waitActive();
     fileSys = cluster.getFileSystem();
     try {
-      checkFile(fileSys, file1, 1);
+      checkFile(fileSys, file1, replication);
       cleanupFile(fileSys, file1);
       SecondaryNameNode secondary = new SecondaryNameNode(conf);
       secondary.doCheckpoint();
@@ -209,8 +208,8 @@ public class TestCheckpoint extends TestCase {
       //
       // Create a new file
       //
-      writeFile(fileSys, file1, 1);
-      checkFile(fileSys, file1, 1);
+      writeFile(fileSys, file1, replication);
+      checkFile(fileSys, file1, replication);
     } finally {
       fileSys.close();
       cluster.shutdown();
@@ -226,7 +225,7 @@ public class TestCheckpoint extends TestCase {
     cluster.waitActive();
     fileSys = cluster.getFileSystem();
     try {
-      checkFile(fileSys, file1, 1);
+      checkFile(fileSys, file1, replication);
       cleanupFile(fileSys, file1);
       SecondaryNameNode secondary = new SecondaryNameNode(conf);
       secondary.doCheckpoint();
@@ -246,6 +245,7 @@ public class TestCheckpoint extends TestCase {
     Collection<File> namedirs = null;
 
     Configuration conf = new Configuration();
+    replication = (short)conf.getInt("dfs.replication", 3);  
     MiniDFSCluster cluster = new MiniDFSCluster(conf, numDatanodes, true, null);
     cluster.waitActive();
     FileSystem fileSys = cluster.getFileSystem();
@@ -261,8 +261,8 @@ public class TestCheckpoint extends TestCase {
       //
       // Create file1
       //
-      writeFile(fileSys, file1, 1);
-      checkFile(fileSys, file1, 1);
+      writeFile(fileSys, file1, replication);
+      checkFile(fileSys, file1, replication);
 
       //
       // Take a checkpoint
@@ -283,12 +283,12 @@ public class TestCheckpoint extends TestCase {
     fileSys = cluster.getFileSystem();
     try {
       // check that file1 still exists
-      checkFile(fileSys, file1, 1);
+      checkFile(fileSys, file1, replication);
       cleanupFile(fileSys, file1);
 
       // create new file file2
-      writeFile(fileSys, file2, 1);
-      checkFile(fileSys, file2, 1);
+      writeFile(fileSys, file2, replication);
+      checkFile(fileSys, file2, replication);
 
       //
       // Take a checkpoint
@@ -313,7 +313,7 @@ public class TestCheckpoint extends TestCase {
 
     try {
       // verify that file2 exists
-      checkFile(fileSys, file2, 1);
+      checkFile(fileSys, file2, replication);
     } finally {
       fileSys.close();
       cluster.shutdown();