Browse Source

HADOOP-1603. Fix a bug in namenode initialization where default replication is sometimes reset to one on restart. Contributed by Raghu.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@558150 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 18 years ago
parent
commit
cf480f8d1b

+ 4 - 0
CHANGES.txt

@@ -391,6 +391,10 @@ Branch 0.14 (unreleased changes)
 130. HADOOP-1623.  Fix an infinite loop when copying directories.
      (Dhruba Borthakur via cutting)
 
+131. HADOOP-1603.  Fix a bug in namenode initialization where
+     default replication is sometimes reset to one on restart.
+     (Raghu Angadi via cutting)
+
 
 Release 0.13.0 - 2007-06-08
 

+ 41 - 32
src/java/org/apache/hadoop/dfs/FSNamesystem.java

@@ -208,38 +208,7 @@ class FSNamesystem implements FSConstants {
                       int port,
                       NameNode nn, Configuration conf) throws IOException {
     fsNamesystemObject = this;
-    this.replicator = new ReplicationTargetChooser(
-                                                   conf.getBoolean("dfs.replication.considerLoad", true),
-                                                   this,
-                                                   clusterMap,
-                                                   LOG);
-    this.defaultReplication = conf.getInt("dfs.replication", 3);
-    this.maxReplication = conf.getInt("dfs.replication.max", 512);
-    this.minReplication = conf.getInt("dfs.replication.min", 1);
-    if (minReplication <= 0)
-      throw new IOException(
-                            "Unexpected configuration parameters: dfs.replication.min = " 
-                            + minReplication
-                            + " must be greater than 0");
-    if (maxReplication >= (int)Short.MAX_VALUE)
-      throw new IOException(
-                            "Unexpected configuration parameters: dfs.replication.max = " 
-                            + maxReplication + " must be less than " + (Short.MAX_VALUE));
-    if (maxReplication < minReplication)
-      throw new IOException(
-                            "Unexpected configuration parameters: dfs.replication.min = " 
-                            + minReplication
-                            + " must be less than dfs.replication.max = " 
-                            + maxReplication);
-    this.maxReplicationStreams = conf.getInt("dfs.max-repl-streams", 2);
-    long heartbeatInterval = conf.getLong("dfs.heartbeat.interval", 3) * 1000;
-    this.heartbeatRecheckInterval = 5 * 60 * 1000; // 5 minutes
-    this.heartbeatExpireInterval = 2 * heartbeatRecheckInterval +
-      10 * heartbeatInterval;
-    this.replicationRecheckInterval = 3 * 1000; //  3 second
-    this.decommissionRecheckInterval = conf.getInt(
-                                                   "dfs.namenode.decommission.interval",
-                                                   5 * 60 * 1000);
+    setConfigurationParameters(conf);
 
     this.localMachine = hostname;
     this.port = port;
@@ -296,9 +265,49 @@ class FSNamesystem implements FSConstants {
    */
   FSNamesystem(FSImage fsImage, Configuration conf) throws IOException {
     fsNamesystemObject = this;
+    setConfigurationParameters(conf);
     this.dir = new FSDirectory(fsImage, this, conf);
   }
 
+  /**
+   * Initializes some of the members from configuration
+   */
+  private void setConfigurationParameters(Configuration conf) 
+                                          throws IOException {
+    this.replicator = new ReplicationTargetChooser(
+                                                   conf.getBoolean("dfs.replication.considerLoad", true),
+                                                   this,
+                                                   clusterMap,
+                                                   LOG);
+    this.defaultReplication = conf.getInt("dfs.replication", 3);
+    this.maxReplication = conf.getInt("dfs.replication.max", 512);
+    this.minReplication = conf.getInt("dfs.replication.min", 1);
+    if (minReplication <= 0)
+      throw new IOException(
+                            "Unexpected configuration parameters: dfs.replication.min = " 
+                            + minReplication
+                            + " must be greater than 0");
+    if (maxReplication >= (int)Short.MAX_VALUE)
+      throw new IOException(
+                            "Unexpected configuration parameters: dfs.replication.max = " 
+                            + maxReplication + " must be less than " + (Short.MAX_VALUE));
+    if (maxReplication < minReplication)
+      throw new IOException(
+                            "Unexpected configuration parameters: dfs.replication.min = " 
+                            + minReplication
+                            + " must be less than dfs.replication.max = " 
+                            + maxReplication);
+    this.maxReplicationStreams = conf.getInt("dfs.max-repl-streams", 2);
+    long heartbeatInterval = conf.getLong("dfs.heartbeat.interval", 3) * 1000;
+    this.heartbeatRecheckInterval = 5 * 60 * 1000; // 5 minutes
+    this.heartbeatExpireInterval = 2 * heartbeatRecheckInterval +
+      10 * heartbeatInterval;
+    this.replicationRecheckInterval = 3 * 1000; //  3 second
+    this.decommissionRecheckInterval = conf.getInt(
+                                                   "dfs.namenode.decommission.interval",
+                                                   5 * 60 * 1000);    
+  }
+
   /** Return the FSNamesystem object
    * 
    */

+ 18 - 18
src/test/org/apache/hadoop/dfs/TestCheckpoint.java

@@ -33,7 +33,8 @@ public class TestCheckpoint extends TestCase {
   static final long seed = 0xDEADBEEFL;
   static final int blockSize = 4096;
   static final int fileSize = 8192;
-  static final int numDatanodes = 1;
+  static final int numDatanodes = 3;
+  short replication = 3;
 
   private void writeFile(FileSystem fileSys, Path name, int repl)
     throws IOException {
@@ -51,11 +52,9 @@ public class TestCheckpoint extends TestCase {
   private void checkFile(FileSystem fileSys, Path name, int repl)
     throws IOException {
     assertTrue(fileSys.exists(name));
-    String[][] locations = fileSys.getFileCacheHints(name, 0, fileSize);
-    for (int idx = 0; idx < locations.length; idx++) {
-      assertEquals("Number of replicas for block" + idx,
-                   Math.min(numDatanodes, repl), locations[idx].length);
-    }
+    int replication = fileSys.getFileStatus(name).getReplication();
+    assertEquals("replication for " + name, repl, replication);
+    //We should probably test for more of the file properties.    
   }
   
   private void cleanupFile(FileSystem fileSys, Path name)
@@ -149,8 +148,8 @@ public class TestCheckpoint extends TestCase {
       //
       // Create a new file
       //
-      writeFile(fileSys, file1, 1);
-      checkFile(fileSys, file1, 1);
+      writeFile(fileSys, file1, replication);
+      checkFile(fileSys, file1, replication);
     } finally {
       fileSys.close();
       cluster.shutdown();
@@ -166,7 +165,7 @@ public class TestCheckpoint extends TestCase {
     cluster.waitActive();
     fileSys = cluster.getFileSystem();
     try {
-      checkFile(fileSys, file1, 1);
+      checkFile(fileSys, file1, replication);
       cleanupFile(fileSys, file1);
       SecondaryNameNode secondary = new SecondaryNameNode(conf);
       secondary.doCheckpoint();
@@ -208,8 +207,8 @@ public class TestCheckpoint extends TestCase {
       //
       // Create a new file
       //
-      writeFile(fileSys, file1, 1);
-      checkFile(fileSys, file1, 1);
+      writeFile(fileSys, file1, replication);
+      checkFile(fileSys, file1, replication);
     } finally {
       fileSys.close();
       cluster.shutdown();
@@ -225,7 +224,7 @@ public class TestCheckpoint extends TestCase {
     cluster.waitActive();
     fileSys = cluster.getFileSystem();
     try {
-      checkFile(fileSys, file1, 1);
+      checkFile(fileSys, file1, replication);
       cleanupFile(fileSys, file1);
       SecondaryNameNode secondary = new SecondaryNameNode(conf);
       secondary.doCheckpoint();
@@ -245,6 +244,7 @@ public class TestCheckpoint extends TestCase {
     Collection<File> namedirs = null;
 
     Configuration conf = new Configuration();
+    replication = (short)conf.getInt("dfs.replication", 3);  
     MiniDFSCluster cluster = new MiniDFSCluster(conf, numDatanodes, true, null);
     cluster.waitActive();
     FileSystem fileSys = cluster.getFileSystem();
@@ -260,8 +260,8 @@ public class TestCheckpoint extends TestCase {
       //
       // Create file1
       //
-      writeFile(fileSys, file1, 1);
-      checkFile(fileSys, file1, 1);
+      writeFile(fileSys, file1, replication);
+      checkFile(fileSys, file1, replication);
 
       //
       // Take a checkpoint
@@ -282,12 +282,12 @@ public class TestCheckpoint extends TestCase {
     fileSys = cluster.getFileSystem();
     try {
       // check that file1 still exists
-      checkFile(fileSys, file1, 1);
+      checkFile(fileSys, file1, replication);
       cleanupFile(fileSys, file1);
 
       // create new file file2
-      writeFile(fileSys, file2, 1);
-      checkFile(fileSys, file2, 1);
+      writeFile(fileSys, file2, replication);
+      checkFile(fileSys, file2, replication);
 
       //
       // Take a checkpoint
@@ -312,7 +312,7 @@ public class TestCheckpoint extends TestCase {
 
     try {
       // verify that file2 exists
-      checkFile(fileSys, file2, 1);
+      checkFile(fileSys, file2, replication);
     } finally {
       fileSys.close();
       cluster.shutdown();