Просмотр исходного кода

HDFS-4888. Refactor and fix FSNamesystem.getTurnOffTip. Contributed by Ravi Prakash.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1498665 13f79535-47bb-0310-9956-ffa450edef68
Kihwal Lee 12 лет назад
Родитель
Сommit
ead7fa0413

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -637,6 +637,9 @@ Release 2.1.0-beta - 2013-07-02
     HDFS-4944. WebHDFS cannot create a file path containing characters that must
     be URI-encoded, such as space. (cnauroth)
 
+    HDFS-4888. Refactor and fix FSNamesystem.getTurnOffTip. (Ravi Prakash via
+    kihwal)
+
   BREAKDOWN OF HDFS-347 SUBTASKS AND RELATED JIRAS
 
     HDFS-4353. Encapsulate connections to peers in Peer and PeerServer classes.

+ 29 - 34
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -4031,9 +4031,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       
     // internal fields
     /** Time when threshold was reached.
-     * 
-     * <br>-1 safe mode is off
-     * <br> 0 safe mode is on, but threshold is not reached yet 
+     * <br> -1 safe mode is off
+     * <br> 0 safe mode is on, and threshold is not reached yet
+     * <br> >0 safe mode is on, but we are in extension period 
      */
     private long reached = -1;  
     /** Total number of blocks. */
@@ -4157,7 +4157,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       NameNode.stateChangeLog.info("STATE* Leaving safe mode after " 
                                     + timeInSafemode/1000 + " secs");
       NameNode.getNameNodeMetrics().setSafeModeTime((int) timeInSafemode);
-      
+
+      //Log the following only once (when transitioning from ON -> OFF)
       if (reached >= 0) {
         NameNode.stateChangeLog.info("STATE* Safe mode is OFF"); 
       }
@@ -4338,62 +4339,56 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      * A tip on how safe mode is to be turned off: manually or automatically.
      */
     String getTurnOffTip() {
-      if(reached < 0)
+      if(!isOn())
         return "Safe mode is OFF.";
-      String leaveMsg = "";
+
+      //Manual OR low-resource safemode. (Admin intervention required)
+      String leaveMsg = "It was turned on manually. ";
       if (areResourcesLow()) {
-        leaveMsg = "Resources are low on NN. " 
-        	+ "Please add or free up more resources then turn off safe mode manually.  "
-        	+ "NOTE:  If you turn off safe mode before adding resources, "
-        	+ "the NN will immediately return to safe mode.";
-      } else {
-        leaveMsg = "Safe mode will be turned off automatically";
+        leaveMsg = "Resources are low on NN. Please add or free up more "
+          + "resources then turn off safe mode manually. NOTE:  If you turn off"
+          + " safe mode before adding resources, "
+          + "the NN will immediately return to safe mode. ";
       }
-      if(isManual() && !areResourcesLow()) {
-        leaveMsg = "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off";
+      if (isManual() || areResourcesLow()) {
+        return leaveMsg
+          + "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off.";
       }
 
-      if(blockTotal < 0)
-        return leaveMsg + ".";
-
+      //Automatic safemode. System will come out of safemode automatically.
+      leaveMsg = "Safe mode will be turned off automatically";
       int numLive = getNumLiveDataNodes();
       String msg = "";
       if (reached == 0) {
         if (blockSafe < blockThreshold) {
           msg += String.format(
             "The reported blocks %d needs additional %d"
-            + " blocks to reach the threshold %.4f of total blocks %d.",
+            + " blocks to reach the threshold %.4f of total blocks %d.\n",
             blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal);
         }
         if (numLive < datanodeThreshold) {
-          if (!"".equals(msg)) {
-            msg += "\n";
-          }
           msg += String.format(
             "The number of live datanodes %d needs an additional %d live "
-            + "datanodes to reach the minimum number %d.",
+            + "datanodes to reach the minimum number %d.\n",
             numLive, (datanodeThreshold - numLive), datanodeThreshold);
         }
-        msg += " " + leaveMsg;
       } else {
         msg = String.format("The reported blocks %d has reached the threshold"
-            + " %.4f of total blocks %d.", blockSafe, threshold, 
-            blockTotal);
+            + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal);
 
-        if (datanodeThreshold > 0) {
-          msg += String.format(" The number of live datanodes %d has reached "
-                               + "the minimum number %d.",
+        msg += String.format("The number of live datanodes %d has reached "
+                               + "the minimum number %d. ",
                                numLive, datanodeThreshold);
-        }
-        msg += " " + leaveMsg;
       }
+      msg += leaveMsg;
       // threshold is not reached or manual or resources low
       if(reached == 0 || (isManual() && !areResourcesLow())) {
-        return msg + ".";
+        return msg;
       }
       // extension period is in progress
-      return msg + " in " + Math.abs(reached + extension - now()) / 1000
-          + " seconds.";
+      return msg + (reached + extension - now() > 0 ?
+        " in " + (reached + extension - now()) / 1000 + " seconds."
+        : " soon.");
     }
 
     /**
@@ -5648,7 +5643,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   public String getSafemode() {
     if (!this.isInSafeMode())
       return "";
-    return "Safe mode is ON." + this.getSafeModeTip();
+    return "Safe mode is ON. " + this.getSafeModeTip();
   }
 
   @Override // NameNodeMXBean

+ 6 - 6
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java

@@ -178,9 +178,9 @@ public class TestSafeMode {
     final NameNode nn = cluster.getNameNode();
     
     String status = nn.getNamesystem().getSafemode();
-    assertEquals("Safe mode is ON.The reported blocks 0 needs additional " +
-        "15 blocks to reach the threshold 0.9990 of total blocks 15. " +
-        "Safe mode will be turned off automatically.", status);
+    assertEquals("Safe mode is ON. The reported blocks 0 needs additional " +
+        "15 blocks to reach the threshold 0.9990 of total blocks 15.\n" +
+        "Safe mode will be turned off automatically", status);
     assertFalse("Mis-replicated block queues should not be initialized " +
         "until threshold is crossed",
         NameNodeAdapter.safeModeInitializedReplQueues(nn));
@@ -353,10 +353,10 @@ public class TestSafeMode {
     fs = cluster.getFileSystem();
 
     String tipMsg = cluster.getNamesystem().getSafemode();
-    assertTrue("Safemode tip message looks right: " + tipMsg,
+    assertTrue("Safemode tip message doesn't look right: " + tipMsg,
                tipMsg.contains("The number of live datanodes 0 needs an additional " +
-                               "1 live datanodes to reach the minimum number 1. " +
-                               "Safe mode will be turned off automatically."));
+                               "1 live datanodes to reach the minimum number 1.\n" +
+                               "Safe mode will be turned off automatically"));
 
     // Start a datanode
     cluster.startDataNodes(conf, 1, true, null, null);

+ 33 - 31
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java

@@ -206,11 +206,11 @@ public class TestHASafeMode {
     // We expect it not to be stuck in safemode, since those blocks
     // that are already visible to the SBN should be processed
     // in the initial block reports.
-    assertSafeMode(nn1, 3, 3);
+    assertSafeMode(nn1, 3, 3, 3, 0);
 
     banner("Waiting for standby to catch up to active namespace");
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
-    assertSafeMode(nn1, 8, 8);
+    assertSafeMode(nn1, 8, 8, 3, 0);
   }
   
   /**
@@ -230,7 +230,7 @@ public class TestHASafeMode {
     banner("Restarting standby");
     restartStandby();
     
-    assertSafeMode(nn1, 3, 3);
+    assertSafeMode(nn1, 3, 3, 3, 0);
     
     // Create a few blocks which will send blockReceived calls to the
     // SBN.
@@ -241,7 +241,7 @@ public class TestHASafeMode {
     banner("Waiting for standby to catch up to active namespace");
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
 
-    assertSafeMode(nn1, 8, 8);
+    assertSafeMode(nn1, 8, 8, 3, 0);
   }
 
   /**
@@ -281,11 +281,11 @@ public class TestHASafeMode {
 
     banner("Restarting standby");
     restartStandby();
-    assertSafeMode(nn1, 0, 5);
+    assertSafeMode(nn1, 0, 5, 3, 0);
     
     banner("Waiting for standby to catch up to active namespace");
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
-    assertSafeMode(nn1, 0, 0);
+    assertSafeMode(nn1, 0, 0, 3, 0);
   }
   
   /**
@@ -307,7 +307,7 @@ public class TestHASafeMode {
     restartStandby();
     
     // It will initially have all of the blocks necessary.
-    assertSafeMode(nn1, 10, 10);
+    assertSafeMode(nn1, 10, 10, 3, 0);
 
     // Delete those blocks while the SBN is in safe mode.
     // This doesn't affect the SBN, since deletions are not
@@ -322,14 +322,14 @@ public class TestHASafeMode {
     HATestUtil.waitForDNDeletions(cluster);
     cluster.triggerDeletionReports();
 
-    assertSafeMode(nn1, 10, 10);
+    assertSafeMode(nn1, 10, 10, 3, 0);
 
     // When we catch up to active namespace, it will restore back
     // to 0 blocks.
     banner("Waiting for standby to catch up to active namespace");
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
 
-    assertSafeMode(nn1, 0, 0);
+    assertSafeMode(nn1, 0, 0, 3, 0);
   }
   
   /**
@@ -355,20 +355,20 @@ public class TestHASafeMode {
     restartStandby();
     
     // It will initially have all of the blocks necessary.
-    assertSafeMode(nn1, 5, 5);
+    assertSafeMode(nn1, 5, 5, 3, 0);
 
     // Append to a block while SBN is in safe mode. This should
     // not affect safemode initially, since the DN message
     // will get queued.
     FSDataOutputStream stm = fs.append(new Path("/test"));
     try {
-      assertSafeMode(nn1, 5, 5);
+      assertSafeMode(nn1, 5, 5, 3, 0);
       
       // if we roll edits now, the SBN should see that it's under construction
       // and change its total count and safe count down by one, since UC
       // blocks are not counted by safe mode.
       HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
-      assertSafeMode(nn1, 4, 4);
+      assertSafeMode(nn1, 4, 4, 3, 0);
     } finally {
       IOUtils.closeStream(stm);
     }
@@ -386,13 +386,13 @@ public class TestHASafeMode {
     HATestUtil.waitForDNDeletions(cluster);
     cluster.triggerDeletionReports();
 
-    assertSafeMode(nn1, 4, 4);
+    assertSafeMode(nn1, 4, 4, 3, 0);
 
     // When we roll the edit log, the deletions will go through.
     banner("Waiting for standby to catch up to active namespace");
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
 
-    assertSafeMode(nn1, 0, 0);
+    assertSafeMode(nn1, 0, 0, 3, 0);
   }
   
   /**
@@ -424,20 +424,21 @@ public class TestHASafeMode {
     restartActive();
   }
   
-  private static void assertSafeMode(NameNode nn, int safe, int total) {
+  private static void assertSafeMode(NameNode nn, int safe, int total,
+    int numNodes, int nodeThresh) {
     String status = nn.getNamesystem().getSafemode();
     if (safe == total) {
       assertTrue("Bad safemode status: '" + status + "'",
           status.startsWith(
-            "Safe mode is ON." +
-            "The reported blocks " + safe + " has reached the threshold " +
-            "0.9990 of total blocks " + total + ". Safe mode will be " +
-            "turned off automatically"));
+            "Safe mode is ON. The reported blocks " + safe + " has reached the "
+            + "threshold 0.9990 of total blocks " + total + ". The number of "
+            + "live datanodes " + numNodes + " has reached the minimum number "
+            + nodeThresh + ". Safe mode will be turned off automatically"));
     } else {
       int additional = total - safe;
       assertTrue("Bad safemode status: '" + status + "'",
           status.startsWith(
-              "Safe mode is ON." +
+              "Safe mode is ON. " +
               "The reported blocks " + safe + " needs additional " +
               additional + " blocks"));
     }
@@ -467,14 +468,14 @@ public class TestHASafeMode {
 
     // We expect it to be on its way out of safemode, since all of the blocks
     // from the edit log have been reported.
-    assertSafeMode(nn1, 3, 3);
+    assertSafeMode(nn1, 3, 3, 3, 0);
     
     // Initiate a failover into it while it's in safemode
     banner("Initiating a failover into NN1 in safemode");
     NameNodeAdapter.abortEditLogs(nn0);
     cluster.transitionToActive(1);
 
-    assertSafeMode(nn1, 5, 5);
+    assertSafeMode(nn1, 5, 5, 3, 0);
   }
   
   /**
@@ -499,10 +500,11 @@ public class TestHASafeMode {
     // It will initially have all of the blocks necessary.
     String status = nn1.getNamesystem().getSafemode();
     assertTrue("Bad safemode status: '" + status + "'",
-        status.startsWith(
-            "Safe mode is ON." +
-            "The reported blocks 10 has reached the threshold 0.9990 of " +
-            "total blocks 10. Safe mode will be turned off automatically"));
+      status.startsWith(
+        "Safe mode is ON. The reported blocks 10 has reached the threshold "
+        + "0.9990 of total blocks 10. The number of live datanodes 3 has "
+        + "reached the minimum number 0. Safe mode will be turned off "
+        + "automatically"));
 
     // Delete those blocks while the SBN is in safe mode.
     // Immediately roll the edit log before the actual deletions are sent
@@ -512,7 +514,7 @@ public class TestHASafeMode {
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
 
     // Should see removal of the blocks as well as their contribution to safe block count.
-    assertSafeMode(nn1, 0, 0);
+    assertSafeMode(nn1, 0, 0, 3, 0);
 
     
     banner("Triggering sending deletions to DNs and Deletion Reports");
@@ -525,7 +527,7 @@ public class TestHASafeMode {
     // No change in assertion status here, but some of the consistency checks
     // in safemode will fire here if we accidentally decrement safe block count
     // below 0.    
-    assertSafeMode(nn1, 0, 0);
+    assertSafeMode(nn1, 0, 0, 3, 0);
   }
   
 
@@ -561,11 +563,11 @@ public class TestHASafeMode {
     
     banner("Restarting SBN");
     restartStandby();
-    assertSafeMode(nn1, 10, 10);
+    assertSafeMode(nn1, 10, 10, 3, 0);
 
     banner("Allowing SBN to catch up");
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
-    assertSafeMode(nn1, 15, 15);
+    assertSafeMode(nn1, 15, 15, 3, 0);
   }
   
   /**
@@ -593,7 +595,7 @@ public class TestHASafeMode {
     nn0.getRpcServer().rollEditLog();
     
     restartStandby();
-    assertSafeMode(nn1, 6, 6);
+    assertSafeMode(nn1, 6, 6, 3, 0);
   }
   
   /**