Procházet zdrojové kódy

HDFS-6120. Fix and improve safe mode log messages. (Arpit Agarwal)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1580047 13f79535-47bb-0310-9956-ffa450edef68
Arpit Agarwal před 11 roky
rodič
revize
fc53af9c4c

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -430,6 +430,8 @@ Release 2.4.0 - UNRELEASED
     HDFS-6138. Add a user guide for how to use viewfs with federation.
     (sanjay and szetszwo via szetszwo)
 
+    HDFS-6120. Fix and improve safe mode log messages. (Arpit Agarwal)
+
   OPTIMIZATIONS
 
     HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery

+ 47 - 37
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -91,7 +91,6 @@ import java.io.BufferedWriter;
 import java.io.ByteArrayInputStream;
 import java.io.DataInput;
 import java.io.DataInputStream;
-import java.io.DataOutputStream;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
@@ -4823,13 +4822,21 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      * @return true if can leave or false otherwise.
      */
     private synchronized boolean canLeave() {
-      if (reached == 0)
+      if (reached == 0) {
         return false;
+      }
+
       if (now() - reached < extension) {
-        reportStatus("STATE* Safe mode ON.", false);
+        reportStatus("STATE* Safe mode ON, in safe mode extension.", false);
         return false;
       }
-      return !needEnter();
+
+      if (needEnter()) {
+        reportStatus("STATE* Safe mode ON, thresholds not met.", false);
+        return false;
+      }
+
+      return true;
     }
       
     /** 
@@ -4973,56 +4980,59 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      * A tip on how safe mode is to be turned off: manually or automatically.
      */
     String getTurnOffTip() {
-      if(!isOn())
+      if(!isOn()) {
         return "Safe mode is OFF.";
+      }
 
       //Manual OR low-resource safemode. (Admin intervention required)
-      String leaveMsg = "It was turned on manually. ";
+      String adminMsg = "It was turned on manually. ";
       if (areResourcesLow()) {
-        leaveMsg = "Resources are low on NN. Please add or free up more "
+        adminMsg = "Resources are low on NN. Please add or free up more "
           + "resources then turn off safe mode manually. NOTE:  If you turn off"
           + " safe mode before adding resources, "
           + "the NN will immediately return to safe mode. ";
       }
       if (isManual() || areResourcesLow()) {
-        return leaveMsg
+        return adminMsg
           + "Use \"hdfs dfsadmin -safemode leave\" to turn safe mode off.";
       }
 
-      //Automatic safemode. System will come out of safemode automatically.
-      leaveMsg = "Safe mode will be turned off automatically";
+      boolean thresholdsMet = true;
       int numLive = getNumLiveDataNodes();
       String msg = "";
-      if (reached == 0) {
-        if (blockSafe < blockThreshold) {
-          msg += String.format(
-            "The reported blocks %d needs additional %d"
-            + " blocks to reach the threshold %.4f of total blocks %d.\n",
-            blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal);
-        }
-        if (numLive < datanodeThreshold) {
-          msg += String.format(
-            "The number of live datanodes %d needs an additional %d live "
-            + "datanodes to reach the minimum number %d.\n",
-            numLive, (datanodeThreshold - numLive), datanodeThreshold);
-        }
+      if (blockSafe < blockThreshold) {
+        msg += String.format(
+          "The reported blocks %d needs additional %d"
+          + " blocks to reach the threshold %.4f of total blocks %d.\n",
+          blockSafe, (blockThreshold - blockSafe) + 1, threshold, blockTotal);
+        thresholdsMet = false;
       } else {
-        msg = String.format("The reported blocks %d has reached the threshold"
+        msg += String.format("The reported blocks %d has reached the threshold"
             + " %.4f of total blocks %d. ", blockSafe, threshold, blockTotal);
-
+      }
+      if (numLive < datanodeThreshold) {
+        msg += String.format(
+          "The number of live datanodes %d needs an additional %d live "
+          + "datanodes to reach the minimum number %d.\n",
+          numLive, (datanodeThreshold - numLive), datanodeThreshold);
+        thresholdsMet = false;
+      } else {
         msg += String.format("The number of live datanodes %d has reached "
-                               + "the minimum number %d. ",
-                               numLive, datanodeThreshold);
-      }
-      msg += leaveMsg;
-      // threshold is not reached or manual or resources low
-      if(reached == 0 || (isManual() && !areResourcesLow())) {
-        return msg;
-      }
-      // extension period is in progress
-      return msg + (reached + extension - now() > 0 ?
-        " in " + (reached + extension - now()) / 1000 + " seconds."
-        : " soon.");
+            + "the minimum number %d. ",
+            numLive, datanodeThreshold);
+      }
+      msg += (reached > 0) ? "In safe mode extension. " : "";
+      msg += "Safe mode will be turned off automatically ";
+
+      if (!thresholdsMet) {
+        msg += "once the thresholds have been reached.";
+      } else if (reached + extension - now() > 0) {
+        msg += ("in " + (reached + extension - now()) / 1000 + " seconds.");
+      } else {
+        msg += "soon.";
+      }
+
+      return msg;
     }
 
     /**

+ 3 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java

@@ -182,7 +182,9 @@ public class TestSafeMode {
     String status = nn.getNamesystem().getSafemode();
     assertEquals("Safe mode is ON. The reported blocks 0 needs additional " +
         "15 blocks to reach the threshold 0.9990 of total blocks 15.\n" +
-        "Safe mode will be turned off automatically", status);
+        "The number of live datanodes 0 has reached the minimum number 0. " +
+        "Safe mode will be turned off automatically once the thresholds " +
+        "have been reached.", status);
     assertFalse("Mis-replicated block queues should not be initialized " +
         "until threshold is crossed",
         NameNodeAdapter.safeModeInitializedReplQueues(nn));

+ 4 - 3
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java

@@ -495,7 +495,8 @@ public class TestHASafeMode {
             "Safe mode is ON. The reported blocks " + safe + " has reached the "
             + "threshold 0.9990 of total blocks " + total + ". The number of "
             + "live datanodes " + numNodes + " has reached the minimum number "
-            + nodeThresh + ". Safe mode will be turned off automatically"));
+            + nodeThresh + ". In safe mode extension. "
+            + "Safe mode will be turned off automatically"));
     } else {
       int additional = total - safe;
       assertTrue("Bad safemode status: '" + status + "'",
@@ -565,8 +566,8 @@ public class TestHASafeMode {
       status.startsWith(
         "Safe mode is ON. The reported blocks 10 has reached the threshold "
         + "0.9990 of total blocks 10. The number of live datanodes 3 has "
-        + "reached the minimum number 0. Safe mode will be turned off "
-        + "automatically"));
+        + "reached the minimum number 0. In safe mode extension. "
+        + "Safe mode will be turned off automatically"));
 
     // Delete those blocks while the SBN is in safe mode.
     // Immediately roll the edit log before the actual deletions are sent