فهرست منبع

svn merge -c 1517942 merging from branch-2 to branch-2.1-beta to fix HDFS-3245.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2.1-beta@1517943 13f79535-47bb-0310-9956-ffa450edef68
Kihwal Lee 11 سال پیش
والد
کامیت
508f42a7d9

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -52,6 +52,9 @@ Release 2.1.1-beta - UNRELEASED
     HDFS-5045. Add more unit tests for retry cache to cover all AtMostOnce 
     methods. (jing9)
 
+    HDFS-3245. Add metrics and web UI for cluster version summary. (Ravi
+    Prakash via kihwal)
+
   OPTIMIZATIONS
 
   BUG FIXES

+ 9 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java

@@ -47,6 +47,7 @@ public class DatanodeInfo extends DatanodeID implements Node {
   private long lastUpdate;
   private int xceiverCount;
   private String location = NetworkTopology.DEFAULT_RACK;
+  private String softwareVersion;
   
   // Datanode administrative states
   public enum AdminStates {
@@ -381,4 +382,12 @@ public class DatanodeInfo extends DatanodeID implements Node {
     // by DatanodeID
     return (this == obj) || super.equals(obj);
   }
+
+  public String getSoftwareVersion() {
+    return softwareVersion;
+  }
+
+  public void setSoftwareVersion(String softwareVersion) {
+    this.softwareVersion = softwareVersion;
+  }
 }

+ 77 - 9
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java

@@ -26,6 +26,7 @@ import java.net.UnknownHostException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
+import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.NavigableMap;
@@ -55,13 +56,6 @@ import org.apache.hadoop.hdfs.server.namenode.HostFileManager.MutableEntrySet;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.Namesystem;
 import org.apache.hadoop.hdfs.server.protocol.BalancerBandwidthCommand;
-import org.apache.hadoop.hdfs.server.namenode.HostFileManager;
-import org.apache.hadoop.hdfs.server.namenode.HostFileManager.Entry;
-import org.apache.hadoop.hdfs.server.namenode.HostFileManager.EntrySet;
-import org.apache.hadoop.hdfs.server.namenode.HostFileManager.MutableEntrySet;
-import org.apache.hadoop.hdfs.server.namenode.NameNode;
-import org.apache.hadoop.hdfs.server.namenode.Namesystem;
-import org.apache.hadoop.hdfs.server.protocol.BalancerBandwidthCommand;
 import org.apache.hadoop.hdfs.server.protocol.BlockCommand;
 import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand;
 import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock;
@@ -71,7 +65,6 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException;
 import org.apache.hadoop.hdfs.server.protocol.RegisterCommand;
 import org.apache.hadoop.hdfs.util.CyclicIteration;
-import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.ipc.Server;
 import org.apache.hadoop.net.CachedDNSToSwitchMapping;
 import org.apache.hadoop.net.DNSToSwitchMapping;
@@ -172,6 +165,14 @@ public class DatanodeManager {
    * according to the NetworkTopology.
    */
   private boolean hasClusterEverBeenMultiRack = false;
+
+  /**
+   * The number of datanodes for each software version. This list should change
+   * during rolling upgrades.
+   * Software version -> Number of datanodes with this version
+   */
+  private HashMap<String, Integer> datanodesSoftwareVersions =
+    new HashMap<String, Integer>(4, 0.75f);
   
   DatanodeManager(final BlockManager blockManager, final Namesystem namesystem,
       final Configuration conf) throws IOException {
@@ -463,6 +464,7 @@ public class DatanodeManager {
     heartbeatManager.removeDatanode(nodeInfo);
     blockManager.removeBlocksAssociatedTo(nodeInfo);
     networktopology.remove(nodeInfo);
+    decrementVersionCount(nodeInfo.getSoftwareVersion());
 
     if (LOG.isDebugEnabled()) {
       LOG.debug("remove datanode " + nodeInfo);
@@ -545,6 +547,61 @@ public class DatanodeManager {
     }
   }
 
+  private void incrementVersionCount(String version) {
+    if (version == null) {
+      return;
+    }
+    synchronized(datanodeMap) {
+      Integer count = this.datanodesSoftwareVersions.get(version);
+      count = count == null ? 1 : count + 1;
+      this.datanodesSoftwareVersions.put(version, count);
+    }
+  }
+
+  private void decrementVersionCount(String version) {
+    if (version == null) {
+      return;
+    }
+    synchronized(datanodeMap) {
+      Integer count = this.datanodesSoftwareVersions.get(version);
+      if(count != null) {
+        if(count > 1) {
+          this.datanodesSoftwareVersions.put(version, count-1);
+        } else {
+          this.datanodesSoftwareVersions.remove(version);
+        }
+      }
+    }
+  }
+
+  private boolean shouldCountVersion(DatanodeDescriptor node) {
+    return node.getSoftwareVersion() != null && node.isAlive &&
+      !isDatanodeDead(node);
+  }
+
+  private void countSoftwareVersions() {
+    synchronized(datanodeMap) {
+      HashMap<String, Integer> versionCount = new HashMap<String, Integer>();
+      for(DatanodeDescriptor dn: datanodeMap.values()) {
+        // Check isAlive too because right after removeDatanode(),
+        // isDatanodeDead() is still true 
+        if(shouldCountVersion(dn))
+        {
+          Integer num = versionCount.get(dn.getSoftwareVersion());
+          num = num == null ? 1 : num+1;
+          versionCount.put(dn.getSoftwareVersion(), num);
+        }
+      }
+      this.datanodesSoftwareVersions = versionCount;
+    }
+  }
+
+  public HashMap<String, Integer> getDatanodesSoftwareVersions() {
+    synchronized(datanodeMap) {
+      return new HashMap<String, Integer> (this.datanodesSoftwareVersions);
+    }
+  }
+
   /* Resolve a node's network location */
   private String resolveNetworkLocation (DatanodeID node) {
     List<String> names = new ArrayList<String>(1);
@@ -761,21 +818,28 @@ public class DatanodeManager {
         try {
           // update cluster map
           getNetworkTopology().remove(nodeS);
+          if(shouldCountVersion(nodeS)) {
+            decrementVersionCount(nodeS.getSoftwareVersion());
+          }
           nodeS.updateRegInfo(nodeReg);
+
+          nodeS.setSoftwareVersion(nodeReg.getSoftwareVersion());
           nodeS.setDisallowed(false); // Node is in the include list
-          
+
           // resolve network location
           nodeS.setNetworkLocation(resolveNetworkLocation(nodeS));
           getNetworkTopology().add(nodeS);
             
           // also treat the registration message as a heartbeat
           heartbeatManager.register(nodeS);
+          incrementVersionCount(nodeS.getSoftwareVersion());
           checkDecommissioning(nodeS);
           success = true;
         } finally {
           if (!success) {
             removeDatanode(nodeS);
             wipeDatanode(nodeS);
+            countSoftwareVersions();
           }
         }
         return;
@@ -799,6 +863,7 @@ public class DatanodeManager {
       try {
         nodeDescr.setNetworkLocation(resolveNetworkLocation(nodeDescr));
         networktopology.add(nodeDescr);
+        nodeDescr.setSoftwareVersion(nodeReg.getSoftwareVersion());
   
         // register new datanode
         addDatanode(nodeDescr);
@@ -809,10 +874,12 @@ public class DatanodeManager {
         // because its is done when the descriptor is created
         heartbeatManager.addDatanode(nodeDescr);
         success = true;
+        incrementVersionCount(nodeReg.getSoftwareVersion());
       } finally {
         if (!success) {
           removeDatanode(nodeDescr);
           wipeDatanode(nodeDescr);
+          countSoftwareVersions();
         }
       }
     } catch (InvalidTopologyException e) {
@@ -834,6 +901,7 @@ public class DatanodeManager {
     namesystem.writeLock();
     try {
       refreshDatanodes();
+      countSoftwareVersions();
     } finally {
       namesystem.writeUnlock();
     }

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ClusterJspHelper.java

@@ -359,6 +359,7 @@ class ClusterJspHelper {
       nn.httpAddress = httpAddress;
       getLiveNodeCount(getProperty(props, "LiveNodes").getValueAsText(), nn);
       getDeadNodeCount(getProperty(props, "DeadNodes").getValueAsText(), nn);
+      nn.softwareVersion = getProperty(props, "SoftwareVersion").getTextValue();
       return nn;
     }
     
@@ -596,6 +597,7 @@ class ClusterJspHelper {
         toXmlItemBlockWithLink(doc, nn.deadDatanodeCount + " (" +
           nn.deadDecomCount + ")", nn.httpAddress+"/dfsnodelist.jsp?whatNodes=DEAD"
           , "Dead Datanode (Decommissioned)");
+        toXmlItemBlock(doc, "Software Version", nn.softwareVersion);
         doc.endTag(); // node
       }
       doc.endTag(); // namenodes
@@ -624,6 +626,7 @@ class ClusterJspHelper {
     int deadDatanodeCount = 0;
     int deadDecomCount = 0;
     String httpAddress = null;
+    String softwareVersion = "";
   }
 
   /**

+ 17 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -6156,6 +6156,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       innerinfo.put("nonDfsUsedSpace", node.getNonDfsUsed());
       innerinfo.put("capacity", node.getCapacity());
       innerinfo.put("numBlocks", node.numBlocks());
+      innerinfo.put("version", node.getSoftwareVersion());
       info.put(node.getHostName(), innerinfo);
     }
     return JSON.toString(info);
@@ -6264,6 +6265,22 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     return dir;
   }
 
+  @Override  //NameNodeMXBean
+  public int getDistinctVersionCount() {
+    return blockManager.getDatanodeManager().getDatanodesSoftwareVersions()
+      .size();
+  }
+
+  @Override  //NameNodeMXBean
+  public Map<String, Integer> getDistinctVersions() {
+    return blockManager.getDatanodeManager().getDatanodesSoftwareVersions();
+  }
+
+  @Override  //NameNodeMXBean
+  public String getSoftwareVersion() {
+    return VersionInfo.getVersion();
+  }
+
   /**
    * Verifies that the given identifier and password are valid and match.
    * @param identifier Token identifier.

+ 24 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java

@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
+import java.util.Map;
+
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 
@@ -33,7 +35,13 @@ public interface NameNodeMXBean {
    * @return the version
    */
   public String getVersion();
-  
+
+  /**
+   * Get the version of software running on the Namenode
+   * @return a string representing the version
+   */
+  public String getSoftwareVersion();
+
   /**
    * Gets the used space by data nodes.
    * 
@@ -180,4 +188,19 @@ public interface NameNodeMXBean {
    * transaction ID and the most recent checkpoint's transaction ID
    */
   public String getJournalTransactionInfo();
+
+  /**
+   * Get the number of distinct versions of live datanodes
+   * 
+   * @return the number of distinct versions of live datanodes
+   */
+  public int getDistinctVersionCount();
+
+  /**
+   * Get the number of live datanodes for each distinct versions
+   * 
+   * @return the number of live datanodes for each distinct versions
+   */
+  public Map<String, Integer> getDistinctVersions();
+  
 }

+ 21 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java

@@ -32,6 +32,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 
 import javax.servlet.ServletContext;
 import javax.servlet.http.HttpServletRequest;
@@ -99,6 +100,20 @@ class NamenodeJspHelper {
     }
   }
 
+  static String getRollingUpgradeText(FSNamesystem fsn) {
+    DatanodeManager dm = fsn.getBlockManager().getDatanodeManager();
+    Map<String, Integer> list = dm.getDatanodesSoftwareVersions();
+    if(list.size() > 1) {
+      StringBuffer status = new StringBuffer("Rolling upgrades in progress. " +
+      "There are " + list.size() + " versions of datanodes currently live: ");
+      for(Map.Entry<String, Integer> ver: list.entrySet()) {
+        status.append(ver.getKey() + "(" + ver.getValue() + "), ");
+      }
+      return status.substring(0, status.length()-2);
+    }
+    return "";
+  }
+
   static String getInodeLimitText(FSNamesystem fsn) {
     if (fsn == null) {
       return "";
@@ -802,7 +817,9 @@ class NamenodeJspHelper {
           + "<td align=\"right\" class=\"pcbpused\">"
           + percentBpUsed
           + "<td align=\"right\" class=\"volfails\">"
-          + d.getVolumeFailures() + "\n");
+          + d.getVolumeFailures()
+          + "<td align=\"right\" class=\"version\">"
+          + d.getSoftwareVersion() + "\n");
     }
 
     void generateNodesList(ServletContext context, JspWriter out,
@@ -900,7 +917,9 @@ class NamenodeJspHelper {
                 + nodeHeaderStr("pcbpused")
                 + "> Block Pool<br>Used (%)" + " <th "
                 + nodeHeaderStr("volfails")
-                +"> Failed Volumes\n");
+                +"> Failed Volumes <th "
+                + nodeHeaderStr("versionString")
+                +"> Version\n");
 
             JspHelper.sortNodeList(live, sorterField, sorterOrder);
             for (int i = 0; i < live.size(); i++) {

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp

@@ -65,6 +65,7 @@
 <h3>Cluster Summary</h3>
 <b> <%= NamenodeJspHelper.getSecurityModeText()%> </b>
 <b> <%= NamenodeJspHelper.getSafeModeText(fsn)%> </b>
+<b> <%= NamenodeJspHelper.getRollingUpgradeText(fsn)%> </b>
 <b> <%= NamenodeJspHelper.getInodeLimitText(fsn)%> </b>
 <%= NamenodeJspHelper.getCorruptFilesWarning(fsn)%>