Explorar o código

HDFS-3990. NN's health report has severe performance problems (daryn)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1407336 13f79535-47bb-0310-9956-ffa450edef68
Daryn Sharp %!s(int64=12) %!d(string=hai) anos
pai
achega
a54e9a3155

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -20,6 +20,8 @@ Release 0.23.5 - UNRELEASED
 
 
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
+    HDFS-3990. NN's health report has severe performance problems (daryn)
+
   BUG FIXES
   BUG FIXES
 
 
     HDFS-3919. MiniDFSCluster:waitClusterUp can hang forever.
     HDFS-3919. MiniDFSCluster:waitClusterUp can hang forever.

+ 14 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeID.java

@@ -39,6 +39,7 @@ public class DatanodeID implements WritableComparable<DatanodeID> {
   public static final DatanodeID[] EMPTY_ARRAY = {}; 
   public static final DatanodeID[] EMPTY_ARRAY = {}; 
 
 
   public String name;      /// hostname:portNumber
   public String name;      /// hostname:portNumber
+  private String peerHostName; // hostname from the actual connection
   public String storageID; /// unique per cluster storageID
   public String storageID; /// unique per cluster storageID
   protected int infoPort;     /// the port where the infoserver is running
   protected int infoPort;     /// the port where the infoserver is running
   public int ipcPort;     /// the port where the ipc server is running
   public int ipcPort;     /// the port where the ipc server is running
@@ -59,6 +60,7 @@ public class DatanodeID implements WritableComparable<DatanodeID> {
         from.getStorageID(),
         from.getStorageID(),
         from.getInfoPort(),
         from.getInfoPort(),
         from.getIpcPort());
         from.getIpcPort());
+    this.peerHostName = from.getPeerHostName();
   }
   }
   
   
   /**
   /**
@@ -84,6 +86,10 @@ public class DatanodeID implements WritableComparable<DatanodeID> {
     this.infoPort = infoPort;
     this.infoPort = infoPort;
   }
   }
   
   
+  public void setPeerHostName(String peerHostName) {
+    this.peerHostName = peerHostName;
+  }
+  
   public void setIpcPort(int ipcPort) {
   public void setIpcPort(int ipcPort) {
     this.ipcPort = ipcPort;
     this.ipcPort = ipcPort;
   }
   }
@@ -102,6 +108,13 @@ public class DatanodeID implements WritableComparable<DatanodeID> {
     return this.storageID;
     return this.storageID;
   }
   }
 
 
+  /**
+   * @return hostname from the actual connection 
+   */
+  public String getPeerHostName() {
+    return peerHostName;
+  }
+  
   /**
   /**
    * @return infoPort (the port at which the HTTP server bound to)
    * @return infoPort (the port at which the HTTP server bound to)
    */
    */
@@ -168,6 +181,7 @@ public class DatanodeID implements WritableComparable<DatanodeID> {
    */
    */
   public void updateRegInfo(DatanodeID nodeReg) {
   public void updateRegInfo(DatanodeID nodeReg) {
     name = nodeReg.getName();
     name = nodeReg.getName();
+    peerHostName = nodeReg.getPeerHostName();
     infoPort = nodeReg.getInfoPort();
     infoPort = nodeReg.getInfoPort();
     ipcPort = nodeReg.getIpcPort();
     ipcPort = nodeReg.getIpcPort();
     // update any more fields added in future.
     // update any more fields added in future.

+ 55 - 66
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java

@@ -375,12 +375,12 @@ public class DatanodeManager {
     node.setNetworkLocation(networkLocation);
     node.setNetworkLocation(networkLocation);
   }
   }
 
 
-  private boolean inHostsList(DatanodeID node, String ipAddr) {
-     return checkInList(node, ipAddr, hostsReader.getHosts(), false);
+  private boolean inHostsList(DatanodeID node) {
+     return checkInList(node, hostsReader.getHosts(), false);
   }
   }
   
   
-  private boolean inExcludedHostsList(DatanodeID node, String ipAddr) {
-    return checkInList(node, ipAddr, hostsReader.getExcludedHosts(), true);
+  private boolean inExcludedHostsList(DatanodeID node) {
+    return checkInList(node, hostsReader.getExcludedHosts(), true);
   }
   }
 
 
   /**
   /**
@@ -418,7 +418,7 @@ public class DatanodeManager {
     
     
     for (Iterator<DatanodeDescriptor> it = nodeList.iterator(); it.hasNext();) {
     for (Iterator<DatanodeDescriptor> it = nodeList.iterator(); it.hasNext();) {
       DatanodeDescriptor node = it.next();
       DatanodeDescriptor node = it.next();
-      if ((!inHostsList(node, null)) && (!inExcludedHostsList(node, null))
+      if ((!inHostsList(node)) && (!inExcludedHostsList(node))
           && node.isDecommissioned()) {
           && node.isDecommissioned()) {
         // Include list is not empty, an existing datanode does not appear
         // Include list is not empty, an existing datanode does not appear
         // in both include or exclude lists and it has been decommissioned.
         // in both include or exclude lists and it has been decommissioned.
@@ -442,48 +442,27 @@ public class DatanodeManager {
    * @return boolean, if in the list
    * @return boolean, if in the list
    */
    */
   private static boolean checkInList(final DatanodeID node,
   private static boolean checkInList(final DatanodeID node,
-      final String ipAddress,
       final Set<String> hostsList,
       final Set<String> hostsList,
       final boolean isExcludeList) {
       final boolean isExcludeList) {
-    final InetAddress iaddr;
-    if (ipAddress != null) {
-      try {
-        iaddr = InetAddress.getByName(ipAddress);
-      } catch (UnknownHostException e) {
-        LOG.warn("Unknown ip address: " + ipAddress, e);
-        return isExcludeList;
-      }
-    } else {
-      try {
-        iaddr = InetAddress.getByName(node.getHost());
-      } catch (UnknownHostException e) {
-        LOG.warn("Unknown host: " + node.getHost(), e);
-        return isExcludeList;
-      }
-    }
-
     // if include list is empty, host is in include list
     // if include list is empty, host is in include list
     if ( (!isExcludeList) && (hostsList.isEmpty()) ){
     if ( (!isExcludeList) && (hostsList.isEmpty()) ){
       return true;
       return true;
     }
     }
-    return // compare ipaddress(:port)
-    (hostsList.contains(iaddr.getHostAddress().toString()))
-        || (hostsList.contains(iaddr.getHostAddress().toString() + ":"
-            + node.getPort()))
-        // compare hostname(:port)
-        || (hostsList.contains(iaddr.getHostName()))
-        || (hostsList.contains(iaddr.getHostName() + ":" + node.getPort()))
-        || ((node instanceof DatanodeInfo) && hostsList
-            .contains(((DatanodeInfo) node).getHostName()));
+    for (String name : getNodeNamesForHostFiltering(node)) {
+      if (hostsList.contains(name)) {
+        return true;
+      }
+    }
+    return false;
   }
   }
 
 
   /**
   /**
    * Decommission the node if it is in exclude list.
    * Decommission the node if it is in exclude list.
    */
    */
-  private void checkDecommissioning(DatanodeDescriptor nodeReg, String ipAddr) 
+  private void checkDecommissioning(DatanodeDescriptor nodeReg) 
     throws IOException {
     throws IOException {
     // If the registered node is in exclude list, then decommission it
     // If the registered node is in exclude list, then decommission it
-    if (inExcludedHostsList(nodeReg, ipAddr)) {
+    if (inExcludedHostsList(nodeReg)) {
       startDecommission(nodeReg);
       startDecommission(nodeReg);
     }
     }
   }
   }
@@ -550,27 +529,27 @@ public class DatanodeManager {
 
 
   public void registerDatanode(DatanodeRegistration nodeReg
   public void registerDatanode(DatanodeRegistration nodeReg
       ) throws IOException {
       ) throws IOException {
-    String dnAddress = Server.getRemoteAddress();
-    if (dnAddress == null) {
-      // Mostly called inside an RPC.
-      // But if not, use address passed by the data-node.
-      dnAddress = nodeReg.getHost();
-    }      
-
+    String hostName = nodeReg.getHost();
+    InetAddress dnAddress = Server.getRemoteIp();
+    if (dnAddress != null) {
+      // Mostly called inside an RPC, update ip and peer hostname
+      String hostname = dnAddress.getHostName();
+      String ip = dnAddress.getHostAddress();
+      if (hostname.equals(ip)) {
+        LOG.warn("Unresolved datanode registration from " + ip);
+        throw new DisallowedDatanodeException(nodeReg);
+      }
+      // update node registration with the ip and hostname from the socket
+      nodeReg.setName(ip + ":" + nodeReg.getPort());
+      nodeReg.setPeerHostName(hostname);
+    }
+    
     // Checks if the node is not on the hosts list.  If it is not, then
     // Checks if the node is not on the hosts list.  If it is not, then
     // it will be disallowed from registering. 
     // it will be disallowed from registering. 
-    if (!inHostsList(nodeReg, dnAddress)) {
+    if (!inHostsList(nodeReg)) {
       throw new DisallowedDatanodeException(nodeReg);
       throw new DisallowedDatanodeException(nodeReg);
     }
     }
 
 
-    String hostName = nodeReg.getHost();
-      
-    // update the datanode's name with ip:port
-    DatanodeID dnReg = new DatanodeID(dnAddress + ":" + nodeReg.getPort(),
-                                      nodeReg.getStorageID(),
-                                      nodeReg.getInfoPort(),
-                                      nodeReg.getIpcPort());
-    nodeReg.updateRegInfo(dnReg);
     nodeReg.exportedKeys = blockManager.getBlockKeys();
     nodeReg.exportedKeys = blockManager.getBlockKeys();
       
       
     NameNode.stateChangeLog.info("BLOCK* NameSystem.registerDatanode: "
     NameNode.stateChangeLog.info("BLOCK* NameSystem.registerDatanode: "
@@ -629,7 +608,7 @@ public class DatanodeManager {
         
         
       // also treat the registration message as a heartbeat
       // also treat the registration message as a heartbeat
       heartbeatManager.register(nodeS);
       heartbeatManager.register(nodeS);
-      checkDecommissioning(nodeS, dnAddress);
+      checkDecommissioning(nodeS);
       return;
       return;
     } 
     } 
 
 
@@ -649,7 +628,7 @@ public class DatanodeManager {
       = new DatanodeDescriptor(nodeReg, NetworkTopology.DEFAULT_RACK, hostName);
       = new DatanodeDescriptor(nodeReg, NetworkTopology.DEFAULT_RACK, hostName);
     resolveNetworkLocation(nodeDescr);
     resolveNetworkLocation(nodeDescr);
     addDatanode(nodeDescr);
     addDatanode(nodeDescr);
-    checkDecommissioning(nodeDescr, dnAddress);
+    checkDecommissioning(nodeDescr);
     
     
     // also treat the registration message as a heartbeat
     // also treat the registration message as a heartbeat
     // no need to update its timestamp
     // no need to update its timestamp
@@ -693,10 +672,10 @@ public class DatanodeManager {
   private void refreshDatanodes() throws IOException {
   private void refreshDatanodes() throws IOException {
     for(DatanodeDescriptor node : datanodeMap.values()) {
     for(DatanodeDescriptor node : datanodeMap.values()) {
       // Check if not include.
       // Check if not include.
-      if (!inHostsList(node, null)) {
+      if (!inHostsList(node)) {
         node.setDisallowed(true); // case 2.
         node.setDisallowed(true); // case 2.
       } else {
       } else {
-        if (inExcludedHostsList(node, null)) {
+        if (inExcludedHostsList(node)) {
           startDecommission(node); // case 3.
           startDecommission(node); // case 3.
         } else {
         } else {
           stopDecommission(node); // case 4.
           stopDecommission(node); // case 4.
@@ -822,18 +801,8 @@ public class DatanodeManager {
           nodes.add(dn);
           nodes.add(dn);
         }
         }
         //Remove any form of the this datanode in include/exclude lists.
         //Remove any form of the this datanode in include/exclude lists.
-        try {
-          InetAddress inet = InetAddress.getByName(dn.getHost());
-          // compare hostname(:port)
-          mustList.remove(inet.getHostName());
-          mustList.remove(inet.getHostName()+":"+dn.getPort());
-          // compare ipaddress(:port)
-          mustList.remove(inet.getHostAddress().toString());
-          mustList.remove(inet.getHostAddress().toString()+ ":" +dn.getPort());
-        } catch ( UnknownHostException e ) {
-          mustList.remove(dn.getName());
-          mustList.remove(dn.getHost());
-          LOG.warn(e);
+        for (String name : getNodeNamesForHostFiltering(dn)) {
+          mustList.remove(name);
         }
         }
       }
       }
     }
     }
@@ -850,6 +819,26 @@ public class DatanodeManager {
     return nodes;
     return nodes;
   }
   }
   
   
+  private static List<String> getNodeNamesForHostFiltering(DatanodeID node) {
+    String ip = node.getHost();
+    String peerHostName = node.getPeerHostName();
+    int xferPort = node.getPort();
+    
+    List<String> names = new ArrayList<String>(); 
+    names.add(ip);
+    names.add(ip + ":" + xferPort);
+    if (peerHostName != null) {
+      names.add(peerHostName);
+      names.add(peerHostName + ":" + xferPort);
+    }
+    if (node instanceof DatanodeInfo) {
+      String regHostName = ((DatanodeInfo) node).getHostName();
+      names.add(regHostName);
+      names.add(regHostName + ":" + xferPort);
+    }
+    return names;
+  }
+  
   private void setDatanodeDead(DatanodeDescriptor node) throws IOException {
   private void setDatanodeDead(DatanodeDescriptor node) throws IOException {
     node.setLastUpdate(0);
     node.setLastUpdate(0);
   }
   }

+ 59 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeRegistration.java

@@ -21,6 +21,7 @@ import static org.mockito.Mockito.doReturn;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.mock;
 
 
 import java.net.InetSocketAddress;
 import java.net.InetSocketAddress;
+import java.security.Permission;
 
 
 import junit.framework.TestCase;
 import junit.framework.TestCase;
 
 
@@ -31,12 +32,70 @@ import org.apache.hadoop.hdfs.server.common.StorageInfo;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 
 
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
 /**
 /**
  * This class tests that a file need not be closed before its
  * This class tests that a file need not be closed before its
  * data can be read by another client.
  * data can be read by another client.
  */
  */
 public class TestDatanodeRegistration extends TestCase {
 public class TestDatanodeRegistration extends TestCase {
 
 
+  private static class MonitorDNS extends SecurityManager {
+    int lookups = 0;
+    @Override
+    public void checkPermission(Permission perm) {}    
+    @Override
+    public void checkConnect(String host, int port) {
+      if (port == -1) {
+        lookups++;
+      }
+    }
+  }
+
+  /**
+   * Ensure the datanode manager does not do host lookup after registration,
+   * especially for node reports.
+   * @throws Exception
+   */
+  public void testDNSLookups() throws Exception {
+    MonitorDNS sm = new MonitorDNS();
+    System.setSecurityManager(sm);
+    
+    MiniDFSCluster cluster = null;
+    try {
+      HdfsConfiguration conf = new HdfsConfiguration();
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(8).build();
+      cluster.waitActive();
+      
+      int initialLookups = sm.lookups;
+      assertTrue("dns security manager is active", initialLookups != 0);
+      
+      DatanodeManager dm =
+          cluster.getNamesystem().getBlockManager().getDatanodeManager();
+      
+      // make sure no lookups occur
+      dm.refreshNodes(conf);
+      assertEquals(initialLookups, sm.lookups);
+
+      dm.refreshNodes(conf);
+      assertEquals(initialLookups, sm.lookups);
+      
+      // ensure none of the reports trigger lookups
+      dm.getDatanodeListForReport(DatanodeReportType.ALL);
+      assertEquals(initialLookups, sm.lookups);
+      
+      dm.getDatanodeListForReport(DatanodeReportType.LIVE);
+      assertEquals(initialLookups, sm.lookups);
+      
+      dm.getDatanodeListForReport(DatanodeReportType.DEAD);
+      assertEquals(initialLookups, sm.lookups);
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+      System.setSecurityManager(null);
+    }
+  }
+  
   /**
   /**
    * Regression test for HDFS-894 ensures that, when datanodes
    * Regression test for HDFS-894 ensures that, when datanodes
    * are restarted, the new IPC port is registered with the
    * are restarted, the new IPC port is registered with the