Quellcode durchsuchen

HADOOP-1155. Improve NetworkTopology's algorithm for finding nearby nodes. Contributed by Hairong Kuang.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@529389 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting vor 18 Jahren
Ursprung
Commit
41eaad84ed

+ 3 - 0
CHANGES.txt

@@ -210,6 +210,9 @@ Trunk (unreleased changes)
 63. HADOOP-1258.  Fix TestCheckpoint test case to wait for 
     MiniDFSCluster to be active.  (Nigel Daley via tomwhite)
 
+64. HADOOP-1155.  Improve NetworkTopology's algorithm for finding
+    nearby nodes, used by HDFS.  (Hairong Kuang via cutting)
+
 
 Release 0.12.3 - 2007-04-06
 

+ 10 - 11
src/java/org/apache/hadoop/dfs/FSNamesystem.java

@@ -626,7 +626,7 @@ class FSNamesystem implements FSConstants {
                          blocksMap.nodeIterator( blocks[i] ); it.hasNext(); ) {
                         machineSets[i][ numNodes++ ] = it.next();
                     }
-                    clusterMap.sortByDistance( client, machineSets[i] );
+                    clusterMap.pseudoSortByDistance( client, machineSets[i] );
                 }
             }
 
@@ -3120,13 +3120,13 @@ class FSNamesystem implements FSConstants {
       throws NotEnoughReplicasException {
         DatanodeDescriptor result;
         do {
-          DatanodeDescriptor[] selectedNodes = 
+          List<DatanodeDescriptor> selectedNodes = 
             chooseRandom(1, nodes, excludedNodes);
-          if(selectedNodes.length == 0 ) {
+          if(selectedNodes.size() == 0 ) {
             throw new NotEnoughReplicasException( 
             "Not able to place enough replicas" );
           }
-          result = (DatanodeDescriptor)(selectedNodes[0]);
+          result = (DatanodeDescriptor)(selectedNodes.get(0));
         } while( !isGoodTarget( result, blocksize, maxNodesPerRack, results));
         results.add(result);
         return result;
@@ -3143,13 +3143,13 @@ class FSNamesystem implements FSConstants {
       throws NotEnoughReplicasException {
         boolean toContinue = true;
         do {
-          DatanodeDescriptor[] selectedNodes = 
+          List<DatanodeDescriptor> selectedNodes = 
             chooseRandom(numOfReplicas, nodes, excludedNodes);
-          if(selectedNodes.length < numOfReplicas) {
+          if(selectedNodes.size() < numOfReplicas) {
             toContinue = false;
           }
-          for(int i=0; i<selectedNodes.length; i++) {
-            DatanodeDescriptor result = (DatanodeDescriptor)(selectedNodes[i]);
+          for(int i=0; i<selectedNodes.size(); i++) {
+            DatanodeDescriptor result = (DatanodeDescriptor)(selectedNodes.get(i));
             if( isGoodTarget( result, blocksize, maxNodesPerRack, results)) {
               numOfReplicas--;
               results.add(result);
@@ -3166,7 +3166,7 @@ class FSNamesystem implements FSConstants {
       /* Randomly choose <i>numOfNodes</i> nodes from <i>scope</i>.
        * @return the choosen nodes
        */
-      private DatanodeDescriptor[] chooseRandom(int numOfReplicas, 
+      private List<DatanodeDescriptor> chooseRandom(int numOfReplicas, 
           String nodes,
           List<DatanodeDescriptor> excludedNodes) {
         List<DatanodeDescriptor> results = 
@@ -3183,8 +3183,7 @@ class FSNamesystem implements FSConstants {
             numOfReplicas--;
           }
         }
-        return (DatanodeDescriptor[])results.toArray(
-            new DatanodeDescriptor[results.size()]);    
+        return results;    
       }
       
       /* judge if a node is a good target.

+ 49 - 20
src/java/org/apache/hadoop/net/NetworkTopology.java

@@ -19,6 +19,7 @@ package org.apache.hadoop.net;
 
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.HashMap;
 import java.util.Comparator;
 import java.util.List;
 import java.util.Random;
@@ -300,6 +301,7 @@ public class NetworkTopology {
     
     InnerNode clusterMap = new InnerNode( InnerNode.ROOT ); // the root
     private int numOfRacks = 0;  // rack counter
+    private HashMap<String, Node> rackMap = new HashMap<String, Node>();
     
     public NetworkTopology() {
     }
@@ -322,6 +324,7 @@ public class NetworkTopology {
         if( clusterMap.add( node) ) {
             if( rack == null ) {
                 numOfRacks++;
+                rackMap.put(node.getNetworkLocation(), node.getParent());
             }
         }
         LOG.debug("NetworkTopology became:\n" + this.toString());
@@ -339,6 +342,7 @@ public class NetworkTopology {
             InnerNode rack = (InnerNode)getNode(node.getNetworkLocation());
             if(rack == null) {
                 numOfRacks--;
+                rackMap.remove(node.getNetworkLocation());
             }
         }
         LOG.debug("NetworkTopology became:\n" + this.toString());
@@ -369,6 +373,12 @@ public class NetworkTopology {
      */
     public synchronized Node getNode( String loc ) {
         loc = NodeBase.normalize(loc);
+        // optimize searching rack node by looking up the rackMap
+        Node node = rackMap.get(loc);
+        if( node != null ) {
+          return node;
+        }
+        // otherwise slower search
         if(!NodeBase.ROOT.equals(loc))
             loc = loc.substring(1);
         return clusterMap.getLoc( loc );
@@ -440,11 +450,7 @@ public class NetworkTopology {
         return false;
       }
       
-        if( node1 == node2 || node1.equals(node2)) {
-            return true;
-        }
-        
-        return node1.getParent()==node2.getParent();
+      return node1.getParent()==node2.getParent();
     }
     
     final private static Random r = new Random();
@@ -546,24 +552,47 @@ public class NetworkTopology {
         return tree.toString();
     }
 
-    /* Set and used only inside sortByDistance. 
-     * This saves an allocation each time we sort.
+    /** Sort nodes array by their distances to <i>reader</i>
+     * It linearly scans the array, if a local node is found, swap it with
+     * the first element of the array.
+     * If a local rack node is found, swap it with the first element following
+     * the local node.
+     * It leaves the rest nodes untouched.
      */
-    private DatanodeDescriptor distFrom = null;
-    private final Comparator<DatanodeDescriptor> nodeDistanceComparator = 
-      new Comparator<DatanodeDescriptor>() {
-        public int compare(DatanodeDescriptor n1, DatanodeDescriptor n2) {
-          return getDistance(distFrom, n1) - getDistance(distFrom, n2);
+    public synchronized void pseudoSortByDistance( 
+                  DatanodeDescriptor reader, DatanodeDescriptor[] nodes ) {
+      if (reader == null ) return; // no need to sort
+      
+      DatanodeDescriptor tempNode;
+      int tempIndex = 0;
+      int localRackNode = -1;
+      //scan the array to find the local node & local rack node
+      for(int i=0; i<nodes.length; i++) {
+        if(tempIndex == 0 && reader == nodes[i]) { //local node
+          //swap the local node and the node at position 0
+          if( i != 0 ) {
+            tempNode = nodes[tempIndex];
+            nodes[tempIndex] = nodes[i];
+            nodes[i] = tempNode;
+          }
+          tempIndex=1;
+          if(localRackNode != -1 ) {
+            if(localRackNode == 0) {
+              localRackNode = i;
+            }
+            break;
+          }
+        } else if(localRackNode == -1 && isOnSameRack(reader, nodes[i])) { //local rack
+          localRackNode = i;
+          if(tempIndex != 0 ) break;
         }
-    };
+      }
       
-    /** Sorts nodes array by their distances to <i>reader</i>. */
-    public synchronized void sortByDistance( final DatanodeDescriptor reader,
-                                             DatanodeDescriptor[] nodes ) { 
-      if(reader != null && contains(reader)) {
-        distFrom = reader;
-        Arrays.sort( nodes, nodeDistanceComparator );
-        distFrom = null;
+      // swap the local rack node and the node at position tempIndex
+      if(localRackNode != -1 && localRackNode != tempIndex ) {
+        tempNode = nodes[tempIndex];
+        nodes[tempIndex] = nodes[localRackNode];
+        nodes[localRackNode] = tempNode;
       }
     }
 }

+ 41 - 1
src/test/org/apache/hadoop/net/TestNetworkTopology.java

@@ -25,7 +25,7 @@ public class TestNetworkTopology extends TestCase {
     }
   }
   
-  public void testContains() {
+  public void testContains() throws Exception {
     for(int i=0; i<dataNodes.length; i++) {
       assertTrue( cluster.contains(dataNodes[i]));
     }
@@ -53,6 +53,46 @@ public class TestNetworkTopology extends TestCase {
     assertEquals(cluster.getDistance(dataNodes[0], dataNodes[6]), 6);
   }
 
+  public void testPseudoSortByDistance() throws Exception {
+    DatanodeDescriptor[] testNodes = new DatanodeDescriptor[3];
+    
+    // array contains both local node & local rack node
+    testNodes[0] = dataNodes[1];
+    testNodes[1] = dataNodes[2];
+    testNodes[2] = dataNodes[0];
+    cluster.pseudoSortByDistance(dataNodes[0], testNodes );
+    assertTrue(testNodes[0] == dataNodes[0]);
+    assertTrue(testNodes[1] == dataNodes[1]);
+    assertTrue(testNodes[2] == dataNodes[2]);
+
+    // array contains local node
+    testNodes[0] = dataNodes[1];
+    testNodes[1] = dataNodes[3];
+    testNodes[2] = dataNodes[0];
+    cluster.pseudoSortByDistance(dataNodes[0], testNodes );
+    assertTrue(testNodes[0] == dataNodes[0]);
+    assertTrue(testNodes[1] == dataNodes[1]);
+    assertTrue(testNodes[2] == dataNodes[3]);
+
+    // array contains local rack node
+    testNodes[0] = dataNodes[5];
+    testNodes[1] = dataNodes[3];
+    testNodes[2] = dataNodes[1];
+    cluster.pseudoSortByDistance(dataNodes[0], testNodes );
+    assertTrue(testNodes[0] == dataNodes[1]);
+    assertTrue(testNodes[1] == dataNodes[3]);
+    assertTrue(testNodes[2] == dataNodes[5]);
+
+    // array contains neither local node & local rack node
+    testNodes[0] = dataNodes[5];
+    testNodes[1] = dataNodes[3];
+    testNodes[2] = dataNodes[2];
+    cluster.pseudoSortByDistance(dataNodes[0], testNodes );
+    assertTrue(testNodes[0] == dataNodes[5]);
+    assertTrue(testNodes[1] == dataNodes[3]);
+    assertTrue(testNodes[2] == dataNodes[2]);
+  }
+  
   public void testRemove() throws Exception {
     for(int i=0; i<dataNodes.length; i++) {
       cluster.remove( dataNodes[i] );