Browse Source

HDFS-6460. Ignore stale and decommissioned nodes in NetworkTopology#sortByDistance. Contributed by Yongjun Zhang.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1602287 13f79535-47bb-0310-9956-ffa450edef68
Andrew Wang 11 years ago
parent
commit
35eeacc122

+ 6 - 5
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java

@@ -884,15 +884,16 @@ public class NetworkTopology {
    * @param seed Used to seed the pseudo-random generator that randomizes the
    * @param seed Used to seed the pseudo-random generator that randomizes the
    *          set of nodes at each network distance.
    *          set of nodes at each network distance.
    */
    */
-  public void sortByDistance(Node reader, Node[] nodes, long seed) {
+  public void sortByDistance(Node reader, Node[] nodes,
+      int activeLen, long seed) {
     /** Sort weights for the nodes array */
     /** Sort weights for the nodes array */
-    int[] weights = new int[nodes.length];
-    for (int i=0; i<nodes.length; i++) {
+    int[] weights = new int[activeLen];
+    for (int i=0; i<activeLen; i++) {
       weights[i] = getWeight(reader, nodes[i]);
       weights[i] = getWeight(reader, nodes[i]);
     }
     }
     // Add weight/node pairs to a TreeMap to sort
     // Add weight/node pairs to a TreeMap to sort
     TreeMap<Integer, List<Node>> tree = new TreeMap<Integer, List<Node>>();
     TreeMap<Integer, List<Node>> tree = new TreeMap<Integer, List<Node>>();
-    for (int i=0; i<nodes.length; i++) {
+    for (int i=0; i<activeLen; i++) {
       int weight = weights[i];
       int weight = weights[i];
       Node node = nodes[i];
       Node node = nodes[i];
       List<Node> list = tree.get(weight);
       List<Node> list = tree.get(weight);
@@ -918,7 +919,7 @@ public class NetworkTopology {
         }
         }
       }
       }
     }
     }
-    Preconditions.checkState(idx == nodes.length,
+    Preconditions.checkState(idx == activeLen,
         "Sorted the wrong number of nodes!");
         "Sorted the wrong number of nodes!");
   }
   }
 }
 }

+ 3 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopologyWithNodeGroup.java

@@ -279,7 +279,8 @@ public class NetworkTopologyWithNodeGroup extends NetworkTopology {
    *          set of nodes at each network distance.
    *          set of nodes at each network distance.
    */
    */
   @Override
   @Override
-  public void sortByDistance( Node reader, Node[] nodes, long seed) {
+  public void sortByDistance( Node reader, Node[] nodes,
+      int activeLen, long seed) {
     // If reader is not a datanode (not in NetworkTopology tree), we need to
     // If reader is not a datanode (not in NetworkTopology tree), we need to
     // replace this reader with a sibling leaf node in tree.
     // replace this reader with a sibling leaf node in tree.
     if (reader != null && !this.contains(reader)) {
     if (reader != null && !this.contains(reader)) {
@@ -292,7 +293,7 @@ public class NetworkTopologyWithNodeGroup extends NetworkTopology {
         return;
         return;
       }
       }
     }
     }
-    super.sortByDistance(reader, nodes, seed);
+    super.sortByDistance(reader, nodes, nodes.length, seed);
   }
   }
 
 
   /** InnerNodeWithNodeGroup represents a switch/router of a data center, rack
   /** InnerNodeWithNodeGroup represents a switch/router of a data center, rack

+ 8 - 4
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetworkTopologyWithNodeGroup.java

@@ -104,7 +104,8 @@ public class TestNetworkTopologyWithNodeGroup {
     testNodes[1] = dataNodes[2];
     testNodes[1] = dataNodes[2];
     testNodes[2] = dataNodes[3];
     testNodes[2] = dataNodes[3];
     testNodes[3] = dataNodes[0];
     testNodes[3] = dataNodes[0];
-    cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
+    cluster.sortByDistance(dataNodes[0], testNodes,
+        testNodes.length, 0xDEADBEEF);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[1] == dataNodes[1]);
     assertTrue(testNodes[1] == dataNodes[1]);
     assertTrue(testNodes[2] == dataNodes[2]);
     assertTrue(testNodes[2] == dataNodes[2]);
@@ -115,7 +116,8 @@ public class TestNetworkTopologyWithNodeGroup {
     testNodes[1] = dataNodes[4];
     testNodes[1] = dataNodes[4];
     testNodes[2] = dataNodes[1];
     testNodes[2] = dataNodes[1];
     testNodes[3] = dataNodes[0];
     testNodes[3] = dataNodes[0];
-    cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
+    cluster.sortByDistance(dataNodes[0], testNodes,
+        testNodes.length, 0xDEADBEEF);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[1] == dataNodes[1]);
     assertTrue(testNodes[1] == dataNodes[1]);
 
 
@@ -124,7 +126,8 @@ public class TestNetworkTopologyWithNodeGroup {
     testNodes[1] = dataNodes[3];
     testNodes[1] = dataNodes[3];
     testNodes[2] = dataNodes[2];
     testNodes[2] = dataNodes[2];
     testNodes[3] = dataNodes[0];
     testNodes[3] = dataNodes[0];
-    cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
+    cluster.sortByDistance(dataNodes[0], testNodes,
+        testNodes.length, 0xDEADBEEF);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[1] == dataNodes[2]);
     assertTrue(testNodes[1] == dataNodes[2]);
 
 
@@ -133,7 +136,8 @@ public class TestNetworkTopologyWithNodeGroup {
     testNodes[1] = dataNodes[7];
     testNodes[1] = dataNodes[7];
     testNodes[2] = dataNodes[2];
     testNodes[2] = dataNodes[2];
     testNodes[3] = dataNodes[0];
     testNodes[3] = dataNodes[0];
-    cluster.sortByDistance(computeNode, testNodes, 0xDEADBEEF);
+    cluster.sortByDistance(computeNode, testNodes,
+        testNodes.length, 0xDEADBEEF);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[1] == dataNodes[2]);
     assertTrue(testNodes[1] == dataNodes[2]);
   }
   }

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -173,6 +173,9 @@ Release 2.5.0 - UNRELEASED
 
 
     HDFS-6214. Webhdfs has poor throughput for files >2GB (daryn)
     HDFS-6214. Webhdfs has poor throughput for files >2GB (daryn)
 
 
+    HDFS-6460. Ignore stale and decommissioned nodes in
+    NetworkTopology#sortByDistance. (Yongjun Zhang via wang)
+
   BUG FIXES 
   BUG FIXES 
 
 
     HDFS-6112. NFS Gateway docs are incorrect for allowed hosts configuration.
     HDFS-6112. NFS Gateway docs are incorrect for allowed hosts configuration.

+ 0 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlock.java

@@ -26,7 +26,6 @@ import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.Token;
 
 
-import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Lists;
 
 
 /**
 /**

+ 22 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java

@@ -331,6 +331,18 @@ public class DatanodeManager {
     return heartbeatManager;
     return heartbeatManager;
   }
   }
 
 
+  private boolean isInactive(DatanodeInfo datanode) {
+    if (datanode.isDecommissioned()) {
+      return true;
+    }
+
+    if (avoidStaleDataNodesForRead) {
+      return datanode.isStale(staleInterval);
+    }
+      
+    return false;
+  }
+  
   /** Sort the located blocks by the distance to the target host. */
   /** Sort the located blocks by the distance to the target host. */
   public void sortLocatedBlocks(final String targethost,
   public void sortLocatedBlocks(final String targethost,
       final List<LocatedBlock> locatedblocks) {
       final List<LocatedBlock> locatedblocks) {
@@ -351,10 +363,17 @@ public class DatanodeManager {
         DFSUtil.DECOM_COMPARATOR;
         DFSUtil.DECOM_COMPARATOR;
         
         
     for (LocatedBlock b : locatedblocks) {
     for (LocatedBlock b : locatedblocks) {
-      networktopology.sortByDistance(client, b.getLocations(), b
-          .getBlock().getBlockId());
+      DatanodeInfo[] di = b.getLocations();
       // Move decommissioned/stale datanodes to the bottom
       // Move decommissioned/stale datanodes to the bottom
-      Arrays.sort(b.getLocations(), comparator);
+      Arrays.sort(di, comparator);
+      
+      int lastActiveIndex = di.length - 1;
+      while (lastActiveIndex > 0 && isInactive(di[lastActiveIndex])) {
+          --lastActiveIndex;
+      }
+      int activeLen = lastActiveIndex + 1;      
+      networktopology.sortByDistance(client, b.getLocations(), activeLen,
+          b.getBlock().getBlockId());
     }
     }
   }
   }
   
   

+ 8 - 4
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestHdfsNetworkTopologyWithNodeGroup.java

@@ -94,7 +94,8 @@ public class TestHdfsNetworkTopologyWithNodeGroup extends TestCase {
     testNodes[1] = dataNodes[2];
     testNodes[1] = dataNodes[2];
     testNodes[2] = dataNodes[3];
     testNodes[2] = dataNodes[3];
     testNodes[3] = dataNodes[0];
     testNodes[3] = dataNodes[0];
-    cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
+    cluster.sortByDistance(dataNodes[0], testNodes,
+        testNodes.length, 0xDEADBEEF);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[1] == dataNodes[1]);
     assertTrue(testNodes[1] == dataNodes[1]);
     assertTrue(testNodes[2] == dataNodes[2]);
     assertTrue(testNodes[2] == dataNodes[2]);
@@ -105,7 +106,8 @@ public class TestHdfsNetworkTopologyWithNodeGroup extends TestCase {
     testNodes[1] = dataNodes[4];
     testNodes[1] = dataNodes[4];
     testNodes[2] = dataNodes[1];
     testNodes[2] = dataNodes[1];
     testNodes[3] = dataNodes[0];
     testNodes[3] = dataNodes[0];
-    cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
+    cluster.sortByDistance(dataNodes[0], testNodes,
+        testNodes.length, 0xDEADBEEF);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[1] == dataNodes[1]);
     assertTrue(testNodes[1] == dataNodes[1]);
 
 
@@ -114,7 +116,8 @@ public class TestHdfsNetworkTopologyWithNodeGroup extends TestCase {
     testNodes[1] = dataNodes[3];
     testNodes[1] = dataNodes[3];
     testNodes[2] = dataNodes[2];
     testNodes[2] = dataNodes[2];
     testNodes[3] = dataNodes[0];
     testNodes[3] = dataNodes[0];
-    cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
+    cluster.sortByDistance(dataNodes[0], testNodes,
+        testNodes.length, 0xDEADBEEF);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[1] == dataNodes[2]);
     assertTrue(testNodes[1] == dataNodes[2]);
 
 
@@ -123,7 +126,8 @@ public class TestHdfsNetworkTopologyWithNodeGroup extends TestCase {
     testNodes[1] = dataNodes[7];
     testNodes[1] = dataNodes[7];
     testNodes[2] = dataNodes[2];
     testNodes[2] = dataNodes[2];
     testNodes[3] = dataNodes[0];
     testNodes[3] = dataNodes[0];
-    cluster.sortByDistance(computeNode, testNodes, 0xDEADBEEF);
+    cluster.sortByDistance(computeNode, testNodes,
+        testNodes.length, 0xDEADBEEF);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[1] == dataNodes[2]);
     assertTrue(testNodes[1] == dataNodes[2]);
   }
   }

+ 36 - 8
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopology.java

@@ -55,11 +55,18 @@ public class TestNetworkTopology {
         DFSTestUtil.getDatanodeDescriptor("5.5.5.5", "/d1/r2"),
         DFSTestUtil.getDatanodeDescriptor("5.5.5.5", "/d1/r2"),
         DFSTestUtil.getDatanodeDescriptor("6.6.6.6", "/d2/r3"),
         DFSTestUtil.getDatanodeDescriptor("6.6.6.6", "/d2/r3"),
         DFSTestUtil.getDatanodeDescriptor("7.7.7.7", "/d2/r3"),
         DFSTestUtil.getDatanodeDescriptor("7.7.7.7", "/d2/r3"),
-        DFSTestUtil.getDatanodeDescriptor("8.8.8.8", "/d2/r3")
+        DFSTestUtil.getDatanodeDescriptor("8.8.8.8", "/d2/r3"),
+        DFSTestUtil.getDatanodeDescriptor("9.9.9.9", "/d3/r1"),
+        DFSTestUtil.getDatanodeDescriptor("10.10.10.10", "/d3/r1"),
+        DFSTestUtil.getDatanodeDescriptor("11.11.11.11", "/d3/r1"),
+        DFSTestUtil.getDatanodeDescriptor("12.12.12.12", "/d3/r2"),
+        DFSTestUtil.getDatanodeDescriptor("13.13.13.13", "/d3/r2")        
     };
     };
     for (int i = 0; i < dataNodes.length; i++) {
     for (int i = 0; i < dataNodes.length; i++) {
       cluster.add(dataNodes[i]);
       cluster.add(dataNodes[i]);
     }
     }
+    dataNodes[9].setDecommissioned();
+    dataNodes[10].setDecommissioned();
   }
   }
   
   
   @Test
   @Test
@@ -100,7 +107,7 @@ public class TestNetworkTopology {
 
 
   @Test
   @Test
   public void testRacks() throws Exception {
   public void testRacks() throws Exception {
-    assertEquals(cluster.getNumOfRacks(), 3);
+    assertEquals(cluster.getNumOfRacks(), 5);
     assertTrue(cluster.isOnSameRack(dataNodes[0], dataNodes[1]));
     assertTrue(cluster.isOnSameRack(dataNodes[0], dataNodes[1]));
     assertFalse(cluster.isOnSameRack(dataNodes[1], dataNodes[2]));
     assertFalse(cluster.isOnSameRack(dataNodes[1], dataNodes[2]));
     assertTrue(cluster.isOnSameRack(dataNodes[2], dataNodes[3]));
     assertTrue(cluster.isOnSameRack(dataNodes[2], dataNodes[3]));
@@ -125,16 +132,33 @@ public class TestNetworkTopology {
     testNodes[0] = dataNodes[1];
     testNodes[0] = dataNodes[1];
     testNodes[1] = dataNodes[2];
     testNodes[1] = dataNodes[2];
     testNodes[2] = dataNodes[0];
     testNodes[2] = dataNodes[0];
-    cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
+    cluster.sortByDistance(dataNodes[0], testNodes,
+        testNodes.length, 0xDEADBEEF);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[1] == dataNodes[1]);
     assertTrue(testNodes[1] == dataNodes[1]);
     assertTrue(testNodes[2] == dataNodes[2]);
     assertTrue(testNodes[2] == dataNodes[2]);
 
 
+    // array contains both local node & local rack node & decommissioned node
+    DatanodeDescriptor[] dtestNodes = new DatanodeDescriptor[5];
+    dtestNodes[0] = dataNodes[8];
+    dtestNodes[1] = dataNodes[12];
+    dtestNodes[2] = dataNodes[11];
+    dtestNodes[3] = dataNodes[9];
+    dtestNodes[4] = dataNodes[10];
+    cluster.sortByDistance(dataNodes[8], dtestNodes,
+        dtestNodes.length - 2, 0xDEADBEEF);
+    assertTrue(dtestNodes[0] == dataNodes[8]);
+    assertTrue(dtestNodes[1] == dataNodes[11]);
+    assertTrue(dtestNodes[2] == dataNodes[12]);
+    assertTrue(dtestNodes[3] == dataNodes[9]);
+    assertTrue(dtestNodes[4] == dataNodes[10]);
+
     // array contains local node
     // array contains local node
     testNodes[0] = dataNodes[1];
     testNodes[0] = dataNodes[1];
     testNodes[1] = dataNodes[3];
     testNodes[1] = dataNodes[3];
     testNodes[2] = dataNodes[0];
     testNodes[2] = dataNodes[0];
-    cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
+    cluster.sortByDistance(dataNodes[0], testNodes,
+        testNodes.length, 0xDEADBEEF);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[1] == dataNodes[1]);
     assertTrue(testNodes[1] == dataNodes[1]);
     assertTrue(testNodes[2] == dataNodes[3]);
     assertTrue(testNodes[2] == dataNodes[3]);
@@ -143,7 +167,8 @@ public class TestNetworkTopology {
     testNodes[0] = dataNodes[5];
     testNodes[0] = dataNodes[5];
     testNodes[1] = dataNodes[3];
     testNodes[1] = dataNodes[3];
     testNodes[2] = dataNodes[1];
     testNodes[2] = dataNodes[1];
-    cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
+    cluster.sortByDistance(dataNodes[0], testNodes,
+        testNodes.length, 0xDEADBEEF);
     assertTrue(testNodes[0] == dataNodes[1]);
     assertTrue(testNodes[0] == dataNodes[1]);
     assertTrue(testNodes[1] == dataNodes[3]);
     assertTrue(testNodes[1] == dataNodes[3]);
     assertTrue(testNodes[2] == dataNodes[5]);
     assertTrue(testNodes[2] == dataNodes[5]);
@@ -152,7 +177,8 @@ public class TestNetworkTopology {
     testNodes[0] = dataNodes[1];
     testNodes[0] = dataNodes[1];
     testNodes[1] = dataNodes[5];
     testNodes[1] = dataNodes[5];
     testNodes[2] = dataNodes[3];
     testNodes[2] = dataNodes[3];
-    cluster.sortByDistance(dataNodes[0], testNodes, 0xDEADBEEF);
+    cluster.sortByDistance(dataNodes[0], testNodes,
+        testNodes.length, 0xDEADBEEF);
     assertTrue(testNodes[0] == dataNodes[1]);
     assertTrue(testNodes[0] == dataNodes[1]);
     assertTrue(testNodes[1] == dataNodes[3]);
     assertTrue(testNodes[1] == dataNodes[3]);
     assertTrue(testNodes[2] == dataNodes[5]);
     assertTrue(testNodes[2] == dataNodes[5]);
@@ -161,7 +187,8 @@ public class TestNetworkTopology {
     testNodes[0] = dataNodes[1];
     testNodes[0] = dataNodes[1];
     testNodes[1] = dataNodes[5];
     testNodes[1] = dataNodes[5];
     testNodes[2] = dataNodes[3];
     testNodes[2] = dataNodes[3];
-    cluster.sortByDistance(dataNodes[0], testNodes, 0xDEAD);
+    cluster.sortByDistance(dataNodes[0], testNodes,
+        testNodes.length, 0xDEAD);
     // sortByDistance does not take the "data center" layer into consideration
     // sortByDistance does not take the "data center" layer into consideration
     // and it doesn't sort by getDistance, so 1, 5, 3 is also valid here
     // and it doesn't sort by getDistance, so 1, 5, 3 is also valid here
     assertTrue(testNodes[0] == dataNodes[1]);
     assertTrue(testNodes[0] == dataNodes[1]);
@@ -176,7 +203,8 @@ public class TestNetworkTopology {
       testNodes[0] = dataNodes[5];
       testNodes[0] = dataNodes[5];
       testNodes[1] = dataNodes[6];
       testNodes[1] = dataNodes[6];
       testNodes[2] = dataNodes[7];
       testNodes[2] = dataNodes[7];
-      cluster.sortByDistance(dataNodes[i], testNodes, 0xBEADED+i);
+      cluster.sortByDistance(dataNodes[i], testNodes,
+          testNodes.length, 0xBEADED+i);
       if (first == null) {
       if (first == null) {
         first = testNodes[0];
         first = testNodes[0];
       } else {
       } else {