浏览代码

HDFS-6701. Make seed optional in NetworkTopology#sortByDistance. Contributed by Ashwin Shankar.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1612625 13f79535-47bb-0310-9956-ffa450edef68
Andrew Wang 11 年之前
父节点
当前提交
c83c5b868e

+ 6 - 3
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java

@@ -883,8 +883,8 @@ public class NetworkTopology {
    * @param seed Used to seed the pseudo-random generator that randomizes the
    *          set of nodes at each network distance.
    */
-  public void sortByDistance(Node reader, Node[] nodes,
-      int activeLen, long seed) {
+  public void sortByDistance(Node reader, Node[] nodes, int activeLen,
+      long seed, boolean randomizeBlockLocationsPerBlock) {
     /** Sort weights for the nodes array */
     int[] weights = new int[activeLen];
     for (int i=0; i<activeLen; i++) {
@@ -906,8 +906,11 @@ public class NetworkTopology {
     // Seed is normally the block id
     // This means we use the same pseudo-random order for each block, for
     // potentially better page cache usage.
+    // Seed is not used if we want to randomize block location for every block
     Random rand = getRandom();
-    rand.setSeed(seed);
+    if (!randomizeBlockLocationsPerBlock) {
+      rand.setSeed(seed);
+    }
     int idx = 0;
     for (List<Node> list: tree.values()) {
       if (list != null) {

+ 4 - 3
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopologyWithNodeGroup.java

@@ -279,8 +279,8 @@ public class NetworkTopologyWithNodeGroup extends NetworkTopology {
    *          set of nodes at each network distance.
    */
   @Override
-  public void sortByDistance( Node reader, Node[] nodes,
-      int activeLen, long seed) {
+  public void sortByDistance(Node reader, Node[] nodes, int activeLen,
+      long seed, boolean randomizeBlockLocationsPerBlock) {
     // If reader is not a datanode (not in NetworkTopology tree), we need to
     // replace this reader with a sibling leaf node in tree.
     if (reader != null && !this.contains(reader)) {
@@ -293,7 +293,8 @@ public class NetworkTopologyWithNodeGroup extends NetworkTopology {
         return;
       }
     }
-    super.sortByDistance(reader, nodes, nodes.length, seed);
+    super.sortByDistance(reader, nodes, nodes.length, seed,
+        randomizeBlockLocationsPerBlock);
   }
 
   /** InnerNodeWithNodeGroup represents a switch/router of a data center, rack

+ 4 - 4
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetworkTopologyWithNodeGroup.java

@@ -105,7 +105,7 @@ public class TestNetworkTopologyWithNodeGroup {
     testNodes[2] = dataNodes[3];
     testNodes[3] = dataNodes[0];
     cluster.sortByDistance(dataNodes[0], testNodes,
-        testNodes.length, 0xDEADBEEF);
+        testNodes.length, 0xDEADBEEF, false);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[1] == dataNodes[1]);
     assertTrue(testNodes[2] == dataNodes[2]);
@@ -117,7 +117,7 @@ public class TestNetworkTopologyWithNodeGroup {
     testNodes[2] = dataNodes[1];
     testNodes[3] = dataNodes[0];
     cluster.sortByDistance(dataNodes[0], testNodes,
-        testNodes.length, 0xDEADBEEF);
+        testNodes.length, 0xDEADBEEF, false);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[1] == dataNodes[1]);
 
@@ -127,7 +127,7 @@ public class TestNetworkTopologyWithNodeGroup {
     testNodes[2] = dataNodes[2];
     testNodes[3] = dataNodes[0];
     cluster.sortByDistance(dataNodes[0], testNodes,
-        testNodes.length, 0xDEADBEEF);
+        testNodes.length, 0xDEADBEEF, false);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[1] == dataNodes[2]);
 
@@ -137,7 +137,7 @@ public class TestNetworkTopologyWithNodeGroup {
     testNodes[2] = dataNodes[2];
     testNodes[3] = dataNodes[0];
     cluster.sortByDistance(computeNode, testNodes,
-        testNodes.length, 0xDEADBEEF);
+        testNodes.length, 0xDEADBEEF, false);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[1] == dataNodes[2]);
   }

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -311,6 +311,9 @@ Release 2.6.0 - UNRELEASED
     datanodes and change datanode to write block replicas using the specified
     storage type. (szetszwo)
 
+    HDFS-6701. Make seed optional in NetworkTopology#sortByDistance.
+    (Ashwin Shankar via wang)
+
   OPTIMIZATIONS
 
     HDFS-6690. Deduplicate xattr names in memory. (wang)

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

@@ -214,6 +214,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String  DFS_NAMENODE_MIN_SUPPORTED_DATANODE_VERSION_KEY = "dfs.namenode.min.supported.datanode.version";
   public static final String  DFS_NAMENODE_MIN_SUPPORTED_DATANODE_VERSION_DEFAULT = "3.0.0-SNAPSHOT";
 
+  public static final String DFS_NAMENODE_RANDOMIZE_BLOCK_LOCATIONS_PER_BLOCK = "dfs.namenode.randomize-block-locations-per-block";
+  public static final boolean DFS_NAMENODE_RANDOMIZE_BLOCK_LOCATIONS_PER_BLOCK_DEFAULT = false;
+
   public static final String  DFS_NAMENODE_EDITS_DIR_MINIMUM_KEY = "dfs.namenode.edits.dir.minimum";
   public static final int     DFS_NAMENODE_EDITS_DIR_MINIMUM_DEFAULT = 1;
 

+ 4 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java

@@ -345,7 +345,8 @@ public class DatanodeManager {
   
   /** Sort the located blocks by the distance to the target host. */
   public void sortLocatedBlocks(final String targethost,
-      final List<LocatedBlock> locatedblocks) {
+      final List<LocatedBlock> locatedblocks,
+      boolean randomizeBlockLocationsPerBlock) {
     //sort the blocks
     // As it is possible for the separation of node manager and datanode, 
     // here we should get node but not datanode only .
@@ -372,8 +373,8 @@ public class DatanodeManager {
           --lastActiveIndex;
       }
       int activeLen = lastActiveIndex + 1;      
-      networktopology.sortByDistance(client, b.getLocations(), activeLen,
-          b.getBlock().getBlockId());
+      networktopology.sortByDistance(client, b.getLocations(), activeLen, b
+          .getBlock().getBlockId(), randomizeBlockLocationsPerBlock);
     }
   }
   

+ 14 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -83,6 +83,9 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROU
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RANDOMIZE_BLOCK_LOCATIONS_PER_BLOCK;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RANDOMIZE_BLOCK_LOCATIONS_PER_BLOCK_DEFAULT;
+
 import static org.apache.hadoop.util.Time.now;
 
 import java.io.BufferedWriter;
@@ -528,6 +531,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
   private final FSImage fsImage;
 
+  private boolean randomizeBlockLocationsPerBlock;
+
   /**
    * Notify that loading of this FSDirectory is complete, and
    * it is imageLoaded for use
@@ -837,6 +842,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       alwaysUseDelegationTokensForTests = conf.getBoolean(
           DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY,
           DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT);
+      
+      this.randomizeBlockLocationsPerBlock = conf.getBoolean(
+          DFS_NAMENODE_RANDOMIZE_BLOCK_LOCATIONS_PER_BLOCK,
+          DFS_NAMENODE_RANDOMIZE_BLOCK_LOCATIONS_PER_BLOCK_DEFAULT);
 
       this.dtSecretManager = createDelegationTokenSecretManager(conf);
       this.dir = new FSDirectory(this, conf);
@@ -1700,17 +1709,17 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     LocatedBlocks blocks = getBlockLocations(src, offset, length, true, true,
         true);
     if (blocks != null) {
-      blockManager.getDatanodeManager().sortLocatedBlocks(
-          clientMachine, blocks.getLocatedBlocks());
-      
+      blockManager.getDatanodeManager().sortLocatedBlocks(clientMachine,
+          blocks.getLocatedBlocks(), randomizeBlockLocationsPerBlock);
+
       // lastBlock is not part of getLocatedBlocks(), might need to sort it too
       LocatedBlock lastBlock = blocks.getLastLocatedBlock();
       if (lastBlock != null) {
         ArrayList<LocatedBlock> lastBlockList =
             Lists.newArrayListWithCapacity(1);
         lastBlockList.add(lastBlock);
-        blockManager.getDatanodeManager().sortLocatedBlocks(
-                              clientMachine, lastBlockList);
+        blockManager.getDatanodeManager().sortLocatedBlocks(clientMachine,
+            lastBlockList, randomizeBlockLocationsPerBlock);
       }
     }
     return blocks;

+ 13 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

@@ -2039,4 +2039,17 @@
   </description>
 </property>
 
+<property>
+  <name>dfs.namenode.randomize-block-locations-per-block</name>
+  <value>false</value>
+  <description>When fetching replica locations of a block, the replicas
+   are sorted based on network distance. This configuration parameter
+   determines whether the replicas at the same network distance are randomly
+   shuffled. By default, this is false, such that repeated requests for a block's
+   replicas always result in the same order. This potentially improves page cache
+   behavior. However, for some network topologies, it is desirable to shuffle this
+   order for better load balancing.
+  </description>
+</property>
+
 </configuration>

+ 36 - 9
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopology.java

@@ -60,7 +60,14 @@ public class TestNetworkTopology {
         DFSTestUtil.getDatanodeDescriptor("10.10.10.10", "/d3/r1"),
         DFSTestUtil.getDatanodeDescriptor("11.11.11.11", "/d3/r1"),
         DFSTestUtil.getDatanodeDescriptor("12.12.12.12", "/d3/r2"),
-        DFSTestUtil.getDatanodeDescriptor("13.13.13.13", "/d3/r2")        
+        DFSTestUtil.getDatanodeDescriptor("13.13.13.13", "/d3/r2"),
+        DFSTestUtil.getDatanodeDescriptor("14.14.14.14", "/d4/r1"),
+        DFSTestUtil.getDatanodeDescriptor("15.15.15.15", "/d4/r1"),
+        DFSTestUtil.getDatanodeDescriptor("16.16.16.16", "/d4/r1"),
+        DFSTestUtil.getDatanodeDescriptor("17.17.17.17", "/d4/r1"),
+        DFSTestUtil.getDatanodeDescriptor("18.18.18.18", "/d4/r1"),
+        DFSTestUtil.getDatanodeDescriptor("19.19.19.19", "/d4/r1"),
+        DFSTestUtil.getDatanodeDescriptor("20.20.20.20", "/d4/r1"),        
     };
     for (int i = 0; i < dataNodes.length; i++) {
       cluster.add(dataNodes[i]);
@@ -107,7 +114,7 @@ public class TestNetworkTopology {
 
   @Test
   public void testRacks() throws Exception {
-    assertEquals(cluster.getNumOfRacks(), 5);
+    assertEquals(cluster.getNumOfRacks(), 6);
     assertTrue(cluster.isOnSameRack(dataNodes[0], dataNodes[1]));
     assertFalse(cluster.isOnSameRack(dataNodes[1], dataNodes[2]));
     assertTrue(cluster.isOnSameRack(dataNodes[2], dataNodes[3]));
@@ -133,7 +140,7 @@ public class TestNetworkTopology {
     testNodes[1] = dataNodes[2];
     testNodes[2] = dataNodes[0];
     cluster.sortByDistance(dataNodes[0], testNodes,
-        testNodes.length, 0xDEADBEEF);
+        testNodes.length, 0xDEADBEEF, false);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[1] == dataNodes[1]);
     assertTrue(testNodes[2] == dataNodes[2]);
@@ -146,7 +153,7 @@ public class TestNetworkTopology {
     dtestNodes[3] = dataNodes[9];
     dtestNodes[4] = dataNodes[10];
     cluster.sortByDistance(dataNodes[8], dtestNodes,
-        dtestNodes.length - 2, 0xDEADBEEF);
+        dtestNodes.length - 2, 0xDEADBEEF, false);
     assertTrue(dtestNodes[0] == dataNodes[8]);
     assertTrue(dtestNodes[1] == dataNodes[11]);
     assertTrue(dtestNodes[2] == dataNodes[12]);
@@ -158,7 +165,7 @@ public class TestNetworkTopology {
     testNodes[1] = dataNodes[3];
     testNodes[2] = dataNodes[0];
     cluster.sortByDistance(dataNodes[0], testNodes,
-        testNodes.length, 0xDEADBEEF);
+        testNodes.length, 0xDEADBEEF, false);
     assertTrue(testNodes[0] == dataNodes[0]);
     assertTrue(testNodes[1] == dataNodes[1]);
     assertTrue(testNodes[2] == dataNodes[3]);
@@ -168,7 +175,7 @@ public class TestNetworkTopology {
     testNodes[1] = dataNodes[3];
     testNodes[2] = dataNodes[1];
     cluster.sortByDistance(dataNodes[0], testNodes,
-        testNodes.length, 0xDEADBEEF);
+        testNodes.length, 0xDEADBEEF, false);
     assertTrue(testNodes[0] == dataNodes[1]);
     assertTrue(testNodes[1] == dataNodes[3]);
     assertTrue(testNodes[2] == dataNodes[5]);
@@ -178,7 +185,7 @@ public class TestNetworkTopology {
     testNodes[1] = dataNodes[5];
     testNodes[2] = dataNodes[3];
     cluster.sortByDistance(dataNodes[0], testNodes,
-        testNodes.length, 0xDEADBEEF);
+        testNodes.length, 0xDEADBEEF, false);
     assertTrue(testNodes[0] == dataNodes[1]);
     assertTrue(testNodes[1] == dataNodes[3]);
     assertTrue(testNodes[2] == dataNodes[5]);
@@ -188,7 +195,7 @@ public class TestNetworkTopology {
     testNodes[1] = dataNodes[5];
     testNodes[2] = dataNodes[3];
     cluster.sortByDistance(dataNodes[0], testNodes,
-        testNodes.length, 0xDEAD);
+        testNodes.length, 0xDEAD, false);
     // sortByDistance does not take the "data center" layer into consideration
     // and it doesn't sort by getDistance, so 1, 5, 3 is also valid here
     assertTrue(testNodes[0] == dataNodes[1]);
@@ -204,7 +211,27 @@ public class TestNetworkTopology {
       testNodes[1] = dataNodes[6];
       testNodes[2] = dataNodes[7];
       cluster.sortByDistance(dataNodes[i], testNodes,
-          testNodes.length, 0xBEADED+i);
+          testNodes.length, 0xBEADED+i, false);
+      if (first == null) {
+        first = testNodes[0];
+      } else {
+        if (first != testNodes[0]) {
+          foundRandom = true;
+          break;
+        }
+      }
+    }
+    assertTrue("Expected to find a different first location", foundRandom);
+    // Array of rack local nodes with randomizeBlockLocationsPerBlock set to
+    // true
+    // Expect random order of block locations for same block
+    first = null;
+    for (int i = 1; i <= 4; i++) {
+      testNodes[0] = dataNodes[13];
+      testNodes[1] = dataNodes[14];
+      testNodes[2] = dataNodes[15];
+      cluster.sortByDistance(dataNodes[15 + i], testNodes, testNodes.length,
+          0xBEADED, true);
       if (first == null) {
         first = testNodes[0];
       } else {