há 16 anos atrás · a18729b75c
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -247,6 +247,8 @@ Trunk (unreleased changes)
 
				 
			
 
				     HADOOP-5603. Improve NameNode's block placement performance. (hairong)
			
 
				 
			
 
				+    HADOOP-5638. More improvement on block placement performance. (hairong)
			
 
				+
			
 
				   BUG FIXES
			
 
				     
			
 
				     HADOOP-5379. CBZip2InputStream to throw IOException on data crc error.
			
--- a/src/core/org/apache/hadoop/net/NetworkTopology.java
+++ b/src/core/org/apache/hadoop/net/NetworkTopology.java
@@ -19,7 +19,6 @@ package org.apache.hadoop.net;
 
				 
			
 
				 import java.util.ArrayList;
			
 
				 import java.util.Collection;
			
 
				-import java.util.List;
			
 
				 import java.util.Random;
			
 
				 import java.util.concurrent.locks.ReadWriteLock;
			
 
				 import java.util.concurrent.locks.ReentrantReadWriteLock;
			
@@ -304,7 +303,7 @@ public class NetworkTopology {
 
				   }
			
 
				     
			
 
				   /** Add a leaf node
			
 
				-   * Update node counter & rack counter if neccessary
			
 
				+   * Update node counter & rack counter if necessary
			
 
				    * @param node
			
 
				    *          node to be added
			
 
				    * @exception IllegalArgumentException if add a node to a leave 
			
@@ -337,7 +336,7 @@ public class NetworkTopology {
 
				   }
			
 
				     
			
 
				   /** Remove a node
			
 
				-   * Update node counter & rack counter if neccessary
			
 
				+   * Update node counter & rack counter if necessary
			
 
				    * @param node
			
 
				    *          node to be removed
			
 
				    */ 
			
@@ -472,7 +471,7 @@ public class NetworkTopology {
 
				   /** Check if two nodes are on the same rack
			
 
				    * @param node1 one node
			
 
				    * @param node2 another node
			
 
				-   * @return true if node1 and node2 are pm the same rack; false otherwise
			
 
				+   * @return true if node1 and node2 are on the same rack; false otherwise
			
 
				    * @exception IllegalArgumentException when either node1 or node2 is null, or
			
 
				    * node1 or node2 do not belong to the cluster
			
 
				    */
			
@@ -493,8 +492,8 @@ public class NetworkTopology {
 
				   /** randomly choose one node from <i>scope</i>
			
 
				    * if scope starts with ~, choose one from the all nodes except for the
			
 
				    * ones in <i>scope</i>; otherwise, choose one from <i>scope</i>
			
 
				-   * @param scope range of nodes from which a node will be choosen
			
 
				-   * @return the choosen node
			
 
				+   * @param scope range of nodes from which a node will be chosen
			
 
				+   * @return the chosen node
			
 
				    */
			
 
				   public Node chooseRandom(String scope) {
			
 
				     netlock.readLock().lock();
			
@@ -546,7 +545,7 @@ public class NetworkTopology {
 
				    * @return number of available nodes
			
 
				    */
			
 
				   public int countNumOfAvailableNodes(String scope,
			
 
				-                                      List<Node> excludedNodes) {
			
 
				+                                      Collection<Node> excludedNodes) {
			
 
				     boolean isExcluded=false;
			
 
				     if (scope.startsWith("~")) {
			
 
				       isExcluded=true;
			
@@ -613,7 +612,7 @@ public class NetworkTopology {
 
				    * If a local rack node is found, swap it with the first element following
			
 
				    * the local node.
			
 
				    * If neither local node or local rack node is found, put a random replica
			
 
				-   * location at postion 0.
			
 
				+   * location at position 0.
			
 
				    * It leaves the rest nodes untouched.
			
 
				    */
			
 
				   public void pseudoSortByDistance( Node reader, Node[] nodes ) {
			
--- a/src/hdfs/org/apache/hadoop/hdfs/server/namenode/ReplicationTargetChooser.java
+++ b/src/hdfs/org/apache/hadoop/hdfs/server/namenode/ReplicationTargetChooser.java
@@ -32,7 +32,7 @@ import java.util.*;
 
				  * the 1st replica is placed on the local machine, 
			
 
				  * otherwise a random datanode. The 2nd replica is placed on a datanode
			
 
				  * that is on a different rack. The 3rd replica is placed on a datanode
			
 
				- * which is on the same rack as the first replca.
			
 
				+ * which is on the same rack as the first replica.
			
 
				  */
			
 
				 class ReplicationTargetChooser {
			
 
				   private final boolean considerLoad; 
			
@@ -59,17 +59,17 @@ class ReplicationTargetChooser {
 
				    * 
			
 
				    * @param numOfReplicas: number of replicas wanted.
			
 
				    * @param writer: the writer's machine, null if not in the cluster.
			
 
				-   * @param excludedNodes: datanodesthat should not be considered targets.
			
 
				+   * @param excludedNodes: datanodes that should not be considered targets.
			
 
				    * @param blocksize: size of the data to be written.
			
 
				    * @return array of DatanodeDescriptor instances chosen as targets
			
 
				    * and sorted as a pipeline.
			
 
				    */
			
 
				   DatanodeDescriptor[] chooseTarget(int numOfReplicas,
			
 
				                                     DatanodeDescriptor writer,
			
 
				-                                    List<Node> excludedNodes,
			
 
				+                                    HashMap<Node, Node> excludedNodes,
			
 
				                                     long blocksize) {
			
 
				     if (excludedNodes == null) {
			
 
				-      excludedNodes = new ArrayList<Node>();
			
 
				+      excludedNodes = new HashMap<Node, Node>();
			
 
				     }
			
 
				       
			
 
				     return chooseTarget(numOfReplicas, writer, 
			
@@ -83,8 +83,8 @@ class ReplicationTargetChooser {
 
				    * 
			
 
				    * @param numOfReplicas: additional number of replicas wanted.
			
 
				    * @param writer: the writer's machine, null if not in the cluster.
			
 
				-   * @param choosenNodes: datanodes that have been choosen as targets.
			
 
				-   * @param excludedNodes: datanodesthat should not be considered targets.
			
 
				+   * @param choosenNodes: datanodes that have been chosen as targets.
			
 
				+   * @param excludedNodes: datanodes that should not be considered targets.
			
 
				    * @param blocksize: size of the data to be written.
			
 
				    * @return array of DatanodeDescriptor instances chosen as target 
			
 
				    * and sorted as a pipeline.
			
@@ -92,14 +92,14 @@ class ReplicationTargetChooser {
 
				   DatanodeDescriptor[] chooseTarget(int numOfReplicas,
			
 
				                                     DatanodeDescriptor writer,
			
 
				                                     List<DatanodeDescriptor> choosenNodes,
			
 
				-                                    List<Node> excludedNodes,
			
 
				+                                    HashMap<Node, Node> excludedNodes,
			
 
				                                     long blocksize) {
			
 
				     if (numOfReplicas == 0 || clusterMap.getNumOfLeaves()==0) {
			
 
				       return new DatanodeDescriptor[0];
			
 
				     }
			
 
				       
			
 
				     if (excludedNodes == null) {
			
 
				-      excludedNodes = new ArrayList<Node>();
			
 
				+      excludedNodes = new HashMap<Node, Node>();
			
 
				     }
			
 
				       
			
 
				     int clusterSize = clusterMap.getNumOfLeaves();
			
@@ -114,7 +114,9 @@ class ReplicationTargetChooser {
 
				       
			
 
				     List<DatanodeDescriptor> results = 
			
 
				       new ArrayList<DatanodeDescriptor>(choosenNodes);
			
 
				-    excludedNodes.addAll(choosenNodes);
			
 
				+    for (Node node:choosenNodes) {
			
 
				+      excludedNodes.put(node, node);
			
 
				+    }
			
 
				       
			
 
				     if (!clusterMap.contains(writer)) {
			
 
				       writer=null;
			
@@ -133,7 +135,7 @@ class ReplicationTargetChooser {
 
				   /* choose <i>numOfReplicas</i> from all data nodes */
			
 
				   private DatanodeDescriptor chooseTarget(int numOfReplicas,
			
 
				                                           DatanodeDescriptor writer,
			
 
				-                                          List<Node> excludedNodes,
			
 
				+                                          HashMap<Node, Node> excludedNodes,
			
 
				                                           long blocksize,
			
 
				                                           int maxNodesPerRack,
			
 
				                                           List<DatanodeDescriptor> results) {
			
@@ -188,13 +190,13 @@ class ReplicationTargetChooser {
 
				   }
			
 
				     
			
 
				   /* choose <i>localMachine</i> as the target.
			
 
				-   * if <i>localMachine</i> is not availabe, 
			
 
				+   * if <i>localMachine</i> is not available, 
			
 
				    * choose a node on the same rack
			
 
				-   * @return the choosen node
			
 
				+   * @return the chosen node
			
 
				    */
			
 
				   private DatanodeDescriptor chooseLocalNode(
			
 
				                                              DatanodeDescriptor localMachine,
			
 
				-                                             List<Node> excludedNodes,
			
 
				+                                             HashMap<Node, Node> excludedNodes,
			
 
				                                              long blocksize,
			
 
				                                              int maxNodesPerRack,
			
 
				                                              List<DatanodeDescriptor> results)
			
@@ -205,8 +207,8 @@ class ReplicationTargetChooser {
 
				                           blocksize, maxNodesPerRack, results);
			
 
				       
			
 
				     // otherwise try local machine first
			
 
				-    if (!excludedNodes.contains(localMachine)) {
			
 
				-      excludedNodes.add(localMachine);
			
 
				+    Node oldNode = excludedNodes.put(localMachine, localMachine);
			
 
				+    if (oldNode == null) { // was not in the excluded list
			
 
				       if (isGoodTarget(localMachine, blocksize,
			
 
				                        maxNodesPerRack, false, results)) {
			
 
				         results.add(localMachine);
			
@@ -220,15 +222,15 @@ class ReplicationTargetChooser {
 
				   }
			
 
				     
			
 
				   /* choose one node from the rack that <i>localMachine</i> is on.
			
 
				-   * if no such node is availabe, choose one node from the rack where
			
 
				+   * if no such node is available, choose one node from the rack where
			
 
				    * a second replica is on.
			
 
				    * if still no such node is available, choose a random node 
			
 
				    * in the cluster.
			
 
				-   * @return the choosen node
			
 
				+   * @return the chosen node
			
 
				    */
			
 
				   private DatanodeDescriptor chooseLocalRack(
			
 
				                                              DatanodeDescriptor localMachine,
			
 
				-                                             List<Node> excludedNodes,
			
 
				+                                             HashMap<Node, Node> excludedNodes,
			
 
				                                              long blocksize,
			
 
				                                              int maxNodesPerRack,
			
 
				                                              List<DatanodeDescriptor> results)
			
@@ -275,13 +277,13 @@ class ReplicationTargetChooser {
 
				     
			
 
				   /* choose <i>numOfReplicas</i> nodes from the racks 
			
 
				    * that <i>localMachine</i> is NOT on.
			
 
				-   * if not enough nodes are availabe, choose the remaining ones 
			
 
				+   * if not enough nodes are available, choose the remaining ones 
			
 
				    * from the local rack
			
 
				    */
			
 
				     
			
 
				   private void chooseRemoteRack(int numOfReplicas,
			
 
				                                 DatanodeDescriptor localMachine,
			
 
				-                                List<Node> excludedNodes,
			
 
				+                                HashMap<Node, Node> excludedNodes,
			
 
				                                 long blocksize,
			
 
				                                 int maxReplicasPerRack,
			
 
				                                 List<DatanodeDescriptor> results)
			
@@ -299,24 +301,24 @@ class ReplicationTargetChooser {
 
				   }
			
 
				 
			
 
				   /* Randomly choose one target from <i>nodes</i>.
			
 
				-   * @return the choosen node
			
 
				+   * @return the chosen node
			
 
				    */
			
 
				   private DatanodeDescriptor chooseRandom(
			
 
				                                           String nodes,
			
 
				-                                          List<Node> excludedNodes,
			
 
				+                                          HashMap<Node, Node> excludedNodes,
			
 
				                                           long blocksize,
			
 
				                                           int maxNodesPerRack,
			
 
				                                           List<DatanodeDescriptor> results) 
			
 
				     throws NotEnoughReplicasException {
			
 
				     int numOfAvailableNodes =
			
 
				-      clusterMap.countNumOfAvailableNodes(nodes, excludedNodes);
			
 
				+      clusterMap.countNumOfAvailableNodes(nodes, excludedNodes.keySet());
			
 
				     while(numOfAvailableNodes > 0) {
			
 
				       DatanodeDescriptor choosenNode = 
			
 
				         (DatanodeDescriptor)(clusterMap.chooseRandom(nodes));
			
 
				-      if (!excludedNodes.contains(choosenNode)) {
			
 
				-        excludedNodes.add(choosenNode);
			
 
				-        numOfAvailableNodes--;
			
 
				 
			
 
				+      Node oldNode = excludedNodes.put(choosenNode, choosenNode);
			
 
				+      if (oldNode == null) { // choosendNode was not in the excluded list
			
 
				+        numOfAvailableNodes--;
			
 
				         if (isGoodTarget(choosenNode, blocksize, maxNodesPerRack, results)) {
			
 
				           results.add(choosenNode);
			
 
				           return choosenNode;
			
@@ -332,19 +334,19 @@ class ReplicationTargetChooser {
 
				    */
			
 
				   private void chooseRandom(int numOfReplicas,
			
 
				                             String nodes,
			
 
				-                            List<Node> excludedNodes,
			
 
				+                            HashMap<Node, Node> excludedNodes,
			
 
				                             long blocksize,
			
 
				                             int maxNodesPerRack,
			
 
				                             List<DatanodeDescriptor> results)
			
 
				     throws NotEnoughReplicasException {
			
 
				       
			
 
				     int numOfAvailableNodes =
			
 
				-      clusterMap.countNumOfAvailableNodes(nodes, excludedNodes);
			
 
				+      clusterMap.countNumOfAvailableNodes(nodes, excludedNodes.keySet());
			
 
				     while(numOfReplicas > 0 && numOfAvailableNodes > 0) {
			
 
				       DatanodeDescriptor choosenNode = 
			
 
				         (DatanodeDescriptor)(clusterMap.chooseRandom(nodes));
			
 
				-      if (!excludedNodes.contains(choosenNode)) {
			
 
				-        excludedNodes.add(choosenNode);
			
 
				+      Node oldNode = excludedNodes.put(choosenNode, choosenNode);
			
 
				+      if (oldNode == null) {
			
 
				         numOfAvailableNodes--;
			
 
				 
			
 
				         if (isGoodTarget(choosenNode, blocksize, maxNodesPerRack, results)) {
			
@@ -426,7 +428,7 @@ class ReplicationTargetChooser {
 
				     
			
 
				   /* Return a pipeline of nodes.
			
 
				    * The pipeline is formed finding a shortest path that 
			
 
				-   * starts from the writer and tranverses all <i>nodes</i>
			
 
				+   * starts from the writer and traverses all <i>nodes</i>
			
 
				    * This is basically a traveling salesman problem.
			
 
				    */
			
 
				   private DatanodeDescriptor[] getPipeline(
			
@@ -469,7 +471,7 @@ class ReplicationTargetChooser {
 
				    * 
			
 
				    * @param lBlk block with locations
			
 
				    * @param cluster 
			
 
				-   * @return 1 if the block must be relicated on additional rack,
			
 
				+   * @return 1 if the block must be replicated on additional rack,
			
 
				    * or 0 if the number of racks is sufficient.
			
 
				    */
			
 
				   public static int verifyBlockPlacement(LocatedBlock lBlk,
			
--- a/src/test/org/apache/hadoop/hdfs/server/namenode/TestReplicationPolicy.java
+++ b/src/test/org/apache/hadoop/hdfs/server/namenode/TestReplicationPolicy.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hdfs.server.namenode;
 
				 
			
 
				 import java.io.IOException;
			
 
				 import java.util.ArrayList;
			
 
				+import java.util.HashMap;
			
 
				 import java.util.List;
			
 
				 
			
 
				 import org.apache.hadoop.conf.Configuration;
			
@@ -134,24 +135,24 @@ public class TestReplicationPolicy extends TestCase {
 
				    * @throws Exception
			
 
				    */
			
 
				   public void testChooseTarget2() throws Exception { 
			
 
				-    List<Node> excludedNodes;
			
 
				+    HashMap<Node, Node> excludedNodes;
			
 
				     DatanodeDescriptor[] targets;
			
 
				     
			
 
				-    excludedNodes = new ArrayList<Node>();
			
 
				-    excludedNodes.add(dataNodes[1]); 
			
 
				+    excludedNodes = new HashMap<Node, Node>();
			
 
				+    excludedNodes.put(dataNodes[1], dataNodes[1]); 
			
 
				     targets = replicator.chooseTarget(
			
 
				                                       0, dataNodes[0], excludedNodes, BLOCK_SIZE);
			
 
				     assertEquals(targets.length, 0);
			
 
				     
			
 
				     excludedNodes.clear();
			
 
				-    excludedNodes.add(dataNodes[1]); 
			
 
				+    excludedNodes.put(dataNodes[1], dataNodes[1]); 
			
 
				     targets = replicator.chooseTarget(
			
 
				                                       1, dataNodes[0], excludedNodes, BLOCK_SIZE);
			
 
				     assertEquals(targets.length, 1);
			
 
				     assertEquals(targets[0], dataNodes[0]);
			
 
				     
			
 
				     excludedNodes.clear();
			
 
				-    excludedNodes.add(dataNodes[1]); 
			
 
				+    excludedNodes.put(dataNodes[1], dataNodes[1]); 
			
 
				     targets = replicator.chooseTarget(
			
 
				                                       2, dataNodes[0], excludedNodes, BLOCK_SIZE);
			
 
				     assertEquals(targets.length, 2);
			
@@ -159,7 +160,7 @@ public class TestReplicationPolicy extends TestCase {
 
				     assertFalse(cluster.isOnSameRack(targets[0], targets[1]));
			
 
				     
			
 
				     excludedNodes.clear();
			
 
				-    excludedNodes.add(dataNodes[1]); 
			
 
				+    excludedNodes.put(dataNodes[1], dataNodes[1]); 
			
 
				     targets = replicator.chooseTarget(
			
 
				                                       3, dataNodes[0], excludedNodes, BLOCK_SIZE);
			
 
				     assertEquals(targets.length, 3);
			
@@ -168,7 +169,7 @@ public class TestReplicationPolicy extends TestCase {
 
				     assertTrue(cluster.isOnSameRack(targets[1], targets[2]));
			
 
				     
			
 
				     excludedNodes.clear();
			
 
				-    excludedNodes.add(dataNodes[1]); 
			
 
				+    excludedNodes.put(dataNodes[1], dataNodes[1]); 
			
 
				     targets = replicator.chooseTarget(
			
 
				                                       4, dataNodes[0], excludedNodes, BLOCK_SIZE);
			
 
				     assertEquals(targets.length, 4);