Browse Source

HDFS-11577. Combine the old and the new chooseRandom for better performance. Contributed by Chen Liang.

Yiqun Lin 8 years ago
parent
commit
6b09336438

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java

@@ -496,7 +496,7 @@ public class NetworkTopology {
     }
   }
 
-  private Node chooseRandom(final String scope, String excludedScope,
+  protected Node chooseRandom(final String scope, String excludedScope,
       final Collection<Node> excludedNodes) {
     if (excludedScope != null) {
       if (scope.startsWith(excludedScope)) {

+ 68 - 8
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DFSNetworkTopology.java

@@ -36,7 +36,6 @@ import java.util.Random;
  * remaining parts should be the same.
  *
  * Currently a placeholder to test storage type info.
- * TODO : add "chooseRandom with storageType info" function.
  */
 public class DFSNetworkTopology extends NetworkTopology {
 
@@ -56,6 +55,7 @@ public class DFSNetworkTopology extends NetworkTopology {
    *
    * @param scope range of nodes from which a node will be chosen
    * @param excludedNodes nodes to be excluded from
+   * @param type the storage type we search for
    * @return the chosen node
    */
   public Node chooseRandomWithStorageType(final String scope,
@@ -74,6 +74,69 @@ public class DFSNetworkTopology extends NetworkTopology {
     }
   }
 
+  /**
+   * Randomly choose one node from <i>scope</i> with the given storage type.
+   *
+   * If scope starts with ~, choose one from the all nodes except for the
+   * ones in <i>scope</i>; otherwise, choose one from <i>scope</i>.
+   * If excludedNodes is given, choose a node that's not in excludedNodes.
+   *
+   * This call would make up to two calls. It first tries to get a random node
+   * (with old method) and check if it satisfies. If yes, simply return it.
+   * Otherwise, it make a second call (with the new method) by passing in a
+   * storage type.
+   *
+   * This is for better performance reason. Put in short, the key note is that
+   * the old method is faster but may take several runs, while the new method
+   * is somewhat slower, and always succeed in one trial.
+   * See HDFS-11535 for more detail.
+   *
+   * @param scope range of nodes from which a node will be chosen
+   * @param excludedNodes nodes to be excluded from
+   * @param type the storage type we search for
+   * @return the chosen node
+   */
+  public Node chooseRandomWithStorageTypeTwoTrial(final String scope,
+      final Collection<Node> excludedNodes, StorageType type) {
+    netlock.readLock().lock();
+    try {
+      String searchScope;
+      String excludedScope;
+      if (scope.startsWith("~")) {
+        searchScope = NodeBase.ROOT;
+        excludedScope = scope.substring(1);
+      } else {
+        searchScope = scope;
+        excludedScope = null;
+      }
+      // next do a two-trial search
+      // first trial, call the old method, inherited from NetworkTopology
+      Node n = chooseRandom(searchScope, excludedScope, excludedNodes);
+      if (n == null) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("No node to choose.");
+        }
+        // this means there is simply no node to choose from
+        return null;
+      }
+      Preconditions.checkArgument(n instanceof DatanodeDescriptor);
+      DatanodeDescriptor dnDescriptor = (DatanodeDescriptor)n;
+
+      if (dnDescriptor.hasStorageType(type)) {
+        // the first trial succeeded, just return
+        return dnDescriptor;
+      } else {
+        // otherwise, make the second trial by calling the new method
+        LOG.debug("First trial failed, node has no type {}, " +
+            "making second trial carrying this type", type);
+        return chooseRandomWithStorageType(searchScope, excludedScope,
+            excludedNodes, type);
+      }
+    } finally {
+      netlock.readLock().unlock();
+    }
+  }
+
   /**
    * Choose a random node based on given scope, excludedScope and excludedNodes
    * set. Although in general the topology has at most three layers, this class
@@ -99,13 +162,10 @@ public class DFSNetworkTopology extends NetworkTopology {
    * all it's ancestors' storage counters accordingly, this way the excluded
    * root is out of the picture.
    *
-   * TODO : this function has duplicate code as NetworkTopology, need to
-   * refactor in the future.
-   *
-   * @param scope
-   * @param excludedScope
-   * @param excludedNodes
-   * @return
+   * @param scope the scope where we look for node.
+   * @param excludedScope the scope where the node must NOT be from.
+   * @param excludedNodes the returned node must not be in this set
+   * @return a node with required storage type
    */
   @VisibleForTesting
   Node chooseRandomWithStorageType(final String scope,

+ 57 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/net/TestDFSNetworkTopology.java

@@ -508,4 +508,61 @@ public class TestDFSNetworkTopology {
     assertEquals(1,
         innerl2d3r3.getSubtreeStorageCount(StorageType.DISK));
   }
+
+  @Test
+  public void testChooseRandomWithStorageTypeTwoTrial() throws Exception {
+    Node n;
+    DatanodeDescriptor dd;
+    n = CLUSTER.chooseRandomWithStorageType("/l2/d3/r4", null, null,
+        StorageType.ARCHIVE);
+    HashSet<Node> excluded = new HashSet<>();
+    // exclude the host on r4 (since there is only one host, no randomness here)
+    excluded.add(n);
+
+    // search with given scope being desired scope
+    for (int i = 0; i<10; i++) {
+      n = CLUSTER.chooseRandomWithStorageTypeTwoTrial(
+          "/l2/d3", null, StorageType.ARCHIVE);
+      assertTrue(n instanceof DatanodeDescriptor);
+      dd = (DatanodeDescriptor) n;
+      assertTrue(dd.getHostName().equals("host12") ||
+          dd.getHostName().equals("host13"));
+    }
+
+    for (int i = 0; i<10; i++) {
+      n = CLUSTER.chooseRandomWithStorageTypeTwoTrial(
+          "/l2/d3", excluded, StorageType.ARCHIVE);
+      assertTrue(n instanceof DatanodeDescriptor);
+      dd = (DatanodeDescriptor) n;
+      assertTrue(dd.getHostName().equals("host13"));
+    }
+
+    // search with given scope being exclude scope
+
+    // a total of 4 ramdisk nodes:
+    // /l1/d2/r3/host7, /l2/d3/r2/host10, /l2/d4/r1/host7 and /l2/d4/r1/host10
+    // so if we exclude /l2/d4/r1, if should be always either host7 or host10
+    for (int i = 0; i<10; i++) {
+      n = CLUSTER.chooseRandomWithStorageTypeTwoTrial(
+          "~/l2/d4", null, StorageType.RAM_DISK);
+      assertTrue(n instanceof DatanodeDescriptor);
+      dd = (DatanodeDescriptor) n;
+      assertTrue(dd.getHostName().equals("host7") ||
+          dd.getHostName().equals("host10"));
+    }
+
+    // similar to above, except that we also exclude host10 here. so it should
+    // always be host7
+    n = CLUSTER.chooseRandomWithStorageType("/l2/d3/r2", null, null,
+        StorageType.RAM_DISK);
+    // add host10 to exclude
+    excluded.add(n);
+    for (int i = 0; i<10; i++) {
+      n = CLUSTER.chooseRandomWithStorageTypeTwoTrial(
+          "~/l2/d4", excluded, StorageType.RAM_DISK);
+      assertTrue(n instanceof DatanodeDescriptor);
+      dd = (DatanodeDescriptor) n;
+      assertTrue(dd.getHostName().equals("host7"));
+    }
+  }
 }