Explorar el Código

HDFS-4937. ReplicationMonitor can infinite-loop in BlockPlacementPolicyDefault#chooseRandom(). Contributed by Kihwal Lee.
(cherry picked from commit 43539b5ff4ac0874a8a454dc93a2a782b0e0ea8f)

Kihwal Lee hace 9 años
padre
commit
c250b21c23

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -89,6 +89,9 @@ Release 2.7.2 - UNRELEASED
     HDFS-9317. Document fsck -blockId and -storagepolicy options in branch-2.7.
     (aajisaka)
 
+    HDFS-4937. ReplicationMonitor can infinite-loop in
+    BlockPlacementPolicyDefault#chooseRandom() (kihwal)
+
 Release 2.7.1 - 2015-07-06
 
   INCOMPATIBLE CHANGES

+ 12 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java

@@ -622,6 +622,7 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
       
     int numOfAvailableNodes = clusterMap.countNumOfAvailableNodes(
         scope, excludedNodes);
+    int refreshCounter = numOfAvailableNodes;
     StringBuilder builder = null;
     if (LOG.isDebugEnabled()) {
       builder = debugLoggingBuilder.get();
@@ -675,6 +676,17 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
         // If no candidate storage was found on this DN then set badTarget.
         badTarget = (i == storages.length);
       }
+      // Refresh the node count. If the live node count became smaller,
+      // but it is not reflected in this loop, it may loop forever in case
+      // the replicas/rack cannot be satisfied.
+      if (--refreshCounter == 0) {
+        refreshCounter = clusterMap.countNumOfAvailableNodes(scope,
+            excludedNodes);
+        // It has already gone through enough number of nodes.
+        if (refreshCounter <= excludedNodes.size()) {
+          break;
+        }
+      }
     }
       
     if (numOfReplicas>0) {