瀏覽代碼

HDFS-16583. DatanodeAdminDefaultMonitor can get stuck in an infinite loop holding the write lock (#4332)

Co-authored-by: S O'Donnell <sodonnell@cloudera.com>
(cherry picked from commit 297f0f6d6a48468b56fd45c86412569cd927f749)
Stephen O'Donnell 3 年之前
父節點
當前提交
55ba3a7944

+ 4 - 12
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminBackoffMonitor.java

@@ -32,8 +32,6 @@ import java.util.Map;
 import java.util.List;
 import java.util.Iterator;
 import java.util.LinkedList;
-import java.util.ArrayDeque;
-import java.util.Queue;
 import java.util.stream.Collectors;
 
 /**
@@ -71,12 +69,6 @@ public class DatanodeAdminBackoffMonitor extends DatanodeAdminMonitorBase
   private HashMap<DatanodeDescriptor, HashMap<BlockInfo, Integer>>
       outOfServiceNodeBlocks = new HashMap<>();
 
-  /**
-   * Any nodes where decommission or maintenance has been cancelled are added
-   * to this queue for later processing.
-   */
-  private final Queue<DatanodeDescriptor> cancelledNodes = new ArrayDeque<>();
-
   /**
    * The numbe of blocks to process when moving blocks to pendingReplication
    * before releasing and reclaiming the namenode lock.
@@ -151,7 +143,7 @@ public class DatanodeAdminBackoffMonitor extends DatanodeAdminMonitorBase
   @Override
   public void stopTrackingNode(DatanodeDescriptor dn) {
     getPendingNodes().remove(dn);
-    cancelledNodes.add(dn);
+    getCancelledNodes().add(dn);
   }
 
   @Override
@@ -232,7 +224,7 @@ public class DatanodeAdminBackoffMonitor extends DatanodeAdminMonitorBase
           "in maintenance or transitioning state. {} nodes pending. {} " +
           "nodes waiting to be cancelled.",
           numBlocksChecked, outOfServiceNodeBlocks.size(), getPendingNodes().size(),
-          cancelledNodes.size());
+          getCancelledNodes().size());
     }
   }
 
@@ -259,8 +251,8 @@ public class DatanodeAdminBackoffMonitor extends DatanodeAdminMonitorBase
    * write lock to prevent the cancelledNodes list being modified externally.
    */
   private void processCancelledNodes() {
-    while(!cancelledNodes.isEmpty()) {
-      DatanodeDescriptor dn = cancelledNodes.poll();
+    while(!getCancelledNodes().isEmpty()) {
+      DatanodeDescriptor dn = getCancelledNodes().poll();
       outOfServiceNodeBlocks.remove(dn);
       pendingRep.remove(dn);
     }

+ 16 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminDefaultMonitor.java

@@ -124,7 +124,7 @@ public class DatanodeAdminDefaultMonitor extends DatanodeAdminMonitorBase
   @Override
   public void stopTrackingNode(DatanodeDescriptor dn) {
     getPendingNodes().remove(dn);
-    outOfServiceNodeBlocks.remove(dn);
+    getCancelledNodes().add(dn);
   }
 
   @Override
@@ -152,6 +152,7 @@ public class DatanodeAdminDefaultMonitor extends DatanodeAdminMonitorBase
     // Check decommission or maintenance progress.
     namesystem.writeLock();
     try {
+      processCancelledNodes();
       processPendingNodes();
       check();
     } catch (Exception e) {
@@ -180,6 +181,20 @@ public class DatanodeAdminDefaultMonitor extends DatanodeAdminMonitorBase
     }
   }
 
+  /**
+   * Process any nodes which have had their decommission or maintenance mode
+   * cancelled by an administrator.
+   *
+   * This method must be executed under the write lock to prevent the
+   * internal structures being modified concurrently.
+   */
+  private void processCancelledNodes() {
+    while(!getCancelledNodes().isEmpty()) {
+      DatanodeDescriptor dn = getCancelledNodes().poll();
+      outOfServiceNodeBlocks.remove(dn);
+    }
+  }
+
   private void check() {
     final Iterator<Map.Entry<DatanodeDescriptor, AbstractList<BlockInfo>>>
         it = new CyclicIteration<>(outOfServiceNodeBlocks,

+ 12 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminMonitorBase.java

@@ -24,6 +24,7 @@ import org.apache.hadoop.hdfs.server.namenode.Namesystem;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.ArrayDeque;
 import java.util.Comparator;
 import java.util.List;
 import java.util.PriorityQueue;
@@ -53,6 +54,12 @@ public abstract class DatanodeAdminMonitorBase
   private final PriorityQueue<DatanodeDescriptor> pendingNodes = new PriorityQueue<>(
       PENDING_NODES_QUEUE_COMPARATOR);
 
+  /**
+   * Any nodes where decommission or maintenance has been cancelled are added
+   * to this queue for later processing.
+   */
+  private final Queue<DatanodeDescriptor> cancelledNodes = new ArrayDeque<>();
+
   /**
    * The maximum number of nodes to track in outOfServiceNodeBlocks.
    * A value of 0 means no limit.
@@ -163,6 +170,11 @@ public abstract class DatanodeAdminMonitorBase
     return pendingNodes;
   }
 
+  @Override
+  public Queue<DatanodeDescriptor> getCancelledNodes() {
+    return cancelledNodes;
+  }
+
   /**
    * If node "is dead while in Decommission In Progress", it cannot be decommissioned
    * until it becomes healthy again. If there are more pendingNodes than can be tracked

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminMonitorInterface.java

@@ -32,6 +32,7 @@ public interface DatanodeAdminMonitorInterface extends Runnable {
   int getTrackedNodeCount();
   int getNumNodesChecked();
   Queue<DatanodeDescriptor> getPendingNodes();
+  Queue<DatanodeDescriptor> getCancelledNodes();
 
   void setBlockManager(BlockManager bm);
   void setDatanodeAdminManager(DatanodeAdminManager dnm);