瀏覽代碼

HDFS-16676. DatanodeAdminManager$Monitor reports a node as invalid continuously (#4626)

Co-authored-by: Ashutosh Gupta <ashugpt@amazon.com>
Ashutosh Gupta 2 年之前
父節點
當前提交
5cc8c574d1

+ 7 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeAdminDefaultMonitor.java

@@ -201,6 +201,7 @@ public class DatanodeAdminDefaultMonitor extends DatanodeAdminMonitorBase
         iterkey).iterator();
     final List<DatanodeDescriptor> toRemove = new ArrayList<>();
     final List<DatanodeDescriptor> unhealthyDns = new ArrayList<>();
+    boolean isValidState = true;
 
     while (it.hasNext() && !exceededNumBlocksPerCheck() && namesystem
         .isRunning()) {
@@ -265,6 +266,7 @@ public class DatanodeAdminDefaultMonitor extends DatanodeAdminMonitorBase
               // to track maintenance expiration.
               dnAdmin.setInMaintenance(dn);
             } else {
+              isValidState  = false;
               Preconditions.checkState(false,
                   "Node %s is in an invalid state! "
                       + "Invalid state: %s %s blocks are on this dn.",
@@ -288,7 +290,11 @@ public class DatanodeAdminDefaultMonitor extends DatanodeAdminMonitorBase
         // an invalid state.
         LOG.warn("DatanodeAdminMonitor caught exception when processing node "
             + "{}.", dn, e);
-        getPendingNodes().add(dn);
+        if(isValidState){
+          getPendingNodes().add(dn);
+        } else {
+          LOG.warn("Ignoring the node {} which is in invalid state", dn);
+        }
         toRemove.add(dn);
         unhealthyDns.remove(dn);
       } finally {