瀏覽代碼

HDFS-15644. Failed volumes can cause DNs to stop block reporting. Contributed by Ahmed Hussein.

Wei-Chiu Chuang 4 年之前
父節點
當前提交
e7e165a927

+ 17 - 13
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java

@@ -1935,29 +1935,33 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
       Set<String> missingVolumesReported = new HashSet<>();
       for (ReplicaInfo b : volumeMap.replicas(bpid)) {
         String volStorageID = b.getVolume().getStorageID();
-        if (!builders.containsKey(volStorageID)) {
-          if (!missingVolumesReported.contains(volStorageID)) {
-            LOG.warn("Storage volume: " + volStorageID + " missing for the"
-                + " replica block: " + b + ". Probably being removed!");
-            missingVolumesReported.add(volStorageID);
-          }
-          continue;
-        }
         switch(b.getState()) {
           case FINALIZED:
           case RBW:
           case RWR:
-            builders.get(b.getVolume().getStorageID()).add(b);
             break;
           case RUR:
-            ReplicaUnderRecovery rur = (ReplicaUnderRecovery)b;
-            builders.get(rur.getVolume().getStorageID())
-                .add(rur.getOriginalReplica());
+            // use the original replica.
+            ReplicaUnderRecovery rur = (ReplicaUnderRecovery) b;
+            b = rur.getOriginalReplica();
             break;
           case TEMPORARY:
-            break;
+            continue;
           default:
             assert false : "Illegal ReplicaInfo state.";
+            continue;
+        }
+        BlockListAsLongs.Builder storageBuilder = builders.get(volStorageID);
+        // a storage in the process of failing will not be in the volumes list
+        // but will be in the replica map.
+        if (storageBuilder != null) {
+          storageBuilder.add(b);
+        } else {
+          if (!missingVolumesReported.contains(volStorageID)) {
+            LOG.warn("Storage volume: " + volStorageID + " missing for the"
+                + " replica block: " + b + ". Probably being removed!");
+            missingVolumesReported.add(volStorageID);
+          }
         }
       }
     }