Explorar o código

HDFS-14993. checkDiskError doesn't work during datanode startup. Contributed by Yang Yun.

Ayush Saxena %!s(int64=5) %!d(string=hai) anos
pai
achega
87c198468b

+ 3 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java

@@ -1701,14 +1701,14 @@ public class DataNode extends ReconfigurableBase
     // the dataset, block scanners, etc.
     initStorage(nsInfo);
 
-    // Exclude failed disks before initializing the block pools to avoid startup
-    // failures.
-    checkDiskError();
     try {
       data.addBlockPool(nsInfo.getBlockPoolID(), getConf());
     } catch (AddBlockPoolException e) {
       handleAddBlockPoolError(e);
     }
+    // HDFS-14993: check disk after add the block pool info.
+    checkDiskError();
+
     blockScanner.enableBlockPoolId(bpos.getBlockPoolId());
     initDirectoryScanner(getConf());
     initDiskBalancer(data, getConf());

+ 31 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java

@@ -916,4 +916,35 @@ public class TestDataNodeVolumeFailure {
       }
     }
   }
+
+  /*
+   * Verify the failed volume can be cheched during dn startup
+   */
+  @Test(timeout = 120000)
+  public void testVolumeFailureDuringStartup() throws Exception {
+    LOG.debug("Data dir: is " +  dataDir.getPath());
+
+    // fail the volume
+    data_fail = cluster.getInstanceStorageDir(1, 0);
+    failedDir = MiniDFSCluster.getFinalizedDir(data_fail,
+        cluster.getNamesystem().getBlockPoolId());
+    failedDir.setReadOnly();
+
+    // restart the dn
+    cluster.restartDataNode(1);
+    final DataNode dn = cluster.getDataNodes().get(1);
+
+    // should get the failed volume during startup
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        return dn.getFSDataset() !=null &&
+            dn.getFSDataset().getVolumeFailureSummary() != null &&
+            dn.getFSDataset().getVolumeFailureSummary().
+                getFailedStorageLocations()!= null &&
+            dn.getFSDataset().getVolumeFailureSummary().
+                getFailedStorageLocations().length == 1;
+      }
+    }, 10, 30 * 1000);
+  }
 }