ソースを参照

HDFS-7719. BlockPoolSliceStorage#removeVolumes fails to remove some in-memory state associated with volumes. (Lei (Eddy) Xu via Colin P. McCabe)

Colin Patrick Mccabe 10 年 前
コミット
40a415799b

+ 4 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -837,6 +837,10 @@ Release 2.7.0 - UNRELEASED
 
     HDFS-7734. Class cast exception in NameNode#main. (yliu via wang)
 
+    HDFS-7719. BlockPoolSliceStorage#removeVolumes fails to remove some
+    in-memory state associated with volumes. (Lei (Eddy) Xu via Colin P.
+    McCabe)
+
 Release 2.6.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 8 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceStorage.java

@@ -275,15 +275,19 @@ public class BlockPoolSliceStorage extends Storage {
   }
 
   /**
-   * Remove storage directories.
-   * @param storageDirs a set of storage directories to be removed.
+   * Remove block pool level storage directory.
+   * @param absPathToRemove the absolute path of the root for the block pool
+   *                        level storage to remove.
    */
-  void removeVolumes(Set<File> storageDirs) {
+  void remove(File absPathToRemove) {
+    Preconditions.checkArgument(absPathToRemove.isAbsolute());
+    LOG.info("Removing block level storage: " + absPathToRemove);
     for (Iterator<StorageDirectory> it = this.storageDirs.iterator();
          it.hasNext(); ) {
       StorageDirectory sd = it.next();
-      if (storageDirs.contains(sd.getRoot())) {
+      if (sd.getRoot().getAbsoluteFile().equals(absPathToRemove)) {
         it.remove();
+        break;
       }
     }
   }

+ 10 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java

@@ -37,7 +37,6 @@ import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.LayoutVersion;
-import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
@@ -423,15 +422,21 @@ public class DataStorage extends Storage {
       dataDirs.add(sl.getFile());
     }
 
-    for (BlockPoolSliceStorage bpsStorage : this.bpStorageMap.values()) {
-      bpsStorage.removeVolumes(dataDirs);
-    }
-
     StringBuilder errorMsgBuilder = new StringBuilder();
     for (Iterator<StorageDirectory> it = this.storageDirs.iterator();
          it.hasNext(); ) {
       StorageDirectory sd = it.next();
       if (dataDirs.contains(sd.getRoot())) {
+        // Remove the block pool level storage first.
+        for (Map.Entry<String, BlockPoolSliceStorage> entry :
+            this.bpStorageMap.entrySet()) {
+          String bpid = entry.getKey();
+          BlockPoolSliceStorage bpsStorage = entry.getValue();
+          File bpRoot =
+              BlockPoolSliceStorage.getBpRoot(bpid, sd.getCurrentDir());
+          bpsStorage.remove(bpRoot.getAbsoluteFile());
+        }
+
         it.remove();
         try {
           sd.unlock();

+ 34 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java

@@ -638,4 +638,38 @@ public class TestDataNodeHotSwapVolumes {
       throw new IOException(exceptions.get(0).getCause());
     }
   }
+
+  @Test(timeout=60000)
+  public void testAddBackRemovedVolume()
+      throws IOException, TimeoutException, InterruptedException,
+      ReconfigurationException {
+    startDFSCluster(1, 2);
+    // Create some data on every volume.
+    createFile(new Path("/test"), 32);
+
+    DataNode dn = cluster.getDataNodes().get(0);
+    Configuration conf = dn.getConf();
+    String oldDataDir = conf.get(DFS_DATANODE_DATA_DIR_KEY);
+    String keepDataDir = oldDataDir.split(",")[0];
+    String removeDataDir = oldDataDir.split(",")[1];
+
+    dn.reconfigurePropertyImpl(DFS_DATANODE_DATA_DIR_KEY, keepDataDir);
+    for (int i = 0; i < cluster.getNumNameNodes(); i++) {
+      String bpid = cluster.getNamesystem(i).getBlockPoolId();
+      BlockPoolSliceStorage bpsStorage =
+          dn.getStorage().getBPStorage(bpid);
+      // Make sure that there is no block pool level storage under removeDataDir.
+      for (int j = 0; j < bpsStorage.getNumStorageDirs(); j++) {
+        Storage.StorageDirectory sd = bpsStorage.getStorageDir(j);
+        assertFalse(sd.getRoot().getAbsolutePath().startsWith(
+            new File(removeDataDir).getAbsolutePath()
+        ));
+      }
+      assertEquals(dn.getStorage().getBPStorage(bpid).getNumStorageDirs(), 1);
+    }
+
+    // Bring the removed directory back. It only successes if all metadata about
+    // this directory were removed from the previous step.
+    dn.reconfigurePropertyImpl(DFS_DATANODE_DATA_DIR_KEY, oldDataDir);
+  }
 }