Browse Source

HDFS-8791. block ID-based DN storage layout can be very slow for datanode on ext4. Contributed by Chris Trezzo.

(cherry picked from commit b8dbffbef87c0ee0bee5e17649586fc9f0e1f55b)
Kihwal Lee 9 years ago
parent
commit
a823c30a9b

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -1946,6 +1946,9 @@ Release 2.7.3 - UNRELEASED
 
     HDFS-9395. Make HDFS audit logging consistant (Kuhu Shukla via kihwal)
 
+    HDFS-8791. block ID-based DN storage layout can be very slow for datanode
+    on ext4 (Chris Trezzo via kihwal)
+
   OPTIMIZATIONS
 
   BUG FIXES

+ 4 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNodeLayoutVersion.java

@@ -65,7 +65,10 @@ public class DataNodeLayoutVersion {
     FIRST_LAYOUT(-55, -53, "First datanode layout", false),
     BLOCKID_BASED_LAYOUT(-56,
         "The block ID of a finalized block uniquely determines its position " +
-            "in the directory structure");
+        "in the directory structure"),
+    BLOCKID_BASED_LAYOUT_32_by_32(-57,
+        "Identical to the block id based layout (-56) except it uses a smaller"
+        + " directory structure (32x32)");
    
     private final FeatureInfo info;
 

+ 7 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java

@@ -1102,10 +1102,13 @@ public class DataStorage extends Storage {
     LOG.info("Start linking block files from " + from + " to " + to);
     boolean upgradeToIdBasedLayout = false;
     // If we are upgrading from a version older than the one where we introduced
-    // block ID-based layout AND we're working with the finalized directory,
-    // we'll need to upgrade from the old flat layout to the block ID-based one
-    if (oldLV > DataNodeLayoutVersion.Feature.BLOCKID_BASED_LAYOUT.getInfo().
-        getLayoutVersion() && to.getName().equals(STORAGE_DIR_FINALIZED)) {
+    // block ID-based layout (32x32) AND we're working with the finalized
+    // directory, we'll need to upgrade from the old layout to the new one. The
+    // upgrade path from pre-blockid based layouts (>-56) and blockid based
+    // 256x256 layouts (-56) is fortunately the same.
+    if (oldLV > DataNodeLayoutVersion.Feature.BLOCKID_BASED_LAYOUT_32_by_32
+        .getInfo().getLayoutVersion()
+        && to.getName().equals(STORAGE_DIR_FINALIZED)) {
       upgradeToIdBasedLayout = true;
     }
 

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DatanodeUtil.java

@@ -113,8 +113,8 @@ public class DatanodeUtil {
    * @return
    */
   public static File idToBlockDir(File root, long blockId) {
-    int d1 = (int)((blockId >> 16) & 0xff);
-    int d2 = (int)((blockId >> 8) & 0xff);
+    int d1 = (int) ((blockId >> 16) & 0x1F);
+    int d2 = (int) ((blockId >> 8) & 0x1F);
     String path = DataStorage.BLOCK_SUBDIR_PREFIX + d1 + SEP +
         DataStorage.BLOCK_SUBDIR_PREFIX + d2;
     return new File(root, path);

+ 27 - 2
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeLayoutUpgrade.java

@@ -28,10 +28,16 @@ public class TestDatanodeLayoutUpgrade {
   private static final String HADOOP_DATANODE_DIR_TXT =
       "hadoop-datanode-dir.txt";
   private static final String HADOOP24_DATANODE = "hadoop-24-datanode-dir.tgz";
+  private static final String HADOOP_56_DN_LAYOUT_TXT =
+      "hadoop-to-57-dn-layout-dir.txt";
+  private static final String HADOOP_56_DN_LAYOUT =
+      "hadoop-56-layout-datanode-dir.tgz";
 
+  /**
+   * Upgrade from LDir-based layout to 32x32 block ID-based layout (-57) --
+   * change described in HDFS-6482 and HDFS-8791
+   */
   @Test
-  // Upgrade from LDir-based layout to block ID-based layout -- change described
-  // in HDFS-6482
   public void testUpgradeToIdBasedLayout() throws IOException {
     TestDFSUpgradeFromImage upgrade = new TestDFSUpgradeFromImage();
     upgrade.unpackStorage(HADOOP24_DATANODE, HADOOP_DATANODE_DIR_TXT);
@@ -45,4 +51,23 @@ public class TestDatanodeLayoutUpgrade {
     upgrade.upgradeAndVerify(new MiniDFSCluster.Builder(conf).numDataNodes(1)
     .manageDataDfsDirs(false).manageNameDfsDirs(false), null);
   }
+
+  /**
+   * Test upgrade from block ID-based layout 256x256 (-56) to block ID-based
+   * layout 32x32 (-57)
+   */
+  @Test
+  public void testUpgradeFrom256To32Layout() throws IOException {
+    TestDFSUpgradeFromImage upgrade = new TestDFSUpgradeFromImage();
+    upgrade.unpackStorage(HADOOP_56_DN_LAYOUT, HADOOP_56_DN_LAYOUT_TXT);
+    Configuration conf = new Configuration(TestDFSUpgradeFromImage.upgradeConf);
+    conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY,
+        new File(System.getProperty("test.build.data"), "dfs" + File.separator
+            + "data").toURI().toString());
+    conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
+        new File(System.getProperty("test.build.data"), "dfs" + File.separator
+            + "name").toURI().toString());
+    upgrade.upgradeAndVerify(new MiniDFSCluster.Builder(conf).numDataNodes(1)
+        .manageDataDfsDirs(false).manageNameDfsDirs(false), null);
+  }
 }

BIN
hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-56-layout-datanode-dir.tgz


+ 24 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-to-57-dn-layout-dir.txt

@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Similar to hadoop-dfs-dir.txt, except this is used for a datanode layout
+# upgrade test to 32x32 (layout -57)
+# Uncomment the following line to produce checksum info for a new DFS image.
+#printChecksums
+/blocks/part1 	 286881285
+/blocks/part12922 	 1068680946
+/blocks/part972 	 2479788008
+/blocks/part973 	 1221039573
+overallCRC 	 1902127725