فهرست منبع

HDFS-4461. DirectoryScanner: volume path prefix takes up memory for every block that is scanned. Contributed by Colin Patrick McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1576477 13f79535-47bb-0310-9956-ffa450edef68
Kihwal Lee 11 سال پیش
والد
کامیت
ba2316f0a4

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -8,6 +8,9 @@ Release 0.23.11 - UNRELEASED
     
   IMPROVEMENTS
 
+    HDFS-4461. DirectoryScanner: volume path prefix takes up memory for every
+    block that is scanned (Colin Patrick McCabe)
+
   OPTIMIZATIONS
     
   BUG FIXES

+ 91 - 10
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java

@@ -33,6 +33,8 @@ import java.util.concurrent.Future;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.ScheduledThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -152,12 +154,73 @@ public class DirectoryScanner implements Runnable {
    * Tracks the files and other information related to a block on the disk
    * Missing file is indicated by setting the corresponding member
    * to null.
+   * 
+   * Because millions of these structures may be created, we try to save
+   * memory here.  So instead of storing full paths, we store path suffixes.
+   * The block file, if it exists, will have a path like this:
+   * <volume_base_path>/<block_path>
+   * So we don't need to store the volume path, since we already know what the
+   * volume is.
+   * 
+   * The metadata file, if it exists, will have a path like this:
+   * <volume_base_path>/<block_path>_<genstamp>.meta
+   * So if we have a block file, there isn't any need to store the block path
+   * again.
+   * 
+   * The accessor functions take care of these manipulations.
    */
   static class ScanInfo implements Comparable<ScanInfo> {
     private final long blockId;
-    private final File metaFile;
-    private final File blockFile;
+    
+    /**
+     * The block file path, relative to the volume's base directory.
+     * If there was no block file found, this may be null. If 'vol'
+     * is null, then this is the full path of the block file.
+     */
+    private final String blockSuffix;
+    
+    /**
+     * The suffix of the meta file path relative to the block file.
+     * If blockSuffix is null, then this will be the entire path relative
+     * to the volume base directory, or an absolute path if vol is also
+     * null.
+     */
+    private final String metaSuffix;
+
     private final FSVolumeInterface volume;
+    private final static Pattern CONDENSED_PATH_REGEX =
+        Pattern.compile("(?<!^)(\\\\|/){2,}");
+    
+    private final static String QUOTED_FILE_SEPARATOR = 
+        Matcher.quoteReplacement(File.separator);
+    
+    /**
+     * Get the most condensed version of the path.
+     *
+     * For example, the condensed version of /foo//bar is /foo/bar
+     * Unlike {@link File#getCanonicalPath()}, this will never perform I/O
+     * on the filesystem.
+     */
+    private static String getCondensedPath(String path) {
+      return CONDENSED_PATH_REGEX.matcher(path).
+          replaceAll(QUOTED_FILE_SEPARATOR);
+    }
+
+    /**
+     * Get a path suffix.
+     *
+     * @param f            The file to get the suffix for.
+     * @param prefix       The prefix we're stripping off.
+     *
+     * @return             A suffix such that prefix + suffix = path to f
+     */
+    private static String getSuffix(File f, String prefix) {
+      String fullPath = getCondensedPath(f.getAbsolutePath());
+      if (fullPath.startsWith(prefix)) {
+        return fullPath.substring(prefix.length());
+      }
+      throw new RuntimeException(prefix + " is not a prefix of " + fullPath);
+    }
 
     ScanInfo(long blockId) {
       this(blockId, null, null, null);
@@ -165,17 +228,34 @@ public class DirectoryScanner implements Runnable {
 
     ScanInfo(long blockId, File blockFile, File metaFile, FSVolumeInterface vol) {
       this.blockId = blockId;
-      this.metaFile = metaFile;
-      this.blockFile = blockFile;
+      String condensedVolPath = vol == null ? null :
+        getCondensedPath(vol.getBasePath());
+      this.blockSuffix = blockFile == null ? null :
+        getSuffix(blockFile, condensedVolPath);
+      if (metaFile == null) {
+        this.metaSuffix = null;
+      } else if (blockFile == null) {
+        this.metaSuffix = getSuffix(metaFile, condensedVolPath);
+      } else {
+        this.metaSuffix = getSuffix(metaFile,
+            condensedVolPath + blockSuffix);
+      }
       this.volume = vol;
     }
 
-    File getMetaFile() {
-      return metaFile;
+    File getBlockFile() {
+      return (blockSuffix == null) ? null :
+        new File(volume.getBasePath(), blockSuffix);
     }
 
-    File getBlockFile() {
-      return blockFile;
+    File getMetaFile() {
+      if (metaSuffix == null) {
+        return null;
+      } else if (blockSuffix == null) {
+        return new File(volume.getBasePath(), metaSuffix);
+      } else {
+        return new File(volume.getBasePath(), blockSuffix + metaSuffix);
+      }
     }
 
     long getBlockId() {
@@ -214,8 +294,9 @@ public class DirectoryScanner implements Runnable {
     }
 
     public long getGenStamp() {
-      return metaFile != null ? Block.getGenerationStamp(metaFile.getName()) :
-        GenerationStamp.GRANDFATHER_GENERATION_STAMP;
+      return metaSuffix != null ? Block.getGenerationStamp(
+          getMetaFile().getName()) : 
+            GenerationStamp.GRANDFATHER_GENERATION_STAMP;
     }
   }
 

+ 5 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java

@@ -594,6 +594,11 @@ class FSDataset implements FSDatasetInterface {
       }
       return (remaining > 0) ? remaining : 0;
     }
+
+    @Override
+    public String getBasePath() {
+      return currentDir.getParent();
+    }
       
     long getReserved(){
       return reserved;

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDatasetInterface.java

@@ -86,6 +86,9 @@ public interface FSDatasetInterface extends FSDatasetMBean {
     /** @return the available storage space in bytes. */
     public long getAvailable() throws IOException;
 
+    /** @return the base path to the volume */
+    public String getBasePath();
+
     /** @return the directory for the block pool. */
     public File getDirectory(String bpid) throws IOException;
 

+ 92 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java

@@ -40,6 +40,8 @@ import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.server.common.GenerationStamp;
 import org.apache.hadoop.hdfs.server.datanode.FSDatasetInterface.FSVolumeInterface;
 
+import org.junit.Test;
+
 /**
  * Tests {@link DirectoryScanner} handling of differences
  * between blocks on the disk and block in memory.
@@ -216,6 +218,7 @@ public class TestDirectoryScanner extends TestCase {
     assertEquals(mismatchBlocks, stats.mismatchBlocks);
   }
 
+  @Test
   public void testDirectoryScanner() throws Exception {
     // Run the test with and without parallel scanning
     for (int parallelism = 1; parallelism < 3; parallelism++) {
@@ -372,4 +375,93 @@ public class TestDirectoryScanner extends TestCase {
     assertNotNull(memBlock);
     assertEquals(genStamp, memBlock.getGenerationStamp());
   }
+  
+  private static class TestFsVolumeSpi implements FSVolumeInterface {
+    @Override
+    public String[] getBlockPoolList() {
+      return new String[0];
+    }
+    
+    @Override
+    public long getAvailable() throws IOException {
+      return 0;
+    }
+    
+    @Override
+    public String getBasePath() {
+      return "/base";
+    }
+    
+    @Override
+    public File getDirectory(String bpid) throws IOException {
+      return new File("/base/current/" + bpid);
+    }
+
+    @Override
+    public File getFinalizedDir(String bpid) throws IOException {
+      return new File("/base/current/" + bpid + "/finalized");
+    }
+  }
+
+  private final static TestFsVolumeSpi TEST_VOLUME = new TestFsVolumeSpi();
+  
+  private final static String BPID_1 = "BP-783049782-127.0.0.1-1370971773491";
+  
+  private final static String BPID_2 = "BP-367845636-127.0.0.1-5895645674231";
+      
+  void testScanInfoObject(long blockId, File blockFile, File metaFile)
+      throws Exception {
+    assertEquals("/base/current/" + BPID_1 + "/finalized",
+        TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath());
+    DirectoryScanner.ScanInfo scanInfo =
+        new DirectoryScanner.ScanInfo(blockId, blockFile, metaFile, TEST_VOLUME);
+    assertEquals(blockId, scanInfo.getBlockId());
+    if (blockFile != null) {
+      assertEquals(blockFile.getAbsolutePath(),
+          scanInfo.getBlockFile().getAbsolutePath());
+    } else {
+      assertNull(scanInfo.getBlockFile());
+    }
+    if (metaFile != null) {
+      assertEquals(metaFile.getAbsolutePath(),
+          scanInfo.getMetaFile().getAbsolutePath());
+    } else {
+      assertNull(scanInfo.getMetaFile());
+    }
+    assertEquals(TEST_VOLUME, scanInfo.getVolume());
+  }
+  
+  void testScanInfoObject(long blockId) throws Exception {
+    DirectoryScanner.ScanInfo scanInfo =
+        new DirectoryScanner.ScanInfo(blockId);
+    assertEquals(blockId, scanInfo.getBlockId());
+    assertNull(scanInfo.getBlockFile());
+    assertNull(scanInfo.getMetaFile());
+  }
+
+  @Test(timeout=120000)
+  public void testScanInfo() throws Exception {
+    testScanInfoObject(123,
+        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
+            "blk_123"),
+        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
+            "blk_123__1001.meta"));
+    testScanInfoObject(464,
+        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
+            "blk_123"),
+        null);
+    testScanInfoObject(523,
+        null,
+        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
+            "blk_123__1009.meta"));
+    testScanInfoObject(789,
+        null,
+        null);
+    testScanInfoObject(456);
+    testScanInfoObject(123,
+        new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
+            "blk_567"),
+        new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
+            "blk_567__1004.meta"));
+  }
 }