Просмотр исходного кода

HDFS-3596. Improve FSEditLog pre-allocation in branch-1. Contributed by Colin Patrick McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-1@1358932 13f79535-47bb-0310-9956-ffa450edef68
Matthew Foley 13 лет назад
Родитель
Сommit
4170f983d5

+ 3 - 0
CHANGES.txt

@@ -21,6 +21,9 @@ Release 1.2.0 - unreleased
 
     HDFS-3604. Add dfs.webhdfs.enabled to hdfs-default.xml. (eli)
 
+    HDFS-3596. Improve FSEditLog pre-allocation in branch-1
+    (Colin Patrick McCabe via mattf)
+
   BUG FIXES
 
     HADOOP-8460. Document proper setting of HADOOP_PID_DIR and

+ 25 - 19
src/hdfs/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java

@@ -92,7 +92,7 @@ public class FSEditLog {
 
   private static int sizeFlushBuffer = 512*1024;
   /** Preallocation length in bytes for writing edit log. */
-  private static final int PREALLOCATION_LENGTH = 1024 * 1024;
+  static final int MIN_PREALLOCATION_LENGTH = 1024 * 1024;
   /** The limit of the length in bytes for each edit log transaction. */
   private static int TRANSACTION_LENGTH_LIMIT = Integer.MAX_VALUE;
 
@@ -136,12 +136,12 @@ public class FSEditLog {
    * An implementation of the abstract class {@link EditLogOutputStream},
    * which stores edits in a local file.
    */
-  static private class EditLogFileOutputStream extends EditLogOutputStream {
+  static class EditLogFileOutputStream extends EditLogOutputStream {
     /** Preallocation buffer, padded with OP_INVALID */
     private static final ByteBuffer PREALLOCATION_BUFFER
-        = ByteBuffer.allocateDirect(PREALLOCATION_LENGTH);
+        = ByteBuffer.allocateDirect(MIN_PREALLOCATION_LENGTH);
     static {
-      PREALLOCATION_BUFFER.position(0).limit(PREALLOCATION_LENGTH);
+      PREALLOCATION_BUFFER.position(0).limit(MIN_PREALLOCATION_LENGTH);
       for(int i = 0; i < PREALLOCATION_BUFFER.capacity(); i++) {
         PREALLOCATION_BUFFER.put(OP_INVALID);
       }
@@ -211,7 +211,7 @@ public class FSEditLog {
       bufCurrent.close();
       bufReady.close();
 
-      // remove the last INVALID marker from transaction log.
+      // remove any preallocated padding bytes from the transaction log.
       fc.truncate(fc.position());
       fp.close();
       
@@ -227,7 +227,6 @@ public class FSEditLog {
     @Override
     void setReadyToFlush() throws IOException {
       assert bufReady.size() == 0 : "previous data is not flushed yet";
-      write(OP_INVALID);           // insert end-of-file marker
       DataOutputBuffer tmp = bufReady;
       bufReady = bufCurrent;
       bufCurrent = tmp;
@@ -244,7 +243,6 @@ public class FSEditLog {
       bufReady.writeTo(fp);     // write data to file
       bufReady.reset();         // erase all data in the buffer
       fc.force(false);          // metadata updates not needed because of preallocation
-      fc.position(fc.position()-1); // skip back the end-of-file marker
     }
 
     /**
@@ -258,18 +256,26 @@ public class FSEditLog {
 
     // allocate a big chunk of data
     private void preallocate() throws IOException {
-      final long oldsize = fc.size();
-      if (fc.position() + 4096 >= oldsize) {
-        final ByteBuffer buffer = PREALLOCATION_BUFFER;
-        buffer.position(0).limit(PREALLOCATION_LENGTH);
-
-        int w = 0;
-        for(; (w += fc.write(buffer, oldsize + w)) < PREALLOCATION_LENGTH; );
-
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("PREALLOCATE: size is now " + fc.size() + " written " + w
-              + " bytes at offset " + oldsize + "; editlog=" + getName());
-        }
+      long size = fc.size();
+      int bufSize = bufReady.getLength();
+      long need = bufSize - (size - fc.position());
+      if (need <= 0) {
+        return;
+      }
+      long oldSize = size;
+      long total = 0;
+      long fillCapacity = PREALLOCATION_BUFFER.capacity();
+      PREALLOCATION_BUFFER.position(0);
+      while (need > 0) {
+        do {
+          size += fc.write(PREALLOCATION_BUFFER, size);
+        } while (PREALLOCATION_BUFFER.remaining() > 0);
+        need -= fillCapacity;
+        total += fillCapacity;
+      }
+      if(FSNamesystem.LOG.isDebugEnabled()) {
+        FSNamesystem.LOG.debug("Preallocated " + total + " bytes at the end of " +
+            "the edit log (offset " + oldSize + ")");
       }
     }
     

+ 31 - 0
src/test/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java

@@ -31,6 +31,7 @@ import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeDirType;
 import org.apache.hadoop.hdfs.server.namenode.FSImage.NameNodeFile;
+import org.apache.hadoop.io.BytesWritable;
 
 /**
  * This class tests the creation and validation of a checkpoint.
@@ -77,6 +78,36 @@ public class TestEditLog extends TestCase {
     }
   }
 
+  public void testEditLogPreallocation() throws IOException {
+    final File TEST_DIR =
+        new File(System.getProperty("test.build.data", "/tmp"));
+    final File TEST_EDITS = new File(TEST_DIR, "edit_log");
+    
+    FSEditLog.EditLogFileOutputStream elfos = null;
+    try {
+      elfos = new FSEditLog.EditLogFileOutputStream(TEST_EDITS);
+      byte b[] = new byte[1024];
+      for (int i = 0; i < b.length; i++) {
+        b[i] = 0;
+      }
+      elfos.write(b);
+      elfos.setReadyToFlush();
+      elfos.flushAndSync();
+      assertEquals(FSEditLog.MIN_PREALLOCATION_LENGTH,
+          elfos.getFile().length());
+      for (int i = 0;
+          i < 2 * FSEditLog.MIN_PREALLOCATION_LENGTH / b.length; i++) {
+        elfos.write(b);
+        elfos.setReadyToFlush();
+        elfos.flushAndSync();
+      }
+      assertEquals(3 * FSEditLog.MIN_PREALLOCATION_LENGTH, elfos.getFile().length());
+    } finally {
+      if (elfos != null) elfos.close();
+      if (TEST_EDITS.exists()) TEST_EDITS.delete();
+    }
+  }
+    
   /**
    * Tests transaction logging in dfs.
    */