Ver Fonte

HADOOP-922. Optimize small forward seeks in HDFS. Contributed by Dhruba.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@501616 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting há 18 anos atrás
pai
commit
c75a6afdbc

+ 4 - 0
CHANGES.txt

@@ -98,6 +98,10 @@ Trunk (unreleased changes)
 30. HADOOP-937.  Change the namenode to request re-registration of
     datanodes in more circumstances.  (Hairong Kuang via cutting)
 
+31. HADOOP-922.  Optimize small forward seeks in HDFS.  If data is has
+    likely already in flight, skip ahead rather than re-opening the
+    block.  (Dhruba Borthakur via cutting)
+
 
 Release 0.10.1 - 2007-01-10
 

+ 20 - 2
src/java/org/apache/hadoop/dfs/DFSClient.java

@@ -44,6 +44,7 @@ import java.util.*;
 class DFSClient implements FSConstants {
     public static final Log LOG = LogFactory.getLog("org.apache.hadoop.fs.DFSClient");
     static int MAX_BLOCK_ACQUIRE_FAILURES = 3;
+    private static final int TCP_WINDOW_SIZE = 128 * 1024; // 128 KB
     private static final long DEFAULT_BLOCK_SIZE = 64 * 1024 * 1024;
     ClientProtocol namenode;
     String localName;
@@ -885,8 +886,25 @@ class DFSClient implements FSConstants {
             if (targetPos > filelen) {
                 throw new IOException("Cannot seek after EOF");
             }
-            pos = targetPos;
-            blockEnd = -1;
+            boolean done = false;
+            if (pos <= targetPos && targetPos <= blockEnd) {
+                //
+                // If this seek is to a positive position in the current
+                // block, and this piece of data might already be lying in
+                // the TCP buffer, then just eat up the intervening data.
+                //
+                int diff = (int)(targetPos - pos);
+                if (diff <= TCP_WINDOW_SIZE) {
+                  blockStream.skipBytes(diff);
+                  pos += diff;
+                  assert(pos == targetPos);
+                  done = true;
+                }
+            }
+            if (!done) {
+                pos = targetPos;
+                blockEnd = -1;
+            }
         }
 
         /**

+ 32 - 0
src/test/org/apache/hadoop/dfs/TestSeekBug.java

@@ -78,6 +78,37 @@ public class TestSeekBug extends TestCase {
     // all done
     stm.close();
   }
+
+  /*
+   * Read some data, skip a few bytes and read more. HADOOP-922.
+   */
+  private void smallReadSeek(FileSystem fileSys, Path name) throws IOException {
+    FSInputStream stmRaw = fileSys.openRaw(name);
+    byte[] expected = new byte[ONEMB];
+    Random rand = new Random(seed);
+    rand.nextBytes(expected);
+    
+    // Issue a simple read first.
+	byte[] actual = new byte[128];
+    stmRaw.seek(100000);
+    stmRaw.read(actual, 0, actual.length);
+    checkAndEraseData(actual, 100000, expected, "First Small Read Test");
+
+    // now do a small seek of 4 bytes, within the same block.
+    int newpos1 = 100000 + 128 + 4;
+    stmRaw.seek(newpos1);
+    stmRaw.read(actual, 0, actual.length);
+    checkAndEraseData(actual, newpos1, expected, "Small Seek Bug 1");
+
+    // seek another 256 bytes this time
+    int newpos2 = newpos1 + 256;
+    stmRaw.seek(newpos2);
+    stmRaw.read(actual, 0, actual.length);
+    checkAndEraseData(actual, newpos2, expected, "Small Seek Bug 2");
+
+    // all done
+    stmRaw.close();
+  }
   
   private void cleanupFile(FileSystem fileSys, Path name) throws IOException {
     assertTrue(fileSys.exists(name));
@@ -96,6 +127,7 @@ public class TestSeekBug extends TestCase {
       Path file1 = new Path("seektest.dat");
       writeFile(fileSys, file1);
       seekReadFile(fileSys, file1);
+      smallReadSeek(fileSys, file1);
       cleanupFile(fileSys, file1);
     } finally {
       fileSys.close();