Browse Source

Merge -r 648515:648516 from trunk to branch-0.17 to fix HADOOP-3242

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/branches/branch-0.17@648519 13f79535-47bb-0310-9956-ffa450edef68
Arun Murthy 17 years ago
parent
commit
c53edf538b

+ 4 - 0
CHANGES.txt

@@ -572,6 +572,10 @@ Release 0.17.0 - Unreleased
     change maxentToSameCluster to run the correct number of jobs.
     (Runping Qi via cdouglas)
 
+    HADOOP-3242. Fix the RecordReader of SequenceFileAsBinaryInputFormat to
+    correctly read from the start of the split and not the beginning of the
+    file. (cdouglas via acmurthy) 
+
 Release 0.16.3 - 2008-04-16
 
   BUG FIXES

+ 4 - 0
src/java/org/apache/hadoop/mapred/SequenceFileAsBinaryInputFormat.java

@@ -66,7 +66,11 @@ public class SequenceFileAsBinaryInputFormat
       FileSystem fs = path.getFileSystem(conf);
       this.in = new SequenceFile.Reader(fs, path, conf);
       this.end = split.getStart() + split.getLength();
+      if (split.getStart() > in.getPosition())
+        in.sync(split.getStart());                  // sync to start
+      this.start = in.getPosition();
       vbytes = in.createValueBytes();
+      done = start >= end;
     }
 
     public BytesWritable createKey() {

+ 25 - 23
src/test/org/apache/hadoop/mapred/TestSequenceFileAsBinaryInputFormat.java

@@ -69,30 +69,32 @@ public class TestSequenceFileAsBinaryInputFormat extends TestCase {
     Text cmpkey = new Text();
     Text cmpval = new Text();
     DataInputBuffer buf = new DataInputBuffer();
-    RecordReader<BytesWritable,BytesWritable> reader =
-      bformat.getRecordReader(new FileSplit(file, 0,
-                              fs.getFileStatus(file).getLen(), 
-                              (String[])null), job, Reporter.NULL);
-    try {
-      while (reader.next(bkey, bval)) {
-        tkey.set(Integer.toString(r.nextInt(), 36));
-        tval.set(Long.toString(r.nextLong(), 36));
-        buf.reset(bkey.get(), bkey.getSize());
-        cmpkey.readFields(buf);
-        buf.reset(bval.get(), bval.getSize());
-        cmpval.readFields(buf);
-        assertTrue(
-            "Keys don't match: " + "*" + cmpkey.toString() + ":" +
-                                         tkey.toString() + "*",
-            cmpkey.toString().equals(tkey.toString()));
-        assertTrue(
-            "Vals don't match: " + "*" + cmpval.toString() + ":" +
-                                         tval.toString() + "*",
-            cmpval.toString().equals(tval.toString()));
-        ++count;
+    final int NUM_SPLITS = 3;
+    job.setInputPath(file);
+    for (InputSplit split : bformat.getSplits(job, NUM_SPLITS)) {
+      RecordReader<BytesWritable,BytesWritable> reader =
+        bformat.getRecordReader(split, job, Reporter.NULL);
+      try {
+        while (reader.next(bkey, bval)) {
+          tkey.set(Integer.toString(r.nextInt(), 36));
+          tval.set(Long.toString(r.nextLong(), 36));
+          buf.reset(bkey.get(), bkey.getSize());
+          cmpkey.readFields(buf);
+          buf.reset(bval.get(), bval.getSize());
+          cmpval.readFields(buf);
+          assertTrue(
+              "Keys don't match: " + "*" + cmpkey.toString() + ":" +
+                                           tkey.toString() + "*",
+              cmpkey.toString().equals(tkey.toString()));
+          assertTrue(
+              "Vals don't match: " + "*" + cmpval.toString() + ":" +
+                                           tval.toString() + "*",
+              cmpval.toString().equals(tval.toString()));
+          ++count;
+        }
+      } finally {
+        reader.close();
       }
-    } finally {
-      reader.close();
     }
     assertEquals("Some records not found", RECORDS, count);
   }