Browse Source

HADOOP-10855. Allow Text to be read with a known Length. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1612732 13f79535-47bb-0310-9956-ffa450edef68
Todd Lipcon 11 years ago
parent
commit
04aad666d9

+ 2 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -42,6 +42,8 @@ Release 2.6.0 - UNRELEASED
     HADOOP-10755. Support negative caching of user-group mapping.
     (Lei Xu via wang)
 
+    HADOOP-10855. Allow Text to be read with a known Length. (todd)
+
   OPTIMIZATIONS
 
   BUG FIXES

+ 13 - 6
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java

@@ -288,9 +288,7 @@ public class Text extends BinaryComparable
   @Override
   public void readFields(DataInput in) throws IOException {
     int newLength = WritableUtils.readVInt(in);
-    setCapacity(newLength, false);
-    in.readFully(bytes, 0, newLength);
-    length = newLength;
+    readWithKnownLength(in, newLength);
   }
   
   public void readFields(DataInput in, int maxLength) throws IOException {
@@ -302,9 +300,7 @@ public class Text extends BinaryComparable
       throw new IOException("tried to deserialize " + newLength +
           " bytes of data, but maxLength = " + maxLength);
     }
-    setCapacity(newLength, false);
-    in.readFully(bytes, 0, newLength);
-    length = newLength;
+    readWithKnownLength(in, newLength);
   }
 
   /** Skips over one Text in the input. */
@@ -313,6 +309,17 @@ public class Text extends BinaryComparable
     WritableUtils.skipFully(in, length);
   }
 
+  /**
+   * Read a Text object whose length is already known.
+   * This allows creating Text from a stream which uses a different serialization
+   * format.
+   */
+  public void readWithKnownLength(DataInput in, int len) throws IOException {
+    setCapacity(len, false);
+    in.readFully(bytes, 0, len);
+    length = len;
+  }
+
   /** serialize
    * write this object to out
    * length uses zero-compressed encoding

+ 22 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java

@@ -24,6 +24,7 @@ import java.nio.BufferUnderflowException;
 import java.nio.ByteBuffer;
 import java.nio.charset.CharacterCodingException;
 import java.util.Random;
+import com.google.common.base.Charsets;
 import com.google.common.primitives.Bytes;
 
 /** Unit tests for LargeUTF8. */
@@ -363,6 +364,27 @@ public class TestText extends TestCase {
       fail("testReadWriteOperations error !!!");
     }        
   }
+
+  public void testReadWithKnownLength() throws IOException {
+    String line = "hello world";
+    byte[] inputBytes = line.getBytes(Charsets.UTF_8);
+    DataInputBuffer in = new DataInputBuffer();
+    Text text = new Text();
+
+    in.reset(inputBytes, inputBytes.length);
+    text.readWithKnownLength(in, 5);
+    assertEquals("hello", text.toString());
+
+    // Read longer length, make sure it lengthens
+    in.reset(inputBytes, inputBytes.length);
+    text.readWithKnownLength(in, 7);
+    assertEquals("hello w", text.toString());
+
+    // Read shorter length, make sure it shortens
+    in.reset(inputBytes, inputBytes.length);
+    text.readWithKnownLength(in, 2);
+    assertEquals("he", text.toString());
+  }
   
   /**
    * test {@code Text.bytesToCodePoint(bytes) }