소스 검색

HADOOP-8361. Avoid out-of-memory problems when deserializing strings. Contributed by Colin Patrick McCabe

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1336945 13f79535-47bb-0310-9956-ffa450edef68
Eli Collins 13 년 전
부모
커밋
032216a3a7

+ 3 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -305,6 +305,9 @@ Release 2.0.0 - UNRELEASED
     HADOOP-8340. SNAPSHOT build versions should compare as less than their eventual
     final release. (todd)
 
+    HADOOP-8361. Avoid out-of-memory problems when deserializing strings.
+    (Colin Patrick McCabe via eli)
+
   OPTIMIZATIONS
 
   BUG FIXES

+ 8 - 8
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java

@@ -254,7 +254,7 @@ public class FileStatus implements Writable, Comparable {
   // Writable
   //////////////////////////////////////////////////
   public void write(DataOutput out) throws IOException {
-    Text.writeString(out, getPath().toString());
+    Text.writeString(out, getPath().toString(), Text.ONE_MEGABYTE);
     out.writeLong(getLen());
     out.writeBoolean(isDirectory());
     out.writeShort(getReplication());
@@ -262,16 +262,16 @@ public class FileStatus implements Writable, Comparable {
     out.writeLong(getModificationTime());
     out.writeLong(getAccessTime());
     getPermission().write(out);
-    Text.writeString(out, getOwner());
-    Text.writeString(out, getGroup());
+    Text.writeString(out, getOwner(), Text.ONE_MEGABYTE);
+    Text.writeString(out, getGroup(), Text.ONE_MEGABYTE);
     out.writeBoolean(isSymlink());
     if (isSymlink()) {
-      Text.writeString(out, getSymlink().toString());
+      Text.writeString(out, getSymlink().toString(), Text.ONE_MEGABYTE);
     }
   }
 
   public void readFields(DataInput in) throws IOException {
-    String strPath = Text.readString(in);
+    String strPath = Text.readString(in, Text.ONE_MEGABYTE);
     this.path = new Path(strPath);
     this.length = in.readLong();
     this.isdir = in.readBoolean();
@@ -280,10 +280,10 @@ public class FileStatus implements Writable, Comparable {
     modification_time = in.readLong();
     access_time = in.readLong();
     permission.readFields(in);
-    owner = Text.readString(in);
-    group = Text.readString(in);
+    owner = Text.readString(in, Text.ONE_MEGABYTE);
+    group = Text.readString(in, Text.ONE_MEGABYTE);
     if (in.readBoolean()) {
-      this.symlink = new Path(Text.readString(in));
+      this.symlink = new Path(Text.readString(in, Text.ONE_MEGABYTE));
     } else {
       this.symlink = null;
     }

+ 4 - 4
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/PermissionStatus.java

@@ -84,8 +84,8 @@ public class PermissionStatus implements Writable {
 
   /** {@inheritDoc} */
   public void readFields(DataInput in) throws IOException {
-    username = Text.readString(in);
-    groupname = Text.readString(in);
+    username = Text.readString(in, Text.ONE_MEGABYTE);
+    groupname = Text.readString(in, Text.ONE_MEGABYTE);
     permission = FsPermission.read(in);
   }
 
@@ -110,8 +110,8 @@ public class PermissionStatus implements Writable {
                            String username, 
                            String groupname,
                            FsPermission permission) throws IOException {
-    Text.writeString(out, username);
-    Text.writeString(out, groupname);
+    Text.writeString(out, username, Text.ONE_MEGABYTE);
+    Text.writeString(out, groupname, Text.ONE_MEGABYTE);
     permission.write(out);
   }
 

+ 31 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java

@@ -53,6 +53,8 @@ import org.apache.hadoop.classification.InterfaceStability;
 public class Text extends BinaryComparable
     implements WritableComparable<BinaryComparable> {
   
+  static final int SHORT_STRING_MAX = 1024 * 1024;
+  
   private static ThreadLocal<CharsetEncoder> ENCODER_FACTORY =
     new ThreadLocal<CharsetEncoder>() {
       protected CharsetEncoder initialValue() {
@@ -412,6 +414,8 @@ public class Text extends BinaryComparable
     return bytes;
   }
 
+  static final public int ONE_MEGABYTE = 1024 * 1024;
+
   /** Read a UTF8 encoded string from in
    */
   public static String readString(DataInput in) throws IOException {
@@ -420,7 +424,17 @@ public class Text extends BinaryComparable
     in.readFully(bytes, 0, length);
     return decode(bytes);
   }
-
+  
+  /** Read a UTF8 encoded string with a maximum size
+   */
+  public static String readString(DataInput in, int maxLength)
+      throws IOException {
+    int length = WritableUtils.readVIntInRange(in, 0, maxLength - 1);
+    byte [] bytes = new byte[length];
+    in.readFully(bytes, 0, length);
+    return decode(bytes);
+  }
+  
   /** Write a UTF8 encoded string to out
    */
   public static int writeString(DataOutput out, String s) throws IOException {
@@ -431,6 +445,22 @@ public class Text extends BinaryComparable
     return length;
   }
 
+  /** Write a UTF8 encoded string with a maximum size to out
+   */
+  public static int writeString(DataOutput out, String s, int maxLength)
+      throws IOException {
+    ByteBuffer bytes = encode(s);
+    int length = bytes.limit();
+    if (length >= maxLength) {
+      throw new IOException("string was too long to write!  Expected " +
+          "less than " + maxLength + " bytes, but got " +
+          length + " bytes.");
+    }
+    WritableUtils.writeVInt(out, length);
+    out.write(bytes.array(), 0, length);
+    return length;
+  }
+
   ////// states for validateUTF8
   
   private static final int LEAD_BYTE = 0;

+ 35 - 1
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.io;
 
 import junit.framework.TestCase;
 
+import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.charset.CharacterCodingException;
 import java.util.Random;
@@ -107,7 +108,6 @@ public class TestText extends TestCase {
     }
   }
   
-  
   public void testIO() throws Exception {
     DataOutputBuffer out = new DataOutputBuffer();
     DataInputBuffer in = new DataInputBuffer();
@@ -136,6 +136,40 @@ public class TestText extends TestCase {
       assertTrue(before.equals(after2));
     }
   }
+  
+  public void doTestLimitedIO(String str, int strLen) throws IOException {
+    DataOutputBuffer out = new DataOutputBuffer();
+    DataInputBuffer in = new DataInputBuffer();
+
+    out.reset();
+    try {
+      Text.writeString(out, str, strLen);
+      fail("expected writeString to fail when told to write a string " +
+          "that was too long!  The string was '" + str + "'");
+    } catch (IOException e) {
+    }
+    Text.writeString(out, str, strLen + 1);
+
+    // test that it reads correctly
+    in.reset(out.getData(), out.getLength());
+    in.mark(strLen);
+    String after;
+    try {
+      after = Text.readString(in, strLen);
+      fail("expected readString to fail when told to read a string " +
+          "that was too long!  The string was '" + str + "'");
+    } catch (IOException e) {
+    }
+    in.reset();
+    after = Text.readString(in, strLen + 1);
+    assertTrue(str.equals(after));
+  }
+  
+  public void testLimitedIO() throws Exception {
+    doTestLimitedIO("abcd", 4);
+    doTestLimitedIO("", 0);
+    doTestLimitedIO("1", 1);
+  }
 
   public void testCompare() throws Exception {
     DataOutputBuffer out1 = new DataOutputBuffer();