Browse Source

HDFS-4282. TestEditLog.testFuzzSequences FAILED in all pre-commit test. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1418214 13f79535-47bb-0310-9956-ffa450edef68
Todd Lipcon 12 years ago
parent
commit
6681523c87

+ 2 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java

@@ -1858,10 +1858,10 @@ public class SequenceFile {
         UTF8 className = new UTF8();
         UTF8 className = new UTF8();
 
 
         className.readFields(in);
         className.readFields(in);
-        keyClassName = className.toString(); // key class name
+        keyClassName = className.toStringChecked(); // key class name
 
 
         className.readFields(in);
         className.readFields(in);
-        valClassName = className.toString(); // val class name
+        valClassName = className.toStringChecked(); // val class name
       } else {
       } else {
         keyClassName = Text.readString(in);
         keyClassName = Text.readString(in);
         valClassName = Text.readString(in);
         valClassName = Text.readString(in);

+ 31 - 3
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/UTF8.java

@@ -21,6 +21,7 @@ package org.apache.hadoop.io;
 import java.io.IOException;
 import java.io.IOException;
 import java.io.DataInput;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.DataOutput;
+import java.io.UTFDataFormatException;
 
 
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.StringUtils;
 
 
@@ -155,6 +156,21 @@ public class UTF8 implements WritableComparable<UTF8> {
     }
     }
     return buffer.toString();
     return buffer.toString();
   }
   }
+  
+  /**
+   * Convert to a string, checking for valid UTF8.
+   * @return the converted string
+   * @throws UTFDataFormatException if the underlying bytes contain invalid
+   * UTF8 data.
+   */
+  public String toStringChecked() throws IOException {
+    StringBuilder buffer = new StringBuilder(length);
+    synchronized (IBUF) {
+      IBUF.reset(bytes, length);
+      readChars(IBUF, buffer, length);
+    }
+    return buffer.toString();
+  }
 
 
   /** Returns true iff <code>o</code> is a UTF8 with the same contents.  */
   /** Returns true iff <code>o</code> is a UTF8 with the same contents.  */
   @Override
   @Override
@@ -238,7 +254,7 @@ public class UTF8 implements WritableComparable<UTF8> {
   }
   }
 
 
   private static void readChars(DataInput in, StringBuilder buffer, int nBytes)
   private static void readChars(DataInput in, StringBuilder buffer, int nBytes)
-    throws IOException {
+    throws UTFDataFormatException, IOException {
     DataOutputBuffer obuf = OBUF_FACTORY.get();
     DataOutputBuffer obuf = OBUF_FACTORY.get();
     obuf.reset();
     obuf.reset();
     obuf.write(in, nBytes);
     obuf.write(in, nBytes);
@@ -250,15 +266,27 @@ public class UTF8 implements WritableComparable<UTF8> {
         // 0b0xxxxxxx: 1-byte sequence
         // 0b0xxxxxxx: 1-byte sequence
         buffer.append((char)(b & 0x7F));
         buffer.append((char)(b & 0x7F));
       } else if ((b & 0xE0) == 0xC0) {
       } else if ((b & 0xE0) == 0xC0) {
+        if (i >= nBytes) {
+          throw new UTFDataFormatException("Truncated UTF8 at " +
+              StringUtils.byteToHexString(bytes, i - 1, 1));
+        }
         // 0b110xxxxx: 2-byte sequence
         // 0b110xxxxx: 2-byte sequence
         buffer.append((char)(((b & 0x1F) << 6)
         buffer.append((char)(((b & 0x1F) << 6)
             | (bytes[i++] & 0x3F)));
             | (bytes[i++] & 0x3F)));
       } else if ((b & 0xF0) == 0xE0) {
       } else if ((b & 0xF0) == 0xE0) {
         // 0b1110xxxx: 3-byte sequence
         // 0b1110xxxx: 3-byte sequence
+        if (i + 1 >= nBytes) {
+          throw new UTFDataFormatException("Truncated UTF8 at " +
+              StringUtils.byteToHexString(bytes, i - 1, 2));
+        }
         buffer.append((char)(((b & 0x0F) << 12)
         buffer.append((char)(((b & 0x0F) << 12)
             | ((bytes[i++] & 0x3F) << 6)
             | ((bytes[i++] & 0x3F) << 6)
             |  (bytes[i++] & 0x3F)));
             |  (bytes[i++] & 0x3F)));
       } else if ((b & 0xF8) == 0xF0) {
       } else if ((b & 0xF8) == 0xF0) {
+        if (i + 2 >= nBytes) {
+          throw new UTFDataFormatException("Truncated UTF8 at " +
+              StringUtils.byteToHexString(bytes, i - 1, 3));
+        }
         // 0b11110xxx: 4-byte sequence
         // 0b11110xxx: 4-byte sequence
         int codepoint =
         int codepoint =
             ((b & 0x07) << 18)
             ((b & 0x07) << 18)
@@ -274,8 +302,8 @@ public class UTF8 implements WritableComparable<UTF8> {
         // Only show the next 6 bytes max in the error code - in case the
         // Only show the next 6 bytes max in the error code - in case the
         // buffer is large, this will prevent an exceedingly large message.
         // buffer is large, this will prevent an exceedingly large message.
         int endForError = Math.min(i + 5, nBytes);
         int endForError = Math.min(i + 5, nBytes);
-        throw new IOException("Invalid UTF8 at " +
-          StringUtils.byteToHexString(bytes, i - 1, endForError));
+        throw new UTFDataFormatException("Invalid UTF8 at " +
+            StringUtils.byteToHexString(bytes, i - 1, endForError));
       }
       }
     }
     }
   }
   }

+ 23 - 4
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestUTF8.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.io;
 
 
 import junit.framework.TestCase;
 import junit.framework.TestCase;
 import java.io.IOException;
 import java.io.IOException;
+import java.io.UTFDataFormatException;
 import java.util.Random;
 import java.util.Random;
 
 
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.GenericTestUtils;
@@ -126,9 +127,9 @@ public class TestUTF8 extends TestCase {
     try {
     try {
       UTF8.fromBytes(invalid);
       UTF8.fromBytes(invalid);
       fail("did not throw an exception");
       fail("did not throw an exception");
-    } catch (IOException ioe) {
+    } catch (UTFDataFormatException utfde) {
       GenericTestUtils.assertExceptionContains(
       GenericTestUtils.assertExceptionContains(
-          "Invalid UTF8 at ffff01020304", ioe);
+          "Invalid UTF8 at ffff01020304", utfde);
     }
     }
   }
   }
 
 
@@ -142,9 +143,27 @@ public class TestUTF8 extends TestCase {
     try {
     try {
       UTF8.fromBytes(invalid);
       UTF8.fromBytes(invalid);
       fail("did not throw an exception");
       fail("did not throw an exception");
-    } catch (IOException ioe) {
+    } catch (UTFDataFormatException utfde) {
       GenericTestUtils.assertExceptionContains(
       GenericTestUtils.assertExceptionContains(
-          "Invalid UTF8 at f88880808004", ioe);
+          "Invalid UTF8 at f88880808004", utfde);
+    }
+  }
+  
+  /**
+   * Test that decoding invalid UTF8 due to truncation yields the correct
+   * exception type.
+   */
+  public void testInvalidUTF8Truncated() throws Exception {
+    // Truncated CAT FACE character -- this is a 4-byte sequence, but we
+    // only have the first three bytes.
+    byte[] truncated = new byte[] {
+        (byte)0xF0, (byte)0x9F, (byte)0x90 };
+    try {
+      UTF8.fromBytes(truncated);
+      fail("did not throw an exception");
+    } catch (UTFDataFormatException utfde) {
+      GenericTestUtils.assertExceptionContains(
+          "Truncated UTF8 at f09f90", utfde);
     }
     }
   }
   }
 }
 }

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -569,6 +569,9 @@ Release 2.0.3-alpha - Unreleased
     HDFS-4238. Standby namenode should not do purging of shared
     HDFS-4238. Standby namenode should not do purging of shared
     storage edits. (todd)
     storage edits. (todd)
 
 
+    HDFS-4282. TestEditLog.testFuzzSequences FAILED in all pre-commit test
+    (todd)
+
   BREAKDOWN OF HDFS-3077 SUBTASKS
   BREAKDOWN OF HDFS-3077 SUBTASKS
 
 
     HDFS-3077. Quorum-based protocol for reading and writing edit logs.
     HDFS-3077. Quorum-based protocol for reading and writing edit logs.

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java

@@ -197,7 +197,7 @@ public class FSImageSerialization {
   public static String readString(DataInputStream in) throws IOException {
   public static String readString(DataInputStream in) throws IOException {
     DeprecatedUTF8 ustr = TL_DATA.get().U_STR;
     DeprecatedUTF8 ustr = TL_DATA.get().U_STR;
     ustr.readFields(in);
     ustr.readFields(in);
-    return ustr.toString();
+    return ustr.toStringChecked();
   }
   }
 
 
   static String readString_EmptyAsNull(DataInputStream in) throws IOException {
   static String readString_EmptyAsNull(DataInputStream in) throws IOException {