18 năm trước cách đây · 336d719b8c
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -66,6 +66,9 @@ Trunk (unreleased changes)
 
				     agree with the Java versions.  (Milind Bhandarkar via
			
 
				     tomwhite)
			
 
				 
			
 
				+20. HADOOP-1096.  Rename InputArchive and OutputArchive and
			
 
				+    make them public. (Milind Bhandarkar via tomwhite)
			
 
				+
			
 
				 
			
 
				 Release 0.12.0 - 2007-03-02
			
 
				 
			
--- a/src/c++/librecordio/xmlarchive.cc
+++ b/src/c++/librecordio/xmlarchive.cc
@@ -85,12 +85,14 @@ static std::string toXMLString(std::string s)
 
				     } else if (ch == '&') {
			
 
				         r.append("&amp;");
			
 
				     } else if (ch == '%') {
			
 
				-        r.append("%25");
			
 
				+        r.append("%0025");
			
 
				     } else if (ch < 0x20) {
			
 
				         uint8_t* pb = (uint8_t*) &ch;
			
 
				         char ch1 = hexchars[*pb/16];
			
 
				         char ch2 = hexchars[*pb%16];
			
 
				         r.push_back('%');
			
 
				+        r.push_back('0');
			
 
				+        r.push_back('0');
			
 
				         r.push_back(ch1);
			
 
				         r.push_back(ch2);
			
 
				     } else {
			
@@ -105,10 +107,10 @@ static uint8_t h2b(char ch) {
 
				     return ch - '0';
			
 
				   }
			
 
				   if ((ch >= 'a') || (ch <= 'f')) {
			
 
				-    return ch - 'a';
			
 
				+    return ch - 'a' + 10;
			
 
				   }
			
 
				   if ((ch >= 'A') || (ch <= 'F')) {
			
 
				-    return ch - 'A';
			
 
				+    return ch - 'A' + 10;
			
 
				   }
			
 
				   return 0;
			
 
				 }
			
@@ -123,20 +125,18 @@ static std::string fromXMLString(std::string s)
 
				     uint8_t b = *pb;
			
 
				     if (b == '%') {
			
 
				       char *pc = (char*) (pb+1);
			
 
				-      if (*pc == '%') {
			
 
				-        r.push_back('%');
			
 
				-        pb += 1;
			
 
				-      } else {
			
 
				-        char ch1 = *pc++;
			
 
				-        char ch2 = *pc++;
			
 
				-        pb += 2;
			
 
				-        uint8_t cnv = h2b(ch1)*16 + h2b(ch2);
			
 
				-        pc = (char*) &cnv;
			
 
				-        r.push_back(*pc);
			
 
				-      }
			
 
				+      // ignore the first two characters, which are always '0'
			
 
				+      *pc++;
			
 
				+      *pc++;;
			
 
				+      char ch1 = *pc++;
			
 
				+      char ch2 = *pc++;
			
 
				+      pb += 4;
			
 
				+      uint8_t cnv = h2b(ch1)*16 + h2b(ch2);
			
 
				+      pc = (char*) &cnv;
			
 
				+      r.push_back(*pc);
			
 
				     } else {
			
 
				-        char *pc = (char*) pb;
			
 
				-        r.push_back(*pc);
			
 
				+      char *pc = (char*) pb;
			
 
				+      r.push_back(*pc);
			
 
				     }
			
 
				     pb++;
			
 
				   }
			
--- a/src/java/org/apache/hadoop/record/Record.java
+++ b/src/java/org/apache/hadoop/record/Record.java
@@ -31,34 +31,49 @@ import org.apache.hadoop.io.WritableComparable;
 
				 public abstract class Record implements WritableComparable, Cloneable {
			
 
				   
			
 
				   /**
			
 
				-   * Serialize a record into archive
			
 
				-   * @param archive Output Archive
			
 
				+   * Serialize a record with tag (ususally field name)
			
 
				+   * @param rout Record output destination
			
 
				    * @param tag record tag (Used only in tagged serialization e.g. XML)
			
 
				    */
			
 
				-  public abstract void serialize(OutputArchive archive, String tag)
			
 
				+  public abstract void serialize(RecordOutput rout, String tag)
			
 
				   throws IOException;
			
 
				   
			
 
				   /**
			
 
				-   * Deserialize a record from archive
			
 
				-   * @param archive Input Archive
			
 
				+   * Deserialize a record with a tag (usually field name)
			
 
				+   * @param rin Record input source
			
 
				    * @param tag Record tag (Used only in tagged serialization e.g. XML)
			
 
				    */
			
 
				-  public abstract void deserialize(InputArchive archive, String tag)
			
 
				+  public abstract void deserialize(RecordInput rin, String tag)
			
 
				   throws IOException;
			
 
				   
			
 
				   // inheric javadoc
			
 
				   public abstract int compareTo (final Object peer) throws ClassCastException;
			
 
				   
			
 
				+  /**
			
 
				+   * Serialize a record without a tag
			
 
				+   * @param rout Record output destination
			
 
				+   */
			
 
				+  public void serialize(RecordOutput rout) throws IOException {
			
 
				+    this.serialize(rout, "");
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * Deserialize a record without a tag
			
 
				+   * @param rin Record input source
			
 
				+   */
			
 
				+  public void deserialize(RecordInput rin) throws IOException {
			
 
				+    this.deserialize(rin, "");
			
 
				+  }
			
 
				+  
			
 
				   // inherit javadoc
			
 
				   public void write(final DataOutput out) throws java.io.IOException {
			
 
				-    BinaryOutputArchive archive = new BinaryOutputArchive(out);
			
 
				-    this.serialize(archive, "");
			
 
				+    BinaryRecordOutput bout = new BinaryRecordOutput(out);
			
 
				+    this.serialize(bout);
			
 
				   }
			
 
				   
			
 
				   // inherit javadoc
			
 
				-  public void readFields(final DataInput in) throws java.io.IOException {
			
 
				-    BinaryInputArchive archive = new BinaryInputArchive(in);
			
 
				-    this.deserialize(archive, "");
			
 
				+  public void readFields(final DataInput din) throws java.io.IOException {
			
 
				+    BinaryRecordInput rin = new BinaryRecordInput(din);
			
 
				+    this.deserialize(rin);
			
 
				   }
			
 
				-  
			
 
				 }
			
--- a/src/java/org/apache/hadoop/record/Utils.java
+++ b/src/java/org/apache/hadoop/record/Utils.java
@@ -51,11 +51,15 @@ public class Utils {
 
				       } else if (ch == '&') {
			
 
				         sb.append("&amp;");
			
 
				       } else if (ch == '%') {
			
 
				-        sb.append("%25");
			
 
				-      } else if (ch < 0x20) {
			
 
				+        sb.append("%0025");
			
 
				+      } else if (ch < 0x20 ||
			
 
				+          (ch > 0xD7FF && ch < 0xE000) ||
			
 
				+          (ch > 0xFFFD)) {
			
 
				         sb.append("%");
			
 
				-        sb.append(hexchars[ch/16]);
			
 
				-        sb.append(hexchars[ch%16]);
			
 
				+        sb.append(hexchars[(ch & 0xF000) >> 12]);
			
 
				+        sb.append(hexchars[(ch & 0x0F00) >> 8]);
			
 
				+        sb.append(hexchars[(ch & 0x00F0) >> 4]);
			
 
				+        sb.append(hexchars[(ch & 0x000F)]);
			
 
				       } else {
			
 
				         sb.append(ch);
			
 
				       }
			
@@ -67,9 +71,9 @@ public class Utils {
 
				     if (ch >= '0' && ch <= '9') {
			
 
				       return ch - '0';
			
 
				     } else if (ch >= 'A' && ch <= 'F') {
			
 
				-      return ch - 'A';
			
 
				+      return ch - 'A' + 10;
			
 
				     } else if (ch >= 'a' && ch <= 'f') {
			
 
				-      return ch - 'a';
			
 
				+      return ch - 'a' + 10;
			
 
				     }
			
 
				     return 0;
			
 
				   }
			
@@ -84,15 +88,16 @@ public class Utils {
 
				     for (int idx = 0; idx < s.length();) {
			
 
				       char ch = s.charAt(idx++);
			
 
				       if (ch == '%') {
			
 
				-        char ch1 = s.charAt(idx++);
			
 
				-        char ch2 = s.charAt(idx++);
			
 
				-        char res = (char)(h2c(ch1)*16 + h2c(ch2));
			
 
				+        int ch1 = h2c(s.charAt(idx++)) << 12;
			
 
				+        int ch2 = h2c(s.charAt(idx++)) << 8;
			
 
				+        int ch3 = h2c(s.charAt(idx++)) << 4;
			
 
				+        int ch4 = h2c(s.charAt(idx++));
			
 
				+        char res = (char)(ch1 | ch2 | ch3 | ch4);
			
 
				         sb.append(res);
			
 
				       } else {
			
 
				         sb.append(ch);
			
 
				       }
			
 
				     }
			
 
				-    
			
 
				     return sb.toString();
			
 
				   }
			
 
				   
			
@@ -235,6 +240,159 @@ public class Utils {
 
				     return new Buffer(barr);
			
 
				   }
			
 
				   
			
 
				+  private static int utf8LenForCodePoint(final int cpt) throws IOException {
			
 
				+    if (cpt >=0 && cpt <= 0x7F) {
			
 
				+      return 1;
			
 
				+    }
			
 
				+    if (cpt >= 0x80 && cpt <= 0x07FF) {
			
 
				+      return 2;
			
 
				+    }
			
 
				+    if ((cpt >= 0x0800 && cpt < 0xD800) ||
			
 
				+        (cpt > 0xDFFF && cpt <= 0xFFFD)) {
			
 
				+      return 3;
			
 
				+    }
			
 
				+    if (cpt >= 0x10000 && cpt <= 0x10FFFF) {
			
 
				+      return 4;
			
 
				+    }
			
 
				+    throw new IOException("Illegal Unicode Codepoint "+
			
 
				+        Integer.toHexString(cpt)+" in string.");
			
 
				+  }
			
 
				+  
			
 
				+  private static final int B10 =    Integer.parseInt("10000000", 2);
			
 
				+  private static final int B110 =   Integer.parseInt("11000000", 2);
			
 
				+  private static final int B1110 =  Integer.parseInt("11100000", 2);
			
 
				+  private static final int B11110 = Integer.parseInt("11110000", 2);
			
 
				+  private static final int B11 =    Integer.parseInt("11000000", 2);
			
 
				+  private static final int B111 =   Integer.parseInt("11100000", 2);
			
 
				+  private static final int B1111 =  Integer.parseInt("11110000", 2);
			
 
				+  private static final int B11111 = Integer.parseInt("11111000", 2);
			
 
				+  
			
 
				+  private static int writeUtf8(int cpt, final byte[] bytes, final int offset)
			
 
				+  throws IOException {
			
 
				+    if (cpt >=0 && cpt <= 0x7F) {
			
 
				+      bytes[offset] = (byte) cpt;
			
 
				+      return 1;
			
 
				+    }
			
 
				+    if (cpt >= 0x80 && cpt <= 0x07FF) {
			
 
				+      bytes[offset+1] = (byte) (B10 | (cpt & 0x3F));
			
 
				+      cpt = cpt >> 6;
			
 
				+      bytes[offset] = (byte) (B110 | (cpt & 0x1F));
			
 
				+      return 2;
			
 
				+    }
			
 
				+    if ((cpt >= 0x0800 && cpt < 0xD800) ||
			
 
				+        (cpt > 0xDFFF && cpt <= 0xFFFD)) {
			
 
				+      bytes[offset+2] = (byte) (B10 | (cpt & 0x3F));
			
 
				+      cpt = cpt >> 6;
			
 
				+      bytes[offset+1] = (byte) (B10 | (cpt & 0x3F));
			
 
				+      cpt = cpt >> 6;
			
 
				+      bytes[offset] = (byte) (B1110 | (cpt & 0x0F));
			
 
				+      return 3;
			
 
				+    }
			
 
				+    if (cpt >= 0x10000 && cpt <= 0x10FFFF) {
			
 
				+      bytes[offset+3] = (byte) (B10 | (cpt & 0x3F));
			
 
				+      cpt = cpt >> 6;
			
 
				+      bytes[offset+2] = (byte) (B10 | (cpt & 0x3F));
			
 
				+      cpt = cpt >> 6;
			
 
				+      bytes[offset+1] = (byte) (B10 | (cpt & 0x3F));
			
 
				+      cpt = cpt >> 6;
			
 
				+      bytes[offset] = (byte) (B11110 | (cpt & 0x07));
			
 
				+      return 4;
			
 
				+    }
			
 
				+    throw new IOException("Illegal Unicode Codepoint "+
			
 
				+        Integer.toHexString(cpt)+" in string.");
			
 
				+  }
			
 
				+  
			
 
				+  static void toBinaryString(final DataOutput out, final String str)
			
 
				+  throws IOException {
			
 
				+    final int strlen = str.length();
			
 
				+    byte[] bytes = new byte[strlen*4]; // Codepoints expand to 4 bytes max
			
 
				+    int utf8Len = 0;
			
 
				+    int idx = 0;
			
 
				+    while(idx < strlen) {
			
 
				+      final int cpt = str.codePointAt(idx);
			
 
				+      idx += Character.isSupplementaryCodePoint(cpt) ? 2 : 1;
			
 
				+      utf8Len += writeUtf8(cpt, bytes, utf8Len);
			
 
				+    }
			
 
				+    writeVInt(out, utf8Len);
			
 
				+    out.write(bytes, 0, utf8Len);
			
 
				+  }
			
 
				+  
			
 
				+  static boolean isValidCodePoint(int cpt) {
			
 
				+    return !((cpt > 0x10FFFF) ||
			
 
				+        (cpt >= 0xD800 && cpt <= 0xDFFF) ||
			
 
				+        (cpt >= 0xFFFE && cpt <=0xFFFF));
			
 
				+  }
			
 
				+  
			
 
				+  private static int utf8ToCodePoint(int b1, int b2, int b3, int b4) {
			
 
				+    int cpt = 0;
			
 
				+    cpt = (((b1 & ~B11111) << 18) |
			
 
				+        ((b2 & ~B11) << 12) |
			
 
				+        ((b3 & ~B11) << 6) |
			
 
				+        (b4 & ~B11));
			
 
				+    return cpt;
			
 
				+  }
			
 
				+  
			
 
				+  private static int utf8ToCodePoint(int b1, int b2, int b3) {
			
 
				+    int cpt = 0;
			
 
				+    cpt = (((b1 & ~B1111) << 12) | ((b2 & ~B11) << 6) | (b3 & ~B11));
			
 
				+    return cpt;
			
 
				+  }
			
 
				+  
			
 
				+  private static int utf8ToCodePoint(int b1, int b2) {
			
 
				+    int cpt = 0;
			
 
				+    cpt = (((b1 & ~B111) << 6) | (b2 & ~B11));
			
 
				+    return cpt;
			
 
				+  }
			
 
				+  
			
 
				+  private static void checkB10(int b) throws IOException {
			
 
				+    if ((b & B11) != B10) {
			
 
				+      throw new IOException("Invalid UTF-8 representation.");
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  static String fromBinaryString(final DataInput din) throws IOException {
			
 
				+    final int utf8Len = readVInt(din);
			
 
				+    final byte[] bytes = new byte[utf8Len];
			
 
				+    din.readFully(bytes);
			
 
				+    int len = 0;
			
 
				+    // For the most commmon case, i.e. ascii, numChars = utf8Len
			
 
				+    StringBuilder sb = new StringBuilder(utf8Len);
			
 
				+    while(len < utf8Len) {
			
 
				+      int cpt = 0;
			
 
				+      final int b1 = bytes[len++] & 0xFF;
			
 
				+      if (b1 <= 0x7F) {
			
 
				+        cpt = b1;
			
 
				+      } else if ((b1 & B11111) == B11110) {
			
 
				+        int b2 = bytes[len++] & 0xFF;
			
 
				+        checkB10(b2);
			
 
				+        int b3 = bytes[len++] & 0xFF;
			
 
				+        checkB10(b3);
			
 
				+        int b4 = bytes[len++] & 0xFF;
			
 
				+        checkB10(b4);
			
 
				+        cpt = utf8ToCodePoint(b1, b2, b3, b4);
			
 
				+      } else if ((b1 & B1111) == B1110) {
			
 
				+        int b2 = bytes[len++] & 0xFF;
			
 
				+        checkB10(b2);
			
 
				+        int b3 = bytes[len++] & 0xFF;
			
 
				+        checkB10(b3);
			
 
				+        cpt = utf8ToCodePoint(b1, b2, b3);
			
 
				+      } else if ((b1 & B111) == B110) {
			
 
				+        int b2 = bytes[len++] & 0xFF;
			
 
				+        checkB10(b2);
			
 
				+        cpt = utf8ToCodePoint(b1, b2);
			
 
				+      } else {
			
 
				+        throw new IOException("Invalid UTF-8 byte "+Integer.toHexString(b1)+
			
 
				+            " at offset "+(len-1)+" in length of "+utf8Len);
			
 
				+      }
			
 
				+      if (!isValidCodePoint(cpt)) {
			
 
				+        throw new IOException("Illegal Unicode Codepoint "+
			
 
				+          Integer.toHexString(cpt)+" in stream.");
			
 
				+      }
			
 
				+      sb.appendCodePoint(cpt);
			
 
				+    }
			
 
				+    return sb.toString();
			
 
				+  }
			
 
				+  
			
 
				   /** Parse a float from a byte array. */
			
 
				   public static float readFloat(byte[] bytes, int start) {
			
 
				     return WritableComparator.readFloat(bytes, start);
			
--- a/src/java/org/apache/hadoop/record/compiler/JBuffer.java
+++ b/src/java/org/apache/hadoop/record/compiler/JBuffer.java
@@ -33,10 +33,6 @@ public class JBuffer extends JCompType {
 
				       super("org.apache.hadoop.record.Buffer", "Buffer", "org.apache.hadoop.record.Buffer");
			
 
				     }
			
 
				     
			
 
				-    void genWriteMethod(CodeBuffer cb, String fname, String tag) {
			
 
				-      cb.append("a_.writeBuffer("+fname+",\""+tag+"\");\n");
			
 
				-    }
			
 
				-    
			
 
				     void genCompareTo(CodeBuffer cb, String fname, String other) {
			
 
				       cb.append("ret = "+fname+".compareTo("+other+");\n");
			
 
				     }
			
--- a/src/java/org/apache/hadoop/record/compiler/JMap.java
+++ b/src/java/org/apache/hadoop/record/compiler/JMap.java
@@ -79,14 +79,14 @@ public class JMap extends JCompType {
 
				       }
			
 
				       cb.append("{\n");
			
 
				       incrLevel();
			
 
				-      cb.append("org.apache.hadoop.record.Index "+getId("midx")+" = a_.startMap(\""+tag+"\");\n");
			
 
				+      cb.append("org.apache.hadoop.record.Index "+getId("midx")+" = a.startMap(\""+tag+"\");\n");
			
 
				       cb.append(fname+"=new "+getType()+"();\n");
			
 
				       cb.append("for (; !"+getId("midx")+".done(); "+getId("midx")+".incr()) {\n");
			
 
				       key.genReadMethod(cb, getId("k"),getId("k"),true);
			
 
				       value.genReadMethod(cb, getId("v"),getId("v"),true);
			
 
				       cb.append(fname+".put("+getId("k")+","+getId("v")+");\n");
			
 
				       cb.append("}\n");
			
 
				-      cb.append("a_.endMap(\""+tag+"\");\n");
			
 
				+      cb.append("a.endMap(\""+tag+"\");\n");
			
 
				       decrLevel();
			
 
				       cb.append("}\n");
			
 
				     }
			
@@ -100,7 +100,7 @@ public class JMap extends JCompType {
 
				           key.getWrapperType()+","+value.getWrapperType()+">> ";
			
 
				       cb.append("{\n");
			
 
				       incrLevel();
			
 
				-      cb.append("a_.startMap("+fname+",\""+tag+"\");\n");
			
 
				+      cb.append("a.startMap("+fname+",\""+tag+"\");\n");
			
 
				       cb.append(setType+getId("es")+" = "+fname+".entrySet();\n");
			
 
				       cb.append("for("+iterType+getId("midx")+" = "+getId("es")+".iterator(); "+getId("midx")+".hasNext(); ) {\n");
			
 
				       cb.append(entryType+getId("me")+" = "+getId("midx")+".next();\n");
			
@@ -109,7 +109,7 @@ public class JMap extends JCompType {
 
				       key.genWriteMethod(cb, getId("k"),getId("k"));
			
 
				       value.genWriteMethod(cb, getId("v"),getId("v"));
			
 
				       cb.append("}\n");
			
 
				-      cb.append("a_.endMap("+fname+",\""+tag+"\");\n");
			
 
				+      cb.append("a.endMap("+fname+",\""+tag+"\");\n");
			
 
				       cb.append("}\n");
			
 
				       decrLevel();
			
 
				     }
			
--- a/src/java/org/apache/hadoop/record/compiler/JRecord.java
+++ b/src/java/org/apache/hadoop/record/compiler/JRecord.java
@@ -55,11 +55,11 @@ public class JRecord extends JCompType {
 
				         cb.append(fullName+" "+fname+";\n");
			
 
				       }
			
 
				       cb.append(fname+"= new "+fullName+"();\n");
			
 
				-      cb.append("a_.readRecord("+fname+",\""+tag+"\");\n");
			
 
				+      cb.append(fname+".deserialize(a,\""+tag+"\");\n");
			
 
				     }
			
 
				     
			
 
				     void genWriteMethod(CodeBuffer cb, String fname, String tag) {
			
 
				-      cb.append("a_.writeRecord("+fname+",\""+tag+"\");\n");
			
 
				+      cb.append(fname+".serialize(a,\""+tag+"\");\n");
			
 
				     }
			
 
				     
			
 
				     void genSlurpBytes(CodeBuffer cb, String b, String s, String l) {
			
@@ -136,38 +136,38 @@ public class JRecord extends JCompType {
 
				         type.genGetSet(cb, name);
			
 
				       }
			
 
				       cb.append("public void serialize("+
			
 
				-          "final org.apache.hadoop.record.OutputArchive a_, final String tag)\n"+
			
 
				+          "final org.apache.hadoop.record.RecordOutput a, final String tag)\n"+
			
 
				           "throws java.io.IOException {\n");
			
 
				-      cb.append("a_.startRecord(this,tag);\n");
			
 
				+      cb.append("a.startRecord(this,tag);\n");
			
 
				       for (Iterator<JField<JavaType>> i = fields.iterator(); i.hasNext();) {
			
 
				         JField<JavaType> jf = i.next();
			
 
				         String name = jf.getName();
			
 
				         JavaType type = jf.getType();
			
 
				         type.genWriteMethod(cb, name, name);
			
 
				       }
			
 
				-      cb.append("a_.endRecord(this,tag);\n");
			
 
				+      cb.append("a.endRecord(this,tag);\n");
			
 
				       cb.append("}\n");
			
 
				       
			
 
				       cb.append("public void deserialize("+
			
 
				-          "final org.apache.hadoop.record.InputArchive a_, final String tag)\n"+
			
 
				+          "final org.apache.hadoop.record.RecordInput a, final String tag)\n"+
			
 
				           "throws java.io.IOException {\n");
			
 
				-      cb.append("a_.startRecord(tag);\n");
			
 
				+      cb.append("a.startRecord(tag);\n");
			
 
				       for (Iterator<JField<JavaType>> i = fields.iterator(); i.hasNext();) {
			
 
				         JField<JavaType> jf = i.next();
			
 
				         String name = jf.getName();
			
 
				         JavaType type = jf.getType();
			
 
				         type.genReadMethod(cb, name, name, false);
			
 
				       }
			
 
				-      cb.append("a_.endRecord(tag);\n");
			
 
				+      cb.append("a.endRecord(tag);\n");
			
 
				       cb.append("}\n");
			
 
				       
			
 
				       cb.append("public String toString() {\n");
			
 
				       cb.append("try {\n");
			
 
				       cb.append("java.io.ByteArrayOutputStream s =\n");
			
 
				       cb.append("  new java.io.ByteArrayOutputStream();\n");
			
 
				-      cb.append("org.apache.hadoop.record.CsvOutputArchive a_ = \n");
			
 
				-      cb.append("  new org.apache.hadoop.record.CsvOutputArchive(s);\n");
			
 
				-      cb.append("this.serialize(a_,\"\");\n");
			
 
				+      cb.append("org.apache.hadoop.record.CsvRecordOutput a = \n");
			
 
				+      cb.append("  new org.apache.hadoop.record.CsvRecordOutput(s);\n");
			
 
				+      cb.append("this.serialize(a);\n");
			
 
				       cb.append("return new String(s.toByteArray(), \"UTF-8\");\n");
			
 
				       cb.append("} catch (Throwable ex) {\n");
			
 
				       cb.append("throw new RuntimeException(ex);\n");
			
--- a/src/java/org/apache/hadoop/record/compiler/JType.java
+++ b/src/java/org/apache/hadoop/record/compiler/JType.java
@@ -80,14 +80,14 @@ abstract public class JType {
 
				     }
			
 
				     
			
 
				     void genWriteMethod(CodeBuffer cb, String fname, String tag) {
			
 
				-      cb.append("a_.write"+methodSuffix+"("+fname+",\""+tag+"\");\n");
			
 
				+      cb.append("a.write"+methodSuffix+"("+fname+",\""+tag+"\");\n");
			
 
				     }
			
 
				     
			
 
				     void genReadMethod(CodeBuffer cb, String fname, String tag, boolean decl) {
			
 
				       if (decl) {
			
 
				         cb.append(name+" "+fname+";\n");
			
 
				       }
			
 
				-      cb.append(fname+"=a_.read"+methodSuffix+"(\""+tag+"\");\n");
			
 
				+      cb.append(fname+"=a.read"+methodSuffix+"(\""+tag+"\");\n");
			
 
				     }
			
 
				     
			
 
				     void genCompareTo(CodeBuffer cb, String fname, String other) {
			
--- a/src/java/org/apache/hadoop/record/compiler/JVector.java
+++ b/src/java/org/apache/hadoop/record/compiler/JVector.java
@@ -75,13 +75,13 @@ public class JVector extends JCompType {
 
				       }
			
 
				       cb.append("{\n");
			
 
				       incrLevel();
			
 
				-      cb.append("org.apache.hadoop.record.Index "+getId("vidx")+" = a_.startVector(\""+tag+"\");\n");
			
 
				+      cb.append("org.apache.hadoop.record.Index "+getId("vidx")+" = a.startVector(\""+tag+"\");\n");
			
 
				       cb.append(fname+"=new "+getType()+"();\n");
			
 
				       cb.append("for (; !"+getId("vidx")+".done(); "+getId("vidx")+".incr()) {\n");
			
 
				       element.genReadMethod(cb, getId("e"), getId("e"), true);
			
 
				       cb.append(fname+".add("+getId("e")+");\n");
			
 
				       cb.append("}\n");
			
 
				-      cb.append("a_.endVector(\""+tag+"\");\n");
			
 
				+      cb.append("a.endVector(\""+tag+"\");\n");
			
 
				       decrLevel();
			
 
				       cb.append("}\n");
			
 
				     }
			
@@ -89,13 +89,13 @@ public class JVector extends JCompType {
 
				     void genWriteMethod(CodeBuffer cb, String fname, String tag) {
			
 
				       cb.append("{\n");
			
 
				       incrLevel();
			
 
				-      cb.append("a_.startVector("+fname+",\""+tag+"\");\n");
			
 
				+      cb.append("a.startVector("+fname+",\""+tag+"\");\n");
			
 
				       cb.append("int "+getId("len")+" = "+fname+".size();\n");
			
 
				       cb.append("for(int "+getId("vidx")+" = 0; "+getId("vidx")+"<"+getId("len")+"; "+getId("vidx")+"++) {\n");
			
 
				       cb.append(element.getType()+" "+getId("e")+" = "+fname+".get("+getId("vidx")+");\n");
			
 
				       element.genWriteMethod(cb, getId("e"), getId("e"));
			
 
				       cb.append("}\n");
			
 
				-      cb.append("a_.endVector("+fname+",\""+tag+"\");\n");
			
 
				+      cb.append("a.endVector("+fname+",\""+tag+"\");\n");
			
 
				       cb.append("}\n");
			
 
				       decrLevel();
			
 
				     }
			
--- a/src/test/org/apache/hadoop/record/FromCpp.java
+++ b/src/test/org/apache/hadoop/record/FromCpp.java
@@ -18,7 +18,6 @@
 
				 
			
 
				 package org.apache.hadoop.record;
			
 
				 
			
 
				-import java.io.ByteArrayOutputStream;
			
 
				 import java.io.File;
			
 
				 import java.io.FileInputStream;
			
 
				 import java.io.IOException;
			
@@ -58,9 +57,9 @@ public class FromCpp extends TestCase {
 
				             r1.setVectorVal(new ArrayList<String>());
			
 
				             r1.setMapVal(new TreeMap<String,String>());
			
 
				             FileInputStream istream = new FileInputStream(tmpfile);
			
 
				-            RecordReader in = new RecordReader(istream, "binary");
			
 
				+            BinaryRecordInput in = new BinaryRecordInput(istream);
			
 
				             RecRecord1 r2 = new RecRecord1();
			
 
				-            in.read(r2);
			
 
				+            r2.deserialize(in, "");
			
 
				             istream.close();
			
 
				             assertTrue(r1.equals(r2));
			
 
				         } catch (IOException ex) {
			
@@ -84,9 +83,9 @@ public class FromCpp extends TestCase {
 
				             r1.setVectorVal(new ArrayList<String>());
			
 
				             r1.setMapVal(new TreeMap<String,String>());
			
 
				             FileInputStream istream = new FileInputStream(tmpfile);
			
 
				-            RecordReader in = new RecordReader(istream, "csv");
			
 
				+            CsvRecordInput in = new CsvRecordInput(istream);
			
 
				             RecRecord1 r2 = new RecRecord1();
			
 
				-            in.read(r2);
			
 
				+            r2.deserialize(in, "");
			
 
				             istream.close();
			
 
				             assertTrue(r1.equals(r2));
			
 
				         } catch (IOException ex) {
			
@@ -110,9 +109,9 @@ public class FromCpp extends TestCase {
 
				             r1.setVectorVal(new ArrayList<String>());
			
 
				             r1.setMapVal(new TreeMap<String,String>());
			
 
				             FileInputStream istream = new FileInputStream(tmpfile);
			
 
				-            RecordReader in = new RecordReader(istream, "xml");
			
 
				+            XmlRecordInput in = new XmlRecordInput(istream);
			
 
				             RecRecord1 r2 = new RecRecord1();
			
 
				-            in.read(r2);
			
 
				+            r2.deserialize(in, "");
			
 
				             istream.close();
			
 
				             assertTrue(r1.equals(r2));
			
 
				         } catch (IOException ex) {
			
--- a/src/test/org/apache/hadoop/record/TestRecordIO.java
+++ b/src/test/org/apache/hadoop/record/TestRecordIO.java
@@ -47,7 +47,7 @@ public class TestRecordIO extends TestCase {
 
				         try {
			
 
				             tmpfile = File.createTempFile("hadooprec", ".dat");
			
 
				             FileOutputStream ostream = new FileOutputStream(tmpfile);
			
 
				-            RecordWriter out = new RecordWriter(ostream, "binary");
			
 
				+            BinaryRecordOutput out = new BinaryRecordOutput(ostream);
			
 
				             RecRecord1 r1 = new RecRecord1();
			
 
				             r1.setBoolVal(true);
			
 
				             r1.setByteVal((byte)0x66);
			
@@ -62,12 +62,12 @@ public class TestRecordIO extends TestCase {
 
				             RecRecord0 r0 = new RecRecord0();
			
 
				             r0.setStringVal("other random text");
			
 
				             r1.setRecordVal(r0);
			
 
				-            out.write(r1);
			
 
				+            r1.serialize(out, "");
			
 
				             ostream.close();
			
 
				             FileInputStream istream = new FileInputStream(tmpfile);
			
 
				-            RecordReader in = new RecordReader(istream, "binary");
			
 
				+            BinaryRecordInput in = new BinaryRecordInput(istream);
			
 
				             RecRecord1 r2 = new RecRecord1();
			
 
				-            in.read(r2);
			
 
				+            r2.deserialize(in, "");
			
 
				             istream.close();
			
 
				             tmpfile.delete();
			
 
				             assertTrue("Serialized and deserialized records do not match.", r1.equals(r2));
			
@@ -81,7 +81,7 @@ public class TestRecordIO extends TestCase {
 
				         try {
			
 
				             tmpfile = File.createTempFile("hadooprec", ".txt");
			
 
				             FileOutputStream ostream = new FileOutputStream(tmpfile);
			
 
				-            RecordWriter out = new RecordWriter(ostream, "csv");
			
 
				+            CsvRecordOutput out = new CsvRecordOutput(ostream);
			
 
				             RecRecord1 r1 = new RecRecord1();
			
 
				             r1.setBoolVal(true);
			
 
				             r1.setByteVal((byte)0x66);
			
@@ -96,12 +96,12 @@ public class TestRecordIO extends TestCase {
 
				             RecRecord0 r0 = new RecRecord0();
			
 
				             r0.setStringVal("other random text");
			
 
				             r1.setRecordVal(r0);
			
 
				-            out.write(r1);
			
 
				+            r1.serialize(out, "");
			
 
				             ostream.close();
			
 
				             FileInputStream istream = new FileInputStream(tmpfile);
			
 
				-            RecordReader in = new RecordReader(istream, "csv");
			
 
				+            CsvRecordInput in = new CsvRecordInput(istream);
			
 
				             RecRecord1 r2 = new RecRecord1();
			
 
				-            in.read(r2);
			
 
				+            r2.deserialize(in, "");
			
 
				             istream.close();
			
 
				             tmpfile.delete();
			
 
				             assertTrue("Serialized and deserialized records do not match.", r1.equals(r2));
			
@@ -140,7 +140,7 @@ public class TestRecordIO extends TestCase {
 
				         try {
			
 
				             tmpfile = File.createTempFile("hadooprec", ".xml");
			
 
				             FileOutputStream ostream = new FileOutputStream(tmpfile);
			
 
				-            RecordWriter out = new RecordWriter(ostream, "xml");
			
 
				+            XmlRecordOutput out = new XmlRecordOutput(ostream);
			
 
				             RecRecord1 r1 = new RecRecord1();
			
 
				             r1.setBoolVal(true);
			
 
				             r1.setByteVal((byte)0x66);
			
@@ -148,19 +148,19 @@ public class TestRecordIO extends TestCase {
 
				             r1.setDoubleVal(1.5234);
			
 
				             r1.setIntVal(4567);
			
 
				             r1.setLongVal(0x5a5a5a5a5a5aL);
			
 
				-            r1.setStringVal("ran\002dom &lt; %text<&more");
			
 
				+            r1.setStringVal("ran\002dom &lt; %text<&more\uffff");
			
 
				             r1.setBufferVal(new Buffer());
			
 
				             r1.setVectorVal(new ArrayList<String>());
			
 
				             r1.setMapVal(new TreeMap<String,String>());
			
 
				             RecRecord0 r0 = new RecRecord0();
			
 
				             r0.setStringVal("other %rando\007m &amp; >&more text");
			
 
				             r1.setRecordVal(r0);
			
 
				-            out.write(r1);
			
 
				+            r1.serialize(out, "");
			
 
				             ostream.close();
			
 
				             FileInputStream istream = new FileInputStream(tmpfile);
			
 
				-            RecordReader in = new RecordReader(istream, "xml");
			
 
				+            XmlRecordInput in = new XmlRecordInput(istream);
			
 
				             RecRecord1 r2 = new RecRecord1();
			
 
				-            in.read(r2);
			
 
				+            r2.deserialize(in, "");
			
 
				             istream.close();
			
 
				             tmpfile.delete();
			
 
				             assertTrue("Serialized and deserialized records do not match.", r1.equals(r2));
			
--- a/src/test/org/apache/hadoop/record/ToCpp.java
+++ b/src/test/org/apache/hadoop/record/ToCpp.java
@@ -20,8 +20,6 @@ package org.apache.hadoop.record;
 
				 
			
 
				 import java.io.IOException;
			
 
				 import junit.framework.*;
			
 
				-import org.apache.hadoop.record.RecordWriter;
			
 
				-import java.io.ByteArrayOutputStream;
			
 
				 import java.io.File;
			
 
				 import java.io.FileOutputStream;
			
 
				 import java.util.ArrayList;
			
@@ -46,9 +44,9 @@ public class ToCpp extends TestCase {
 
				     public void testBinary() {
			
 
				         File tmpfile;
			
 
				         try {
			
 
				-            tmpfile = new File("/temp/hadooptemp.dat");
			
 
				+            tmpfile = new File("/tmp/hadooptemp.dat");
			
 
				             FileOutputStream ostream = new FileOutputStream(tmpfile);
			
 
				-            RecordWriter out = new RecordWriter(ostream, "binary");
			
 
				+            BinaryRecordOutput out = new BinaryRecordOutput(ostream);
			
 
				             RecRecord1 r1 = new RecRecord1();
			
 
				             r1.setBoolVal(true);
			
 
				             r1.setByteVal((byte)0x66);
			
@@ -60,7 +58,7 @@ public class ToCpp extends TestCase {
 
				             r1.setBufferVal(new Buffer());
			
 
				             r1.setVectorVal(new ArrayList<String>());
			
 
				             r1.setMapVal(new TreeMap<String,String>());
			
 
				-            out.write(r1);
			
 
				+            r1.serialize(out, "");
			
 
				             ostream.close();
			
 
				         } catch (IOException ex) {
			
 
				             ex.printStackTrace();
			
@@ -70,9 +68,9 @@ public class ToCpp extends TestCase {
 
				     public void testCsv() {
			
 
				         File tmpfile;
			
 
				         try {
			
 
				-            tmpfile = new File("/temp/hadooptemp.txt");
			
 
				+            tmpfile = new File("/tmp/hadooptemp.txt");
			
 
				             FileOutputStream ostream = new FileOutputStream(tmpfile);
			
 
				-            RecordWriter out = new RecordWriter(ostream, "csv");
			
 
				+            CsvRecordOutput out = new CsvRecordOutput(ostream);
			
 
				             RecRecord1 r1 = new RecRecord1();
			
 
				             r1.setBoolVal(true);
			
 
				             r1.setByteVal((byte)0x66);
			
@@ -84,7 +82,7 @@ public class ToCpp extends TestCase {
 
				             r1.setBufferVal(new Buffer());
			
 
				             r1.setVectorVal(new ArrayList<String>());
			
 
				             r1.setMapVal(new TreeMap<String,String>());
			
 
				-            out.write(r1);
			
 
				+            r1.serialize(out, "");
			
 
				             ostream.close();
			
 
				         } catch (IOException ex) {
			
 
				             ex.printStackTrace();
			
@@ -94,9 +92,9 @@ public class ToCpp extends TestCase {
 
				     public void testXml() {
			
 
				         File tmpfile;
			
 
				         try {
			
 
				-            tmpfile = new File("/temp/hadooptemp.xml");
			
 
				+            tmpfile = new File("/tmp/hadooptemp.xml");
			
 
				             FileOutputStream ostream = new FileOutputStream(tmpfile);
			
 
				-            RecordWriter out = new RecordWriter(ostream, "xml");
			
 
				+            XmlRecordOutput out = new XmlRecordOutput(ostream);
			
 
				             RecRecord1 r1 = new RecRecord1();
			
 
				             r1.setBoolVal(true);
			
 
				             r1.setByteVal((byte)0x66);
			
@@ -108,7 +106,7 @@ public class ToCpp extends TestCase {
 
				             r1.setBufferVal(new Buffer());
			
 
				             r1.setVectorVal(new ArrayList<String>());
			
 
				             r1.setMapVal(new TreeMap<String,String>());
			
 
				-            out.write(r1);
			
 
				+            r1.serialize(out, "");
			
 
				             ostream.close();
			
 
				         } catch (IOException ex) {
			
 
				             ex.printStackTrace();