Browse Source

HADOOP-1096. Rename InputArchive and OutputArchive and make them public. Contributed by Milind Bhandarkar.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@519263 13f79535-47bb-0310-9956-ffa450edef68
Thomas White 18 năm trước cách đây
mục cha
commit
336d719b8c

+ 3 - 0
CHANGES.txt

@@ -66,6 +66,9 @@ Trunk (unreleased changes)
     agree with the Java versions.  (Milind Bhandarkar via
     tomwhite)
 
+20. HADOOP-1096.  Rename InputArchive and OutputArchive and
+    make them public. (Milind Bhandarkar via tomwhite)
+
 
 Release 0.12.0 - 2007-03-02
 

+ 16 - 16
src/c++/librecordio/xmlarchive.cc

@@ -85,12 +85,14 @@ static std::string toXMLString(std::string s)
     } else if (ch == '&') {
         r.append("&");
     } else if (ch == '%') {
-        r.append("%25");
+        r.append("%0025");
     } else if (ch < 0x20) {
         uint8_t* pb = (uint8_t*) &ch;
         char ch1 = hexchars[*pb/16];
         char ch2 = hexchars[*pb%16];
         r.push_back('%');
+        r.push_back('0');
+        r.push_back('0');
         r.push_back(ch1);
         r.push_back(ch2);
     } else {
@@ -105,10 +107,10 @@ static uint8_t h2b(char ch) {
     return ch - '0';
   }
   if ((ch >= 'a') || (ch <= 'f')) {
-    return ch - 'a';
+    return ch - 'a' + 10;
   }
   if ((ch >= 'A') || (ch <= 'F')) {
-    return ch - 'A';
+    return ch - 'A' + 10;
   }
   return 0;
 }
@@ -123,20 +125,18 @@ static std::string fromXMLString(std::string s)
     uint8_t b = *pb;
     if (b == '%') {
       char *pc = (char*) (pb+1);
-      if (*pc == '%') {
-        r.push_back('%');
-        pb += 1;
-      } else {
-        char ch1 = *pc++;
-        char ch2 = *pc++;
-        pb += 2;
-        uint8_t cnv = h2b(ch1)*16 + h2b(ch2);
-        pc = (char*) &cnv;
-        r.push_back(*pc);
-      }
+      // ignore the first two characters, which are always '0'
+      *pc++;
+      *pc++;;
+      char ch1 = *pc++;
+      char ch2 = *pc++;
+      pb += 4;
+      uint8_t cnv = h2b(ch1)*16 + h2b(ch2);
+      pc = (char*) &cnv;
+      r.push_back(*pc);
     } else {
-        char *pc = (char*) pb;
-        r.push_back(*pc);
+      char *pc = (char*) pb;
+      r.push_back(*pc);
     }
     pb++;
   }

+ 27 - 12
src/java/org/apache/hadoop/record/Record.java

@@ -31,34 +31,49 @@ import org.apache.hadoop.io.WritableComparable;
 public abstract class Record implements WritableComparable, Cloneable {
   
   /**
-   * Serialize a record into archive
-   * @param archive Output Archive
+   * Serialize a record with tag (ususally field name)
+   * @param rout Record output destination
    * @param tag record tag (Used only in tagged serialization e.g. XML)
    */
-  public abstract void serialize(OutputArchive archive, String tag)
+  public abstract void serialize(RecordOutput rout, String tag)
   throws IOException;
   
   /**
-   * Deserialize a record from archive
-   * @param archive Input Archive
+   * Deserialize a record with a tag (usually field name)
+   * @param rin Record input source
    * @param tag Record tag (Used only in tagged serialization e.g. XML)
    */
-  public abstract void deserialize(InputArchive archive, String tag)
+  public abstract void deserialize(RecordInput rin, String tag)
   throws IOException;
   
   // inheric javadoc
   public abstract int compareTo (final Object peer) throws ClassCastException;
   
+  /**
+   * Serialize a record without a tag
+   * @param rout Record output destination
+   */
+  public void serialize(RecordOutput rout) throws IOException {
+    this.serialize(rout, "");
+  }
+  
+  /**
+   * Deserialize a record without a tag
+   * @param rin Record input source
+   */
+  public void deserialize(RecordInput rin) throws IOException {
+    this.deserialize(rin, "");
+  }
+  
   // inherit javadoc
   public void write(final DataOutput out) throws java.io.IOException {
-    BinaryOutputArchive archive = new BinaryOutputArchive(out);
-    this.serialize(archive, "");
+    BinaryRecordOutput bout = new BinaryRecordOutput(out);
+    this.serialize(bout);
   }
   
   // inherit javadoc
-  public void readFields(final DataInput in) throws java.io.IOException {
-    BinaryInputArchive archive = new BinaryInputArchive(in);
-    this.deserialize(archive, "");
+  public void readFields(final DataInput din) throws java.io.IOException {
+    BinaryRecordInput rin = new BinaryRecordInput(din);
+    this.deserialize(rin);
   }
-  
 }

+ 168 - 10
src/java/org/apache/hadoop/record/Utils.java

@@ -51,11 +51,15 @@ public class Utils {
       } else if (ch == '&') {
         sb.append("&amp;");
       } else if (ch == '%') {
-        sb.append("%25");
-      } else if (ch < 0x20) {
+        sb.append("%0025");
+      } else if (ch < 0x20 ||
+          (ch > 0xD7FF && ch < 0xE000) ||
+          (ch > 0xFFFD)) {
         sb.append("%");
-        sb.append(hexchars[ch/16]);
-        sb.append(hexchars[ch%16]);
+        sb.append(hexchars[(ch & 0xF000) >> 12]);
+        sb.append(hexchars[(ch & 0x0F00) >> 8]);
+        sb.append(hexchars[(ch & 0x00F0) >> 4]);
+        sb.append(hexchars[(ch & 0x000F)]);
       } else {
         sb.append(ch);
       }
@@ -67,9 +71,9 @@ public class Utils {
     if (ch >= '0' && ch <= '9') {
       return ch - '0';
     } else if (ch >= 'A' && ch <= 'F') {
-      return ch - 'A';
+      return ch - 'A' + 10;
     } else if (ch >= 'a' && ch <= 'f') {
-      return ch - 'a';
+      return ch - 'a' + 10;
     }
     return 0;
   }
@@ -84,15 +88,16 @@ public class Utils {
     for (int idx = 0; idx < s.length();) {
       char ch = s.charAt(idx++);
       if (ch == '%') {
-        char ch1 = s.charAt(idx++);
-        char ch2 = s.charAt(idx++);
-        char res = (char)(h2c(ch1)*16 + h2c(ch2));
+        int ch1 = h2c(s.charAt(idx++)) << 12;
+        int ch2 = h2c(s.charAt(idx++)) << 8;
+        int ch3 = h2c(s.charAt(idx++)) << 4;
+        int ch4 = h2c(s.charAt(idx++));
+        char res = (char)(ch1 | ch2 | ch3 | ch4);
         sb.append(res);
       } else {
         sb.append(ch);
       }
     }
-    
     return sb.toString();
   }
   
@@ -235,6 +240,159 @@ public class Utils {
     return new Buffer(barr);
   }
   
+  private static int utf8LenForCodePoint(final int cpt) throws IOException {
+    if (cpt >=0 && cpt <= 0x7F) {
+      return 1;
+    }
+    if (cpt >= 0x80 && cpt <= 0x07FF) {
+      return 2;
+    }
+    if ((cpt >= 0x0800 && cpt < 0xD800) ||
+        (cpt > 0xDFFF && cpt <= 0xFFFD)) {
+      return 3;
+    }
+    if (cpt >= 0x10000 && cpt <= 0x10FFFF) {
+      return 4;
+    }
+    throw new IOException("Illegal Unicode Codepoint "+
+        Integer.toHexString(cpt)+" in string.");
+  }
+  
+  private static final int B10 =    Integer.parseInt("10000000", 2);
+  private static final int B110 =   Integer.parseInt("11000000", 2);
+  private static final int B1110 =  Integer.parseInt("11100000", 2);
+  private static final int B11110 = Integer.parseInt("11110000", 2);
+  private static final int B11 =    Integer.parseInt("11000000", 2);
+  private static final int B111 =   Integer.parseInt("11100000", 2);
+  private static final int B1111 =  Integer.parseInt("11110000", 2);
+  private static final int B11111 = Integer.parseInt("11111000", 2);
+  
+  private static int writeUtf8(int cpt, final byte[] bytes, final int offset)
+  throws IOException {
+    if (cpt >=0 && cpt <= 0x7F) {
+      bytes[offset] = (byte) cpt;
+      return 1;
+    }
+    if (cpt >= 0x80 && cpt <= 0x07FF) {
+      bytes[offset+1] = (byte) (B10 | (cpt & 0x3F));
+      cpt = cpt >> 6;
+      bytes[offset] = (byte) (B110 | (cpt & 0x1F));
+      return 2;
+    }
+    if ((cpt >= 0x0800 && cpt < 0xD800) ||
+        (cpt > 0xDFFF && cpt <= 0xFFFD)) {
+      bytes[offset+2] = (byte) (B10 | (cpt & 0x3F));
+      cpt = cpt >> 6;
+      bytes[offset+1] = (byte) (B10 | (cpt & 0x3F));
+      cpt = cpt >> 6;
+      bytes[offset] = (byte) (B1110 | (cpt & 0x0F));
+      return 3;
+    }
+    if (cpt >= 0x10000 && cpt <= 0x10FFFF) {
+      bytes[offset+3] = (byte) (B10 | (cpt & 0x3F));
+      cpt = cpt >> 6;
+      bytes[offset+2] = (byte) (B10 | (cpt & 0x3F));
+      cpt = cpt >> 6;
+      bytes[offset+1] = (byte) (B10 | (cpt & 0x3F));
+      cpt = cpt >> 6;
+      bytes[offset] = (byte) (B11110 | (cpt & 0x07));
+      return 4;
+    }
+    throw new IOException("Illegal Unicode Codepoint "+
+        Integer.toHexString(cpt)+" in string.");
+  }
+  
+  static void toBinaryString(final DataOutput out, final String str)
+  throws IOException {
+    final int strlen = str.length();
+    byte[] bytes = new byte[strlen*4]; // Codepoints expand to 4 bytes max
+    int utf8Len = 0;
+    int idx = 0;
+    while(idx < strlen) {
+      final int cpt = str.codePointAt(idx);
+      idx += Character.isSupplementaryCodePoint(cpt) ? 2 : 1;
+      utf8Len += writeUtf8(cpt, bytes, utf8Len);
+    }
+    writeVInt(out, utf8Len);
+    out.write(bytes, 0, utf8Len);
+  }
+  
+  static boolean isValidCodePoint(int cpt) {
+    return !((cpt > 0x10FFFF) ||
+        (cpt >= 0xD800 && cpt <= 0xDFFF) ||
+        (cpt >= 0xFFFE && cpt <=0xFFFF));
+  }
+  
+  private static int utf8ToCodePoint(int b1, int b2, int b3, int b4) {
+    int cpt = 0;
+    cpt = (((b1 & ~B11111) << 18) |
+        ((b2 & ~B11) << 12) |
+        ((b3 & ~B11) << 6) |
+        (b4 & ~B11));
+    return cpt;
+  }
+  
+  private static int utf8ToCodePoint(int b1, int b2, int b3) {
+    int cpt = 0;
+    cpt = (((b1 & ~B1111) << 12) | ((b2 & ~B11) << 6) | (b3 & ~B11));
+    return cpt;
+  }
+  
+  private static int utf8ToCodePoint(int b1, int b2) {
+    int cpt = 0;
+    cpt = (((b1 & ~B111) << 6) | (b2 & ~B11));
+    return cpt;
+  }
+  
+  private static void checkB10(int b) throws IOException {
+    if ((b & B11) != B10) {
+      throw new IOException("Invalid UTF-8 representation.");
+    }
+  }
+  
+  static String fromBinaryString(final DataInput din) throws IOException {
+    final int utf8Len = readVInt(din);
+    final byte[] bytes = new byte[utf8Len];
+    din.readFully(bytes);
+    int len = 0;
+    // For the most commmon case, i.e. ascii, numChars = utf8Len
+    StringBuilder sb = new StringBuilder(utf8Len);
+    while(len < utf8Len) {
+      int cpt = 0;
+      final int b1 = bytes[len++] & 0xFF;
+      if (b1 <= 0x7F) {
+        cpt = b1;
+      } else if ((b1 & B11111) == B11110) {
+        int b2 = bytes[len++] & 0xFF;
+        checkB10(b2);
+        int b3 = bytes[len++] & 0xFF;
+        checkB10(b3);
+        int b4 = bytes[len++] & 0xFF;
+        checkB10(b4);
+        cpt = utf8ToCodePoint(b1, b2, b3, b4);
+      } else if ((b1 & B1111) == B1110) {
+        int b2 = bytes[len++] & 0xFF;
+        checkB10(b2);
+        int b3 = bytes[len++] & 0xFF;
+        checkB10(b3);
+        cpt = utf8ToCodePoint(b1, b2, b3);
+      } else if ((b1 & B111) == B110) {
+        int b2 = bytes[len++] & 0xFF;
+        checkB10(b2);
+        cpt = utf8ToCodePoint(b1, b2);
+      } else {
+        throw new IOException("Invalid UTF-8 byte "+Integer.toHexString(b1)+
+            " at offset "+(len-1)+" in length of "+utf8Len);
+      }
+      if (!isValidCodePoint(cpt)) {
+        throw new IOException("Illegal Unicode Codepoint "+
+          Integer.toHexString(cpt)+" in stream.");
+      }
+      sb.appendCodePoint(cpt);
+    }
+    return sb.toString();
+  }
+  
   /** Parse a float from a byte array. */
   public static float readFloat(byte[] bytes, int start) {
     return WritableComparator.readFloat(bytes, start);

+ 0 - 4
src/java/org/apache/hadoop/record/compiler/JBuffer.java

@@ -33,10 +33,6 @@ public class JBuffer extends JCompType {
       super("org.apache.hadoop.record.Buffer", "Buffer", "org.apache.hadoop.record.Buffer");
     }
     
-    void genWriteMethod(CodeBuffer cb, String fname, String tag) {
-      cb.append("a_.writeBuffer("+fname+",\""+tag+"\");\n");
-    }
-    
     void genCompareTo(CodeBuffer cb, String fname, String other) {
       cb.append("ret = "+fname+".compareTo("+other+");\n");
     }

+ 4 - 4
src/java/org/apache/hadoop/record/compiler/JMap.java

@@ -79,14 +79,14 @@ public class JMap extends JCompType {
       }
       cb.append("{\n");
       incrLevel();
-      cb.append("org.apache.hadoop.record.Index "+getId("midx")+" = a_.startMap(\""+tag+"\");\n");
+      cb.append("org.apache.hadoop.record.Index "+getId("midx")+" = a.startMap(\""+tag+"\");\n");
       cb.append(fname+"=new "+getType()+"();\n");
       cb.append("for (; !"+getId("midx")+".done(); "+getId("midx")+".incr()) {\n");
       key.genReadMethod(cb, getId("k"),getId("k"),true);
       value.genReadMethod(cb, getId("v"),getId("v"),true);
       cb.append(fname+".put("+getId("k")+","+getId("v")+");\n");
       cb.append("}\n");
-      cb.append("a_.endMap(\""+tag+"\");\n");
+      cb.append("a.endMap(\""+tag+"\");\n");
       decrLevel();
       cb.append("}\n");
     }
@@ -100,7 +100,7 @@ public class JMap extends JCompType {
           key.getWrapperType()+","+value.getWrapperType()+">> ";
       cb.append("{\n");
       incrLevel();
-      cb.append("a_.startMap("+fname+",\""+tag+"\");\n");
+      cb.append("a.startMap("+fname+",\""+tag+"\");\n");
       cb.append(setType+getId("es")+" = "+fname+".entrySet();\n");
       cb.append("for("+iterType+getId("midx")+" = "+getId("es")+".iterator(); "+getId("midx")+".hasNext(); ) {\n");
       cb.append(entryType+getId("me")+" = "+getId("midx")+".next();\n");
@@ -109,7 +109,7 @@ public class JMap extends JCompType {
       key.genWriteMethod(cb, getId("k"),getId("k"));
       value.genWriteMethod(cb, getId("v"),getId("v"));
       cb.append("}\n");
-      cb.append("a_.endMap("+fname+",\""+tag+"\");\n");
+      cb.append("a.endMap("+fname+",\""+tag+"\");\n");
       cb.append("}\n");
       decrLevel();
     }

+ 11 - 11
src/java/org/apache/hadoop/record/compiler/JRecord.java

@@ -55,11 +55,11 @@ public class JRecord extends JCompType {
         cb.append(fullName+" "+fname+";\n");
       }
       cb.append(fname+"= new "+fullName+"();\n");
-      cb.append("a_.readRecord("+fname+",\""+tag+"\");\n");
+      cb.append(fname+".deserialize(a,\""+tag+"\");\n");
     }
     
     void genWriteMethod(CodeBuffer cb, String fname, String tag) {
-      cb.append("a_.writeRecord("+fname+",\""+tag+"\");\n");
+      cb.append(fname+".serialize(a,\""+tag+"\");\n");
     }
     
     void genSlurpBytes(CodeBuffer cb, String b, String s, String l) {
@@ -136,38 +136,38 @@ public class JRecord extends JCompType {
         type.genGetSet(cb, name);
       }
       cb.append("public void serialize("+
-          "final org.apache.hadoop.record.OutputArchive a_, final String tag)\n"+
+          "final org.apache.hadoop.record.RecordOutput a, final String tag)\n"+
           "throws java.io.IOException {\n");
-      cb.append("a_.startRecord(this,tag);\n");
+      cb.append("a.startRecord(this,tag);\n");
       for (Iterator<JField<JavaType>> i = fields.iterator(); i.hasNext();) {
         JField<JavaType> jf = i.next();
         String name = jf.getName();
         JavaType type = jf.getType();
         type.genWriteMethod(cb, name, name);
       }
-      cb.append("a_.endRecord(this,tag);\n");
+      cb.append("a.endRecord(this,tag);\n");
       cb.append("}\n");
       
       cb.append("public void deserialize("+
-          "final org.apache.hadoop.record.InputArchive a_, final String tag)\n"+
+          "final org.apache.hadoop.record.RecordInput a, final String tag)\n"+
           "throws java.io.IOException {\n");
-      cb.append("a_.startRecord(tag);\n");
+      cb.append("a.startRecord(tag);\n");
       for (Iterator<JField<JavaType>> i = fields.iterator(); i.hasNext();) {
         JField<JavaType> jf = i.next();
         String name = jf.getName();
         JavaType type = jf.getType();
         type.genReadMethod(cb, name, name, false);
       }
-      cb.append("a_.endRecord(tag);\n");
+      cb.append("a.endRecord(tag);\n");
       cb.append("}\n");
       
       cb.append("public String toString() {\n");
       cb.append("try {\n");
       cb.append("java.io.ByteArrayOutputStream s =\n");
       cb.append("  new java.io.ByteArrayOutputStream();\n");
-      cb.append("org.apache.hadoop.record.CsvOutputArchive a_ = \n");
-      cb.append("  new org.apache.hadoop.record.CsvOutputArchive(s);\n");
-      cb.append("this.serialize(a_,\"\");\n");
+      cb.append("org.apache.hadoop.record.CsvRecordOutput a = \n");
+      cb.append("  new org.apache.hadoop.record.CsvRecordOutput(s);\n");
+      cb.append("this.serialize(a);\n");
       cb.append("return new String(s.toByteArray(), \"UTF-8\");\n");
       cb.append("} catch (Throwable ex) {\n");
       cb.append("throw new RuntimeException(ex);\n");

+ 2 - 2
src/java/org/apache/hadoop/record/compiler/JType.java

@@ -80,14 +80,14 @@ abstract public class JType {
     }
     
     void genWriteMethod(CodeBuffer cb, String fname, String tag) {
-      cb.append("a_.write"+methodSuffix+"("+fname+",\""+tag+"\");\n");
+      cb.append("a.write"+methodSuffix+"("+fname+",\""+tag+"\");\n");
     }
     
     void genReadMethod(CodeBuffer cb, String fname, String tag, boolean decl) {
       if (decl) {
         cb.append(name+" "+fname+";\n");
       }
-      cb.append(fname+"=a_.read"+methodSuffix+"(\""+tag+"\");\n");
+      cb.append(fname+"=a.read"+methodSuffix+"(\""+tag+"\");\n");
     }
     
     void genCompareTo(CodeBuffer cb, String fname, String other) {

+ 4 - 4
src/java/org/apache/hadoop/record/compiler/JVector.java

@@ -75,13 +75,13 @@ public class JVector extends JCompType {
       }
       cb.append("{\n");
       incrLevel();
-      cb.append("org.apache.hadoop.record.Index "+getId("vidx")+" = a_.startVector(\""+tag+"\");\n");
+      cb.append("org.apache.hadoop.record.Index "+getId("vidx")+" = a.startVector(\""+tag+"\");\n");
       cb.append(fname+"=new "+getType()+"();\n");
       cb.append("for (; !"+getId("vidx")+".done(); "+getId("vidx")+".incr()) {\n");
       element.genReadMethod(cb, getId("e"), getId("e"), true);
       cb.append(fname+".add("+getId("e")+");\n");
       cb.append("}\n");
-      cb.append("a_.endVector(\""+tag+"\");\n");
+      cb.append("a.endVector(\""+tag+"\");\n");
       decrLevel();
       cb.append("}\n");
     }
@@ -89,13 +89,13 @@ public class JVector extends JCompType {
     void genWriteMethod(CodeBuffer cb, String fname, String tag) {
       cb.append("{\n");
       incrLevel();
-      cb.append("a_.startVector("+fname+",\""+tag+"\");\n");
+      cb.append("a.startVector("+fname+",\""+tag+"\");\n");
       cb.append("int "+getId("len")+" = "+fname+".size();\n");
       cb.append("for(int "+getId("vidx")+" = 0; "+getId("vidx")+"<"+getId("len")+"; "+getId("vidx")+"++) {\n");
       cb.append(element.getType()+" "+getId("e")+" = "+fname+".get("+getId("vidx")+");\n");
       element.genWriteMethod(cb, getId("e"), getId("e"));
       cb.append("}\n");
-      cb.append("a_.endVector("+fname+",\""+tag+"\");\n");
+      cb.append("a.endVector("+fname+",\""+tag+"\");\n");
       cb.append("}\n");
       decrLevel();
     }

+ 6 - 7
src/test/org/apache/hadoop/record/FromCpp.java

@@ -18,7 +18,6 @@
 
 package org.apache.hadoop.record;
 
-import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
@@ -58,9 +57,9 @@ public class FromCpp extends TestCase {
             r1.setVectorVal(new ArrayList<String>());
             r1.setMapVal(new TreeMap<String,String>());
             FileInputStream istream = new FileInputStream(tmpfile);
-            RecordReader in = new RecordReader(istream, "binary");
+            BinaryRecordInput in = new BinaryRecordInput(istream);
             RecRecord1 r2 = new RecRecord1();
-            in.read(r2);
+            r2.deserialize(in, "");
             istream.close();
             assertTrue(r1.equals(r2));
         } catch (IOException ex) {
@@ -84,9 +83,9 @@ public class FromCpp extends TestCase {
             r1.setVectorVal(new ArrayList<String>());
             r1.setMapVal(new TreeMap<String,String>());
             FileInputStream istream = new FileInputStream(tmpfile);
-            RecordReader in = new RecordReader(istream, "csv");
+            CsvRecordInput in = new CsvRecordInput(istream);
             RecRecord1 r2 = new RecRecord1();
-            in.read(r2);
+            r2.deserialize(in, "");
             istream.close();
             assertTrue(r1.equals(r2));
         } catch (IOException ex) {
@@ -110,9 +109,9 @@ public class FromCpp extends TestCase {
             r1.setVectorVal(new ArrayList<String>());
             r1.setMapVal(new TreeMap<String,String>());
             FileInputStream istream = new FileInputStream(tmpfile);
-            RecordReader in = new RecordReader(istream, "xml");
+            XmlRecordInput in = new XmlRecordInput(istream);
             RecRecord1 r2 = new RecRecord1();
-            in.read(r2);
+            r2.deserialize(in, "");
             istream.close();
             assertTrue(r1.equals(r2));
         } catch (IOException ex) {

+ 13 - 13
src/test/org/apache/hadoop/record/TestRecordIO.java

@@ -47,7 +47,7 @@ public class TestRecordIO extends TestCase {
         try {
             tmpfile = File.createTempFile("hadooprec", ".dat");
             FileOutputStream ostream = new FileOutputStream(tmpfile);
-            RecordWriter out = new RecordWriter(ostream, "binary");
+            BinaryRecordOutput out = new BinaryRecordOutput(ostream);
             RecRecord1 r1 = new RecRecord1();
             r1.setBoolVal(true);
             r1.setByteVal((byte)0x66);
@@ -62,12 +62,12 @@ public class TestRecordIO extends TestCase {
             RecRecord0 r0 = new RecRecord0();
             r0.setStringVal("other random text");
             r1.setRecordVal(r0);
-            out.write(r1);
+            r1.serialize(out, "");
             ostream.close();
             FileInputStream istream = new FileInputStream(tmpfile);
-            RecordReader in = new RecordReader(istream, "binary");
+            BinaryRecordInput in = new BinaryRecordInput(istream);
             RecRecord1 r2 = new RecRecord1();
-            in.read(r2);
+            r2.deserialize(in, "");
             istream.close();
             tmpfile.delete();
             assertTrue("Serialized and deserialized records do not match.", r1.equals(r2));
@@ -81,7 +81,7 @@ public class TestRecordIO extends TestCase {
         try {
             tmpfile = File.createTempFile("hadooprec", ".txt");
             FileOutputStream ostream = new FileOutputStream(tmpfile);
-            RecordWriter out = new RecordWriter(ostream, "csv");
+            CsvRecordOutput out = new CsvRecordOutput(ostream);
             RecRecord1 r1 = new RecRecord1();
             r1.setBoolVal(true);
             r1.setByteVal((byte)0x66);
@@ -96,12 +96,12 @@ public class TestRecordIO extends TestCase {
             RecRecord0 r0 = new RecRecord0();
             r0.setStringVal("other random text");
             r1.setRecordVal(r0);
-            out.write(r1);
+            r1.serialize(out, "");
             ostream.close();
             FileInputStream istream = new FileInputStream(tmpfile);
-            RecordReader in = new RecordReader(istream, "csv");
+            CsvRecordInput in = new CsvRecordInput(istream);
             RecRecord1 r2 = new RecRecord1();
-            in.read(r2);
+            r2.deserialize(in, "");
             istream.close();
             tmpfile.delete();
             assertTrue("Serialized and deserialized records do not match.", r1.equals(r2));
@@ -140,7 +140,7 @@ public class TestRecordIO extends TestCase {
         try {
             tmpfile = File.createTempFile("hadooprec", ".xml");
             FileOutputStream ostream = new FileOutputStream(tmpfile);
-            RecordWriter out = new RecordWriter(ostream, "xml");
+            XmlRecordOutput out = new XmlRecordOutput(ostream);
             RecRecord1 r1 = new RecRecord1();
             r1.setBoolVal(true);
             r1.setByteVal((byte)0x66);
@@ -148,19 +148,19 @@ public class TestRecordIO extends TestCase {
             r1.setDoubleVal(1.5234);
             r1.setIntVal(4567);
             r1.setLongVal(0x5a5a5a5a5a5aL);
-            r1.setStringVal("ran\002dom &lt; %text<&more");
+            r1.setStringVal("ran\002dom &lt; %text<&more\uffff");
             r1.setBufferVal(new Buffer());
             r1.setVectorVal(new ArrayList<String>());
             r1.setMapVal(new TreeMap<String,String>());
             RecRecord0 r0 = new RecRecord0();
             r0.setStringVal("other %rando\007m &amp; >&more text");
             r1.setRecordVal(r0);
-            out.write(r1);
+            r1.serialize(out, "");
             ostream.close();
             FileInputStream istream = new FileInputStream(tmpfile);
-            RecordReader in = new RecordReader(istream, "xml");
+            XmlRecordInput in = new XmlRecordInput(istream);
             RecRecord1 r2 = new RecRecord1();
-            in.read(r2);
+            r2.deserialize(in, "");
             istream.close();
             tmpfile.delete();
             assertTrue("Serialized and deserialized records do not match.", r1.equals(r2));

+ 9 - 11
src/test/org/apache/hadoop/record/ToCpp.java

@@ -20,8 +20,6 @@ package org.apache.hadoop.record;
 
 import java.io.IOException;
 import junit.framework.*;
-import org.apache.hadoop.record.RecordWriter;
-import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.util.ArrayList;
@@ -46,9 +44,9 @@ public class ToCpp extends TestCase {
     public void testBinary() {
         File tmpfile;
         try {
-            tmpfile = new File("/temp/hadooptemp.dat");
+            tmpfile = new File("/tmp/hadooptemp.dat");
             FileOutputStream ostream = new FileOutputStream(tmpfile);
-            RecordWriter out = new RecordWriter(ostream, "binary");
+            BinaryRecordOutput out = new BinaryRecordOutput(ostream);
             RecRecord1 r1 = new RecRecord1();
             r1.setBoolVal(true);
             r1.setByteVal((byte)0x66);
@@ -60,7 +58,7 @@ public class ToCpp extends TestCase {
             r1.setBufferVal(new Buffer());
             r1.setVectorVal(new ArrayList<String>());
             r1.setMapVal(new TreeMap<String,String>());
-            out.write(r1);
+            r1.serialize(out, "");
             ostream.close();
         } catch (IOException ex) {
             ex.printStackTrace();
@@ -70,9 +68,9 @@ public class ToCpp extends TestCase {
     public void testCsv() {
         File tmpfile;
         try {
-            tmpfile = new File("/temp/hadooptemp.txt");
+            tmpfile = new File("/tmp/hadooptemp.txt");
             FileOutputStream ostream = new FileOutputStream(tmpfile);
-            RecordWriter out = new RecordWriter(ostream, "csv");
+            CsvRecordOutput out = new CsvRecordOutput(ostream);
             RecRecord1 r1 = new RecRecord1();
             r1.setBoolVal(true);
             r1.setByteVal((byte)0x66);
@@ -84,7 +82,7 @@ public class ToCpp extends TestCase {
             r1.setBufferVal(new Buffer());
             r1.setVectorVal(new ArrayList<String>());
             r1.setMapVal(new TreeMap<String,String>());
-            out.write(r1);
+            r1.serialize(out, "");
             ostream.close();
         } catch (IOException ex) {
             ex.printStackTrace();
@@ -94,9 +92,9 @@ public class ToCpp extends TestCase {
     public void testXml() {
         File tmpfile;
         try {
-            tmpfile = new File("/temp/hadooptemp.xml");
+            tmpfile = new File("/tmp/hadooptemp.xml");
             FileOutputStream ostream = new FileOutputStream(tmpfile);
-            RecordWriter out = new RecordWriter(ostream, "xml");
+            XmlRecordOutput out = new XmlRecordOutput(ostream);
             RecRecord1 r1 = new RecRecord1();
             r1.setBoolVal(true);
             r1.setByteVal((byte)0x66);
@@ -108,7 +106,7 @@ public class ToCpp extends TestCase {
             r1.setBufferVal(new Buffer());
             r1.setVectorVal(new ArrayList<String>());
             r1.setMapVal(new TreeMap<String,String>());
-            out.write(r1);
+            r1.serialize(out, "");
             ostream.close();
         } catch (IOException ex) {
             ex.printStackTrace();