Browse Source

HADOOP-421. Replace uses of String in recordio with Text. Contributed by Milind.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@433322 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 18 years ago
parent
commit
b69e4b0b7e
26 changed files with 191 additions and 130 deletions
  1. 4 0
      CHANGES.txt
  2. 6 6
      src/c++/librecordio/Makefile
  3. 1 1
      src/c++/librecordio/archive.hh
  4. 1 1
      src/c++/librecordio/recordio.hh
  5. 34 28
      src/c++/librecordio/xmlarchive.cc
  6. 5 5
      src/java/org/apache/hadoop/record/BinaryInputArchive.java
  7. 3 4
      src/java/org/apache/hadoop/record/BinaryOutputArchive.java
  8. 2 2
      src/java/org/apache/hadoop/record/CsvInputArchive.java
  9. 2 1
      src/java/org/apache/hadoop/record/CsvOutputArchive.java
  10. 2 3
      src/java/org/apache/hadoop/record/InputArchive.java
  11. 2 1
      src/java/org/apache/hadoop/record/OutputArchive.java
  12. 56 16
      src/java/org/apache/hadoop/record/Utils.java
  13. 2 2
      src/java/org/apache/hadoop/record/XmlInputArchive.java
  14. 2 1
      src/java/org/apache/hadoop/record/XmlOutputArchive.java
  15. 8 7
      src/java/org/apache/hadoop/record/compiler/JRecord.java
  16. 2 2
      src/java/org/apache/hadoop/record/compiler/JString.java
  17. 1 1
      src/java/org/apache/hadoop/record/package.html
  18. 4 5
      src/test/org/apache/hadoop/record/test/FromCpp.java
  19. 3 1
      src/test/org/apache/hadoop/record/test/RecBuffer.java
  20. 3 1
      src/test/org/apache/hadoop/record/test/RecInt.java
  21. 6 4
      src/test/org/apache/hadoop/record/test/RecRecord0.java
  22. 15 13
      src/test/org/apache/hadoop/record/test/RecRecord1.java
  23. 7 5
      src/test/org/apache/hadoop/record/test/RecString.java
  24. 9 9
      src/test/org/apache/hadoop/record/test/TestMapRed.java
  25. 7 6
      src/test/org/apache/hadoop/record/test/TestRecordIO.java
  26. 4 5
      src/test/org/apache/hadoop/record/test/ToCpp.java

+ 4 - 0
CHANGES.txt

@@ -54,6 +54,10 @@ Trunk (unreleased changes)
 12. HADOOP-176.  Fix a bug in IntWritable.Comparator.
 12. HADOOP-176.  Fix a bug in IntWritable.Comparator.
     (Dick King via cutting)
     (Dick King via cutting)
 
 
+13. HADOOP-421.  Replace uses of String in recordio package with Text
+    class, for improved handling of UTF-8 data.
+    (Milind Bhandarkar via cutting)
+
 
 
 Release 0.5.0 - 2006-08-04
 Release 0.5.0 - 2006-08-04
 
 

+ 6 - 6
src/c++/librecordio/Makefile

@@ -19,22 +19,22 @@ all: librecordio.a test
 librecordio.a: recordio.o filestream.o binarchive.o csvarchive.o xmlarchive.o exception.o
 librecordio.a: recordio.o filestream.o binarchive.o csvarchive.o xmlarchive.o exception.o
 	ar cru librecordio.a recordio.o filestream.o binarchive.o csvarchive.o xmlarchive.o exception.o
 	ar cru librecordio.a recordio.o filestream.o binarchive.o csvarchive.o xmlarchive.o exception.o
 
 
-recordio.o: recordio.cc
+recordio.o: recordio.cc recordio.hh archive.hh
 	g++ -g3 -O0 -c -I${XERCESCROOT}/include -o recordio.o recordio.cc
 	g++ -g3 -O0 -c -I${XERCESCROOT}/include -o recordio.o recordio.cc
 	
 	
-filestream.o: filestream.cc
+filestream.o: filestream.cc recordio.hh filestream.hh
 	g++ -g3 -O0 -c -o filestream.o filestream.cc
 	g++ -g3 -O0 -c -o filestream.o filestream.cc
 	
 	
-binarchive.o: binarchive.cc
+binarchive.o: binarchive.cc recordio.hh binarchive.hh archive.hh
 	g++ -g3 -O0 -c -o binarchive.o binarchive.cc
 	g++ -g3 -O0 -c -o binarchive.o binarchive.cc
 
 
-csvarchive.o: csvarchive.cc
+csvarchive.o: csvarchive.cc recordio.hh csvarchive.hh archive.hh
 	g++ -g3 -O0 -c -o csvarchive.o csvarchive.cc
 	g++ -g3 -O0 -c -o csvarchive.o csvarchive.cc
 
 
-xmlarchive.o: xmlarchive.cc
+xmlarchive.o: xmlarchive.cc recordio.hh xmlarchive.hh archive.hh
 	g++ -g3 -O0 -c -I${XERCESCROOT}/include -o xmlarchive.o xmlarchive.cc
 	g++ -g3 -O0 -c -I${XERCESCROOT}/include -o xmlarchive.o xmlarchive.cc
 		
 		
-exception.o: exception.cc
+exception.o: exception.cc exception.hh
 	g++ -g3 -O0 -c -o exception.o exception.cc
 	g++ -g3 -O0 -c -o exception.o exception.cc
 	
 	
 recordio.cc: recordio.hh archive.hh exception.hh
 recordio.cc: recordio.hh archive.hh exception.hh

+ 1 - 1
src/c++/librecordio/archive.hh

@@ -88,7 +88,7 @@ public:
   virtual void endVector(size_t len, const char* tag) = 0;
   virtual void endVector(size_t len, const char* tag) = 0;
   virtual void startMap(size_t len, const char* tag) = 0;
   virtual void startMap(size_t len, const char* tag) = 0;
   virtual void endMap(size_t len, const char* tag) = 0;
   virtual void endMap(size_t len, const char* tag) = 0;
-  virtual void serialize(hadoop::Record& s, const char* tag) {
+  virtual void serialize(const hadoop::Record& s, const char* tag) {
     s.serialize(*this, tag);
     s.serialize(*this, tag);
   }
   }
   template <typename T>
   template <typename T>

+ 1 - 1
src/c++/librecordio/recordio.hh

@@ -44,7 +44,7 @@ class OArchive;
 class Record {
 class Record {
 public:
 public:
   virtual bool validate() const = 0;
   virtual bool validate() const = 0;
-  virtual void serialize(OArchive& archive, const char* tag) = 0;
+  virtual void serialize(OArchive& archive, const char* tag) const = 0;
   virtual void deserialize(IArchive& archive, const char* tag) = 0;
   virtual void deserialize(IArchive& archive, const char* tag) = 0;
   virtual const std::string& type() const = 0;
   virtual const std::string& type() const = 0;
   virtual const std::string& signature() const = 0;
   virtual const std::string& signature() const = 0;

+ 34 - 28
src/c++/librecordio/xmlarchive.cc

@@ -67,7 +67,8 @@ void hadoop::MySAXHandler::characters(const XMLCh* const buf, const unsigned int
   }
   }
 }
 }
 
 
-static char hexchars[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+static char hexchars[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+                          'A', 'B', 'C', 'D', 'E', 'F' };
 
 
 static std::string toXMLString(std::string s)
 static std::string toXMLString(std::string s)
 {
 {
@@ -77,33 +78,35 @@ static std::string toXMLString(std::string s)
   const char* data = s.data();
   const char* data = s.data();
   for (i=0; i<len; i++, data++) {
   for (i=0; i<len; i++, data++) {
     char ch = *data;
     char ch = *data;
-    if (((ch >= 'A') && (ch <='Z')) ||
-        ((ch >= 'a') && (ch <='z')) ||
-        ((ch >= '0') && (ch <='9'))) {
-        r.push_back(ch);
-    } else if (ch == ' ') {
-      r.push_back('+');
+    if (ch == '<') {
+        r.append("&lt;");
+    } else if (ch == '&') {
+        r.append("&amp;");
+    } else if (ch == '%') {
+        r.append("%25");
+    } else if (ch < 0x20) {
+        uint8_t* pb = (uint8_t*) &ch;
+        char ch1 = hexchars[*pb/16];
+        char ch2 = hexchars[*pb%16];
+        r.push_back('%');
+        r.push_back(ch1);
+        r.push_back(ch2);
     } else {
     } else {
-      uint8_t* pb = (uint8_t*) &ch;
-      char ch1 = hexchars[*pb/16];
-      char ch2 = hexchars[*pb%16];
-      r.push_back('%');
-      r.push_back(ch1);
-      r.push_back(ch2);
+        r.push_back(ch);
     }
     }
   }
   }
   return r;
   return r;
 }
 }
 
 
 static uint8_t h2b(char ch) {
 static uint8_t h2b(char ch) {
-  if ((ch >= 'A') || (ch <= 'F')) {
-    return ch - 'A';
+  if ((ch >= '0') || (ch <= '9')) {
+    return ch - '0';
   }
   }
   if ((ch >= 'a') || (ch <= 'f')) {
   if ((ch >= 'a') || (ch <= 'f')) {
     return ch - 'a';
     return ch - 'a';
   }
   }
-  if ((ch >= '0') || (ch <= '9')) {
-    return ch - '0';
+  if ((ch >= 'A') || (ch <= 'F')) {
+    return ch - 'A';
   }
   }
   return 0;
   return 0;
 }
 }
@@ -116,19 +119,22 @@ static std::string fromXMLString(std::string s)
   uint8_t* pb = (uint8_t*) s.data();
   uint8_t* pb = (uint8_t*) s.data();
   for (i = 0; i < len; i++) {
   for (i = 0; i < len; i++) {
     uint8_t b = *pb;
     uint8_t b = *pb;
-    if (b == '+') {
-      r.push_back(' ');
-    } else if (b == '%') {
+    if (b == '%') {
       char *pc = (char*) (pb+1);
       char *pc = (char*) (pb+1);
-      char ch1 = *pc++;
-      char ch2 = *pc++;
-      pb += 2;
-      uint8_t cnv = h2b(ch1)*16 + h2b(ch2);
-      pc = (char*) &cnv;
-      r.push_back(*pc);
+      if (*pc == '%') {
+        r.push_back('%');
+        pb += 1;
+      } else {
+        char ch1 = *pc++;
+        char ch2 = *pc++;
+        pb += 2;
+        uint8_t cnv = h2b(ch1)*16 + h2b(ch2);
+        pc = (char*) &cnv;
+        r.push_back(*pc);
+      }
     } else {
     } else {
-      char *pc = (char*) pb;
-      r.push_back(*pc);
+        char *pc = (char*) pb;
+        r.push_back(*pc);
     }
     }
     pb++;
     pb++;
   }
   }

+ 5 - 5
src/java/org/apache/hadoop/record/BinaryInputArchive.java

@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.io.ByteArrayOutputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.DataInputStream;
 import java.io.DataInputStream;
 import java.io.InputStream;
 import java.io.InputStream;
+import org.apache.hadoop.io.Text;
 
 
 import org.apache.hadoop.io.WritableUtils;
 import org.apache.hadoop.io.WritableUtils;
 
 
@@ -78,11 +79,10 @@ public class BinaryInputArchive implements InputArchive {
         return in.readDouble();
         return in.readDouble();
     }
     }
     
     
-    public String readString(String tag) throws IOException {
-        int len = readInt(tag);
-        byte[] chars = new byte[len];
-        in.readFully(chars);
-        return new String(chars, "UTF-8");
+    public Text readString(String tag) throws IOException {
+        Text text = new Text();
+        text.readFields(in);
+        return text;
     }
     }
     
     
     public ByteArrayOutputStream readBuffer(String tag) throws IOException {
     public ByteArrayOutputStream readBuffer(String tag) throws IOException {

+ 3 - 4
src/java/org/apache/hadoop/record/BinaryOutputArchive.java

@@ -23,6 +23,7 @@ import java.util.ArrayList;
 import java.io.DataOutput;
 import java.io.DataOutput;
 import java.io.DataOutputStream;
 import java.io.DataOutputStream;
 import java.io.OutputStream;
 import java.io.OutputStream;
+import org.apache.hadoop.io.Text;
 
 
 import org.apache.hadoop.io.WritableUtils;
 import org.apache.hadoop.io.WritableUtils;
 
 
@@ -67,10 +68,8 @@ public class BinaryOutputArchive implements OutputArchive {
         out.writeDouble(d);
         out.writeDouble(d);
     }
     }
     
     
-    public void writeString(String s, String tag) throws IOException {
-        byte[] chars = s.getBytes("UTF-8");
-        writeInt(chars.length, tag);
-        out.write(chars);
+    public void writeString(Text s, String tag) throws IOException {
+        s.write(out);
     }
     }
     
     
     public void writeBuffer(ByteArrayOutputStream buf, String tag)
     public void writeBuffer(ByteArrayOutputStream buf, String tag)

+ 2 - 2
src/java/org/apache/hadoop/record/CsvInputArchive.java

@@ -20,9 +20,9 @@ import java.io.InputStreamReader;
 import java.io.InputStream;
 import java.io.InputStream;
 import java.io.IOException;
 import java.io.IOException;
 import java.io.ByteArrayOutputStream;
 import java.io.ByteArrayOutputStream;
-import java.io.PushbackInputStream;
 import java.io.PushbackReader;
 import java.io.PushbackReader;
 import java.io.UnsupportedEncodingException;
 import java.io.UnsupportedEncodingException;
+import org.apache.hadoop.io.Text;
 
 
 /**
 /**
  *
  *
@@ -119,7 +119,7 @@ class CsvInputArchive implements InputArchive {
         }
         }
     }
     }
     
     
-    public String readString(String tag) throws IOException {
+    public Text readString(String tag) throws IOException {
         String sval = readField(tag);
         String sval = readField(tag);
         return Utils.fromCSVString(sval);
         return Utils.fromCSVString(sval);
         
         

+ 2 - 1
src/java/org/apache/hadoop/record/CsvOutputArchive.java

@@ -23,6 +23,7 @@ import java.util.ArrayList;
 import java.io.PrintStream;
 import java.io.PrintStream;
 import java.io.OutputStream;
 import java.io.OutputStream;
 import java.io.UnsupportedEncodingException;
 import java.io.UnsupportedEncodingException;
+import org.apache.hadoop.io.Text;
 
 
 /**
 /**
  *
  *
@@ -88,7 +89,7 @@ public class CsvOutputArchive implements OutputArchive {
         throwExceptionOnError(tag);
         throwExceptionOnError(tag);
     }
     }
     
     
-    public void writeString(String s, String tag) throws IOException {
+    public void writeString(Text s, String tag) throws IOException {
         printCommaUnlessFirst();
         printCommaUnlessFirst();
         stream.print(Utils.toCSVString(s));
         stream.print(Utils.toCSVString(s));
         throwExceptionOnError(tag);
         throwExceptionOnError(tag);

+ 2 - 3
src/java/org/apache/hadoop/record/InputArchive.java

@@ -18,8 +18,7 @@ package org.apache.hadoop.record;
 
 
 import java.io.IOException;
 import java.io.IOException;
 import java.io.ByteArrayOutputStream;
 import java.io.ByteArrayOutputStream;
-import java.util.TreeMap;
-import java.util.ArrayList;
+import org.apache.hadoop.io.Text;
 
 
 /**
 /**
  * Interface that all the Deserializers have to implement.
  * Interface that all the Deserializers have to implement.
@@ -33,7 +32,7 @@ public interface InputArchive {
     public long readLong(String tag) throws IOException;
     public long readLong(String tag) throws IOException;
     public float readFloat(String tag) throws IOException;
     public float readFloat(String tag) throws IOException;
     public double readDouble(String tag) throws IOException;
     public double readDouble(String tag) throws IOException;
-    public String readString(String tag) throws IOException;
+    public Text readString(String tag) throws IOException;
     public ByteArrayOutputStream readBuffer(String tag) throws IOException;
     public ByteArrayOutputStream readBuffer(String tag) throws IOException;
     public void readRecord(Record r, String tag) throws IOException;
     public void readRecord(Record r, String tag) throws IOException;
     public void startRecord(String tag) throws IOException;
     public void startRecord(String tag) throws IOException;

+ 2 - 1
src/java/org/apache/hadoop/record/OutputArchive.java

@@ -20,6 +20,7 @@ import java.io.IOException;
 import java.io.ByteArrayOutputStream;
 import java.io.ByteArrayOutputStream;
 import java.util.TreeMap;
 import java.util.TreeMap;
 import java.util.ArrayList;
 import java.util.ArrayList;
+import org.apache.hadoop.io.Text;
 
 
 /**
 /**
  * Interface that alll the serializers have to implement.
  * Interface that alll the serializers have to implement.
@@ -33,7 +34,7 @@ public interface OutputArchive {
     public void writeLong(long l, String tag) throws IOException;
     public void writeLong(long l, String tag) throws IOException;
     public void writeFloat(float f, String tag) throws IOException;
     public void writeFloat(float f, String tag) throws IOException;
     public void writeDouble(double d, String tag) throws IOException;
     public void writeDouble(double d, String tag) throws IOException;
-    public void writeString(String s, String tag) throws IOException;
+    public void writeString(Text s, String tag) throws IOException;
     public void writeBuffer(ByteArrayOutputStream buf, String tag)
     public void writeBuffer(ByteArrayOutputStream buf, String tag)
         throws IOException;
         throws IOException;
     public void writeRecord(Record r, String tag) throws IOException;
     public void writeRecord(Record r, String tag) throws IOException;

+ 56 - 16
src/java/org/apache/hadoop/record/Utils.java

@@ -21,6 +21,8 @@ import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.io.UnsupportedEncodingException;
+import java.nio.charset.CharacterCodingException;
+import org.apache.hadoop.io.Text;
 
 
 /**
 /**
  * Various utility functions for Hadooop record I/O runtime.
  * Various utility functions for Hadooop record I/O runtime.
@@ -194,19 +196,45 @@ public class Utils {
         return true;
         return true;
     }
     }
     
     
+    public static final byte[] hexchars = { '0', '1', '2', '3', '4', '5',
+                                            '6', '7', '8', '9', 'A', 'B',
+                                            'C', 'D', 'E', 'F' };
     /**
     /**
      * 
      * 
      * @param s 
      * @param s 
      * @return 
      * @return 
      */
      */
-    static String toXMLString(String s) {
-        String rets = "";
-        try {
-            rets = java.net.URLEncoder.encode(s, "UTF-8");
-        } catch (UnsupportedEncodingException ex) {
-            ex.printStackTrace();
+    static String toXMLString(Text t) {
+        String s = t.toString();
+        StringBuffer sb = new StringBuffer();
+        for (int idx = 0; idx < s.length(); idx++) {
+          char ch = s.charAt(idx);
+          if (ch == '<') {
+            sb.append("&lt;");
+          } else if (ch == '&') {
+            sb.append("&amp;");
+          } else if (ch == '%') {
+            sb.append("%25");
+          } else if (ch < 0x20) {
+            sb.append("%");
+            sb.append(hexchars[ch/16]);
+            sb.append(hexchars[ch%16]);
+          } else {
+            sb.append(ch);
+          }
         }
         }
-        return rets;
+        return sb.toString();
+    }
+    
+    static private int h2c(char ch) {
+      if (ch >= '0' && ch <= '9') {
+        return ch - '0';
+      } else if (ch >= 'A' && ch <= 'F') {
+        return ch - 'A';
+      } else if (ch >= 'a' && ch <= 'f') {
+        return ch - 'a';
+      }
+      return 0;
     }
     }
     
     
     /**
     /**
@@ -214,14 +242,25 @@ public class Utils {
      * @param s 
      * @param s 
      * @return 
      * @return 
      */
      */
-    static String fromXMLString(String s) {
-        String rets = "";
+    static Text fromXMLString(String s) {
+        StringBuffer sb = new StringBuffer();
+        for (int idx = 0; idx < s.length();) {
+          char ch = s.charAt(idx++);
+          if (ch == '%') {
+            char ch1 = s.charAt(idx++);
+            char ch2 = s.charAt(idx++);
+            char res = (char)(h2c(ch1)*16 + h2c(ch2));
+            sb.append(res);
+          } else {
+            sb.append(ch);
+          }
+        }
         try {
         try {
-            rets = java.net.URLDecoder.decode(s, "UTF-8");
-        } catch (UnsupportedEncodingException ex) {
-            ex.printStackTrace();
+          return new Text(sb.toString());
+        } catch (CharacterCodingException ex) {
+          ex.printStackTrace();
+          return new Text();
         }
         }
-        return rets;
     }
     }
     
     
     /**
     /**
@@ -229,7 +268,8 @@ public class Utils {
      * @param s 
      * @param s 
      * @return 
      * @return 
      */
      */
-    static String toCSVString(String s) {
+    static String toCSVString(Text t) {
+        String s = t.toString();
         StringBuffer sb = new StringBuffer(s.length()+1);
         StringBuffer sb = new StringBuffer(s.length()+1);
         sb.append('\'');
         sb.append('\'');
         int len = s.length();
         int len = s.length();
@@ -267,7 +307,7 @@ public class Utils {
      * @throws java.io.IOException 
      * @throws java.io.IOException 
      * @return 
      * @return 
      */
      */
-    static String fromCSVString(String s) throws IOException {
+    static Text fromCSVString(String s) throws IOException {
         if (s.charAt(0) != '\'') {
         if (s.charAt(0) != '\'') {
             throw new IOException("Error deserializing string.");
             throw new IOException("Error deserializing string.");
         }
         }
@@ -290,7 +330,7 @@ public class Utils {
                 sb.append(c);
                 sb.append(c);
             }
             }
         }
         }
-        return sb.toString();
+        return new Text(sb.toString());
     }
     }
     
     
     /**
     /**

+ 2 - 2
src/java/org/apache/hadoop/record/XmlInputArchive.java

@@ -20,13 +20,13 @@ import java.io.InputStream;
 import java.io.IOException;
 import java.io.IOException;
 import java.io.ByteArrayOutputStream;
 import java.io.ByteArrayOutputStream;
 import java.util.ArrayList;
 import java.util.ArrayList;
-import java.util.Iterator;
 
 
 import org.xml.sax.*;
 import org.xml.sax.*;
 import org.xml.sax.helpers.DefaultHandler;
 import org.xml.sax.helpers.DefaultHandler;
 import javax.xml.parsers.SAXParserFactory;
 import javax.xml.parsers.SAXParserFactory;
 import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.parsers.SAXParser;
 import javax.xml.parsers.SAXParser;
+import org.apache.hadoop.io.Text;
 /**
 /**
  *
  *
  * @author Milind Bhandarkar
  * @author Milind Bhandarkar
@@ -197,7 +197,7 @@ class XmlInputArchive implements InputArchive {
         return Double.parseDouble(v.getValue());
         return Double.parseDouble(v.getValue());
     }
     }
     
     
-    public String readString(String tag) throws IOException {
+    public Text readString(String tag) throws IOException {
         Value v = next();
         Value v = next();
         if (!"string".equals(v.getType())) {
         if (!"string".equals(v.getType())) {
             throw new IOException("Error deserializing "+tag+".");
             throw new IOException("Error deserializing "+tag+".");

+ 2 - 1
src/java/org/apache/hadoop/record/XmlOutputArchive.java

@@ -23,6 +23,7 @@ import java.util.ArrayList;
 import java.io.PrintStream;
 import java.io.PrintStream;
 import java.io.OutputStream;
 import java.io.OutputStream;
 import java.util.Stack;
 import java.util.Stack;
+import org.apache.hadoop.io.Text;
 
 
 /**
 /**
  *
  *
@@ -188,7 +189,7 @@ class XmlOutputArchive implements OutputArchive {
         printEndEnvelope(tag);
         printEndEnvelope(tag);
     }
     }
     
     
-    public void writeString(String s, String tag) throws IOException {
+    public void writeString(Text s, String tag) throws IOException {
         printBeginEnvelope(tag);
         printBeginEnvelope(tag);
         stream.print("<string>");
         stream.print("<string>");
         stream.print(Utils.toXMLString(s));
         stream.print(Utils.toXMLString(s));

+ 8 - 7
src/java/org/apache/hadoop/record/compiler/JRecord.java

@@ -116,15 +116,15 @@ public class JRecord extends JCompType {
             JField jf = (JField) i.next();
             JField jf = (JField) i.next();
             hh.write(jf.genCppDecl());
             hh.write(jf.genCppDecl());
         }
         }
-        hh.write("  std::bitset<"+mFields.size()+"> bs_;\n");
+        hh.write("  mutable std::bitset<"+mFields.size()+"> bs_;\n");
         hh.write("public:\n");
         hh.write("public:\n");
-        hh.write("  virtual void serialize(::hadoop::OArchive& a_, const char* tag);\n");
+        hh.write("  virtual void serialize(::hadoop::OArchive& a_, const char* tag) const;\n");
         hh.write("  virtual void deserialize(::hadoop::IArchive& a_, const char* tag);\n");
         hh.write("  virtual void deserialize(::hadoop::IArchive& a_, const char* tag);\n");
         hh.write("  virtual const ::std::string& type() const;\n");
         hh.write("  virtual const ::std::string& type() const;\n");
         hh.write("  virtual const ::std::string& signature() const;\n");
         hh.write("  virtual const ::std::string& signature() const;\n");
         hh.write("  virtual bool validate() const;\n");
         hh.write("  virtual bool validate() const;\n");
-        hh.write("  virtual bool operator<(const "+getName()+"& peer_);\n");
-        hh.write("  virtual bool operator==(const "+getName()+"& peer_);\n");
+        hh.write("  virtual bool operator<(const "+getName()+"& peer_) const;\n");
+        hh.write("  virtual bool operator==(const "+getName()+"& peer_) const;\n");
         hh.write("  virtual ~"+getName()+"() {};\n");
         hh.write("  virtual ~"+getName()+"() {};\n");
         int fIdx = 0;
         int fIdx = 0;
         for (Iterator i = mFields.iterator(); i.hasNext(); fIdx++) {
         for (Iterator i = mFields.iterator(); i.hasNext(); fIdx++) {
@@ -135,7 +135,7 @@ public class JRecord extends JCompType {
         for (int i=ns.length-1; i>=0; i--) {
         for (int i=ns.length-1; i>=0; i--) {
             hh.write("} // end namespace "+ns[i]+"\n");
             hh.write("} // end namespace "+ns[i]+"\n");
         }
         }
-        cc.write("void "+getCppFQName()+"::serialize(::hadoop::OArchive& a_, const char* tag) {\n");
+        cc.write("void "+getCppFQName()+"::serialize(::hadoop::OArchive& a_, const char* tag) const {\n");
         cc.write("  if (!validate()) throw new ::hadoop::IOException(\"All fields not set.\");\n");
         cc.write("  if (!validate()) throw new ::hadoop::IOException(\"All fields not set.\");\n");
         cc.write("  a_.startRecord(*this,tag);\n");
         cc.write("  a_.startRecord(*this,tag);\n");
         fIdx = 0;
         fIdx = 0;
@@ -182,7 +182,7 @@ public class JRecord extends JCompType {
         cc.write("  return true;\n");
         cc.write("  return true;\n");
         cc.write("}\n");
         cc.write("}\n");
         
         
-        cc.write("bool "+getCppFQName()+"::operator< (const "+getCppFQName()+"& peer_) {\n");
+        cc.write("bool "+getCppFQName()+"::operator< (const "+getCppFQName()+"& peer_) const {\n");
         cc.write("  return (1\n");
         cc.write("  return (1\n");
         for (Iterator i = mFields.iterator(); i.hasNext();) {
         for (Iterator i = mFields.iterator(); i.hasNext();) {
             JField jf = (JField) i.next();
             JField jf = (JField) i.next();
@@ -192,7 +192,7 @@ public class JRecord extends JCompType {
         cc.write("  );\n");
         cc.write("  );\n");
         cc.write("}\n");
         cc.write("}\n");
         
         
-        cc.write("bool "+getCppFQName()+"::operator== (const "+getCppFQName()+"& peer_) {\n");
+        cc.write("bool "+getCppFQName()+"::operator== (const "+getCppFQName()+"& peer_) const {\n");
         cc.write("  return (1\n");
         cc.write("  return (1\n");
         for (Iterator i = mFields.iterator(); i.hasNext();) {
         for (Iterator i = mFields.iterator(); i.hasNext();) {
             JField jf = (JField) i.next();
             JField jf = (JField) i.next();
@@ -234,6 +234,7 @@ public class JRecord extends JCompType {
         FileWriter jj = new FileWriter(jfile);
         FileWriter jj = new FileWriter(jfile);
         jj.write("// File generated by hadoop record compiler. Do not edit.\n");
         jj.write("// File generated by hadoop record compiler. Do not edit.\n");
         jj.write("package "+getJavaPackage()+";\n\n");
         jj.write("package "+getJavaPackage()+";\n\n");
+        jj.write("import org.apache.hadoop.io.Text;\n\n");
         jj.write("public class "+getName()+" implements org.apache.hadoop.record.Record, org.apache.hadoop.io.WritableComparable {\n");
         jj.write("public class "+getName()+" implements org.apache.hadoop.record.Record, org.apache.hadoop.io.WritableComparable {\n");
         for (Iterator i = mFields.iterator(); i.hasNext();) {
         for (Iterator i = mFields.iterator(); i.hasNext();) {
             JField jf = (JField) i.next();
             JField jf = (JField) i.next();

+ 2 - 2
src/java/org/apache/hadoop/record/compiler/JString.java

@@ -24,7 +24,7 @@ public class JString extends JCompType {
     
     
     /** Creates a new instance of JString */
     /** Creates a new instance of JString */
     public JString() {
     public JString() {
-        super(" ::std::string", "String", "String", "String");
+        super(" ::std::string", "Text", "String", "Text");
     }
     }
     
     
     public String getSignature() {
     public String getSignature() {
@@ -34,7 +34,7 @@ public class JString extends JCompType {
     public String genJavaReadWrapper(String fname, String tag, boolean decl) {
     public String genJavaReadWrapper(String fname, String tag, boolean decl) {
         String ret = "";
         String ret = "";
         if (decl) {
         if (decl) {
-            ret = "    String "+fname+";\n";
+            ret = "    Text "+fname+";\n";
         }
         }
         return ret + "        "+fname+"=a_.readString(\""+tag+"\");\n";
         return ret + "        "+fname+"=a_.readString(\""+tag+"\");\n";
     }
     }

+ 1 - 1
src/java/org/apache/hadoop/record/package.html

@@ -595,7 +595,7 @@ int             int32_t             int
 long            int64_t             long
 long            int64_t             long
 float           float               float
 float           float               float
 double          double              double
 double          double              double
-ustring         std::string         java.lang.String
+ustring         std::string         Text
 buffer          std::string         java.io.ByteArrayOutputStream
 buffer          std::string         java.io.ByteArrayOutputStream
 class type      class type          class type
 class type      class type          class type
 vector<type>    std::vector<type>   java.util.ArrayList
 vector<type>    std::vector<type>   java.util.ArrayList

+ 4 - 5
src/test/org/apache/hadoop/record/test/FromCpp.java

@@ -17,15 +17,14 @@
 package org.apache.hadoop.record.test;
 package org.apache.hadoop.record.test;
 
 
 import org.apache.hadoop.record.RecordReader;
 import org.apache.hadoop.record.RecordReader;
-import org.apache.hadoop.record.RecordWriter;
 import java.io.ByteArrayOutputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileInputStream;
-import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.ArrayList;
 import java.util.TreeMap;
 import java.util.TreeMap;
 import junit.framework.*;
 import junit.framework.*;
+import org.apache.hadoop.io.Text;
 
 
 /**
 /**
  *
  *
@@ -54,7 +53,7 @@ public class FromCpp extends TestCase {
             r1.setDoubleVal(1.5234);
             r1.setDoubleVal(1.5234);
             r1.setIntVal(4567);
             r1.setIntVal(4567);
             r1.setLongVal(0x5a5a5a5a5a5aL);
             r1.setLongVal(0x5a5a5a5a5a5aL);
-            r1.setStringVal("random text");
+            r1.setStringVal(new Text("random text"));
             r1.setBufferVal(new ByteArrayOutputStream(20));
             r1.setBufferVal(new ByteArrayOutputStream(20));
             r1.setVectorVal(new ArrayList());
             r1.setVectorVal(new ArrayList());
             r1.setMapVal(new TreeMap());
             r1.setMapVal(new TreeMap());
@@ -80,7 +79,7 @@ public class FromCpp extends TestCase {
             r1.setDoubleVal(1.5234);
             r1.setDoubleVal(1.5234);
             r1.setIntVal(4567);
             r1.setIntVal(4567);
             r1.setLongVal(0x5a5a5a5a5a5aL);
             r1.setLongVal(0x5a5a5a5a5a5aL);
-            r1.setStringVal("random text");
+            r1.setStringVal(new Text("random text"));
             r1.setBufferVal(new ByteArrayOutputStream(20));
             r1.setBufferVal(new ByteArrayOutputStream(20));
             r1.setVectorVal(new ArrayList());
             r1.setVectorVal(new ArrayList());
             r1.setMapVal(new TreeMap());
             r1.setMapVal(new TreeMap());
@@ -106,7 +105,7 @@ public class FromCpp extends TestCase {
             r1.setDoubleVal(1.5234);
             r1.setDoubleVal(1.5234);
             r1.setIntVal(4567);
             r1.setIntVal(4567);
             r1.setLongVal(0x5a5a5a5a5a5aL);
             r1.setLongVal(0x5a5a5a5a5a5aL);
-            r1.setStringVal("random text");
+            r1.setStringVal(new Text("random text"));
             r1.setBufferVal(new ByteArrayOutputStream(20));
             r1.setBufferVal(new ByteArrayOutputStream(20));
             r1.setVectorVal(new ArrayList());
             r1.setVectorVal(new ArrayList());
             r1.setMapVal(new TreeMap());
             r1.setMapVal(new TreeMap());

+ 3 - 1
src/test/org/apache/hadoop/record/test/RecBuffer.java

@@ -1,6 +1,8 @@
 // File generated by hadoop record compiler. Do not edit.
 // File generated by hadoop record compiler. Do not edit.
 package org.apache.hadoop.record.test;
 package org.apache.hadoop.record.test;
 
 
+import org.apache.hadoop.io.Text;
+
 public class RecBuffer implements org.apache.hadoop.record.Record, org.apache.hadoop.io.WritableComparable {
 public class RecBuffer implements org.apache.hadoop.record.Record, org.apache.hadoop.io.WritableComparable {
   private java.io.ByteArrayOutputStream mData;
   private java.io.ByteArrayOutputStream mData;
   private java.util.BitSet bs_;
   private java.util.BitSet bs_;
@@ -56,7 +58,7 @@ public class RecBuffer implements org.apache.hadoop.record.Record, org.apache.ha
     org.apache.hadoop.record.BinaryInputArchive archive = new org.apache.hadoop.record.BinaryInputArchive(in);
     org.apache.hadoop.record.BinaryInputArchive archive = new org.apache.hadoop.record.BinaryInputArchive(in);
     deserialize(archive, "");
     deserialize(archive, "");
   }
   }
-  private boolean validate() {
+  public boolean validate() {
     if (bs_.cardinality() != bs_.length()) return false;
     if (bs_.cardinality() != bs_.length()) return false;
     return true;
     return true;
 }
 }

+ 3 - 1
src/test/org/apache/hadoop/record/test/RecInt.java

@@ -1,6 +1,8 @@
 // File generated by hadoop record compiler. Do not edit.
 // File generated by hadoop record compiler. Do not edit.
 package org.apache.hadoop.record.test;
 package org.apache.hadoop.record.test;
 
 
+import org.apache.hadoop.io.Text;
+
 public class RecInt implements org.apache.hadoop.record.Record, org.apache.hadoop.io.WritableComparable {
 public class RecInt implements org.apache.hadoop.record.Record, org.apache.hadoop.io.WritableComparable {
   private int mData;
   private int mData;
   private java.util.BitSet bs_;
   private java.util.BitSet bs_;
@@ -56,7 +58,7 @@ public class RecInt implements org.apache.hadoop.record.Record, org.apache.hadoo
     org.apache.hadoop.record.BinaryInputArchive archive = new org.apache.hadoop.record.BinaryInputArchive(in);
     org.apache.hadoop.record.BinaryInputArchive archive = new org.apache.hadoop.record.BinaryInputArchive(in);
     deserialize(archive, "");
     deserialize(archive, "");
   }
   }
-  private boolean validate() {
+  public boolean validate() {
     if (bs_.cardinality() != bs_.length()) return false;
     if (bs_.cardinality() != bs_.length()) return false;
     return true;
     return true;
 }
 }

+ 6 - 4
src/test/org/apache/hadoop/record/test/RecRecord0.java

@@ -1,23 +1,25 @@
 // File generated by hadoop record compiler. Do not edit.
 // File generated by hadoop record compiler. Do not edit.
 package org.apache.hadoop.record.test;
 package org.apache.hadoop.record.test;
 
 
+import org.apache.hadoop.io.Text;
+
 public class RecRecord0 implements org.apache.hadoop.record.Record, org.apache.hadoop.io.WritableComparable {
 public class RecRecord0 implements org.apache.hadoop.record.Record, org.apache.hadoop.io.WritableComparable {
-  private String mStringVal;
+  private Text mStringVal;
   private java.util.BitSet bs_;
   private java.util.BitSet bs_;
   public RecRecord0() {
   public RecRecord0() {
     bs_ = new java.util.BitSet(2);
     bs_ = new java.util.BitSet(2);
     bs_.set(1);
     bs_.set(1);
   }
   }
   public RecRecord0(
   public RecRecord0(
-        String m0) {
+        Text m0) {
     bs_ = new java.util.BitSet(2);
     bs_ = new java.util.BitSet(2);
     bs_.set(1);
     bs_.set(1);
     mStringVal=m0; bs_.set(0);
     mStringVal=m0; bs_.set(0);
   }
   }
-  public String getStringVal() {
+  public Text getStringVal() {
     return mStringVal;
     return mStringVal;
   }
   }
-  public void setStringVal(String m_) {
+  public void setStringVal(Text m_) {
     mStringVal=m_; bs_.set(0);
     mStringVal=m_; bs_.set(0);
   }
   }
   public void serialize(org.apache.hadoop.record.OutputArchive a_, String tag) throws java.io.IOException {
   public void serialize(org.apache.hadoop.record.OutputArchive a_, String tag) throws java.io.IOException {

+ 15 - 13
src/test/org/apache/hadoop/record/test/RecRecord1.java

@@ -1,6 +1,8 @@
 // File generated by hadoop record compiler. Do not edit.
 // File generated by hadoop record compiler. Do not edit.
 package org.apache.hadoop.record.test;
 package org.apache.hadoop.record.test;
 
 
+import org.apache.hadoop.io.Text;
+
 public class RecRecord1 implements org.apache.hadoop.record.Record, org.apache.hadoop.io.WritableComparable {
 public class RecRecord1 implements org.apache.hadoop.record.Record, org.apache.hadoop.io.WritableComparable {
   private boolean mBoolVal;
   private boolean mBoolVal;
   private byte mByteVal;
   private byte mByteVal;
@@ -8,7 +10,7 @@ public class RecRecord1 implements org.apache.hadoop.record.Record, org.apache.h
   private long mLongVal;
   private long mLongVal;
   private float mFloatVal;
   private float mFloatVal;
   private double mDoubleVal;
   private double mDoubleVal;
-  private String mStringVal;
+  private Text mStringVal;
   private java.io.ByteArrayOutputStream mBufferVal;
   private java.io.ByteArrayOutputStream mBufferVal;
   private java.util.ArrayList mVectorVal;
   private java.util.ArrayList mVectorVal;
   private java.util.TreeMap mMapVal;
   private java.util.TreeMap mMapVal;
@@ -25,7 +27,7 @@ public class RecRecord1 implements org.apache.hadoop.record.Record, org.apache.h
         long m3,
         long m3,
         float m4,
         float m4,
         double m5,
         double m5,
-        String m6,
+        Text m6,
         java.io.ByteArrayOutputStream m7,
         java.io.ByteArrayOutputStream m7,
         java.util.ArrayList m8,
         java.util.ArrayList m8,
         java.util.TreeMap m9,
         java.util.TreeMap m9,
@@ -80,10 +82,10 @@ public class RecRecord1 implements org.apache.hadoop.record.Record, org.apache.h
   public void setDoubleVal(double m_) {
   public void setDoubleVal(double m_) {
     mDoubleVal=m_; bs_.set(5);
     mDoubleVal=m_; bs_.set(5);
   }
   }
-  public String getStringVal() {
+  public Text getStringVal() {
     return mStringVal;
     return mStringVal;
   }
   }
-  public void setStringVal(String m_) {
+  public void setStringVal(Text m_) {
     mStringVal=m_; bs_.set(6);
     mStringVal=m_; bs_.set(6);
   }
   }
   public java.io.ByteArrayOutputStream getBufferVal() {
   public java.io.ByteArrayOutputStream getBufferVal() {
@@ -133,7 +135,7 @@ public class RecRecord1 implements org.apache.hadoop.record.Record, org.apache.h
       a_.startVector(mVectorVal,"VectorVal");
       a_.startVector(mVectorVal,"VectorVal");
       int len1 = mVectorVal.size();
       int len1 = mVectorVal.size();
       for(int vidx1 = 0; vidx1<len1; vidx1++) {
       for(int vidx1 = 0; vidx1<len1; vidx1++) {
-        String e1 = (String) mVectorVal.get(vidx1);
+        Text e1 = (Text) mVectorVal.get(vidx1);
         a_.writeString(e1,"e1");
         a_.writeString(e1,"e1");
       }
       }
       a_.endVector(mVectorVal,"VectorVal");
       a_.endVector(mVectorVal,"VectorVal");
@@ -144,8 +146,8 @@ public class RecRecord1 implements org.apache.hadoop.record.Record, org.apache.h
       java.util.Set es1 = mMapVal.entrySet();
       java.util.Set es1 = mMapVal.entrySet();
       for(java.util.Iterator midx1 = es1.iterator(); midx1.hasNext(); ) {
       for(java.util.Iterator midx1 = es1.iterator(); midx1.hasNext(); ) {
         java.util.Map.Entry me1 = (java.util.Map.Entry) midx1.next();
         java.util.Map.Entry me1 = (java.util.Map.Entry) midx1.next();
-        String k1 = (String) me1.getKey();
-        String v1 = (String) me1.getValue();
+        Text k1 = (Text) me1.getKey();
+        Text v1 = (Text) me1.getValue();
         a_.writeString(k1,"k1");
         a_.writeString(k1,"k1");
         a_.writeString(v1,"v1");
         a_.writeString(v1,"v1");
       }
       }
@@ -178,7 +180,7 @@ public class RecRecord1 implements org.apache.hadoop.record.Record, org.apache.h
       org.apache.hadoop.record.Index vidx1 = a_.startVector("VectorVal");
       org.apache.hadoop.record.Index vidx1 = a_.startVector("VectorVal");
       mVectorVal=new java.util.ArrayList();
       mVectorVal=new java.util.ArrayList();
       for (; !vidx1.done(); vidx1.incr()) {
       for (; !vidx1.done(); vidx1.incr()) {
-    String e1;
+    Text e1;
         e1=a_.readString("e1");
         e1=a_.readString("e1");
         mVectorVal.add(e1);
         mVectorVal.add(e1);
       }
       }
@@ -189,9 +191,9 @@ public class RecRecord1 implements org.apache.hadoop.record.Record, org.apache.h
       org.apache.hadoop.record.Index midx1 = a_.startMap("MapVal");
       org.apache.hadoop.record.Index midx1 = a_.startMap("MapVal");
       mMapVal=new java.util.TreeMap();
       mMapVal=new java.util.TreeMap();
       for (; !midx1.done(); midx1.incr()) {
       for (; !midx1.done(); midx1.incr()) {
-    String k1;
+    Text k1;
         k1=a_.readString("k1");
         k1=a_.readString("k1");
-    String v1;
+    Text v1;
         v1=a_.readString("v1");
         v1=a_.readString("v1");
         mMapVal.put(k1,v1);
         mMapVal.put(k1,v1);
       }
       }
@@ -222,7 +224,7 @@ public class RecRecord1 implements org.apache.hadoop.record.Record, org.apache.h
       a_.startVector(mVectorVal,"VectorVal");
       a_.startVector(mVectorVal,"VectorVal");
       int len1 = mVectorVal.size();
       int len1 = mVectorVal.size();
       for(int vidx1 = 0; vidx1<len1; vidx1++) {
       for(int vidx1 = 0; vidx1<len1; vidx1++) {
-        String e1 = (String) mVectorVal.get(vidx1);
+        Text e1 = (Text) mVectorVal.get(vidx1);
         a_.writeString(e1,"e1");
         a_.writeString(e1,"e1");
       }
       }
       a_.endVector(mVectorVal,"VectorVal");
       a_.endVector(mVectorVal,"VectorVal");
@@ -232,8 +234,8 @@ public class RecRecord1 implements org.apache.hadoop.record.Record, org.apache.h
       java.util.Set es1 = mMapVal.entrySet();
       java.util.Set es1 = mMapVal.entrySet();
       for(java.util.Iterator midx1 = es1.iterator(); midx1.hasNext(); ) {
       for(java.util.Iterator midx1 = es1.iterator(); midx1.hasNext(); ) {
         java.util.Map.Entry me1 = (java.util.Map.Entry) midx1.next();
         java.util.Map.Entry me1 = (java.util.Map.Entry) midx1.next();
-        String k1 = (String) me1.getKey();
-        String v1 = (String) me1.getValue();
+        Text k1 = (Text) me1.getKey();
+        Text v1 = (Text) me1.getValue();
         a_.writeString(k1,"k1");
         a_.writeString(k1,"k1");
         a_.writeString(v1,"v1");
         a_.writeString(v1,"v1");
       }
       }

+ 7 - 5
src/test/org/apache/hadoop/record/test/RecString.java

@@ -1,23 +1,25 @@
 // File generated by hadoop record compiler. Do not edit.
 // File generated by hadoop record compiler. Do not edit.
 package org.apache.hadoop.record.test;
 package org.apache.hadoop.record.test;
 
 
+import org.apache.hadoop.io.Text;
+
 public class RecString implements org.apache.hadoop.record.Record, org.apache.hadoop.io.WritableComparable {
 public class RecString implements org.apache.hadoop.record.Record, org.apache.hadoop.io.WritableComparable {
-  private String mData;
+  private Text mData;
   private java.util.BitSet bs_;
   private java.util.BitSet bs_;
   public RecString() {
   public RecString() {
     bs_ = new java.util.BitSet(2);
     bs_ = new java.util.BitSet(2);
     bs_.set(1);
     bs_.set(1);
   }
   }
   public RecString(
   public RecString(
-        String m0) {
+        Text m0) {
     bs_ = new java.util.BitSet(2);
     bs_ = new java.util.BitSet(2);
     bs_.set(1);
     bs_.set(1);
     mData=m0; bs_.set(0);
     mData=m0; bs_.set(0);
   }
   }
-  public String getData() {
+  public Text getData() {
     return mData;
     return mData;
   }
   }
-  public void setData(String m_) {
+  public void setData(Text m_) {
     mData=m_; bs_.set(0);
     mData=m_; bs_.set(0);
   }
   }
   public void serialize(org.apache.hadoop.record.OutputArchive a_, String tag) throws java.io.IOException {
   public void serialize(org.apache.hadoop.record.OutputArchive a_, String tag) throws java.io.IOException {
@@ -56,7 +58,7 @@ public class RecString implements org.apache.hadoop.record.Record, org.apache.ha
     org.apache.hadoop.record.BinaryInputArchive archive = new org.apache.hadoop.record.BinaryInputArchive(in);
     org.apache.hadoop.record.BinaryInputArchive archive = new org.apache.hadoop.record.BinaryInputArchive(in);
     deserialize(archive, "");
     deserialize(archive, "");
   }
   }
-  private boolean validate() {
+  public boolean validate() {
     if (bs_.cardinality() != bs_.length()) return false;
     if (bs_.cardinality() != bs_.length()) return false;
     return true;
     return true;
 }
 }

+ 9 - 9
src/test/org/apache/hadoop/record/test/TestMapRed.java

@@ -94,7 +94,7 @@ public class TestMapRed extends TestCase {
 
 
             for (int i = 0; i < randomCount; i++) {
             for (int i = 0; i < randomCount; i++) {
                 out.collect(new RecInt(Math.abs(r.nextInt())),
                 out.collect(new RecInt(Math.abs(r.nextInt())),
-                        new RecString(Integer.toString(randomVal)));
+                        new RecString(new Text(Integer.toString(randomVal))));
             }
             }
         }
         }
         public void close() {
         public void close() {
@@ -113,9 +113,9 @@ public class TestMapRed extends TestCase {
                 throws IOException {
                 throws IOException {
             int keyint = ((RecInt) key).getData();
             int keyint = ((RecInt) key).getData();
             while (it.hasNext()) {
             while (it.hasNext()) {
-                String val = ((RecString) it.next()).getData();
-                out.collect(new RecInt(Integer.parseInt(val)),
-                        new RecString(""));
+                Text val = ((RecString) it.next()).getData();
+                out.collect(new RecInt(Integer.parseInt(val.toString())),
+                        new RecString(new Text("")));
             }
             }
         }
         }
         public void close() {
         public void close() {
@@ -144,8 +144,8 @@ public class TestMapRed extends TestCase {
 
 
         public void map(WritableComparable key, Writable val, OutputCollector out, Reporter reporter) throws IOException {
         public void map(WritableComparable key, Writable val, OutputCollector out, Reporter reporter) throws IOException {
             int pos = ((RecInt) key).getData();
             int pos = ((RecInt) key).getData();
-            String str = ((RecString) val).getData();
-            out.collect(new RecInt(pos), new RecString("1"));
+            Text str = ((RecString) val).getData();
+            out.collect(new RecInt(pos), new RecString(new Text("1")));
         }
         }
         public void close() {
         public void close() {
         }
         }
@@ -163,7 +163,7 @@ public class TestMapRed extends TestCase {
                 it.next();
                 it.next();
                 count++;
                 count++;
             }
             }
-            out.collect(new RecInt(keyint), new RecString(Integer.toString(count)));
+            out.collect(new RecInt(keyint), new RecString(new Text(Integer.toString(count))));
         }
         }
         public void close() {
         public void close() {
         }
         }
@@ -183,8 +183,8 @@ public class TestMapRed extends TestCase {
 
 
         public void map(WritableComparable key, Writable val, OutputCollector out, Reporter reporter) throws IOException {
         public void map(WritableComparable key, Writable val, OutputCollector out, Reporter reporter) throws IOException {
             int keyint = ((RecInt) key).getData();
             int keyint = ((RecInt) key).getData();
-            String valstr = ((RecString) val).getData();
-            out.collect(new RecInt(keyint), new RecInt(Integer.parseInt(valstr)));
+            Text valstr = ((RecString) val).getData();
+            out.collect(new RecInt(keyint), new RecInt(Integer.parseInt(valstr.toString())));
         }
         }
         public void close() {
         public void close() {
         }
         }

+ 7 - 6
src/test/org/apache/hadoop/record/test/TestRecordIO.java

@@ -26,6 +26,7 @@ import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.FileOutputStream;
 import java.util.ArrayList;
 import java.util.ArrayList;
 import java.util.TreeMap;
 import java.util.TreeMap;
+import org.apache.hadoop.io.Text;
 
 
 /**
 /**
  *
  *
@@ -56,12 +57,12 @@ public class TestRecordIO extends TestCase {
             r1.setDoubleVal(1.5234);
             r1.setDoubleVal(1.5234);
             r1.setIntVal(4567);
             r1.setIntVal(4567);
             r1.setLongVal(0x5a5a5a5a5a5aL);
             r1.setLongVal(0x5a5a5a5a5a5aL);
-            r1.setStringVal("random text");
+            r1.setStringVal(new Text("random text"));
             r1.setBufferVal(new ByteArrayOutputStream(20));
             r1.setBufferVal(new ByteArrayOutputStream(20));
             r1.setVectorVal(new ArrayList());
             r1.setVectorVal(new ArrayList());
             r1.setMapVal(new TreeMap());
             r1.setMapVal(new TreeMap());
             RecRecord0 r0 = new RecRecord0();
             RecRecord0 r0 = new RecRecord0();
-            r0.setStringVal("other random text");
+            r0.setStringVal(new Text("other random text"));
             r1.setRecordVal(r0);
             r1.setRecordVal(r0);
             out.write(r1);
             out.write(r1);
             ostream.close();
             ostream.close();
@@ -90,12 +91,12 @@ public class TestRecordIO extends TestCase {
             r1.setDoubleVal(1.5234);
             r1.setDoubleVal(1.5234);
             r1.setIntVal(4567);
             r1.setIntVal(4567);
             r1.setLongVal(0x5a5a5a5a5a5aL);
             r1.setLongVal(0x5a5a5a5a5a5aL);
-            r1.setStringVal("random text");
+            r1.setStringVal(new Text("random text"));
             r1.setBufferVal(new ByteArrayOutputStream(20));
             r1.setBufferVal(new ByteArrayOutputStream(20));
             r1.setVectorVal(new ArrayList());
             r1.setVectorVal(new ArrayList());
             r1.setMapVal(new TreeMap());
             r1.setMapVal(new TreeMap());
             RecRecord0 r0 = new RecRecord0();
             RecRecord0 r0 = new RecRecord0();
-            r0.setStringVal("other random text");
+            r0.setStringVal(new Text("other random text"));
             r1.setRecordVal(r0);
             r1.setRecordVal(r0);
             out.write(r1);
             out.write(r1);
             ostream.close();
             ostream.close();
@@ -124,12 +125,12 @@ public class TestRecordIO extends TestCase {
             r1.setDoubleVal(1.5234);
             r1.setDoubleVal(1.5234);
             r1.setIntVal(4567);
             r1.setIntVal(4567);
             r1.setLongVal(0x5a5a5a5a5a5aL);
             r1.setLongVal(0x5a5a5a5a5a5aL);
-            r1.setStringVal("random text");
+            r1.setStringVal(new Text("random &lt; %text<&more"));
             r1.setBufferVal(new ByteArrayOutputStream(20));
             r1.setBufferVal(new ByteArrayOutputStream(20));
             r1.setVectorVal(new ArrayList());
             r1.setVectorVal(new ArrayList());
             r1.setMapVal(new TreeMap());
             r1.setMapVal(new TreeMap());
             RecRecord0 r0 = new RecRecord0();
             RecRecord0 r0 = new RecRecord0();
-            r0.setStringVal("other random text");
+            r0.setStringVal(new Text("other %random &amp; >&more text"));
             r1.setRecordVal(r0);
             r1.setRecordVal(r0);
             out.write(r1);
             out.write(r1);
             ostream.close();
             ostream.close();

+ 4 - 5
src/test/org/apache/hadoop/record/test/ToCpp.java

@@ -19,13 +19,12 @@ package org.apache.hadoop.record.test;
 import java.io.IOException;
 import java.io.IOException;
 import junit.framework.*;
 import junit.framework.*;
 import org.apache.hadoop.record.RecordWriter;
 import org.apache.hadoop.record.RecordWriter;
-import org.apache.hadoop.record.RecordReader;
 import java.io.ByteArrayOutputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.File;
-import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.FileOutputStream;
 import java.util.ArrayList;
 import java.util.ArrayList;
 import java.util.TreeMap;
 import java.util.TreeMap;
+import org.apache.hadoop.io.Text;
 
 
 /**
 /**
  *
  *
@@ -56,7 +55,7 @@ public class ToCpp extends TestCase {
             r1.setDoubleVal(1.5234);
             r1.setDoubleVal(1.5234);
             r1.setIntVal(4567);
             r1.setIntVal(4567);
             r1.setLongVal(0x5a5a5a5a5a5aL);
             r1.setLongVal(0x5a5a5a5a5a5aL);
-            r1.setStringVal("random text");
+            r1.setStringVal(new Text("random text"));
             r1.setBufferVal(new ByteArrayOutputStream(20));
             r1.setBufferVal(new ByteArrayOutputStream(20));
             r1.setVectorVal(new ArrayList());
             r1.setVectorVal(new ArrayList());
             r1.setMapVal(new TreeMap());
             r1.setMapVal(new TreeMap());
@@ -80,7 +79,7 @@ public class ToCpp extends TestCase {
             r1.setDoubleVal(1.5234);
             r1.setDoubleVal(1.5234);
             r1.setIntVal(4567);
             r1.setIntVal(4567);
             r1.setLongVal(0x5a5a5a5a5a5aL);
             r1.setLongVal(0x5a5a5a5a5a5aL);
-            r1.setStringVal("random text");
+            r1.setStringVal(new Text("random text"));
             r1.setBufferVal(new ByteArrayOutputStream(20));
             r1.setBufferVal(new ByteArrayOutputStream(20));
             r1.setVectorVal(new ArrayList());
             r1.setVectorVal(new ArrayList());
             r1.setMapVal(new TreeMap());
             r1.setMapVal(new TreeMap());
@@ -104,7 +103,7 @@ public class ToCpp extends TestCase {
             r1.setDoubleVal(1.5234);
             r1.setDoubleVal(1.5234);
             r1.setIntVal(4567);
             r1.setIntVal(4567);
             r1.setLongVal(0x5a5a5a5a5a5aL);
             r1.setLongVal(0x5a5a5a5a5a5aL);
-            r1.setStringVal("random text");
+            r1.setStringVal(new Text("random text"));
             r1.setBufferVal(new ByteArrayOutputStream(20));
             r1.setBufferVal(new ByteArrayOutputStream(20));
             r1.setVectorVal(new ArrayList());
             r1.setVectorVal(new ArrayList());
             r1.setMapVal(new TreeMap());
             r1.setMapVal(new TreeMap());