فهرست منبع

HADOOP-517. Fix bug in contrib/streaming's end of line detection. Contributed by Hairong.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@441645 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 18 سال پیش
والد
کامیت
21b8d315fe

+ 3 - 0
CHANGES.txt

@@ -158,6 +158,9 @@ Trunk (unreleased changes)
 39. HADOOP-458.  Fix a memory corruption bug in libhdfs.
     (Arun C Murthy via cutting)
 
+40. HADOOP-517.  Fix a contrib/streaming bug in end-of-line detection.
+    (Hairong Kuang via cutting)
+
 
 Release 0.5.0 - 2006-08-04
 

+ 2 - 2
src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeMapRed.java

@@ -397,7 +397,7 @@ public abstract class PipeMapRed {
             Text key = new Text();
             Text val = new Text();
             // 3/4 Tool to Hadoop
-            while((answer=UTF8ByteArrayUtils.readLine(clientIn_))!= null) {
+            while((answer=UTF8ByteArrayUtils.readLine((InputStream)clientIn_))!= null) {
                 // 4/4 Hadoop out
                 if(optSideEffect_) {
                     sideEffectOut_.write(answer);
@@ -434,7 +434,7 @@ public abstract class PipeMapRed {
       byte [] line;
       try {
         long num = 0;
-        while((line=UTF8ByteArrayUtils.readLine(clientErr_)) != null) {
+        while((line=UTF8ByteArrayUtils.readLine((InputStream)clientErr_)) != null) {
           num++;
           String lineStr = new String(line, "UTF-8"); 
           logprintln(lineStr);

+ 1 - 1
src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamLineRecordReader.java

@@ -99,7 +99,7 @@ public class StreamLineRecordReader extends StreamBaseRecordReader
                 return false;
         }
         
-        line = UTF8ByteArrayUtils.readLine(in_);
+        line = UTF8ByteArrayUtils.readLine((InputStream)in_);
         if(line==null)
             return false;
         try {

+ 18 - 3
src/contrib/streaming/src/java/org/apache/hadoop/streaming/UTF8ByteArrayUtils.java

@@ -16,8 +16,9 @@
 
 package org.apache.hadoop.streaming;
 
-import java.io.DataInputStream;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.PushbackInputStream;
 
 import org.apache.hadoop.io.Text;
 
@@ -70,7 +71,7 @@ public class UTF8ByteArrayUtils {
      * @return a byte array containing the line 
      * @throws IOException
      */
-    public static byte[] readLine(DataInputStream in) throws IOException {
+    public static byte[] readLine(InputStream in) throws IOException {
       byte [] buf = new byte[128];
       byte [] lineBuffer = buf;
       int room = 128;
@@ -84,9 +85,23 @@ public class UTF8ByteArrayUtils {
         }
 
         char c = (char)b;
-        if (c == '\r' || c == '\n')
+        if (c == '\n')
           break;
 
+        if (c == '\r') {
+          in.mark(1);
+          int c2 = in.read();
+          if(c2 == -1) {
+              isEOF = true;
+              break;
+          }
+          if (c2 != '\n') {
+            // push it back
+            in.reset();
+          }
+          break;
+        }
+        
         if (--room < 0) {
             buf = new byte[offset + 128];
             room = buf.length - offset - 1;