Browse Source

HADOOP-3711. Fixes the Streaming input parsing to properly find the separator. Contributed by Amareshwari Sriramadasu.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/trunk@675541 13f79535-47bb-0310-9956-ffa450edef68
Devaraj Das 17 năm trước cách đây
mục cha
commit
8b9ee131fe

+ 3 - 0
CHANGES.txt

@@ -84,6 +84,9 @@ Trunk (unreleased changes)
     HADOOP-3640. Fix the read method in the NativeS3InputStream. (tomwhite via
     omalley)
 
+    HADOOP-3711. Fixes the Streaming input parsing to properly find the 
+    separator. (Amareshwari Sriramadasu via ddas)
+
 Release 0.18.0 - Unreleased
 
   INCOMPATIBLE CHANGES

+ 2 - 2
src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeMapRed.java

@@ -322,10 +322,10 @@ public abstract class PipeMapRed {
     byte[] separator = getFieldSeparator();
     
     // Need to find numKeyFields separators
-    int pos = UTF8ByteArrayUtils.findBytes(line, 0, line.length, separator);
+    int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator);
     for(int k=1; k<numKeyFields && pos!=-1; k++) {
       pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, 
-          line.length, separator);
+          length, separator);
     }
     try {
       if (pos == -1) {

+ 7 - 2
src/contrib/streaming/src/test/org/apache/hadoop/streaming/TestStreamingKeyValue.java

@@ -35,9 +35,14 @@ public class TestStreamingKeyValue extends TestCase
   // Third line of input does not have any tab character.
   // So, the whole line is the key and value is empty.
   protected String input = 
-    "roses are \tred\t\n\tviolets are blue\nbunnies are pink\n";
+    "roses are \tred\t\n\tviolets are blue\nbunnies are pink\n" +
+    "this is for testing a big\tinput line\n" +
+    "small input\n";
   protected String outputExpect = 
-    "\tviolets are blue\nbunnies are pink\t\nroses are \tred\t\n";
+    "\tviolets are blue\nbunnies are pink\t\n" + 
+    "roses are \tred\t\n" +
+    "small input\t\n" +
+    "this is for testing a big\tinput line\n";
 
   private StreamJob job;