Prechádzať zdrojové kódy

HADOOP-8654. TextInputFormat delimiter bug (Gelesh and Jason Lowe via bobby)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1373859 13f79535-47bb-0310-9956-ffa450edef68
Robert Joseph Evans 12 rokov pred
rodič
commit
2926ae6ead

+ 3 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -395,6 +395,9 @@ Branch-2 ( Unreleased changes )
     HADOOP-8659. Native libraries must build with soft-float ABI for Oracle JVM
     on ARM. (Trevor Robinson via todd)
 
+    HADOOP-8654. TextInputFormat delimiter bug (Gelesh and Jason Lowe via
+    bobby)
+
   BREAKDOWN OF HDFS-3042 SUBTASKS
 
     HADOOP-8220. ZKFailoverController doesn't handle failure to become active

+ 2 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/LineReader.java

@@ -266,7 +266,8 @@ public class LineReader {
             bufferPosn++;
             break;
           }
-        } else {
+        } else if (delPosn != 0) {
+          bufferPosn--; // recheck if bufferPosn matches start of delimiter
           delPosn = 0;
         }
       }

+ 49 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLineReader.java

@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.io.ByteArrayInputStream;
+
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+import junit.framework.Assert;
+
+public class TestLineReader {
+
+  @Test
+  public void testCustomDelimiter() throws Exception {
+    String data = "record Bangalorrecord recorrecordrecord Kerala";
+    String delimiter = "record";
+    LineReader reader = new LineReader(
+        new ByteArrayInputStream(data.getBytes()),
+        delimiter.getBytes());
+    Text line = new Text();
+    reader.readLine(line);
+    Assert.assertEquals("", line.toString());
+    reader.readLine(line);
+    Assert.assertEquals(" Bangalor", line.toString());
+    reader.readLine(line);
+    Assert.assertEquals(" recor", line.toString());
+    reader.readLine(line);
+    Assert.assertEquals("", line.toString());
+    reader.readLine(line);
+    Assert.assertEquals(" Kerala", line.toString());
+  }
+}