|
@@ -19,138 +19,148 @@
|
|
|
package org.apache.hadoop.util;
|
|
|
|
|
|
import java.io.ByteArrayInputStream;
|
|
|
+import java.nio.charset.StandardCharsets;
|
|
|
+import java.util.Arrays;
|
|
|
|
|
|
import org.apache.hadoop.io.Text;
|
|
|
-import org.apache.hadoop.util.LineReader;
|
|
|
-import org.junit.Test;
|
|
|
-
|
|
|
import org.junit.Assert;
|
|
|
+import org.junit.Test;
|
|
|
|
|
|
public class TestLineReader {
|
|
|
- private LineReader lineReader;
|
|
|
- private String TestData;
|
|
|
- private String Delimiter;
|
|
|
- private Text line;
|
|
|
|
|
|
+ /**
|
|
|
+ * TEST_1: The test scenario is the tail of the buffer equals the starting
|
|
|
+ * character/s of delimiter.
|
|
|
+ *
|
|
|
+ * The Test Data is such that,
|
|
|
+ *
|
|
|
+ * 1) we will have "</entity>" as delimiter
|
|
|
+ *
|
|
|
+ * 2) The tail of the current buffer would be "</" which matches with the
|
|
|
+ * starting character sequence of delimiter.
|
|
|
+ *
|
|
|
+ * 3) The Head of the next buffer would be "id>" which does NOT match with
|
|
|
+ * the remaining characters of delimiter.
|
|
|
+ *
|
|
|
+ * 4) Input data would be prefixed by char 'a' about
|
|
|
+ * numberOfCharToFillTheBuffer times. So that, one iteration to buffer the
|
|
|
+ * input data, would end at '</' ie equals starting 2 char of delimiter
|
|
|
+ *
|
|
|
+ * 5) For this we would take BufferSize as 64 * 1024;
|
|
|
+ *
|
|
|
+ * Check Condition In the second key value pair, the value should contain
|
|
|
+ * "</" from currentToken and "id>" from next token
|
|
|
+ */
|
|
|
@Test
|
|
|
- public void testCustomDelimiter() throws Exception {
|
|
|
- /* TEST_1
|
|
|
- * The test scenario is the tail of the buffer
|
|
|
- * equals the starting character/s of delimiter
|
|
|
- *
|
|
|
- * The Test Data is such that,
|
|
|
- *
|
|
|
- * 1) we will have "</entity>" as delimiter
|
|
|
- *
|
|
|
- * 2) The tail of the current buffer would be "</"
|
|
|
- * which matches with the starting character sequence of delimiter.
|
|
|
- *
|
|
|
- * 3) The Head of the next buffer would be "id>"
|
|
|
- * which does NOT match with the remaining characters of delimiter.
|
|
|
- *
|
|
|
- * 4) Input data would be prefixed by char 'a'
|
|
|
- * about numberOfCharToFillTheBuffer times.
|
|
|
- * So that, one iteration to buffer the input data,
|
|
|
- * would end at '</' ie equals starting 2 char of delimiter
|
|
|
- *
|
|
|
- * 5) For this we would take BufferSize as 64 * 1024;
|
|
|
- *
|
|
|
- * Check Condition
|
|
|
- * In the second key value pair, the value should contain
|
|
|
- * "</" from currentToken and
|
|
|
- * "id>" from next token
|
|
|
- */
|
|
|
-
|
|
|
- Delimiter="</entity>";
|
|
|
-
|
|
|
- String CurrentBufferTailToken=
|
|
|
- "</entity><entity><id>Gelesh</";
|
|
|
+ public void testCustomDelimiter1() throws Exception {
|
|
|
+
|
|
|
+ final String delimiter = "</entity>";
|
|
|
+
|
|
|
// Ending part of Input Data Buffer
|
|
|
- // It contains '</' ie delimiter character
|
|
|
-
|
|
|
- String NextBufferHeadToken=
|
|
|
- "id><name>Omathil</name></entity>";
|
|
|
+ // It contains '</' ie delimiter character
|
|
|
+ final String currentBufferTailToken = "</entity><entity><id>Gelesh</";
|
|
|
+
|
|
|
// Supposing the start of next buffer is this
|
|
|
-
|
|
|
- String Expected =
|
|
|
- (CurrentBufferTailToken+NextBufferHeadToken)
|
|
|
- .replace(Delimiter, "");
|
|
|
- // Expected ,must capture from both the buffer, excluding Delimiter
|
|
|
-
|
|
|
- String TestPartOfInput = CurrentBufferTailToken+NextBufferHeadToken;
|
|
|
-
|
|
|
- int BufferSize=64 * 1024;
|
|
|
+ final String nextBufferHeadToken = "id><name>Omathil</name></entity>";
|
|
|
+
|
|
|
+ // Expected must capture from both the buffer, excluding Delimiter
|
|
|
+ final String expected =
|
|
|
+ (currentBufferTailToken + nextBufferHeadToken).replace(delimiter, "");
|
|
|
+
|
|
|
+ final String testPartOfInput = currentBufferTailToken + nextBufferHeadToken;
|
|
|
+
|
|
|
+ final int bufferSize = 64 * 1024;
|
|
|
int numberOfCharToFillTheBuffer =
|
|
|
- BufferSize - CurrentBufferTailToken.length();
|
|
|
- StringBuilder fillerString=new StringBuilder();
|
|
|
- for (int i=0; i<numberOfCharToFillTheBuffer; i++) {
|
|
|
- fillerString.append('a'); // char 'a' as a filler for the test string
|
|
|
- }
|
|
|
-
|
|
|
- TestData = fillerString + TestPartOfInput;
|
|
|
- lineReader = new LineReader(
|
|
|
- new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes());
|
|
|
-
|
|
|
- line = new Text();
|
|
|
-
|
|
|
+ bufferSize - currentBufferTailToken.length();
|
|
|
+
|
|
|
+ final char[] fillBuffer = new char[numberOfCharToFillTheBuffer];
|
|
|
+
|
|
|
+ // char 'a' as a filler for the test string
|
|
|
+ Arrays.fill(fillBuffer, 'a');
|
|
|
+
|
|
|
+ final StringBuilder fillerString = new StringBuilder();
|
|
|
+
|
|
|
+ final String testData = fillerString + testPartOfInput;
|
|
|
+
|
|
|
+ final LineReader lineReader = new LineReader(
|
|
|
+ new ByteArrayInputStream(testData.getBytes(StandardCharsets.UTF_8)),
|
|
|
+ delimiter.getBytes(StandardCharsets.UTF_8));
|
|
|
+
|
|
|
+ final Text line = new Text();
|
|
|
lineReader.readLine(line);
|
|
|
+ lineReader.close();
|
|
|
+
|
|
|
Assert.assertEquals(fillerString.toString(), line.toString());
|
|
|
-
|
|
|
+
|
|
|
lineReader.readLine(line);
|
|
|
- Assert.assertEquals(Expected, line.toString());
|
|
|
-
|
|
|
- /*TEST_2
|
|
|
- * The test scenario is such that,
|
|
|
- * the character/s preceding the delimiter,
|
|
|
- * equals the starting character/s of delimiter
|
|
|
- */
|
|
|
-
|
|
|
- Delimiter = "record";
|
|
|
- StringBuilder TestStringBuilder = new StringBuilder();
|
|
|
-
|
|
|
- TestStringBuilder.append(Delimiter + "Kerala ");
|
|
|
- TestStringBuilder.append(Delimiter + "Bangalore");
|
|
|
- TestStringBuilder.append(Delimiter + " North Korea");
|
|
|
- TestStringBuilder.append(Delimiter + Delimiter+
|
|
|
- "Guantanamo");
|
|
|
- TestStringBuilder.append(Delimiter + "ecord"
|
|
|
- + "recor" + "core"); //~EOF with 're'
|
|
|
-
|
|
|
- TestData=TestStringBuilder.toString();
|
|
|
-
|
|
|
- lineReader = new LineReader(
|
|
|
- new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes());
|
|
|
+ Assert.assertEquals(expected, line.toString());
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * TEST_2: The test scenario is such that, the character/s preceding the
|
|
|
+ * delimiter, equals the starting character/s of delimiter.
|
|
|
+ */
|
|
|
+ @Test
|
|
|
+ public void testCustomDelimiter2() throws Exception {
|
|
|
+ final String delimiter = "record";
|
|
|
+ final StringBuilder testStringBuilder = new StringBuilder();
|
|
|
+
|
|
|
+ testStringBuilder.append(delimiter).append("Kerala ");
|
|
|
+ testStringBuilder.append(delimiter).append("Bangalore");
|
|
|
+ testStringBuilder.append(delimiter).append(" North Korea");
|
|
|
+ testStringBuilder.append(delimiter).append(delimiter).append("Guantanamo");
|
|
|
+
|
|
|
+ // ~EOF with 're'
|
|
|
+ testStringBuilder.append(delimiter + "ecord" + "recor" + "core");
|
|
|
+
|
|
|
+ final String testData = testStringBuilder.toString();
|
|
|
+
|
|
|
+ final LineReader lineReader = new LineReader(
|
|
|
+ new ByteArrayInputStream(testData.getBytes(StandardCharsets.UTF_8)),
|
|
|
+ delimiter.getBytes((StandardCharsets.UTF_8)));
|
|
|
+
|
|
|
+ final Text line = new Text();
|
|
|
|
|
|
lineReader.readLine(line);
|
|
|
Assert.assertEquals("", line.toString());
|
|
|
lineReader.readLine(line);
|
|
|
Assert.assertEquals("Kerala ", line.toString());
|
|
|
-
|
|
|
- lineReader.readLine(line);
|
|
|
+
|
|
|
+ lineReader.readLine(line);
|
|
|
Assert.assertEquals("Bangalore", line.toString());
|
|
|
-
|
|
|
- lineReader.readLine(line);
|
|
|
+
|
|
|
+ lineReader.readLine(line);
|
|
|
Assert.assertEquals(" North Korea", line.toString());
|
|
|
-
|
|
|
- lineReader.readLine(line);
|
|
|
+
|
|
|
+ lineReader.readLine(line);
|
|
|
Assert.assertEquals("", line.toString());
|
|
|
- lineReader.readLine(line);
|
|
|
+ lineReader.readLine(line);
|
|
|
Assert.assertEquals("Guantanamo", line.toString());
|
|
|
-
|
|
|
- lineReader.readLine(line);
|
|
|
- Assert.assertEquals(("ecord"+"recor"+"core"), line.toString());
|
|
|
-
|
|
|
- // Test 3
|
|
|
- // The test scenario is such that,
|
|
|
- // aaaabccc split by aaab
|
|
|
- TestData = "aaaabccc";
|
|
|
- Delimiter = "aaab";
|
|
|
- lineReader = new LineReader(
|
|
|
- new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes());
|
|
|
+
|
|
|
+ lineReader.readLine(line);
|
|
|
+ Assert.assertEquals(("ecord" + "recor" + "core"), line.toString());
|
|
|
+
|
|
|
+ lineReader.close();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Test 3: The test scenario is such that, aaabccc split by aaab.
|
|
|
+ */
|
|
|
+ @Test
|
|
|
+ public void testCustomDelimiter3() throws Exception {
|
|
|
+ final String testData = "aaaabccc";
|
|
|
+ final String delimiter = "aaab";
|
|
|
+ final LineReader lineReader = new LineReader(
|
|
|
+ new ByteArrayInputStream(testData.getBytes(StandardCharsets.UTF_8)),
|
|
|
+ delimiter.getBytes(StandardCharsets.UTF_8));
|
|
|
+
|
|
|
+ final Text line = new Text();
|
|
|
|
|
|
lineReader.readLine(line);
|
|
|
Assert.assertEquals("a", line.toString());
|
|
|
lineReader.readLine(line);
|
|
|
Assert.assertEquals("ccc", line.toString());
|
|
|
+
|
|
|
+ lineReader.close();
|
|
|
}
|
|
|
}
|