6 年之前 · 9fcd89ab93
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLineReader.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestLineReader.java
@@ -19,138 +19,148 @@
 
				 package org.apache.hadoop.util;
			
 
				 
			
 
				 import java.io.ByteArrayInputStream;
			
 
				+import java.nio.charset.StandardCharsets;
			
 
				+import java.util.Arrays;
			
 
				 
			
 
				 import org.apache.hadoop.io.Text;
			
 
				-import org.apache.hadoop.util.LineReader;
			
 
				-import org.junit.Test;
			
 
				-
			
 
				 import org.junit.Assert;
			
 
				+import org.junit.Test;
			
 
				 
			
 
				 public class TestLineReader {
			
 
				-  private LineReader lineReader;
			
 
				-  private String TestData;
			
 
				-  private String Delimiter;
			
 
				-  private Text line;
			
 
				 
			
 
				+  /**
			
 
				+   * TEST_1: The test scenario is the tail of the buffer equals the starting
			
 
				+   * character/s of delimiter.
			
 
				+   *
			
 
				+   * The Test Data is such that,
			
 
				+   *
			
 
				+   * 1) we will have "&lt;/entity&gt;" as delimiter
			
 
				+   *
			
 
				+   * 2) The tail of the current buffer would be "&lt;/" which matches with the
			
 
				+   * starting character sequence of delimiter.
			
 
				+   *
			
 
				+   * 3) The Head of the next buffer would be "id&gt;" which does NOT match with
			
 
				+   * the remaining characters of delimiter.
			
 
				+   *
			
 
				+   * 4) Input data would be prefixed by char 'a' about
			
 
				+   * numberOfCharToFillTheBuffer times. So that, one iteration to buffer the
			
 
				+   * input data, would end at '&lt;/' ie equals starting 2 char of delimiter
			
 
				+   *
			
 
				+   * 5) For this we would take BufferSize as 64 * 1024;
			
 
				+   *
			
 
				+   * Check Condition In the second key value pair, the value should contain
			
 
				+   * "&lt;/" from currentToken and "id&gt;" from next token
			
 
				+   */
			
 
				   @Test
			
 
				-  public void testCustomDelimiter() throws Exception {
			
 
				-    /* TEST_1
			
 
				-     * The test scenario is the tail of the buffer
			
 
				-     * equals the starting character/s of delimiter
			
 
				-     * 
			
 
				-     * The Test Data is such that,
			
 
				-     *   
			
 
				-     * 1) we will have "</entity>" as delimiter  
			
 
				-     *  
			
 
				-     * 2) The tail of the current buffer would be "</"
			
 
				-     *    which matches with the starting character sequence of delimiter.
			
 
				-     *    
			
 
				-     * 3) The Head of the next buffer would be   "id>" 
			
 
				-     *    which does NOT match with the remaining characters of delimiter.
			
 
				-     *   
			
 
				-     * 4) Input data would be prefixed by char 'a' 
			
 
				-     *    about numberOfCharToFillTheBuffer times.
			
 
				-     *    So that, one iteration to buffer the input data,
			
 
				-     *    would end at '</' ie equals starting 2 char of delimiter  
			
 
				-     *     
			
 
				-     * 5) For this we would take BufferSize as 64 * 1024;
			
 
				-     * 
			
 
				-     * Check Condition
			
 
				-     *  In the second key value pair, the value should contain 
			
 
				-     *  "</"  from currentToken and
			
 
				-     *  "id>" from next token
			
 
				-     */  
			
 
				-    
			
 
				-    Delimiter="</entity>"; 
			
 
				-    
			
 
				-    String CurrentBufferTailToken=
			
 
				-        "</entity><entity><id>Gelesh</";
			
 
				+  public void testCustomDelimiter1() throws Exception {
			
 
				+
			
 
				+    final String delimiter = "</entity>";
			
 
				+
			
 
				     // Ending part of Input Data Buffer
			
 
				-    // It contains '</' ie delimiter character 
			
 
				-    
			
 
				-    String NextBufferHeadToken=
			
 
				-        "id><name>Omathil</name></entity>";
			
 
				+    // It contains '</' ie delimiter character
			
 
				+    final String currentBufferTailToken = "</entity><entity><id>Gelesh</";
			
 
				+
			
 
				     // Supposing the start of next buffer is this
			
 
				-    
			
 
				-    String Expected = 
			
 
				-        (CurrentBufferTailToken+NextBufferHeadToken)
			
 
				-        .replace(Delimiter, "");                       
			
 
				-    // Expected ,must capture from both the buffer, excluding Delimiter
			
 
				-  
			
 
				-    String TestPartOfInput = CurrentBufferTailToken+NextBufferHeadToken;
			
 
				-  
			
 
				-    int BufferSize=64 * 1024;
			
 
				+    final String nextBufferHeadToken = "id><name>Omathil</name></entity>";
			
 
				+
			
 
				+    // Expected must capture from both the buffer, excluding Delimiter
			
 
				+    final String expected =
			
 
				+        (currentBufferTailToken + nextBufferHeadToken).replace(delimiter, "");
			
 
				+
			
 
				+    final String testPartOfInput = currentBufferTailToken + nextBufferHeadToken;
			
 
				+
			
 
				+    final int bufferSize = 64 * 1024;
			
 
				     int numberOfCharToFillTheBuffer =
			
 
				-            BufferSize - CurrentBufferTailToken.length();
			
 
				-    StringBuilder fillerString=new StringBuilder();
			
 
				-    for (int i=0; i<numberOfCharToFillTheBuffer; i++) {
			
 
				-      fillerString.append('a'); // char 'a' as a filler for the test string
			
 
				-    }
			
 
				-
			
 
				-    TestData = fillerString + TestPartOfInput;
			
 
				-    lineReader = new LineReader(
			
 
				-        new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes());
			
 
				-    
			
 
				-    line = new Text();
			
 
				-    
			
 
				+        bufferSize - currentBufferTailToken.length();
			
 
				+
			
 
				+    final char[] fillBuffer = new char[numberOfCharToFillTheBuffer];
			
 
				+
			
 
				+    // char 'a' as a filler for the test string
			
 
				+    Arrays.fill(fillBuffer, 'a');
			
 
				+
			
 
				+    final StringBuilder fillerString = new StringBuilder();
			
 
				+
			
 
				+    final String testData = fillerString + testPartOfInput;
			
 
				+
			
 
				+    final LineReader lineReader = new LineReader(
			
 
				+        new ByteArrayInputStream(testData.getBytes(StandardCharsets.UTF_8)),
			
 
				+        delimiter.getBytes(StandardCharsets.UTF_8));
			
 
				+
			
 
				+    final Text line = new Text();
			
 
				     lineReader.readLine(line);
			
 
				+    lineReader.close();
			
 
				+
			
 
				     Assert.assertEquals(fillerString.toString(), line.toString());
			
 
				-    
			
 
				+
			
 
				     lineReader.readLine(line);
			
 
				-    Assert.assertEquals(Expected, line.toString());
			
 
				-    
			
 
				-    /*TEST_2
			
 
				-     * The test scenario is such that,
			
 
				-     * the character/s preceding the delimiter,
			
 
				-     * equals the starting character/s of delimiter
			
 
				-     */
			
 
				-    
			
 
				-    Delimiter = "record";
			
 
				-    StringBuilder TestStringBuilder = new StringBuilder();
			
 
				-    
			
 
				-    TestStringBuilder.append(Delimiter + "Kerala ");
			
 
				-    TestStringBuilder.append(Delimiter + "Bangalore");
			
 
				-    TestStringBuilder.append(Delimiter + " North Korea");
			
 
				-    TestStringBuilder.append(Delimiter + Delimiter+
			
 
				-                        "Guantanamo");
			
 
				-    TestStringBuilder.append(Delimiter + "ecord"
			
 
				-            + "recor" + "core"); //~EOF with 're'
			
 
				-    
			
 
				-    TestData=TestStringBuilder.toString();
			
 
				-    
			
 
				-    lineReader = new LineReader(
			
 
				-        new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes());
			
 
				+    Assert.assertEquals(expected, line.toString());
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * TEST_2: The test scenario is such that, the character/s preceding the
			
 
				+   * delimiter, equals the starting character/s of delimiter.
			
 
				+   */
			
 
				+  @Test
			
 
				+  public void testCustomDelimiter2() throws Exception {
			
 
				+    final String delimiter = "record";
			
 
				+    final StringBuilder testStringBuilder = new StringBuilder();
			
 
				+
			
 
				+    testStringBuilder.append(delimiter).append("Kerala ");
			
 
				+    testStringBuilder.append(delimiter).append("Bangalore");
			
 
				+    testStringBuilder.append(delimiter).append(" North Korea");
			
 
				+    testStringBuilder.append(delimiter).append(delimiter).append("Guantanamo");
			
 
				+
			
 
				+    // ~EOF with 're'
			
 
				+    testStringBuilder.append(delimiter + "ecord" + "recor" + "core");
			
 
				+
			
 
				+    final String testData = testStringBuilder.toString();
			
 
				+
			
 
				+    final LineReader lineReader = new LineReader(
			
 
				+        new ByteArrayInputStream(testData.getBytes(StandardCharsets.UTF_8)),
			
 
				+        delimiter.getBytes((StandardCharsets.UTF_8)));
			
 
				+
			
 
				+    final Text line = new Text();
			
 
				 
			
 
				     lineReader.readLine(line);
			
 
				     Assert.assertEquals("", line.toString());
			
 
				     lineReader.readLine(line);
			
 
				     Assert.assertEquals("Kerala ", line.toString());
			
 
				-    
			
 
				-    lineReader.readLine(line); 
			
 
				+
			
 
				+    lineReader.readLine(line);
			
 
				     Assert.assertEquals("Bangalore", line.toString());
			
 
				-    
			
 
				-    lineReader.readLine(line); 
			
 
				+
			
 
				+    lineReader.readLine(line);
			
 
				     Assert.assertEquals(" North Korea", line.toString());
			
 
				-    
			
 
				-    lineReader.readLine(line); 
			
 
				+
			
 
				+    lineReader.readLine(line);
			
 
				     Assert.assertEquals("", line.toString());
			
 
				-    lineReader.readLine(line); 
			
 
				+    lineReader.readLine(line);
			
 
				     Assert.assertEquals("Guantanamo", line.toString());
			
 
				-    
			
 
				-    lineReader.readLine(line); 
			
 
				-    Assert.assertEquals(("ecord"+"recor"+"core"), line.toString());
			
 
				-
			
 
				-    // Test 3
			
 
				-    // The test scenario is such that,
			
 
				-    // aaaabccc split by aaab
			
 
				-    TestData = "aaaabccc";
			
 
				-    Delimiter = "aaab";
			
 
				-    lineReader = new LineReader(
			
 
				-        new ByteArrayInputStream(TestData.getBytes()), Delimiter.getBytes());
			
 
				+
			
 
				+    lineReader.readLine(line);
			
 
				+    Assert.assertEquals(("ecord" + "recor" + "core"), line.toString());
			
 
				+
			
 
				+    lineReader.close();
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Test 3: The test scenario is such that, aaabccc split by aaab.
			
 
				+   */
			
 
				+  @Test
			
 
				+  public void testCustomDelimiter3() throws Exception {
			
 
				+    final String testData = "aaaabccc";
			
 
				+    final String delimiter = "aaab";
			
 
				+    final LineReader lineReader = new LineReader(
			
 
				+        new ByteArrayInputStream(testData.getBytes(StandardCharsets.UTF_8)),
			
 
				+        delimiter.getBytes(StandardCharsets.UTF_8));
			
 
				+
			
 
				+    final Text line = new Text();
			
 
				 
			
 
				     lineReader.readLine(line);
			
 
				     Assert.assertEquals("a", line.toString());
			
 
				     lineReader.readLine(line);
			
 
				     Assert.assertEquals("ccc", line.toString());
			
 
				+
			
 
				+    lineReader.close();
			
 
				   }
			
 
				 }