소스 검색

HADOOP-17141. Add Capability To Get Text Length (#2157)

Contributed by David Mollitor
belugabehr 4 년 전
부모
커밋
e60096c377

+ 18 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java

@@ -77,6 +77,7 @@ public class Text extends BinaryComparable
 
   private byte[] bytes = EMPTY_BYTES;
   private int length = 0;
+  private int textLength = -1;
 
   /**
    * Construct an empty text string.
@@ -131,6 +132,17 @@ public class Text extends BinaryComparable
     return length;
   }
 
+  /**
+   * Returns the length of this text. The length is equal to the number of
+   * Unicode code units in the text.
+   */
+  public int getTextLength() {
+    if (textLength < 0) {
+      textLength = toString().length();
+    }
+    return textLength;
+  }
+
   /**
    * Returns the Unicode Scalar Value (32-bit integer value)
    * for the character at <code>position</code>. Note that this
@@ -204,6 +216,7 @@ public class Text extends BinaryComparable
       ByteBuffer bb = encode(string, true);
       bytes = bb.array();
       length = bb.limit();
+      textLength = string.length();
     } catch (CharacterCodingException e) {
       throw new RuntimeException("Should not have happened", e);
     }
@@ -221,6 +234,7 @@ public class Text extends BinaryComparable
    */
   public void set(Text other) {
     set(other.getBytes(), 0, other.getLength());
+    this.textLength = other.textLength;
   }
 
   /**
@@ -234,6 +248,7 @@ public class Text extends BinaryComparable
     ensureCapacity(len);
     System.arraycopy(utf8, start, bytes, 0, len);
     this.length = len;
+    this.textLength = -1;
   }
 
   /**
@@ -251,6 +266,7 @@ public class Text extends BinaryComparable
     }
     System.arraycopy(utf8, start, bytes, length, len);
     length += len;
+    textLength = -1;
   }
 
   /**
@@ -263,6 +279,7 @@ public class Text extends BinaryComparable
    */
   public void clear() {
     length = 0;
+    textLength = -1;
   }
 
   /**
@@ -327,6 +344,7 @@ public class Text extends BinaryComparable
     ensureCapacity(len);
     in.readFully(bytes, 0, len);
     length = len;
+    textLength = -1;
   }
 
   /**

+ 10 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java

@@ -268,6 +268,8 @@ public class TestText {
             0, text.getBytes().length);
     assertEquals("String's length must be zero",
         0, text.getLength());
+    assertEquals("String's text length must be zero",
+        0, text.getTextLength());
 
     // Test if clear works as intended
     text = new Text("abcd\u20acbdcd\u20ac");
@@ -280,6 +282,8 @@ public class TestText {
         text.getBytes().length >= len);
     assertEquals("Length of the string must be reset to 0 after clear()",
         0, text.getLength());
+    assertEquals("Text length of the string must be reset to 0 after clear()",
+        0, text.getTextLength());
   }
 
   @Test
@@ -288,9 +292,12 @@ public class TestText {
     Text b=new Text("a");
     b.set(a);
     assertEquals("abc", b.toString());
+    assertEquals(3, a.getTextLength());
+    assertEquals(3, b.getTextLength());
     a.append("xdefgxxx".getBytes(), 1, 4);
     assertEquals("modified aliased string", "abc", b.toString());
     assertEquals("appended string incorrectly", "abcdefg", a.toString());
+    assertEquals("This should reflect in the lenght", 7, a.getTextLength());
     // add an extra byte so that capacity = 10 and length = 8
     a.append(new byte[]{'d'}, 0, 1);
     assertEquals(10, a.getBytes().length);
@@ -392,16 +399,19 @@ public class TestText {
     in.reset(inputBytes, inputBytes.length);
     text.readWithKnownLength(in, 5);
     assertEquals("hello", text.toString());
+    assertEquals(5, text.getTextLength());
 
     // Read longer length, make sure it lengthens
     in.reset(inputBytes, inputBytes.length);
     text.readWithKnownLength(in, 7);
     assertEquals("hello w", text.toString());
+    assertEquals(7, text.getTextLength());
 
     // Read shorter length, make sure it shortens
     in.reset(inputBytes, inputBytes.length);
     text.readWithKnownLength(in, 2);
     assertEquals("he", text.toString());
+    assertEquals(2, text.getTextLength());
   }
   
   /**