TestText.java 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. /**
  2. * Copyright 2005 The Apache Software Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package org.apache.hadoop.io;
  17. import junit.framework.TestCase;
  18. import java.io.IOException;
  19. import java.nio.ByteBuffer;
  20. import java.nio.charset.CharacterCodingException;
  21. import java.util.Random;
  22. import org.apache.commons.logging.Log;
  23. import org.apache.commons.logging.LogFactory;
  24. /** Unit tests for LargeUTF8. */
  25. public class TestText extends TestCase {
  26. private static final Log LOG= LogFactory.getLog("org.apache.hadoop.io.TestText");
  27. private static final int NUM_ITERATIONS = 100;
  28. public TestText(String name) { super(name); }
  29. private static final Random RANDOM = new Random(1);
  30. private static final int RAND_LEN = -1;
  31. // generate a valid java String
  32. private static String getTestString(int len) throws Exception {
  33. StringBuffer buffer = new StringBuffer();
  34. int length = (len==RAND_LEN) ? RANDOM.nextInt(1000) : len;
  35. while (buffer.length()<length) {
  36. int codePoint = RANDOM.nextInt(Character.MAX_CODE_POINT);
  37. char tmpStr[] = new char[2];
  38. if(Character.isDefined(codePoint)) {
  39. //unpaired surrogate
  40. if(codePoint < Character.MIN_SUPPLEMENTARY_CODE_POINT &&
  41. !Character.isHighSurrogate((char)codePoint) &&
  42. !Character.isLowSurrogate((char)codePoint) ) {
  43. Character.toChars(codePoint, tmpStr, 0);
  44. buffer.append(tmpStr);
  45. }
  46. }
  47. }
  48. return buffer.toString();
  49. }
  50. public static String getTestString() throws Exception {
  51. return getTestString(RAND_LEN);
  52. }
  53. public static String getLongString() throws Exception {
  54. String str = getTestString();
  55. int length = Short.MAX_VALUE+str.length();
  56. StringBuffer buffer = new StringBuffer();
  57. while(buffer.length()<length)
  58. buffer.append(str);
  59. return buffer.toString();
  60. }
  61. public void testWritable() throws Exception {
  62. for (int i = 0; i < NUM_ITERATIONS; i++) {
  63. try {
  64. String str;
  65. if(i == 0 )
  66. str = getLongString();
  67. else
  68. str = getTestString();
  69. TestWritable.testWritable(new Text(str));
  70. } catch (IOException e) {
  71. LOG.info(e);
  72. }
  73. }
  74. }
  75. public void testCoding() throws Exception {
  76. for (int i = 0; i < NUM_ITERATIONS; i++) {
  77. try {
  78. // generate a random string
  79. String before;
  80. if(i == 0 )
  81. before = getLongString();
  82. else
  83. before = getTestString();
  84. // test string to utf8
  85. ByteBuffer bb = Text.encode(before);
  86. byte[] utf8Text = bb.array();
  87. byte[] utf8Java = before.getBytes("UTF-8");
  88. assertEquals(0, WritableComparator.compareBytes(
  89. utf8Text, 0, bb.limit(),
  90. utf8Java, 0, utf8Java.length));
  91. // test utf8 to string
  92. String after = Text.decode(utf8Java);
  93. assertTrue(before.equals(after));
  94. }catch(CharacterCodingException e) {
  95. LOG.info( e );
  96. }
  97. }
  98. }
  99. public void testIO() throws Exception {
  100. DataOutputBuffer out = new DataOutputBuffer();
  101. DataInputBuffer in = new DataInputBuffer();
  102. for (int i = 0; i < NUM_ITERATIONS; i++) {
  103. try {
  104. // generate a random string
  105. String before;
  106. if(i == 0 )
  107. before = getLongString();
  108. else
  109. before = getTestString();
  110. // write it
  111. out.reset();
  112. Text.writeString(out, before);
  113. // test that it reads correctly
  114. in.reset(out.getData(), out.getLength());
  115. String after = Text.readString(in);
  116. assertTrue(before.equals(after));
  117. // Test compatibility with Java's other decoder
  118. int strLenSize = WritableUtils.getVIntSize(Text.utf8Length(before));
  119. String after2 = new String(out.getData(), strLenSize,
  120. out.getLength()-strLenSize, "UTF-8");
  121. assertTrue(before.equals(after2));
  122. }catch(IOException e) {
  123. LOG.info(e);
  124. }
  125. }
  126. }
  127. public void testCompare() throws Exception {
  128. DataOutputBuffer out1 = new DataOutputBuffer();
  129. DataOutputBuffer out2 = new DataOutputBuffer();
  130. DataOutputBuffer out3 = new DataOutputBuffer();
  131. Text.Comparator comparator = new Text.Comparator();
  132. for (int i=0; i<NUM_ITERATIONS; i++ ) {
  133. try {
  134. // reset output buffer
  135. out1.reset();
  136. out2.reset();
  137. out3.reset();
  138. // generate two random strings
  139. String str1 = getTestString();
  140. String str2 = getTestString();
  141. if(i == 0 ) {
  142. str1 = getLongString();
  143. str2 = getLongString();
  144. } else {
  145. str1 = getTestString();
  146. str2 = getTestString();
  147. }
  148. // convert to texts
  149. Text txt1 = new Text(str1);
  150. Text txt2 = new Text(str2);
  151. Text txt3 = new Text(str1);
  152. // serialize them
  153. txt1.write(out1);
  154. txt2.write(out2);
  155. txt3.write(out3);
  156. // compare two strings by looking at their binary formats
  157. int ret1 = comparator.compare(out1.getData(), 0, out1.getLength(),
  158. out2.getData(), 0, out2.getLength());
  159. // compare two strings
  160. int ret2 = txt1.compareTo(txt2);
  161. assertEquals(ret1, ret2);
  162. // test equal
  163. assertEquals(txt1.compareTo(txt3), 0);
  164. assertEquals(comparator.compare(out1.getData(), 0, out3.getLength(),
  165. out3.getData(), 0, out3.getLength()), 0);
  166. } catch (IOException e) {
  167. LOG.info(e);
  168. }
  169. }
  170. }
  171. public void testFind() throws Exception {
  172. try {
  173. Text text = new Text("abcd\u20acbdcd\u20ac");
  174. assertTrue(text.find("abd")==-1);
  175. assertTrue(text.find("ac")==-1);
  176. assertTrue(text.find("\u20ac")==4);
  177. assertTrue(text.find("\u20ac", 5)==11);
  178. } catch( CharacterCodingException e) {
  179. LOG.warn(e);
  180. }
  181. }
  182. public static void main(String[] args) throws Exception
  183. {
  184. TestText test = new TestText("main");
  185. test.testIO();
  186. test.testCompare();
  187. test.testCoding();
  188. test.testWritable();
  189. test.testFind();
  190. }
  191. }