123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223 |
- /**
- * Copyright 2005 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.hadoop.io;
- import junit.framework.TestCase;
- import java.io.IOException;
- import java.nio.ByteBuffer;
- import java.nio.charset.CharacterCodingException;
- import java.util.Random;
- import org.apache.commons.logging.Log;
- import org.apache.commons.logging.LogFactory;
- /** Unit tests for LargeUTF8. */
- public class TestText extends TestCase {
- private static final Log LOG= LogFactory.getLog("org.apache.hadoop.io.TestText");
- private static final int NUM_ITERATIONS = 100;
- public TestText(String name) { super(name); }
- private static final Random RANDOM = new Random(1);
- private static final int RAND_LEN = -1;
-
- // generate a valid java String
- private static String getTestString(int len) throws Exception {
- StringBuffer buffer = new StringBuffer();
- int length = (len==RAND_LEN) ? RANDOM.nextInt(1000) : len;
- while (buffer.length()<length) {
- int codePoint = RANDOM.nextInt(Character.MAX_CODE_POINT);
- char tmpStr[] = new char[2];
- if(Character.isDefined(codePoint)) {
- //unpaired surrogate
- if(codePoint < Character.MIN_SUPPLEMENTARY_CODE_POINT &&
- !Character.isHighSurrogate((char)codePoint) &&
- !Character.isLowSurrogate((char)codePoint) ) {
- Character.toChars(codePoint, tmpStr, 0);
- buffer.append(tmpStr);
- }
- }
- }
- return buffer.toString();
- }
-
- public static String getTestString() throws Exception {
- return getTestString(RAND_LEN);
- }
-
- public static String getLongString() throws Exception {
- String str = getTestString();
- int length = Short.MAX_VALUE+str.length();
- StringBuffer buffer = new StringBuffer();
- while(buffer.length()<length)
- buffer.append(str);
-
- return buffer.toString();
- }
- public void testWritable() throws Exception {
- for (int i = 0; i < NUM_ITERATIONS; i++) {
- try {
- String str;
- if(i == 0 )
- str = getLongString();
- else
- str = getTestString();
- TestWritable.testWritable(new Text(str));
- } catch (IOException e) {
- LOG.info(e);
- }
- }
- }
- public void testCoding() throws Exception {
- for (int i = 0; i < NUM_ITERATIONS; i++) {
- try {
- // generate a random string
- String before;
- if(i == 0 )
- before = getLongString();
- else
- before = getTestString();
-
- // test string to utf8
- ByteBuffer bb = Text.encode(before);
-
- byte[] utf8Text = bb.array();
- byte[] utf8Java = before.getBytes("UTF-8");
- assertEquals(0, WritableComparator.compareBytes(
- utf8Text, 0, bb.limit(),
- utf8Java, 0, utf8Java.length));
-
- // test utf8 to string
- String after = Text.decode(utf8Java);
- assertTrue(before.equals(after));
- }catch(CharacterCodingException e) {
- LOG.info( e );
- }
- }
- }
-
-
- public void testIO() throws Exception {
- DataOutputBuffer out = new DataOutputBuffer();
- DataInputBuffer in = new DataInputBuffer();
- for (int i = 0; i < NUM_ITERATIONS; i++) {
- try {
- // generate a random string
- String before;
- if(i == 0 )
- before = getLongString();
- else
- before = getTestString();
- // write it
- out.reset();
- Text.writeString(out, before);
- // test that it reads correctly
- in.reset(out.getData(), out.getLength());
- String after = Text.readString(in);
- assertTrue(before.equals(after));
-
- // Test compatibility with Java's other decoder
- int strLenSize = WritableUtils.getVIntSize(Text.utf8Length(before));
- String after2 = new String(out.getData(), strLenSize,
- out.getLength()-strLenSize, "UTF-8");
- assertTrue(before.equals(after2));
- }catch(IOException e) {
- LOG.info(e);
- }
- }
- }
- public void testCompare() throws Exception {
- DataOutputBuffer out1 = new DataOutputBuffer();
- DataOutputBuffer out2 = new DataOutputBuffer();
- DataOutputBuffer out3 = new DataOutputBuffer();
- Text.Comparator comparator = new Text.Comparator();
- for (int i=0; i<NUM_ITERATIONS; i++ ) {
- try {
- // reset output buffer
- out1.reset();
- out2.reset();
- out3.reset();
- // generate two random strings
- String str1 = getTestString();
- String str2 = getTestString();
- if(i == 0 ) {
- str1 = getLongString();
- str2 = getLongString();
- } else {
- str1 = getTestString();
- str2 = getTestString();
- }
-
- // convert to texts
- Text txt1 = new Text(str1);
- Text txt2 = new Text(str2);
- Text txt3 = new Text(str1);
-
- // serialize them
- txt1.write(out1);
- txt2.write(out2);
- txt3.write(out3);
-
- // compare two strings by looking at their binary formats
- int ret1 = comparator.compare(out1.getData(), 0, out1.getLength(),
- out2.getData(), 0, out2.getLength());
- // compare two strings
- int ret2 = txt1.compareTo(txt2);
-
- assertEquals(ret1, ret2);
-
- // test equal
- assertEquals(txt1.compareTo(txt3), 0);
- assertEquals(comparator.compare(out1.getData(), 0, out3.getLength(),
- out3.getData(), 0, out3.getLength()), 0);
- } catch (IOException e) {
- LOG.info(e);
- }
- }
- }
-
- public void testFind() throws Exception {
- try {
- Text text = new Text("abcd\u20acbdcd\u20ac");
- assertTrue(text.find("abd")==-1);
- assertTrue(text.find("ac")==-1);
- assertTrue(text.find("\u20ac")==4);
- assertTrue(text.find("\u20ac", 5)==11);
- } catch( CharacterCodingException e) {
- LOG.warn(e);
- }
- }
-
- public static void main(String[] args) throws Exception
- {
- TestText test = new TestText("main");
- test.testIO();
- test.testCompare();
- test.testCoding();
- test.testWritable();
- test.testFind();
- }
- }
|