|
@@ -64,6 +64,7 @@ import org.apache.hadoop.io.SequenceFile.CompressionType;
|
|
|
import org.apache.hadoop.io.Text;
|
|
|
import org.apache.hadoop.io.Writable;
|
|
|
import org.apache.hadoop.io.compress.bzip2.Bzip2Factory;
|
|
|
+import org.apache.hadoop.io.compress.zlib.BuiltInGzipCompressor;
|
|
|
import org.apache.hadoop.io.compress.zlib.BuiltInGzipDecompressor;
|
|
|
import org.apache.hadoop.io.compress.zlib.BuiltInZlibDeflater;
|
|
|
import org.apache.hadoop.io.compress.zlib.BuiltInZlibInflater;
|
|
@@ -101,6 +102,14 @@ public class TestCodec {
|
|
|
|
|
|
@Test
|
|
|
public void testGzipCodec() throws IOException {
|
|
|
+ Configuration conf = new Configuration();
|
|
|
+ if (ZlibFactory.isNativeZlibLoaded(conf)) {
|
|
|
+ codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.GzipCodec");
|
|
|
+ codecTest(conf, seed, count, "org.apache.hadoop.io.compress.GzipCodec");
|
|
|
+ }
|
|
|
+ // without hadoop-native installed.
|
|
|
+ ZlibFactory.setNativeZlibLoaded(false);
|
|
|
+ assumeTrue(ZlibFactory.isNativeZlibLoaded(conf));
|
|
|
codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.GzipCodec");
|
|
|
codecTest(conf, seed, count, "org.apache.hadoop.io.compress.GzipCodec");
|
|
|
}
|
|
@@ -467,15 +476,15 @@ public class TestCodec {
|
|
|
"org.apache.hadoop.io.compress.GzipCodec");
|
|
|
codecTestWithNOCompression(conf,
|
|
|
"org.apache.hadoop.io.compress.DefaultCodec");
|
|
|
- } else {
|
|
|
- LOG.warn("testCodecInitWithCompressionLevel for native skipped"
|
|
|
- + ": native libs not loaded");
|
|
|
}
|
|
|
conf = new Configuration();
|
|
|
// don't use native libs
|
|
|
ZlibFactory.setNativeZlibLoaded(false);
|
|
|
+ LOG.info("testCodecInitWithCompressionLevel without native libs");
|
|
|
codecTestWithNOCompression( conf,
|
|
|
"org.apache.hadoop.io.compress.DefaultCodec");
|
|
|
+ codecTestWithNOCompression(conf,
|
|
|
+ "org.apache.hadoop.io.compress.GzipCodec");
|
|
|
}
|
|
|
|
|
|
@Test
|
|
@@ -550,6 +559,23 @@ public class TestCodec {
|
|
|
sequenceFileCodecTest(conf, 200000, "org.apache.hadoop.io.compress.DeflateCodec", 1000000);
|
|
|
}
|
|
|
|
|
|
+ @Test
|
|
|
+ public void testSequenceFileGzipCodec() throws IOException, ClassNotFoundException,
|
|
|
+ InstantiationException, IllegalAccessException {
|
|
|
+ Configuration conf = new Configuration();
|
|
|
+ if (ZlibFactory.isNativeZlibLoaded(conf)) {
|
|
|
+ sequenceFileCodecTest(conf, 100, "org.apache.hadoop.io.compress.GzipCodec", 5);
|
|
|
+ sequenceFileCodecTest(conf, 100, "org.apache.hadoop.io.compress.GzipCodec", 100);
|
|
|
+ sequenceFileCodecTest(conf, 200000, "org.apache.hadoop.io.compress.GzipCodec", 1000000);
|
|
|
+ }
|
|
|
+ // without hadoop-native installed.
|
|
|
+ ZlibFactory.setNativeZlibLoaded(false);
|
|
|
+ assumeTrue(ZlibFactory.isNativeZlibLoaded(conf));
|
|
|
+ sequenceFileCodecTest(conf, 100, "org.apache.hadoop.io.compress.GzipCodec", 5);
|
|
|
+ sequenceFileCodecTest(conf, 100, "org.apache.hadoop.io.compress.GzipCodec", 100);
|
|
|
+ sequenceFileCodecTest(conf, 200000, "org.apache.hadoop.io.compress.GzipCodec", 1000000);
|
|
|
+ }
|
|
|
+
|
|
|
private static void sequenceFileCodecTest(Configuration conf, int lines,
|
|
|
String codecClass, int blockSize)
|
|
|
throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {
|
|
@@ -853,16 +879,16 @@ public class TestCodec {
|
|
|
// and try to read it back via the regular GZIPInputStream.
|
|
|
|
|
|
// Use native libs per the parameter
|
|
|
- Configuration conf = new Configuration();
|
|
|
+ Configuration hadoopConf = new Configuration();
|
|
|
if (useNative) {
|
|
|
- assumeTrue(ZlibFactory.isNativeZlibLoaded(conf));
|
|
|
+ assumeTrue(ZlibFactory.isNativeZlibLoaded(hadoopConf));
|
|
|
} else {
|
|
|
assertFalse("ZlibFactory is using native libs against request",
|
|
|
- ZlibFactory.isNativeZlibLoaded(conf));
|
|
|
+ ZlibFactory.isNativeZlibLoaded(hadoopConf));
|
|
|
}
|
|
|
|
|
|
// Ensure that the CodecPool has a BuiltInZlibDeflater in it.
|
|
|
- Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
|
|
|
+ Compressor zlibCompressor = ZlibFactory.getZlibCompressor(hadoopConf);
|
|
|
assertNotNull("zlibCompressor is null!", zlibCompressor);
|
|
|
assertTrue("ZlibFactory returned unexpected deflator",
|
|
|
useNative ? zlibCompressor instanceof ZlibCompressor
|
|
@@ -871,37 +897,47 @@ public class TestCodec {
|
|
|
CodecPool.returnCompressor(zlibCompressor);
|
|
|
|
|
|
// Create a GZIP text file via the Compressor interface.
|
|
|
- CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
|
|
|
+ CompressionCodecFactory ccf = new CompressionCodecFactory(hadoopConf);
|
|
|
CompressionCodec codec = ccf.getCodec(new Path("foo.gz"));
|
|
|
assertTrue("Codec for .gz file is not GzipCodec",
|
|
|
codec instanceof GzipCodec);
|
|
|
|
|
|
- final String msg = "This is the message we are going to compress.";
|
|
|
final String fileName = new Path(GenericTestUtils.getTempPath(
|
|
|
"testGzipCodecWrite.txt.gz")).toString();
|
|
|
|
|
|
BufferedWriter w = null;
|
|
|
Compressor gzipCompressor = CodecPool.getCompressor(codec);
|
|
|
- if (null != gzipCompressor) {
|
|
|
- // If it gives us back a Compressor, we should be able to use this
|
|
|
- // to write files we can then read back with Java's gzip tools.
|
|
|
- OutputStream os = new CompressorStream(new FileOutputStream(fileName),
|
|
|
- gzipCompressor);
|
|
|
- w = new BufferedWriter(new OutputStreamWriter(os));
|
|
|
- w.write(msg);
|
|
|
- w.close();
|
|
|
- CodecPool.returnCompressor(gzipCompressor);
|
|
|
-
|
|
|
- verifyGzipFile(fileName, msg);
|
|
|
+ // When it gives us back a Compressor, we should be able to use this
|
|
|
+ // to write files we can then read back with Java's gzip tools.
|
|
|
+ OutputStream os = new CompressorStream(new FileOutputStream(fileName),
|
|
|
+ gzipCompressor);
|
|
|
+
|
|
|
+ // Call `write` multiple times.
|
|
|
+ int bufferSize = 10000;
|
|
|
+ char[] inputBuffer = new char[bufferSize];
|
|
|
+ Random rand = new Random();
|
|
|
+ for (int i = 0; i < bufferSize; i++) {
|
|
|
+ inputBuffer[i] = (char) ('a' + rand.nextInt(26));
|
|
|
+ }
|
|
|
+ w = new BufferedWriter(new OutputStreamWriter(os), bufferSize);
|
|
|
+ StringBuilder sb = new StringBuilder();
|
|
|
+ for (int i = 0; i < 10; i++) {
|
|
|
+ w.write(inputBuffer);
|
|
|
+ sb.append(inputBuffer);
|
|
|
}
|
|
|
+ w.close();
|
|
|
+ CodecPool.returnCompressor(gzipCompressor);
|
|
|
+ verifyGzipFile(fileName, sb.toString());
|
|
|
+
|
|
|
|
|
|
// Create a gzip text file via codec.getOutputStream().
|
|
|
w = new BufferedWriter(new OutputStreamWriter(
|
|
|
- codec.createOutputStream(new FileOutputStream(fileName))));
|
|
|
- w.write(msg);
|
|
|
+ codec.createOutputStream(new FileOutputStream(fileName))));
|
|
|
+ for (int i = 0; i < 10; i++) {
|
|
|
+ w.write(inputBuffer);
|
|
|
+ }
|
|
|
w.close();
|
|
|
-
|
|
|
- verifyGzipFile(fileName, msg);
|
|
|
+ verifyGzipFile(fileName, sb.toString());
|
|
|
}
|
|
|
|
|
|
@Test
|
|
@@ -915,6 +951,57 @@ public class TestCodec {
|
|
|
public void testGzipNativeCodecWrite() throws IOException {
|
|
|
testGzipCodecWrite(true);
|
|
|
}
|
|
|
+
|
|
|
+ @Test
|
|
|
+ public void testCodecPoolAndGzipCompressor() {
|
|
|
+ // BuiltInZlibDeflater should not be used as the GzipCodec compressor.
|
|
|
+ // Assert that this is the case.
|
|
|
+
|
|
|
+ // Don't use native libs for this test.
|
|
|
+ Configuration conf = new Configuration();
|
|
|
+ ZlibFactory.setNativeZlibLoaded(false);
|
|
|
+ assertFalse("ZlibFactory is using native libs against request",
|
|
|
+ ZlibFactory.isNativeZlibLoaded(conf));
|
|
|
+
|
|
|
+ // This should give us a BuiltInZlibDeflater.
|
|
|
+ Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
|
|
|
+ assertNotNull("zlibCompressor is null!", zlibCompressor);
|
|
|
+ assertTrue("ZlibFactory returned unexpected deflator",
|
|
|
+ zlibCompressor instanceof BuiltInZlibDeflater);
|
|
|
+ // its createOutputStream() just wraps the existing stream in a
|
|
|
+ // java.util.zip.GZIPOutputStream.
|
|
|
+ CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
|
|
|
+ CompressionCodec codec = ccf.getCodec(new Path("foo.gz"));
|
|
|
+ assertTrue("Codec for .gz file is not GzipCodec",
|
|
|
+ codec instanceof GzipCodec);
|
|
|
+
|
|
|
+ // make sure we don't get a null compressor
|
|
|
+ Compressor codecCompressor = codec.createCompressor();
|
|
|
+ if (null == codecCompressor) {
|
|
|
+ fail("Got null codecCompressor");
|
|
|
+ }
|
|
|
+
|
|
|
+ // Asking the CodecPool for a compressor for GzipCodec
|
|
|
+ // should not return null
|
|
|
+ Compressor poolCompressor = CodecPool.getCompressor(codec);
|
|
|
+ if (null == poolCompressor) {
|
|
|
+ fail("Got null poolCompressor");
|
|
|
+ }
|
|
|
+ // return a couple compressors
|
|
|
+ CodecPool.returnCompressor(zlibCompressor);
|
|
|
+ CodecPool.returnCompressor(poolCompressor);
|
|
|
+ Compressor poolCompressor2 = CodecPool.getCompressor(codec);
|
|
|
+ if (poolCompressor.getClass() == BuiltInGzipCompressor.class) {
|
|
|
+ if (poolCompressor == poolCompressor2) {
|
|
|
+ fail("Reused java gzip compressor in pool");
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ if (poolCompressor != poolCompressor2) {
|
|
|
+ fail("Did not reuse native gzip compressor in pool");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
@Test
|
|
|
public void testCodecPoolAndGzipDecompressor() {
|
|
|
// BuiltInZlibInflater should not be used as the GzipCodec decompressor.
|