ソースを参照

HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in
GzipCodec. Contributed by Aaron Kimball


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.21@898712 13f79535-47bb-0310-9956-ffa450edef68

Christopher Douglas 15 年 前
コミット
808d8692c7

+ 3 - 1
CHANGES.txt

@@ -1163,7 +1163,9 @@ Release 0.20.2 - Unreleased
 
     HADOOP-6460. Reinitializes buffers used for serializing responses in ipc
     server on exceeding maximum response size to free up Java heap. (suresh)
-    
+
+    HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in
+    GzipCodec. (Aaron Kimball via cdouglas)
 
 Release 0.20.1 - 2009-09-01
 

+ 2 - 2
src/java/org/apache/hadoop/io/compress/GzipCodec.java

@@ -165,7 +165,7 @@ public class GzipCodec extends DefaultCodec {
   public Class<? extends Compressor> getCompressorType() {
     return ZlibFactory.isNativeZlibLoaded(conf)
       ? GzipZlibCompressor.class
-      : BuiltInZlibDeflater.class;
+      : null;
   }
 
   public CompressionInputStream createInputStream(InputStream in) 
@@ -196,7 +196,7 @@ public class GzipCodec extends DefaultCodec {
   public Class<? extends Decompressor> getDecompressorType() {
     return ZlibFactory.isNativeZlibLoaded(conf)
       ? GzipZlibDecompressor.class
-      : BuiltInZlibInflater.class;
+      : null;
   }
 
   public String getDefaultExtension() {

+ 164 - 0
src/test/core/org/apache/hadoop/io/compress/TestCodec.java

@@ -19,13 +19,24 @@ package org.apache.hadoop.io.compress;
 
 import java.io.BufferedInputStream;
 import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
 import java.util.Arrays;
 import java.util.Random;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
@@ -41,6 +52,9 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.SequenceFile.CompressionType;
 import org.apache.hadoop.io.compress.CompressionOutputStream;
+import org.apache.hadoop.io.compress.CompressorStream;
+import org.apache.hadoop.io.compress.zlib.BuiltInZlibDeflater;
+import org.apache.hadoop.io.compress.zlib.BuiltInZlibInflater;
 import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionLevel;
 import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionStrategy;
 import org.apache.hadoop.io.compress.zlib.ZlibFactory;
@@ -418,4 +432,154 @@ public class TestCodec {
     
   }
 
+  @Test
+  public void testCodecPoolAndGzipDecompressor() {
+    // BuiltInZlibInflater should not be used as the GzipCodec decompressor.
+    // Assert that this is the case.
+
+    // Don't use native libs for this test.
+    Configuration conf = new Configuration();
+    conf.setBoolean("hadoop.native.lib", false);
+    assertFalse("ZlibFactory is using native libs against request",
+        ZlibFactory.isNativeZlibLoaded(conf));
+
+    // This should give us a BuiltInZlibInflater.
+    Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
+    assertNotNull("zlibDecompressor is null!", zlibDecompressor);
+    assertTrue("ZlibFactory returned unexpected inflator",
+        zlibDecompressor instanceof BuiltInZlibInflater);
+
+    // Asking for a decompressor directly from GzipCodec should return null;
+    // its createOutputStream() just wraps the existing stream in a
+    // java.util.zip.GZIPOutputStream.
+    CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
+    CompressionCodec codec = ccf.getCodec(new Path("foo.gz"));
+    assertTrue("Codec for .gz file is not GzipCodec", codec instanceof GzipCodec);
+    Decompressor codecDecompressor = codec.createDecompressor();
+    if (null != codecDecompressor) {
+      fail("Got non-null codecDecompressor: " + codecDecompressor);
+    }
+
+    // Asking the CodecPool for a decompressor for GzipCodec
+    // should return null as well.
+    Decompressor poolDecompressor = CodecPool.getDecompressor(codec);
+    if (null != poolDecompressor) {
+      fail("Got non-null poolDecompressor: " + poolDecompressor);
+    }
+
+    // If we then ensure that the pool is populated...
+    CodecPool.returnDecompressor(zlibDecompressor);
+
+    // Asking the pool another time should still not bind this to GzipCodec.
+    poolDecompressor = CodecPool.getDecompressor(codec);
+    if (null != poolDecompressor) {
+      fail("Second time, got non-null poolDecompressor: "
+          + poolDecompressor);
+    }
+  }
+
+  @Test
+  public void testGzipCodecRead() throws IOException {
+    // Create a gzipped file and try to read it back, using a decompressor
+    // from the CodecPool.
+
+    // Don't use native libs for this test.
+    Configuration conf = new Configuration();
+    conf.setBoolean("hadoop.native.lib", false);
+    assertFalse("ZlibFactory is using native libs against request",
+        ZlibFactory.isNativeZlibLoaded(conf));
+
+    // Ensure that the CodecPool has a BuiltInZlibInflater in it.
+    Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
+    assertNotNull("zlibDecompressor is null!", zlibDecompressor);
+    assertTrue("ZlibFactory returned unexpected inflator",
+        zlibDecompressor instanceof BuiltInZlibInflater);
+    CodecPool.returnDecompressor(zlibDecompressor);
+
+    // Now create a GZip text file.
+    String tmpDir = System.getProperty("test.build.data", "/tmp/");
+    Path f = new Path(new Path(tmpDir), "testGzipCodecRead.txt.gz");
+    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
+      new GZIPOutputStream(new FileOutputStream(f.toString()))));
+    final String msg = "This is the message in the file!";
+    bw.write(msg);
+    bw.close();
+
+    // Now read it back, using the CodecPool to establish the
+    // decompressor to use.
+    CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
+    CompressionCodec codec = ccf.getCodec(f);
+    Decompressor decompressor = CodecPool.getDecompressor(codec);
+    FileSystem fs = FileSystem.getLocal(conf);
+    InputStream is = fs.open(f);
+    is = codec.createInputStream(is, decompressor);
+    BufferedReader br = new BufferedReader(new InputStreamReader(is));
+    String line = br.readLine();
+    assertEquals("Didn't get the same message back!", msg, line);
+    br.close();
+  }
+
+  private void verifyGzipFile(String filename, String msg) throws IOException {
+    BufferedReader r = new BufferedReader(new InputStreamReader(
+        new GZIPInputStream(new FileInputStream(filename))));
+    try {
+      String line = r.readLine();
+      assertEquals("Got invalid line back from " + filename, msg, line);
+    } finally {
+      r.close();
+      new File(filename).delete();
+    }
+  }
+
+  @Test
+  public void testGzipCodecWrite() throws IOException {
+    // Create a gzipped file using a compressor from the CodecPool,
+    // and try to read it back via the regular GZIPInputStream.
+
+    // Don't use native libs for this test.
+    Configuration conf = new Configuration();
+    conf.setBoolean("hadoop.native.lib", false);
+    assertFalse("ZlibFactory is using native libs against request",
+        ZlibFactory.isNativeZlibLoaded(conf));
+
+    // Ensure that the CodecPool has a BuiltInZlibDeflater in it.
+    Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
+    assertNotNull("zlibCompressor is null!", zlibCompressor);
+    assertTrue("ZlibFactory returned unexpected deflator",
+        zlibCompressor instanceof BuiltInZlibDeflater);
+    CodecPool.returnCompressor(zlibCompressor);
+
+    // Create a GZIP text file via the Compressor interface.
+    CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
+    CompressionCodec codec = ccf.getCodec(new Path("foo.gz"));
+    assertTrue("Codec for .gz file is not GzipCodec", codec instanceof GzipCodec);
+
+    final String msg = "This is the message we are going to compress.";
+    final String tmpDir = System.getProperty("test.build.data", "/tmp/");
+    final String fileName = new Path(new Path(tmpDir),
+        "testGzipCodecWrite.txt.gz").toString();
+
+    BufferedWriter w = null;
+    Compressor gzipCompressor = CodecPool.getCompressor(codec);
+    if (null != gzipCompressor) {
+      // If it gives us back a Compressor, we should be able to use this
+      // to write files we can then read back with Java's gzip tools.
+      OutputStream os = new CompressorStream(new FileOutputStream(fileName),
+          gzipCompressor);
+      w = new BufferedWriter(new OutputStreamWriter(os));
+      w.write(msg);
+      w.close();
+      CodecPool.returnCompressor(gzipCompressor);
+
+      verifyGzipFile(fileName, msg);
+    }
+
+    // Create a gzip text file via codec.getOutputStream().
+    w = new BufferedWriter(new OutputStreamWriter(
+        codec.createOutputStream(new FileOutputStream(fileName))));
+    w.write(msg);
+    w.close();
+
+    verifyGzipFile(fileName, msg);
+  }
 }