瀏覽代碼

HADOOP-3646. Add a bzip2 compatible codec, so bzip compressed data
may be processed by map/reduce. Contributed by Abdul Qadeer.


git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/trunk@680802 13f79535-47bb-0310-9956-ffa450edef68

Christopher Douglas 17 年之前
父節點
當前提交
2f99879d4e

+ 3 - 0
CHANGES.txt

@@ -118,6 +118,9 @@ Trunk (unreleased changes)
     HADOOP-3824. Move base functionality of StatusHttpServer to a core
     HADOOP-3824. Move base functionality of StatusHttpServer to a core
     package. (TszWo (Nicholas), SZE via cdouglas)
     package. (TszWo (Nicholas), SZE via cdouglas)
 
 
+    HADOOP-3646. Add a bzip2 compatible codec, so bzip compressed data
+    may be processed by map/reduce. (Abdul Qadeer via cdouglas)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
     HADOOP-3556. Removed lock contention in MD5Hash by changing the 
     HADOOP-3556. Removed lock contention in MD5Hash by changing the 

+ 1 - 1
conf/hadoop-default.xml

@@ -131,7 +131,7 @@ creations/deletions), or "all".</description>
 
 
 <property>
 <property>
   <name>io.compression.codecs</name>
   <name>io.compression.codecs</name>
-  <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.LzopCodec</value>
+  <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.LzopCodec,org.apache.hadoop.io.compress.BZip2Codec</value>
   <description>A list of the compression codec classes that can be used 
   <description>A list of the compression codec classes that can be used 
                for compression/decompression.</description>
                for compression/decompression.</description>
 </property>
 </property>

+ 269 - 0
src/core/org/apache/hadoop/io/compress/BZip2Codec.java

@@ -0,0 +1,269 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.compress;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.hadoop.io.compress.bzip2.CBZip2InputStream;
+import org.apache.hadoop.io.compress.bzip2.CBZip2OutputStream;
+
+/**
+ * This class provides CompressionOutputStream and CompressionInputStream for
+ * compression and decompression. Currently we dont have an implementation of
+ * the Compressor and Decompressor interfaces, so those methods of
+ * CompressionCodec which have a Compressor or Decompressor type argument, throw
+ * UnsupportedOperationException.
+ */
+public class BZip2Codec implements
+    org.apache.hadoop.io.compress.CompressionCodec {
+
+  private static final String HEADER = "BZ";
+  private static final int HEADER_LEN = HEADER.length();
+
+  /**
+  * Creates a new instance of BZip2Codec
+  */
+  public BZip2Codec() {
+  }
+
+  /**
+  * Creates CompressionOutputStream for BZip2
+  *
+  * @param out
+  *            The output Stream
+  * @return The BZip2 CompressionOutputStream
+  * @throws java.io.IOException
+  *             Throws IO exception
+  */
+  public CompressionOutputStream createOutputStream(OutputStream out)
+      throws IOException {
+    return new BZip2CompressionOutputStream(out);
+  }
+
+  /**
+   * This functionality is currently not supported.
+   *
+   * @throws java.lang.UnsupportedOperationException
+   *             Throws UnsupportedOperationException
+   */
+  public CompressionOutputStream createOutputStream(OutputStream out,
+      Compressor compressor) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  /**
+  * This functionality is currently not supported.
+  *
+  * @throws java.lang.UnsupportedOperationException
+  *             Throws UnsupportedOperationException
+  */
+  public Class<org.apache.hadoop.io.compress.Compressor> getCompressorType() {
+    throw new UnsupportedOperationException();
+  }
+
+  /**
+  * This functionality is currently not supported.
+  *
+  * @throws java.lang.UnsupportedOperationException
+  *             Throws UnsupportedOperationException
+  */
+  public Compressor createCompressor() {
+    throw new UnsupportedOperationException();
+  }
+
+  /**
+  * Creates CompressionInputStream to be used to read off uncompressed data.
+  *
+  * @param in
+  *            The InputStream
+  * @return Returns CompressionInputStream for BZip2
+  * @throws java.io.IOException
+  *             Throws IOException
+  */
+  public CompressionInputStream createInputStream(InputStream in)
+      throws IOException {
+    return new BZip2CompressionInputStream(in);
+  }
+
+  /**
+  * This functionality is currently not supported.
+  *
+  * @throws java.lang.UnsupportedOperationException
+  *             Throws UnsupportedOperationException
+  */
+  public CompressionInputStream createInputStream(InputStream in,
+      Decompressor decompressor) throws IOException {
+    throw new UnsupportedOperationException();
+
+  }
+
+  /**
+  * This functionality is currently not supported.
+  *
+  * @throws java.lang.UnsupportedOperationException
+  *             Throws UnsupportedOperationException
+  */
+  public Class<org.apache.hadoop.io.compress.Decompressor> getDecompressorType() {
+    throw new UnsupportedOperationException();
+  }
+
+  /**
+  * This functionality is currently not supported.
+  *
+  * @throws java.lang.UnsupportedOperationException
+  *             Throws UnsupportedOperationException
+  */
+  public Decompressor createDecompressor() {
+    throw new UnsupportedOperationException();
+  }
+
+  /**
+  * .bz2 is recognized as the default extension for compressed BZip2 files
+  *
+  * @return A String telling the default bzip2 file extension
+  */
+  public String getDefaultExtension() {
+    return ".bz2";
+  }
+
+  private static class BZip2CompressionOutputStream extends CompressionOutputStream {
+
+    // class data starts here//
+    private CBZip2OutputStream output;
+
+    // class data ends here//
+
+    public BZip2CompressionOutputStream(OutputStream out)
+        throws IOException {
+      super(out);
+      writeStreamHeader();
+      this.output = new CBZip2OutputStream(out);
+    }
+
+    private void writeStreamHeader() throws IOException {
+      if (super.out != null) {
+        // The compressed bzip2 stream should start with the
+        // identifying characters BZ. Caller of CBZip2OutputStream
+        // i.e. this class must write these characters.
+        out.write(HEADER.getBytes());
+      }
+    }
+
+    public void write(byte[] b, int off, int len) throws IOException {
+      this.output.write(b, off, len);
+
+    }
+
+    public void finish() throws IOException {
+      this.output.flush();
+    }
+
+    public void resetState() throws IOException {
+
+    }
+
+    public void write(int b) throws IOException {
+      this.output.write(b);
+    }
+
+    public void close() throws IOException {
+      this.output.flush();
+      this.output.close();
+    }
+
+    protected void finalize() throws IOException {
+      if (this.output != null) {
+        this.close();
+      }
+    }
+
+  }// end of class BZip2CompressionOutputStream
+
+  private static class BZip2CompressionInputStream extends CompressionInputStream {
+
+    // class data starts here//
+    private CBZip2InputStream input;
+
+    // class data ends here//
+
+    public BZip2CompressionInputStream(InputStream in) throws IOException {
+
+      super(in);
+      BufferedInputStream bufferedIn = readStreamHeader();
+      input = new CBZip2InputStream(bufferedIn);
+    }
+
+    private BufferedInputStream readStreamHeader() throws IOException {
+      // We are flexible enough to allow the compressed stream not to
+      // start with the header of BZ. So it works fine either we have
+      // the header or not.
+      BufferedInputStream bufferedIn = null;
+      if (super.in != null) {
+        bufferedIn = new BufferedInputStream(super.in);
+        bufferedIn.mark(HEADER_LEN);
+        byte[] headerBytes = new byte[HEADER_LEN];
+        int actualRead = bufferedIn.read(headerBytes, 0, HEADER_LEN);
+        if (actualRead != -1) {
+          String header = new String(headerBytes);
+          if (header.compareTo(HEADER) != 0) {
+            bufferedIn.reset();
+          }
+        }
+      }
+
+      if (bufferedIn == null) {
+        throw new IOException("Failed to read bzip2 stream.");
+      }
+
+      return bufferedIn;
+
+    }// end of method
+
+    public void close() throws IOException {
+      this.input.close();
+    }
+
+    public int read(byte[] b, int off, int len) throws IOException {
+
+      return this.input.read(b, off, len);
+
+    }
+
+    public void resetState() throws IOException {
+
+    }
+
+    public int read() throws IOException {
+      return this.input.read();
+
+    }
+
+    protected void finalize() throws IOException {
+      if (this.input != null) {
+        this.close();
+      }
+
+    }
+
+  }// end of BZip2CompressionInputStream
+
+}

+ 97 - 0
src/core/org/apache/hadoop/io/compress/bzip2/BZip2Constants.java

@@ -0,0 +1,97 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+
+/*
+ * This package is based on the work done by Keiron Liddle, Aftex Software
+ * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
+ * great code.
+ */
+
+package org.apache.hadoop.io.compress.bzip2;
+
+/**
+ * Base class for both the compress and decompress classes. Holds common arrays,
+ * and static data.
+ * <p>
+ * This interface is public for historical purposes. You should have no need to
+ * use it.
+ * </p>
+ */
+public interface BZip2Constants {
+
+  int baseBlockSize = 100000;
+  int MAX_ALPHA_SIZE = 258;
+  int MAX_CODE_LEN = 23;
+  int RUNA = 0;
+  int RUNB = 1;
+  int N_GROUPS = 6;
+  int G_SIZE = 50;
+  int N_ITERS = 4;
+  int MAX_SELECTORS = (2 + (900000 / G_SIZE));
+  int NUM_OVERSHOOT_BYTES = 20;
+
+  /**
+  * This array really shouldn't be here. Again, for historical purposes it
+  * is.
+  *
+  * <p>
+  * FIXME: This array should be in a private or package private location,
+  * since it could be modified by malicious code.
+  * </p>
+  */
+  final int[] rNums = { 619, 720, 127, 481, 931, 816, 813, 233, 566, 247,
+    985, 724, 205, 454, 863, 491, 741, 242, 949, 214, 733, 859, 335,
+    708, 621, 574, 73, 654, 730, 472, 419, 436, 278, 496, 867, 210,
+    399, 680, 480, 51, 878, 465, 811, 169, 869, 675, 611, 697, 867,
+    561, 862, 687, 507, 283, 482, 129, 807, 591, 733, 623, 150, 238,
+    59, 379, 684, 877, 625, 169, 643, 105, 170, 607, 520, 932, 727,
+    476, 693, 425, 174, 647, 73, 122, 335, 530, 442, 853, 695, 249,
+    445, 515, 909, 545, 703, 919, 874, 474, 882, 500, 594, 612, 641,
+    801, 220, 162, 819, 984, 589, 513, 495, 799, 161, 604, 958, 533,
+    221, 400, 386, 867, 600, 782, 382, 596, 414, 171, 516, 375, 682,
+    485, 911, 276, 98, 553, 163, 354, 666, 933, 424, 341, 533, 870,
+    227, 730, 475, 186, 263, 647, 537, 686, 600, 224, 469, 68, 770,
+    919, 190, 373, 294, 822, 808, 206, 184, 943, 795, 384, 383, 461,
+    404, 758, 839, 887, 715, 67, 618, 276, 204, 918, 873, 777, 604,
+    560, 951, 160, 578, 722, 79, 804, 96, 409, 713, 940, 652, 934, 970,
+    447, 318, 353, 859, 672, 112, 785, 645, 863, 803, 350, 139, 93,
+    354, 99, 820, 908, 609, 772, 154, 274, 580, 184, 79, 626, 630, 742,
+    653, 282, 762, 623, 680, 81, 927, 626, 789, 125, 411, 521, 938,
+    300, 821, 78, 343, 175, 128, 250, 170, 774, 972, 275, 999, 639,
+    495, 78, 352, 126, 857, 956, 358, 619, 580, 124, 737, 594, 701,
+    612, 669, 112, 134, 694, 363, 992, 809, 743, 168, 974, 944, 375,
+    748, 52, 600, 747, 642, 182, 862, 81, 344, 805, 988, 739, 511, 655,
+    814, 334, 249, 515, 897, 955, 664, 981, 649, 113, 974, 459, 893,
+    228, 433, 837, 553, 268, 926, 240, 102, 654, 459, 51, 686, 754,
+    806, 760, 493, 403, 415, 394, 687, 700, 946, 670, 656, 610, 738,
+    392, 760, 799, 887, 653, 978, 321, 576, 617, 626, 502, 894, 679,
+    243, 440, 680, 879, 194, 572, 640, 724, 926, 56, 204, 700, 707,
+    151, 457, 449, 797, 195, 791, 558, 945, 679, 297, 59, 87, 824, 713,
+    663, 412, 693, 342, 606, 134, 108, 571, 364, 631, 212, 174, 643,
+    304, 329, 343, 97, 430, 751, 497, 314, 983, 374, 822, 928, 140,
+    206, 73, 263, 980, 736, 876, 478, 430, 305, 170, 514, 364, 692,
+    829, 82, 855, 953, 676, 246, 369, 970, 294, 750, 807, 827, 150,
+    790, 288, 923, 804, 378, 215, 828, 592, 281, 565, 555, 710, 82,
+    896, 831, 547, 261, 524, 462, 293, 465, 502, 56, 661, 821, 976,
+    991, 658, 869, 905, 758, 745, 193, 768, 550, 608, 933, 378, 286,
+    215, 979, 792, 961, 61, 688, 793, 644, 986, 403, 106, 366, 905,
+    644, 372, 567, 466, 434, 645, 210, 389, 550, 919, 135, 780, 773,
+    635, 389, 707, 100, 626, 958, 165, 504, 920, 176, 193, 713, 857,
+    265, 203, 50, 668, 108, 645, 990, 626, 197, 510, 357, 358, 850,
+    858, 364, 936, 638 };
+}

+ 971 - 0
src/core/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java

@@ -0,0 +1,971 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+
+/*
+ * This package is based on the work done by Keiron Liddle, Aftex Software
+ * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
+ * great code.
+ */
+package org.apache.hadoop.io.compress.bzip2;
+
+import java.io.InputStream;
+import java.io.IOException;
+
+/**
+ * An input stream that decompresses from the BZip2 format (without the file
+ * header chars) to be read as any other stream.
+ *
+ * <p>
+ * The decompression requires large amounts of memory. Thus you should call the
+ * {@link #close() close()} method as soon as possible, to force
+ * <tt>CBZip2InputStream</tt> to release the allocated memory. See
+ * {@link CBZip2OutputStream CBZip2OutputStream} for information about memory
+ * usage.
+ * </p>
+ *
+ * <p>
+ * <tt>CBZip2InputStream</tt> reads bytes from the compressed source stream via
+ * the single byte {@link java.io.InputStream#read() read()} method exclusively.
+ * Thus you should consider to use a buffered source stream.
+ * </p>
+ *
+ * <p>
+ * Instances of this class are not threadsafe.
+ * </p>
+ */
+public class CBZip2InputStream extends InputStream implements BZip2Constants {
+
+  private static void reportCRCError() throws IOException {
+    // The clean way would be to throw an exception.
+    // throw new IOException("crc error");
+
+    // Just print a message, like the previous versions of this class did
+    System.err.println("BZip2 CRC error");
+  }
+
+  private void makeMaps() {
+    final boolean[] inUse = this.data.inUse;
+    final byte[] seqToUnseq = this.data.seqToUnseq;
+
+    int nInUseShadow = 0;
+
+    for (int i = 0; i < 256; i++) {
+      if (inUse[i])
+        seqToUnseq[nInUseShadow++] = (byte) i;
+    }
+
+    this.nInUse = nInUseShadow;
+  }
+
+  /**
+  * Index of the last char in the block, so the block size == last + 1.
+  */
+  private int last;
+
+  /**
+  * Index in zptr[] of original string after sorting.
+  */
+  private int origPtr;
+
+  /**
+  * always: in the range 0 .. 9. The current block size is 100000 * this
+  * number.
+  */
+  private int blockSize100k;
+
+  private boolean blockRandomised;
+
+  private int bsBuff;
+  private int bsLive;
+  private final CRC crc = new CRC();
+
+  private int nInUse;
+
+  private InputStream in;
+
+  private int currentChar = -1;
+
+  private static final int EOF = 0;
+  private static final int START_BLOCK_STATE = 1;
+  private static final int RAND_PART_A_STATE = 2;
+  private static final int RAND_PART_B_STATE = 3;
+  private static final int RAND_PART_C_STATE = 4;
+  private static final int NO_RAND_PART_A_STATE = 5;
+  private static final int NO_RAND_PART_B_STATE = 6;
+  private static final int NO_RAND_PART_C_STATE = 7;
+
+  private int currentState = START_BLOCK_STATE;
+
+  private int storedBlockCRC, storedCombinedCRC;
+  private int computedBlockCRC, computedCombinedCRC;
+
+  // Variables used by setup* methods exclusively
+
+  private int su_count;
+  private int su_ch2;
+  private int su_chPrev;
+  private int su_i2;
+  private int su_j2;
+  private int su_rNToGo;
+  private int su_rTPos;
+  private int su_tPos;
+  private char su_z;
+
+  /**
+  * All memory intensive stuff. This field is initialized by initBlock().
+  */
+  private CBZip2InputStream.Data data;
+
+  /**
+  * Constructs a new CBZip2InputStream which decompresses bytes read from the
+  * specified stream.
+  *
+  * <p>
+  * Although BZip2 headers are marked with the magic <tt>"Bz"</tt> this
+  * constructor expects the next byte in the stream to be the first one after
+  * the magic. Thus callers have to skip the first two bytes. Otherwise this
+  * constructor will throw an exception.
+  * </p>
+  *
+  * @throws IOException
+  *             if the stream content is malformed or an I/O error occurs.
+  * @throws NullPointerException
+  *             if <tt>in == null</tt>
+  */
+  public CBZip2InputStream(final InputStream in) throws IOException {
+    super();
+
+    this.in = in;
+    init();
+  }
+
+  public int read() throws IOException {
+    if (this.in != null) {
+      return read0();
+    } else {
+      throw new IOException("stream closed");
+    }
+  }
+
+  public int read(final byte[] dest, final int offs, final int len)
+      throws IOException {
+    if (offs < 0) {
+      throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
+    }
+    if (len < 0) {
+      throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
+    }
+    if (offs + len > dest.length) {
+      throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
+          + len + ") > dest.length(" + dest.length + ").");
+    }
+    if (this.in == null) {
+      throw new IOException("stream closed");
+    }
+
+    final int hi = offs + len;
+    int destOffs = offs;
+    for (int b; (destOffs < hi) && ((b = read0()) >= 0);) {
+      dest[destOffs++] = (byte) b;
+    }
+
+    return (destOffs == offs) ? -1 : (destOffs - offs);
+  }
+
+  private int read0() throws IOException {
+    final int retChar = this.currentChar;
+
+    switch (this.currentState) {
+    case EOF:
+      return -1;
+
+    case START_BLOCK_STATE:
+      throw new IllegalStateException();
+
+    case RAND_PART_A_STATE:
+      throw new IllegalStateException();
+
+    case RAND_PART_B_STATE:
+      setupRandPartB();
+      break;
+
+    case RAND_PART_C_STATE:
+      setupRandPartC();
+      break;
+
+    case NO_RAND_PART_A_STATE:
+      throw new IllegalStateException();
+
+    case NO_RAND_PART_B_STATE:
+      setupNoRandPartB();
+      break;
+
+    case NO_RAND_PART_C_STATE:
+      setupNoRandPartC();
+      break;
+
+    default:
+      throw new IllegalStateException();
+    }
+
+    return retChar;
+  }
+
+  private void init() throws IOException {
+    int magic2 = this.in.read();
+    if (magic2 != 'h') {
+      throw new IOException("Stream is not BZip2 formatted: expected 'h'"
+          + " as first byte but got '" + (char) magic2 + "'");
+    }
+
+    int blockSize = this.in.read();
+    if ((blockSize < '1') || (blockSize > '9')) {
+      throw new IOException("Stream is not BZip2 formatted: illegal "
+          + "blocksize " + (char) blockSize);
+    }
+
+    this.blockSize100k = blockSize - '0';
+
+    initBlock();
+    setupBlock();
+  }
+
+  private void initBlock() throws IOException {
+    char magic0 = bsGetUByte();
+    char magic1 = bsGetUByte();
+    char magic2 = bsGetUByte();
+    char magic3 = bsGetUByte();
+    char magic4 = bsGetUByte();
+    char magic5 = bsGetUByte();
+
+    if (magic0 == 0x17 && magic1 == 0x72 && magic2 == 0x45
+        && magic3 == 0x38 && magic4 == 0x50 && magic5 == 0x90) {
+      complete(); // end of file
+    } else if (magic0 != 0x31 || // '1'
+        magic1 != 0x41 || // ')'
+        magic2 != 0x59 || // 'Y'
+        magic3 != 0x26 || // '&'
+        magic4 != 0x53 || // 'S'
+        magic5 != 0x59 // 'Y'
+    ) {
+      this.currentState = EOF;
+      throw new IOException("bad block header");
+    } else {
+      this.storedBlockCRC = bsGetInt();
+      this.blockRandomised = bsR(1) == 1;
+
+      /**
+      * Allocate data here instead in constructor, so we do not allocate
+      * it if the input file is empty.
+      */
+      if (this.data == null) {
+        this.data = new Data(this.blockSize100k);
+      }
+
+      // currBlockNo++;
+      getAndMoveToFrontDecode();
+
+      this.crc.initialiseCRC();
+      this.currentState = START_BLOCK_STATE;
+    }
+  }
+
+  private void endBlock() throws IOException {
+    this.computedBlockCRC = this.crc.getFinalCRC();
+
+    // A bad CRC is considered a fatal error.
+    if (this.storedBlockCRC != this.computedBlockCRC) {
+      // make next blocks readable without error
+      // (repair feature, not yet documented, not tested)
+      this.computedCombinedCRC = (this.storedCombinedCRC << 1)
+          | (this.storedCombinedCRC >>> 31);
+      this.computedCombinedCRC ^= this.storedBlockCRC;
+
+      reportCRCError();
+    }
+
+    this.computedCombinedCRC = (this.computedCombinedCRC << 1)
+        | (this.computedCombinedCRC >>> 31);
+    this.computedCombinedCRC ^= this.computedBlockCRC;
+  }
+
+  private void complete() throws IOException {
+    this.storedCombinedCRC = bsGetInt();
+    this.currentState = EOF;
+    this.data = null;
+
+    if (this.storedCombinedCRC != this.computedCombinedCRC) {
+      reportCRCError();
+    }
+  }
+
+  public void close() throws IOException {
+    InputStream inShadow = this.in;
+    if (inShadow != null) {
+      try {
+        if (inShadow != System.in) {
+          inShadow.close();
+        }
+      } finally {
+        this.data = null;
+        this.in = null;
+      }
+    }
+  }
+
+  private int bsR(final int n) throws IOException {
+    int bsLiveShadow = this.bsLive;
+    int bsBuffShadow = this.bsBuff;
+
+    if (bsLiveShadow < n) {
+      final InputStream inShadow = this.in;
+      do {
+        int thech = inShadow.read();
+
+        if (thech < 0) {
+          throw new IOException("unexpected end of stream");
+        }
+
+        bsBuffShadow = (bsBuffShadow << 8) | thech;
+        bsLiveShadow += 8;
+      } while (bsLiveShadow < n);
+
+      this.bsBuff = bsBuffShadow;
+    }
+
+    this.bsLive = bsLiveShadow - n;
+    return (bsBuffShadow >> (bsLiveShadow - n)) & ((1 << n) - 1);
+  }
+
+  private boolean bsGetBit() throws IOException {
+    int bsLiveShadow = this.bsLive;
+    int bsBuffShadow = this.bsBuff;
+
+    if (bsLiveShadow < 1) {
+      int thech = this.in.read();
+
+      if (thech < 0) {
+        throw new IOException("unexpected end of stream");
+      }
+
+      bsBuffShadow = (bsBuffShadow << 8) | thech;
+      bsLiveShadow += 8;
+      this.bsBuff = bsBuffShadow;
+    }
+
+    this.bsLive = bsLiveShadow - 1;
+    return ((bsBuffShadow >> (bsLiveShadow - 1)) & 1) != 0;
+  }
+
+  private char bsGetUByte() throws IOException {
+    return (char) bsR(8);
+  }
+
+  private int bsGetInt() throws IOException {
+    return (((((bsR(8) << 8) | bsR(8)) << 8) | bsR(8)) << 8) | bsR(8);
+  }
+
+  /**
+  * Called by createHuffmanDecodingTables() exclusively.
+  */
+  private static void hbCreateDecodeTables(final int[] limit,
+      final int[] base, final int[] perm, final char[] length,
+      final int minLen, final int maxLen, final int alphaSize) {
+    for (int i = minLen, pp = 0; i <= maxLen; i++) {
+      for (int j = 0; j < alphaSize; j++) {
+        if (length[j] == i) {
+          perm[pp++] = j;
+        }
+      }
+    }
+
+    for (int i = MAX_CODE_LEN; --i > 0;) {
+      base[i] = 0;
+      limit[i] = 0;
+    }
+
+    for (int i = 0; i < alphaSize; i++) {
+      base[length[i] + 1]++;
+    }
+
+    for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) {
+      b += base[i];
+      base[i] = b;
+    }
+
+    for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) {
+      final int nb = base[i + 1];
+      vec += nb - b;
+      b = nb;
+      limit[i] = vec - 1;
+      vec <<= 1;
+    }
+
+    for (int i = minLen + 1; i <= maxLen; i++) {
+      base[i] = ((limit[i - 1] + 1) << 1) - base[i];
+    }
+  }
+
+  private void recvDecodingTables() throws IOException {
+    final Data dataShadow = this.data;
+    final boolean[] inUse = dataShadow.inUse;
+    final byte[] pos = dataShadow.recvDecodingTables_pos;
+    final byte[] selector = dataShadow.selector;
+    final byte[] selectorMtf = dataShadow.selectorMtf;
+
+    int inUse16 = 0;
+
+    /* Receive the mapping table */
+    for (int i = 0; i < 16; i++) {
+      if (bsGetBit()) {
+        inUse16 |= 1 << i;
+      }
+    }
+
+    for (int i = 256; --i >= 0;) {
+      inUse[i] = false;
+    }
+
+    for (int i = 0; i < 16; i++) {
+      if ((inUse16 & (1 << i)) != 0) {
+        final int i16 = i << 4;
+        for (int j = 0; j < 16; j++) {
+          if (bsGetBit()) {
+            inUse[i16 + j] = true;
+          }
+        }
+      }
+    }
+
+    makeMaps();
+    final int alphaSize = this.nInUse + 2;
+
+    /* Now the selectors */
+    final int nGroups = bsR(3);
+    final int nSelectors = bsR(15);
+
+    for (int i = 0; i < nSelectors; i++) {
+      int j = 0;
+      while (bsGetBit()) {
+        j++;
+      }
+      selectorMtf[i] = (byte) j;
+    }
+
+    /* Undo the MTF values for the selectors. */
+    for (int v = nGroups; --v >= 0;) {
+      pos[v] = (byte) v;
+    }
+
+    for (int i = 0; i < nSelectors; i++) {
+      int v = selectorMtf[i] & 0xff;
+      final byte tmp = pos[v];
+      while (v > 0) {
+        // nearly all times v is zero, 4 in most other cases
+        pos[v] = pos[v - 1];
+        v--;
+      }
+      pos[0] = tmp;
+      selector[i] = tmp;
+    }
+
+    final char[][] len = dataShadow.temp_charArray2d;
+
+    /* Now the coding tables */
+    for (int t = 0; t < nGroups; t++) {
+      int curr = bsR(5);
+      final char[] len_t = len[t];
+      for (int i = 0; i < alphaSize; i++) {
+        while (bsGetBit()) {
+          curr += bsGetBit() ? -1 : 1;
+        }
+        len_t[i] = (char) curr;
+      }
+    }
+
+    // finally create the Huffman tables
+    createHuffmanDecodingTables(alphaSize, nGroups);
+  }
+
+  /**
+  * Called by recvDecodingTables() exclusively.
+  */
+  private void createHuffmanDecodingTables(final int alphaSize,
+      final int nGroups) {
+    final Data dataShadow = this.data;
+    final char[][] len = dataShadow.temp_charArray2d;
+    final int[] minLens = dataShadow.minLens;
+    final int[][] limit = dataShadow.limit;
+    final int[][] base = dataShadow.base;
+    final int[][] perm = dataShadow.perm;
+
+    for (int t = 0; t < nGroups; t++) {
+      int minLen = 32;
+      int maxLen = 0;
+      final char[] len_t = len[t];
+      for (int i = alphaSize; --i >= 0;) {
+        final char lent = len_t[i];
+        if (lent > maxLen) {
+          maxLen = lent;
+        }
+        if (lent < minLen) {
+          minLen = lent;
+        }
+      }
+      hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen,
+          maxLen, alphaSize);
+      minLens[t] = minLen;
+    }
+  }
+
+  private void getAndMoveToFrontDecode() throws IOException {
+    this.origPtr = bsR(24);
+    recvDecodingTables();
+
+    final InputStream inShadow = this.in;
+    final Data dataShadow = this.data;
+    final byte[] ll8 = dataShadow.ll8;
+    final int[] unzftab = dataShadow.unzftab;
+    final byte[] selector = dataShadow.selector;
+    final byte[] seqToUnseq = dataShadow.seqToUnseq;
+    final char[] yy = dataShadow.getAndMoveToFrontDecode_yy;
+    final int[] minLens = dataShadow.minLens;
+    final int[][] limit = dataShadow.limit;
+    final int[][] base = dataShadow.base;
+    final int[][] perm = dataShadow.perm;
+    final int limitLast = this.blockSize100k * 100000;
+
+    /*
+    * Setting up the unzftab entries here is not strictly necessary, but it
+    * does save having to do it later in a separate pass, and so saves a
+    * block's worth of cache misses.
+    */
+    for (int i = 256; --i >= 0;) {
+      yy[i] = (char) i;
+      unzftab[i] = 0;
+    }
+
+    int groupNo = 0;
+    int groupPos = G_SIZE - 1;
+    final int eob = this.nInUse + 1;
+    int nextSym = getAndMoveToFrontDecode0(0);
+    int bsBuffShadow = this.bsBuff;
+    int bsLiveShadow = this.bsLive;
+    int lastShadow = -1;
+    int zt = selector[groupNo] & 0xff;
+    int[] base_zt = base[zt];
+    int[] limit_zt = limit[zt];
+    int[] perm_zt = perm[zt];
+    int minLens_zt = minLens[zt];
+
+    while (nextSym != eob) {
+      if ((nextSym == RUNA) || (nextSym == RUNB)) {
+        int s = -1;
+
+        for (int n = 1; true; n <<= 1) {
+          if (nextSym == RUNA) {
+            s += n;
+          } else if (nextSym == RUNB) {
+            s += n << 1;
+          } else {
+            break;
+          }
+
+          if (groupPos == 0) {
+            groupPos = G_SIZE - 1;
+            zt = selector[++groupNo] & 0xff;
+            base_zt = base[zt];
+            limit_zt = limit[zt];
+            perm_zt = perm[zt];
+            minLens_zt = minLens[zt];
+          } else {
+            groupPos--;
+          }
+
+          int zn = minLens_zt;
+
+          // Inlined:
+          // int zvec = bsR(zn);
+          while (bsLiveShadow < zn) {
+            final int thech = inShadow.read();
+            if (thech >= 0) {
+              bsBuffShadow = (bsBuffShadow << 8) | thech;
+              bsLiveShadow += 8;
+              continue;
+            } else {
+              throw new IOException("unexpected end of stream");
+            }
+          }
+          int zvec = (bsBuffShadow >> (bsLiveShadow - zn))
+              & ((1 << zn) - 1);
+          bsLiveShadow -= zn;
+
+          while (zvec > limit_zt[zn]) {
+            zn++;
+            while (bsLiveShadow < 1) {
+              final int thech = inShadow.read();
+              if (thech >= 0) {
+                bsBuffShadow = (bsBuffShadow << 8) | thech;
+                bsLiveShadow += 8;
+                continue;
+              } else {
+                throw new IOException(
+                    "unexpected end of stream");
+              }
+            }
+            bsLiveShadow--;
+            zvec = (zvec << 1)
+                | ((bsBuffShadow >> bsLiveShadow) & 1);
+          }
+          nextSym = perm_zt[zvec - base_zt[zn]];
+        }
+
+        final byte ch = seqToUnseq[yy[0]];
+        unzftab[ch & 0xff] += s + 1;
+
+        while (s-- >= 0) {
+          ll8[++lastShadow] = ch;
+        }
+
+        if (lastShadow >= limitLast) {
+          throw new IOException("block overrun");
+        }
+      } else {
+        if (++lastShadow >= limitLast) {
+          throw new IOException("block overrun");
+        }
+
+        final char tmp = yy[nextSym - 1];
+        unzftab[seqToUnseq[tmp] & 0xff]++;
+        ll8[lastShadow] = seqToUnseq[tmp];
+
+        /*
+        * This loop is hammered during decompression, hence avoid
+        * native method call overhead of System.arraycopy for very
+        * small ranges to copy.
+        */
+        if (nextSym <= 16) {
+          for (int j = nextSym - 1; j > 0;) {
+            yy[j] = yy[--j];
+          }
+        } else {
+          System.arraycopy(yy, 0, yy, 1, nextSym - 1);
+        }
+
+        yy[0] = tmp;
+
+        if (groupPos == 0) {
+          groupPos = G_SIZE - 1;
+          zt = selector[++groupNo] & 0xff;
+          base_zt = base[zt];
+          limit_zt = limit[zt];
+          perm_zt = perm[zt];
+          minLens_zt = minLens[zt];
+        } else {
+          groupPos--;
+        }
+
+        int zn = minLens_zt;
+
+        // Inlined:
+        // int zvec = bsR(zn);
+        while (bsLiveShadow < zn) {
+          final int thech = inShadow.read();
+          if (thech >= 0) {
+            bsBuffShadow = (bsBuffShadow << 8) | thech;
+            bsLiveShadow += 8;
+            continue;
+          } else {
+            throw new IOException("unexpected end of stream");
+          }
+        }
+        int zvec = (bsBuffShadow >> (bsLiveShadow - zn))
+            & ((1 << zn) - 1);
+        bsLiveShadow -= zn;
+
+        while (zvec > limit_zt[zn]) {
+          zn++;
+          while (bsLiveShadow < 1) {
+            final int thech = inShadow.read();
+            if (thech >= 0) {
+              bsBuffShadow = (bsBuffShadow << 8) | thech;
+              bsLiveShadow += 8;
+              continue;
+            } else {
+              throw new IOException("unexpected end of stream");
+            }
+          }
+          bsLiveShadow--;
+          zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
+        }
+        nextSym = perm_zt[zvec - base_zt[zn]];
+      }
+    }
+
+    this.last = lastShadow;
+    this.bsLive = bsLiveShadow;
+    this.bsBuff = bsBuffShadow;
+  }
+
+  private int getAndMoveToFrontDecode0(final int groupNo) throws IOException {
+    final InputStream inShadow = this.in;
+    final Data dataShadow = this.data;
+    final int zt = dataShadow.selector[groupNo] & 0xff;
+    final int[] limit_zt = dataShadow.limit[zt];
+    int zn = dataShadow.minLens[zt];
+    int zvec = bsR(zn);
+    int bsLiveShadow = this.bsLive;
+    int bsBuffShadow = this.bsBuff;
+
+    while (zvec > limit_zt[zn]) {
+      zn++;
+      while (bsLiveShadow < 1) {
+        final int thech = inShadow.read();
+
+        if (thech >= 0) {
+          bsBuffShadow = (bsBuffShadow << 8) | thech;
+          bsLiveShadow += 8;
+          continue;
+        } else {
+          throw new IOException("unexpected end of stream");
+        }
+      }
+      bsLiveShadow--;
+      zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
+    }
+
+    this.bsLive = bsLiveShadow;
+    this.bsBuff = bsBuffShadow;
+
+    return dataShadow.perm[zt][zvec - dataShadow.base[zt][zn]];
+  }
+
+  private void setupBlock() throws IOException {
+    if (this.data == null) {
+      return;
+    }
+
+    final int[] cftab = this.data.cftab;
+    final int[] tt = this.data.initTT(this.last + 1);
+    final byte[] ll8 = this.data.ll8;
+    cftab[0] = 0;
+    System.arraycopy(this.data.unzftab, 0, cftab, 1, 256);
+
+    for (int i = 1, c = cftab[0]; i <= 256; i++) {
+      c += cftab[i];
+      cftab[i] = c;
+    }
+
+    for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
+      tt[cftab[ll8[i] & 0xff]++] = i;
+    }
+
+    if ((this.origPtr < 0) || (this.origPtr >= tt.length)) {
+      throw new IOException("stream corrupted");
+    }
+
+    this.su_tPos = tt[this.origPtr];
+    this.su_count = 0;
+    this.su_i2 = 0;
+    this.su_ch2 = 256; /* not a char and not EOF */
+
+    if (this.blockRandomised) {
+      this.su_rNToGo = 0;
+      this.su_rTPos = 0;
+      setupRandPartA();
+    } else {
+      setupNoRandPartA();
+    }
+  }
+
+  private void setupRandPartA() throws IOException {
+    if (this.su_i2 <= this.last) {
+      this.su_chPrev = this.su_ch2;
+      int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
+      this.su_tPos = this.data.tt[this.su_tPos];
+      if (this.su_rNToGo == 0) {
+        this.su_rNToGo = BZip2Constants.rNums[this.su_rTPos] - 1;
+        if (++this.su_rTPos == 512) {
+          this.su_rTPos = 0;
+        }
+      } else {
+        this.su_rNToGo--;
+      }
+      this.su_ch2 = su_ch2Shadow ^= (this.su_rNToGo == 1) ? 1 : 0;
+      this.su_i2++;
+      this.currentChar = su_ch2Shadow;
+      this.currentState = RAND_PART_B_STATE;
+      this.crc.updateCRC(su_ch2Shadow);
+    } else {
+      endBlock();
+      initBlock();
+      setupBlock();
+    }
+  }
+
+  private void setupNoRandPartA() throws IOException {
+    if (this.su_i2 <= this.last) {
+      this.su_chPrev = this.su_ch2;
+      int su_ch2Shadow = this.data.ll8[this.su_tPos] & 0xff;
+      this.su_ch2 = su_ch2Shadow;
+      this.su_tPos = this.data.tt[this.su_tPos];
+      this.su_i2++;
+      this.currentChar = su_ch2Shadow;
+      this.currentState = NO_RAND_PART_B_STATE;
+      this.crc.updateCRC(su_ch2Shadow);
+    } else {
+      this.currentState = NO_RAND_PART_A_STATE;
+      endBlock();
+      initBlock();
+      setupBlock();
+    }
+  }
+
+  private void setupRandPartB() throws IOException {
+    if (this.su_ch2 != this.su_chPrev) {
+      this.currentState = RAND_PART_A_STATE;
+      this.su_count = 1;
+      setupRandPartA();
+    } else if (++this.su_count >= 4) {
+      this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
+      this.su_tPos = this.data.tt[this.su_tPos];
+      if (this.su_rNToGo == 0) {
+        this.su_rNToGo = BZip2Constants.rNums[this.su_rTPos] - 1;
+        if (++this.su_rTPos == 512) {
+          this.su_rTPos = 0;
+        }
+      } else {
+        this.su_rNToGo--;
+      }
+      this.su_j2 = 0;
+      this.currentState = RAND_PART_C_STATE;
+      if (this.su_rNToGo == 1) {
+        this.su_z ^= 1;
+      }
+      setupRandPartC();
+    } else {
+      this.currentState = RAND_PART_A_STATE;
+      setupRandPartA();
+    }
+  }
+
+  private void setupRandPartC() throws IOException {
+    if (this.su_j2 < this.su_z) {
+      this.currentChar = this.su_ch2;
+      this.crc.updateCRC(this.su_ch2);
+      this.su_j2++;
+    } else {
+      this.currentState = RAND_PART_A_STATE;
+      this.su_i2++;
+      this.su_count = 0;
+      setupRandPartA();
+    }
+  }
+
+  private void setupNoRandPartB() throws IOException {
+    if (this.su_ch2 != this.su_chPrev) {
+      this.su_count = 1;
+      setupNoRandPartA();
+    } else if (++this.su_count >= 4) {
+      this.su_z = (char) (this.data.ll8[this.su_tPos] & 0xff);
+      this.su_tPos = this.data.tt[this.su_tPos];
+      this.su_j2 = 0;
+      setupNoRandPartC();
+    } else {
+      setupNoRandPartA();
+    }
+  }
+
+  private void setupNoRandPartC() throws IOException {
+    if (this.su_j2 < this.su_z) {
+      int su_ch2Shadow = this.su_ch2;
+      this.currentChar = su_ch2Shadow;
+      this.crc.updateCRC(su_ch2Shadow);
+      this.su_j2++;
+      this.currentState = NO_RAND_PART_C_STATE;
+    } else {
+      this.su_i2++;
+      this.su_count = 0;
+      setupNoRandPartA();
+    }
+  }
+
+  private static final class Data extends Object {
+
+    // (with blockSize 900k)
+    final boolean[] inUse = new boolean[256]; // 256 byte
+
+    final byte[] seqToUnseq = new byte[256]; // 256 byte
+    final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
+    final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
+
+    /**
+    * Freq table collected to save a pass over the data during
+    * decompression.
+    */
+    final int[] unzftab = new int[256]; // 1024 byte
+
+    final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
+    final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
+    final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
+    final int[] minLens = new int[N_GROUPS]; // 24 byte
+
+    final int[] cftab = new int[257]; // 1028 byte
+    final char[] getAndMoveToFrontDecode_yy = new char[256]; // 512 byte
+    final char[][] temp_charArray2d = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096
+                                        // byte
+    final byte[] recvDecodingTables_pos = new byte[N_GROUPS]; // 6 byte
+    // ---------------
+    // 60798 byte
+
+    int[] tt; // 3600000 byte
+    byte[] ll8; // 900000 byte
+
+    // ---------------
+    // 4560782 byte
+    // ===============
+
+    Data(int blockSize100k) {
+      super();
+
+      this.ll8 = new byte[blockSize100k * BZip2Constants.baseBlockSize];
+    }
+
+    /**
+    * Initializes the {@link #tt} array.
+    *
+    * This method is called when the required length of the array is known.
+    * I don't initialize it at construction time to avoid unneccessary
+    * memory allocation when compressing small files.
+    */
+    final int[] initTT(int length) {
+      int[] ttShadow = this.tt;
+
+      // tt.length should always be >= length, but theoretically
+      // it can happen, if the compressor mixed small and large
+      // blocks. Normally only the last block will be smaller
+      // than others.
+      if ((ttShadow == null) || (ttShadow.length < length)) {
+        this.tt = ttShadow = new int[length];
+      }
+
+      return ttShadow;
+    }
+
+  }
+}

+ 2074 - 0
src/core/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java

@@ -0,0 +1,2074 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+
+/*
+ * This package is based on the work done by Keiron Liddle, Aftex Software
+ * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
+ * great code.
+ */
+
+package org.apache.hadoop.io.compress.bzip2;
+
+import java.io.OutputStream;
+import java.io.IOException;
+
+/**
+ * An output stream that compresses into the BZip2 format (without the file
+ * header chars) into another stream.
+ *
+ * <p>
+ * The compression requires large amounts of memory. Thus you should call the
+ * {@link #close() close()} method as soon as possible, to force
+ * <tt>CBZip2OutputStream</tt> to release the allocated memory.
+ * </p>
+ *
+ * <p>
+ * You can shrink the amount of allocated memory and maybe raise the compression
+ * speed by choosing a lower blocksize, which in turn may cause a lower
+ * compression ratio. You can avoid unnecessary memory allocation by avoiding
+ * using a blocksize which is bigger than the size of the input.
+ * </p>
+ *
+ * <p>
+ * You can compute the memory usage for compressing by the following formula:
+ * </p>
+ *
+ * <pre>
+ * &lt;code&gt;400k + (9 * blocksize)&lt;/code&gt;.
+ * </pre>
+ *
+ * <p>
+ * To get the memory required for decompression by {@link CBZip2InputStream
+ * CBZip2InputStream} use
+ * </p>
+ *
+ * <pre>
+ * &lt;code&gt;65k + (5 * blocksize)&lt;/code&gt;.
+ * </pre>
+ *
+ * <table width="100%" border="1">
+ * <colgroup> <col width="33%" /> <col width="33%" /> <col width="33%" />
+ * </colgroup>
+ * <tr>
+ * <th colspan="3">Memory usage by blocksize</th>
+ * </tr>
+ * <tr>
+ * <th align="right">Blocksize</th> <th align="right">Compression<br>
+ * memory usage</th> <th align="right">Decompression<br>
+ * memory usage</th>
+ * </tr>
+ * <tr>
+ * <td align="right">100k</td>
+ * <td align="right">1300k</td>
+ * <td align="right">565k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">200k</td>
+ * <td align="right">2200k</td>
+ * <td align="right">1065k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">300k</td>
+ * <td align="right">3100k</td>
+ * <td align="right">1565k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">400k</td>
+ * <td align="right">4000k</td>
+ * <td align="right">2065k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">500k</td>
+ * <td align="right">4900k</td>
+ * <td align="right">2565k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">600k</td>
+ * <td align="right">5800k</td>
+ * <td align="right">3065k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">700k</td>
+ * <td align="right">6700k</td>
+ * <td align="right">3565k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">800k</td>
+ * <td align="right">7600k</td>
+ * <td align="right">4065k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">900k</td>
+ * <td align="right">8500k</td>
+ * <td align="right">4565k</td>
+ * </tr>
+ * </table>
+ *
+ * <p>
+ * For decompression <tt>CBZip2InputStream</tt> allocates less memory if the
+ * bzipped input is smaller than one block.
+ * </p>
+ *
+ * <p>
+ * Instances of this class are not threadsafe.
+ * </p>
+ *
+ * <p>
+ * TODO: Update to BZip2 1.0.1
+ * </p>
+ *
+ */
+public class CBZip2OutputStream extends OutputStream implements BZip2Constants {
+
+  /**
+  * The minimum supported blocksize <tt> == 1</tt>.
+  */
+  public static final int MIN_BLOCKSIZE = 1;
+
+  /**
+  * The maximum supported blocksize <tt> == 9</tt>.
+  */
+  public static final int MAX_BLOCKSIZE = 9;
+
+  /**
+  * This constant is accessible by subclasses for historical purposes. If you
+  * don't know what it means then you don't need it.
+  */
+  protected static final int SETMASK = (1 << 21);
+
+  /**
+  * This constant is accessible by subclasses for historical purposes. If you
+  * don't know what it means then you don't need it.
+  */
+  protected static final int CLEARMASK = (~SETMASK);
+
+  /**
+  * This constant is accessible by subclasses for historical purposes. If you
+  * don't know what it means then you don't need it.
+  */
+  protected static final int GREATER_ICOST = 15;
+
+  /**
+  * This constant is accessible by subclasses for historical purposes. If you
+  * don't know what it means then you don't need it.
+  */
+  protected static final int LESSER_ICOST = 0;
+
+  /**
+  * This constant is accessible by subclasses for historical purposes. If you
+  * don't know what it means then you don't need it.
+  */
+  protected static final int SMALL_THRESH = 20;
+
+  /**
+  * This constant is accessible by subclasses for historical purposes. If you
+  * don't know what it means then you don't need it.
+  */
+  protected static final int DEPTH_THRESH = 10;
+
+  /**
+  * This constant is accessible by subclasses for historical purposes. If you
+  * don't know what it means then you don't need it.
+  */
+  protected static final int WORK_FACTOR = 30;
+
+  /**
+  * This constant is accessible by subclasses for historical purposes. If you
+  * don't know what it means then you don't need it.
+  * <p>
+  * If you are ever unlucky/improbable enough to get a stack overflow whilst
+  * sorting, increase the following constant and try again. In practice I
+  * have never seen the stack go above 27 elems, so the following limit seems
+  * very generous.
+  * </p>
+  */
+  protected static final int QSORT_STACK_SIZE = 1000;
+
+  /**
+  * Knuth's increments seem to work better than Incerpi-Sedgewick here.
+  * Possibly because the number of elems to sort is usually small, typically
+  * &lt;= 20.
+  */
+  private static final int[] INCS = { 1, 4, 13, 40, 121, 364, 1093, 3280,
+      9841, 29524, 88573, 265720, 797161, 2391484 };
+
+  /**
+  * This method is accessible by subclasses for historical purposes. If you
+  * don't know what it does then you don't need it.
+  */
+  protected static void hbMakeCodeLengths(char[] len, int[] freq,
+      int alphaSize, int maxLen) {
+    /*
+    * Nodes and heap entries run from 1. Entry 0 for both the heap and
+    * nodes is a sentinel.
+    */
+    final int[] heap = new int[MAX_ALPHA_SIZE * 2];
+    final int[] weight = new int[MAX_ALPHA_SIZE * 2];
+    final int[] parent = new int[MAX_ALPHA_SIZE * 2];
+
+    for (int i = alphaSize; --i >= 0;) {
+      weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
+    }
+
+    for (boolean tooLong = true; tooLong;) {
+      tooLong = false;
+
+      int nNodes = alphaSize;
+      int nHeap = 0;
+      heap[0] = 0;
+      weight[0] = 0;
+      parent[0] = -2;
+
+      for (int i = 1; i <= alphaSize; i++) {
+        parent[i] = -1;
+        nHeap++;
+        heap[nHeap] = i;
+
+        int zz = nHeap;
+        int tmp = heap[zz];
+        while (weight[tmp] < weight[heap[zz >> 1]]) {
+          heap[zz] = heap[zz >> 1];
+          zz >>= 1;
+        }
+        heap[zz] = tmp;
+      }
+
+      // assert (nHeap < (MAX_ALPHA_SIZE + 2)) : nHeap;
+
+      while (nHeap > 1) {
+        int n1 = heap[1];
+        heap[1] = heap[nHeap];
+        nHeap--;
+
+        int yy = 0;
+        int zz = 1;
+        int tmp = heap[1];
+
+        while (true) {
+          yy = zz << 1;
+
+          if (yy > nHeap) {
+            break;
+          }
+
+          if ((yy < nHeap)
+              && (weight[heap[yy + 1]] < weight[heap[yy]])) {
+            yy++;
+          }
+
+          if (weight[tmp] < weight[heap[yy]]) {
+            break;
+          }
+
+          heap[zz] = heap[yy];
+          zz = yy;
+        }
+
+        heap[zz] = tmp;
+
+        int n2 = heap[1];
+        heap[1] = heap[nHeap];
+        nHeap--;
+
+        yy = 0;
+        zz = 1;
+        tmp = heap[1];
+
+        while (true) {
+          yy = zz << 1;
+
+          if (yy > nHeap) {
+            break;
+          }
+
+          if ((yy < nHeap)
+              && (weight[heap[yy + 1]] < weight[heap[yy]])) {
+            yy++;
+          }
+
+          if (weight[tmp] < weight[heap[yy]]) {
+            break;
+          }
+
+          heap[zz] = heap[yy];
+          zz = yy;
+        }
+
+        heap[zz] = tmp;
+        nNodes++;
+        parent[n1] = parent[n2] = nNodes;
+
+        final int weight_n1 = weight[n1];
+        final int weight_n2 = weight[n2];
+        weight[nNodes] = (((weight_n1 & 0xffffff00) + (weight_n2 & 0xffffff00)) | (1 + (((weight_n1 & 0x000000ff) > (weight_n2 & 0x000000ff)) ? (weight_n1 & 0x000000ff)
+            : (weight_n2 & 0x000000ff))));
+
+        parent[nNodes] = -1;
+        nHeap++;
+        heap[nHeap] = nNodes;
+
+        tmp = 0;
+        zz = nHeap;
+        tmp = heap[zz];
+        final int weight_tmp = weight[tmp];
+        while (weight_tmp < weight[heap[zz >> 1]]) {
+          heap[zz] = heap[zz >> 1];
+          zz >>= 1;
+        }
+        heap[zz] = tmp;
+
+      }
+
+      // assert (nNodes < (MAX_ALPHA_SIZE * 2)) : nNodes;
+
+      for (int i = 1; i <= alphaSize; i++) {
+        int j = 0;
+        int k = i;
+
+        for (int parent_k; (parent_k = parent[k]) >= 0;) {
+          k = parent_k;
+          j++;
+        }
+
+        len[i - 1] = (char) j;
+        if (j > maxLen) {
+          tooLong = true;
+        }
+      }
+
+      if (tooLong) {
+        for (int i = 1; i < alphaSize; i++) {
+          int j = weight[i] >> 8;
+          j = 1 + (j >> 1);
+          weight[i] = j << 8;
+        }
+      }
+    }
+  }
+
+  private static void hbMakeCodeLengths(final byte[] len, final int[] freq,
+      final Data dat, final int alphaSize, final int maxLen) {
+    /*
+    * Nodes and heap entries run from 1. Entry 0 for both the heap and
+    * nodes is a sentinel.
+    */
+    final int[] heap = dat.heap;
+    final int[] weight = dat.weight;
+    final int[] parent = dat.parent;
+
+    for (int i = alphaSize; --i >= 0;) {
+      weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
+    }
+
+    for (boolean tooLong = true; tooLong;) {
+      tooLong = false;
+
+      int nNodes = alphaSize;
+      int nHeap = 0;
+      heap[0] = 0;
+      weight[0] = 0;
+      parent[0] = -2;
+
+      for (int i = 1; i <= alphaSize; i++) {
+        parent[i] = -1;
+        nHeap++;
+        heap[nHeap] = i;
+
+        int zz = nHeap;
+        int tmp = heap[zz];
+        while (weight[tmp] < weight[heap[zz >> 1]]) {
+          heap[zz] = heap[zz >> 1];
+          zz >>= 1;
+        }
+        heap[zz] = tmp;
+      }
+
+      while (nHeap > 1) {
+        int n1 = heap[1];
+        heap[1] = heap[nHeap];
+        nHeap--;
+
+        int yy = 0;
+        int zz = 1;
+        int tmp = heap[1];
+
+        while (true) {
+          yy = zz << 1;
+
+          if (yy > nHeap) {
+            break;
+          }
+
+          if ((yy < nHeap)
+              && (weight[heap[yy + 1]] < weight[heap[yy]])) {
+            yy++;
+          }
+
+          if (weight[tmp] < weight[heap[yy]]) {
+            break;
+          }
+
+          heap[zz] = heap[yy];
+          zz = yy;
+        }
+
+        heap[zz] = tmp;
+
+        int n2 = heap[1];
+        heap[1] = heap[nHeap];
+        nHeap--;
+
+        yy = 0;
+        zz = 1;
+        tmp = heap[1];
+
+        while (true) {
+          yy = zz << 1;
+
+          if (yy > nHeap) {
+            break;
+          }
+
+          if ((yy < nHeap)
+              && (weight[heap[yy + 1]] < weight[heap[yy]])) {
+            yy++;
+          }
+
+          if (weight[tmp] < weight[heap[yy]]) {
+            break;
+          }
+
+          heap[zz] = heap[yy];
+          zz = yy;
+        }
+
+        heap[zz] = tmp;
+        nNodes++;
+        parent[n1] = parent[n2] = nNodes;
+
+        final int weight_n1 = weight[n1];
+        final int weight_n2 = weight[n2];
+        weight[nNodes] = ((weight_n1 & 0xffffff00) + (weight_n2 & 0xffffff00))
+            | (1 + (((weight_n1 & 0x000000ff) > (weight_n2 & 0x000000ff)) ? (weight_n1 & 0x000000ff)
+                : (weight_n2 & 0x000000ff)));
+
+        parent[nNodes] = -1;
+        nHeap++;
+        heap[nHeap] = nNodes;
+
+        tmp = 0;
+        zz = nHeap;
+        tmp = heap[zz];
+        final int weight_tmp = weight[tmp];
+        while (weight_tmp < weight[heap[zz >> 1]]) {
+          heap[zz] = heap[zz >> 1];
+          zz >>= 1;
+        }
+        heap[zz] = tmp;
+
+      }
+
+      for (int i = 1; i <= alphaSize; i++) {
+        int j = 0;
+        int k = i;
+
+        for (int parent_k; (parent_k = parent[k]) >= 0;) {
+          k = parent_k;
+          j++;
+        }
+
+        len[i - 1] = (byte) j;
+        if (j > maxLen) {
+          tooLong = true;
+        }
+      }
+
+      if (tooLong) {
+        for (int i = 1; i < alphaSize; i++) {
+          int j = weight[i] >> 8;
+          j = 1 + (j >> 1);
+          weight[i] = j << 8;
+        }
+      }
+    }
+  }
+
+  /**
+  * Index of the last char in the block, so the block size == last + 1.
+  */
+  private int last;
+
+  /**
+  * Index in fmap[] of original string after sorting.
+  */
+  private int origPtr;
+
+  /**
+  * Always: in the range 0 .. 9. The current block size is 100000 * this
+  * number.
+  */
+  private final int blockSize100k;
+
+  private boolean blockRandomised;
+
+  private int bsBuff;
+  private int bsLive;
+  private final CRC crc = new CRC();
+
+  private int nInUse;
+
+  private int nMTF;
+
+  /*
+  * Used when sorting. If too many long comparisons happen, we stop sorting,
+  * randomise the block slightly, and try again.
+  */
+  private int workDone;
+  private int workLimit;
+  private boolean firstAttempt;
+
+  private int currentChar = -1;
+  private int runLength = 0;
+
+  private int blockCRC;
+  private int combinedCRC;
+  private int allowableBlockSize;
+
+  /**
+  * All memory intensive stuff.
+  */
+  private CBZip2OutputStream.Data data;
+
+  private OutputStream out;
+
+  /**
+  * Chooses a blocksize based on the given length of the data to compress.
+  *
+  * @return The blocksize, between {@link #MIN_BLOCKSIZE} and
+  *         {@link #MAX_BLOCKSIZE} both inclusive. For a negative
+  *         <tt>inputLength</tt> this method returns <tt>MAX_BLOCKSIZE</tt>
+  *         always.
+  *
+  * @param inputLength
+  *            The length of the data which will be compressed by
+  *            <tt>CBZip2OutputStream</tt>.
+  */
+  public static int chooseBlockSize(long inputLength) {
+    return (inputLength > 0) ? (int) Math
+        .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE;
+  }
+
+  /**
+  * Constructs a new <tt>CBZip2OutputStream</tt> with a blocksize of 900k.
+  *
+  * <p>
+  * <b>Attention: </b>The caller is resonsible to write the two BZip2 magic
+  * bytes <tt>"BZ"</tt> to the specified stream prior to calling this
+  * constructor.
+  * </p>
+  *
+  * @param out *
+  *            the destination stream.
+  *
+  * @throws IOException
+  *             if an I/O error occurs in the specified stream.
+  * @throws NullPointerException
+  *             if <code>out == null</code>.
+  */
+  public CBZip2OutputStream(final OutputStream out) throws IOException {
+    this(out, MAX_BLOCKSIZE);
+  }
+
+  /**
+  * Constructs a new <tt>CBZip2OutputStream</tt> with specified blocksize.
+  *
+  * <p>
+  * <b>Attention: </b>The caller is resonsible to write the two BZip2 magic
+  * bytes <tt>"BZ"</tt> to the specified stream prior to calling this
+  * constructor.
+  * </p>
+  *
+  *
+  * @param out
+  *            the destination stream.
+  * @param blockSize
+  *            the blockSize as 100k units.
+  *
+  * @throws IOException
+  *             if an I/O error occurs in the specified stream.
+  * @throws IllegalArgumentException
+  *             if <code>(blockSize < 1) || (blockSize > 9)</code>.
+  * @throws NullPointerException
+  *             if <code>out == null</code>.
+  *
+  * @see #MIN_BLOCKSIZE
+  * @see #MAX_BLOCKSIZE
+  */
+  public CBZip2OutputStream(final OutputStream out, final int blockSize)
+      throws IOException {
+    super();
+
+    if (blockSize < 1) {
+      throw new IllegalArgumentException("blockSize(" + blockSize
+          + ") < 1");
+    }
+    if (blockSize > 9) {
+      throw new IllegalArgumentException("blockSize(" + blockSize
+          + ") > 9");
+    }
+
+    this.blockSize100k = blockSize;
+    this.out = out;
+    init();
+  }
+
+  public void write(final int b) throws IOException {
+    if (this.out != null) {
+      write0(b);
+    } else {
+      throw new IOException("closed");
+    }
+  }
+
+  private void writeRun() throws IOException {
+    final int lastShadow = this.last;
+
+    if (lastShadow < this.allowableBlockSize) {
+      final int currentCharShadow = this.currentChar;
+      final Data dataShadow = this.data;
+      dataShadow.inUse[currentCharShadow] = true;
+      final byte ch = (byte) currentCharShadow;
+
+      int runLengthShadow = this.runLength;
+      this.crc.updateCRC(currentCharShadow, runLengthShadow);
+
+      switch (runLengthShadow) {
+      case 1:
+        dataShadow.block[lastShadow + 2] = ch;
+        this.last = lastShadow + 1;
+        break;
+
+      case 2:
+        dataShadow.block[lastShadow + 2] = ch;
+        dataShadow.block[lastShadow + 3] = ch;
+        this.last = lastShadow + 2;
+        break;
+
+      case 3: {
+        final byte[] block = dataShadow.block;
+        block[lastShadow + 2] = ch;
+        block[lastShadow + 3] = ch;
+        block[lastShadow + 4] = ch;
+        this.last = lastShadow + 3;
+      }
+        break;
+
+      default: {
+        runLengthShadow -= 4;
+        dataShadow.inUse[runLengthShadow] = true;
+        final byte[] block = dataShadow.block;
+        block[lastShadow + 2] = ch;
+        block[lastShadow + 3] = ch;
+        block[lastShadow + 4] = ch;
+        block[lastShadow + 5] = ch;
+        block[lastShadow + 6] = (byte) runLengthShadow;
+        this.last = lastShadow + 5;
+      }
+        break;
+
+      }
+    } else {
+      endBlock();
+      initBlock();
+      writeRun();
+    }
+  }
+
+  /**
+  * Overriden to close the stream.
+  */
+  protected void finalize() throws Throwable {
+    close();
+    super.finalize();
+  }
+
+  public void close() throws IOException {
+    OutputStream outShadow = this.out;
+    if (outShadow != null) {
+      try {
+        if (this.runLength > 0) {
+          writeRun();
+        }
+        this.currentChar = -1;
+        endBlock();
+        endCompression();
+        outShadow.close();
+      } finally {
+        this.out = null;
+        this.data = null;
+      }
+    }
+  }
+
+  public void flush() throws IOException {
+    OutputStream outShadow = this.out;
+    if (outShadow != null) {
+      outShadow.flush();
+    }
+  }
+
+  private void init() throws IOException {
+    // write magic: done by caller who created this stream
+    // this.out.write('B');
+    // this.out.write('Z');
+
+    this.data = new Data(this.blockSize100k);
+
+    /*
+    * Write `magic' bytes h indicating file-format == huffmanised, followed
+    * by a digit indicating blockSize100k.
+    */
+    bsPutUByte('h');
+    bsPutUByte('0' + this.blockSize100k);
+
+    this.combinedCRC = 0;
+    initBlock();
+  }
+
+  private void initBlock() {
+    // blockNo++;
+    this.crc.initialiseCRC();
+    this.last = -1;
+    // ch = 0;
+
+    boolean[] inUse = this.data.inUse;
+    for (int i = 256; --i >= 0;) {
+      inUse[i] = false;
+    }
+
+    /* 20 is just a paranoia constant */
+    this.allowableBlockSize = (this.blockSize100k * BZip2Constants.baseBlockSize) - 20;
+  }
+
+  private void endBlock() throws IOException {
+    this.blockCRC = this.crc.getFinalCRC();
+    this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31);
+    this.combinedCRC ^= this.blockCRC;
+
+    // empty block at end of file
+    if (this.last == -1) {
+      return;
+    }
+
+    /* sort the block and establish posn of original string */
+    blockSort();
+
+    /*
+    * A 6-byte block header, the value chosen arbitrarily as 0x314159265359
+    * :-). A 32 bit value does not really give a strong enough guarantee
+    * that the value will not appear by chance in the compressed
+    * datastream. Worst-case probability of this event, for a 900k block,
+    * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48
+    * bits. For a compressed file of size 100Gb -- about 100000 blocks --
+    * only a 48-bit marker will do. NB: normal compression/ decompression
+    * donot rely on these statistical properties. They are only important
+    * when trying to recover blocks from damaged files.
+    */
+    bsPutUByte(0x31);
+    bsPutUByte(0x41);
+    bsPutUByte(0x59);
+    bsPutUByte(0x26);
+    bsPutUByte(0x53);
+    bsPutUByte(0x59);
+
+    /* Now the block's CRC, so it is in a known place. */
+    bsPutInt(this.blockCRC);
+
+    /* Now a single bit indicating randomisation. */
+    if (this.blockRandomised) {
+      bsW(1, 1);
+    } else {
+      bsW(1, 0);
+    }
+
+    /* Finally, block's contents proper. */
+    moveToFrontCodeAndSend();
+  }
+
+  private void endCompression() throws IOException {
+    /*
+    * Now another magic 48-bit number, 0x177245385090, to indicate the end
+    * of the last block. (sqrt(pi), if you want to know. I did want to use
+    * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me
+    * to feel statistically comfortable. Call me paranoid.)
+    */
+    bsPutUByte(0x17);
+    bsPutUByte(0x72);
+    bsPutUByte(0x45);
+    bsPutUByte(0x38);
+    bsPutUByte(0x50);
+    bsPutUByte(0x90);
+
+    bsPutInt(this.combinedCRC);
+    bsFinishedWithStream();
+  }
+
+  /**
+  * Returns the blocksize parameter specified at construction time.
+  */
+  public final int getBlockSize() {
+    return this.blockSize100k;
+  }
+
+  public void write(final byte[] buf, int offs, final int len)
+      throws IOException {
+    if (offs < 0) {
+      throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
+    }
+    if (len < 0) {
+      throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
+    }
+    if (offs + len > buf.length) {
+      throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
+          + len + ") > buf.length(" + buf.length + ").");
+    }
+    if (this.out == null) {
+      throw new IOException("stream closed");
+    }
+
+    for (int hi = offs + len; offs < hi;) {
+      write0(buf[offs++]);
+    }
+  }
+
+  private void write0(int b) throws IOException {
+    if (this.currentChar != -1) {
+      b &= 0xff;
+      if (this.currentChar == b) {
+        if (++this.runLength > 254) {
+          writeRun();
+          this.currentChar = -1;
+          this.runLength = 0;
+        }
+        // else nothing to do
+      } else {
+        writeRun();
+        this.runLength = 1;
+        this.currentChar = b;
+      }
+    } else {
+      this.currentChar = b & 0xff;
+      this.runLength++;
+    }
+  }
+
+  private static void hbAssignCodes(final int[] code, final byte[] length,
+      final int minLen, final int maxLen, final int alphaSize) {
+    int vec = 0;
+    for (int n = minLen; n <= maxLen; n++) {
+      for (int i = 0; i < alphaSize; i++) {
+        if ((length[i] & 0xff) == n) {
+          code[i] = vec;
+          vec++;
+        }
+      }
+      vec <<= 1;
+    }
+  }
+
+  private void bsFinishedWithStream() throws IOException {
+    while (this.bsLive > 0) {
+      int ch = this.bsBuff >> 24;
+      this.out.write(ch); // write 8-bit
+      this.bsBuff <<= 8;
+      this.bsLive -= 8;
+    }
+  }
+
+  private void bsW(final int n, final int v) throws IOException {
+    final OutputStream outShadow = this.out;
+    int bsLiveShadow = this.bsLive;
+    int bsBuffShadow = this.bsBuff;
+
+    while (bsLiveShadow >= 8) {
+      outShadow.write(bsBuffShadow >> 24); // write 8-bit
+      bsBuffShadow <<= 8;
+      bsLiveShadow -= 8;
+    }
+
+    this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n));
+    this.bsLive = bsLiveShadow + n;
+  }
+
+  private void bsPutUByte(final int c) throws IOException {
+    bsW(8, c);
+  }
+
+  private void bsPutInt(final int u) throws IOException {
+    bsW(8, (u >> 24) & 0xff);
+    bsW(8, (u >> 16) & 0xff);
+    bsW(8, (u >> 8) & 0xff);
+    bsW(8, u & 0xff);
+  }
+
+  private void sendMTFValues() throws IOException {
+    final byte[][] len = this.data.sendMTFValues_len;
+    final int alphaSize = this.nInUse + 2;
+
+    for (int t = N_GROUPS; --t >= 0;) {
+      byte[] len_t = len[t];
+      for (int v = alphaSize; --v >= 0;) {
+        len_t[v] = GREATER_ICOST;
+      }
+    }
+
+    /* Decide how many coding tables to use */
+    // assert (this.nMTF > 0) : this.nMTF;
+    final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3
+        : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6;
+
+    /* Generate an initial set of coding tables */
+    sendMTFValues0(nGroups, alphaSize);
+
+    /*
+    * Iterate up to N_ITERS times to improve the tables.
+    */
+    final int nSelectors = sendMTFValues1(nGroups, alphaSize);
+
+    /* Compute MTF values for the selectors. */
+    sendMTFValues2(nGroups, nSelectors);
+
+    /* Assign actual codes for the tables. */
+    sendMTFValues3(nGroups, alphaSize);
+
+    /* Transmit the mapping table. */
+    sendMTFValues4();
+
+    /* Now the selectors. */
+    sendMTFValues5(nGroups, nSelectors);
+
+    /* Now the coding tables. */
+    sendMTFValues6(nGroups, alphaSize);
+
+    /* And finally, the block data proper */
+    sendMTFValues7(nSelectors);
+  }
+
+  private void sendMTFValues0(final int nGroups, final int alphaSize) {
+    final byte[][] len = this.data.sendMTFValues_len;
+    final int[] mtfFreq = this.data.mtfFreq;
+
+    int remF = this.nMTF;
+    int gs = 0;
+
+    for (int nPart = nGroups; nPart > 0; nPart--) {
+      final int tFreq = remF / nPart;
+      int ge = gs - 1;
+      int aFreq = 0;
+
+      for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) {
+        aFreq += mtfFreq[++ge];
+      }
+
+      if ((ge > gs) && (nPart != nGroups) && (nPart != 1)
+          && (((nGroups - nPart) & 1) != 0)) {
+        aFreq -= mtfFreq[ge--];
+      }
+
+      final byte[] len_np = len[nPart - 1];
+      for (int v = alphaSize; --v >= 0;) {
+        if ((v >= gs) && (v <= ge)) {
+          len_np[v] = LESSER_ICOST;
+        } else {
+          len_np[v] = GREATER_ICOST;
+        }
+      }
+
+      gs = ge + 1;
+      remF -= aFreq;
+    }
+  }
+
+  private int sendMTFValues1(final int nGroups, final int alphaSize) {
+    final Data dataShadow = this.data;
+    final int[][] rfreq = dataShadow.sendMTFValues_rfreq;
+    final int[] fave = dataShadow.sendMTFValues_fave;
+    final short[] cost = dataShadow.sendMTFValues_cost;
+    final char[] sfmap = dataShadow.sfmap;
+    final byte[] selector = dataShadow.selector;
+    final byte[][] len = dataShadow.sendMTFValues_len;
+    final byte[] len_0 = len[0];
+    final byte[] len_1 = len[1];
+    final byte[] len_2 = len[2];
+    final byte[] len_3 = len[3];
+    final byte[] len_4 = len[4];
+    final byte[] len_5 = len[5];
+    final int nMTFShadow = this.nMTF;
+
+    int nSelectors = 0;
+
+    for (int iter = 0; iter < N_ITERS; iter++) {
+      for (int t = nGroups; --t >= 0;) {
+        fave[t] = 0;
+        int[] rfreqt = rfreq[t];
+        for (int i = alphaSize; --i >= 0;) {
+          rfreqt[i] = 0;
+        }
+      }
+
+      nSelectors = 0;
+
+      for (int gs = 0; gs < this.nMTF;) {
+        /* Set group start & end marks. */
+
+        /*
+        * Calculate the cost of this group as coded by each of the
+        * coding tables.
+        */
+
+        final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
+
+        if (nGroups == N_GROUPS) {
+          // unrolled version of the else-block
+
+          short cost0 = 0;
+          short cost1 = 0;
+          short cost2 = 0;
+          short cost3 = 0;
+          short cost4 = 0;
+          short cost5 = 0;
+
+          for (int i = gs; i <= ge; i++) {
+            final int icv = sfmap[i];
+            cost0 += len_0[icv] & 0xff;
+            cost1 += len_1[icv] & 0xff;
+            cost2 += len_2[icv] & 0xff;
+            cost3 += len_3[icv] & 0xff;
+            cost4 += len_4[icv] & 0xff;
+            cost5 += len_5[icv] & 0xff;
+          }
+
+          cost[0] = cost0;
+          cost[1] = cost1;
+          cost[2] = cost2;
+          cost[3] = cost3;
+          cost[4] = cost4;
+          cost[5] = cost5;
+
+        } else {
+          for (int t = nGroups; --t >= 0;) {
+            cost[t] = 0;
+          }
+
+          for (int i = gs; i <= ge; i++) {
+            final int icv = sfmap[i];
+            for (int t = nGroups; --t >= 0;) {
+              cost[t] += len[t][icv] & 0xff;
+            }
+          }
+        }
+
+        /*
+        * Find the coding table which is best for this group, and
+        * record its identity in the selector table.
+        */
+        int bt = -1;
+        for (int t = nGroups, bc = 999999999; --t >= 0;) {
+          final int cost_t = cost[t];
+          if (cost_t < bc) {
+            bc = cost_t;
+            bt = t;
+          }
+        }
+
+        fave[bt]++;
+        selector[nSelectors] = (byte) bt;
+        nSelectors++;
+
+        /*
+        * Increment the symbol frequencies for the selected table.
+        */
+        final int[] rfreq_bt = rfreq[bt];
+        for (int i = gs; i <= ge; i++) {
+          rfreq_bt[sfmap[i]]++;
+        }
+
+        gs = ge + 1;
+      }
+
+      /*
+      * Recompute the tables based on the accumulated frequencies.
+      */
+      for (int t = 0; t < nGroups; t++) {
+        hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20);
+      }
+    }
+
+    return nSelectors;
+  }
+
+  private void sendMTFValues2(final int nGroups, final int nSelectors) {
+    // assert (nGroups < 8) : nGroups;
+
+    final Data dataShadow = this.data;
+    byte[] pos = dataShadow.sendMTFValues2_pos;
+
+    for (int i = nGroups; --i >= 0;) {
+      pos[i] = (byte) i;
+    }
+
+    for (int i = 0; i < nSelectors; i++) {
+      final byte ll_i = dataShadow.selector[i];
+      byte tmp = pos[0];
+      int j = 0;
+
+      while (ll_i != tmp) {
+        j++;
+        byte tmp2 = tmp;
+        tmp = pos[j];
+        pos[j] = tmp2;
+      }
+
+      pos[0] = tmp;
+      dataShadow.selectorMtf[i] = (byte) j;
+    }
+  }
+
+  private void sendMTFValues3(final int nGroups, final int alphaSize) {
+    int[][] code = this.data.sendMTFValues_code;
+    byte[][] len = this.data.sendMTFValues_len;
+
+    for (int t = 0; t < nGroups; t++) {
+      int minLen = 32;
+      int maxLen = 0;
+      final byte[] len_t = len[t];
+      for (int i = alphaSize; --i >= 0;) {
+        final int l = len_t[i] & 0xff;
+        if (l > maxLen) {
+          maxLen = l;
+        }
+        if (l < minLen) {
+          minLen = l;
+        }
+      }
+
+      // assert (maxLen <= 20) : maxLen;
+      // assert (minLen >= 1) : minLen;
+
+      hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
+    }
+  }
+
+  private void sendMTFValues4() throws IOException {
+    final boolean[] inUse = this.data.inUse;
+    final boolean[] inUse16 = this.data.sentMTFValues4_inUse16;
+
+    for (int i = 16; --i >= 0;) {
+      inUse16[i] = false;
+      final int i16 = i * 16;
+      for (int j = 16; --j >= 0;) {
+        if (inUse[i16 + j]) {
+          inUse16[i] = true;
+        }
+      }
+    }
+
+    for (int i = 0; i < 16; i++) {
+      bsW(1, inUse16[i] ? 1 : 0);
+    }
+
+    final OutputStream outShadow = this.out;
+    int bsLiveShadow = this.bsLive;
+    int bsBuffShadow = this.bsBuff;
+
+    for (int i = 0; i < 16; i++) {
+      if (inUse16[i]) {
+        final int i16 = i * 16;
+        for (int j = 0; j < 16; j++) {
+          // inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
+          while (bsLiveShadow >= 8) {
+            outShadow.write(bsBuffShadow >> 24); // write 8-bit
+            bsBuffShadow <<= 8;
+            bsLiveShadow -= 8;
+          }
+          if (inUse[i16 + j]) {
+            bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
+          }
+          bsLiveShadow++;
+        }
+      }
+    }
+
+    this.bsBuff = bsBuffShadow;
+    this.bsLive = bsLiveShadow;
+  }
+
+  private void sendMTFValues5(final int nGroups, final int nSelectors)
+      throws IOException {
+    bsW(3, nGroups);
+    bsW(15, nSelectors);
+
+    final OutputStream outShadow = this.out;
+    final byte[] selectorMtf = this.data.selectorMtf;
+
+    int bsLiveShadow = this.bsLive;
+    int bsBuffShadow = this.bsBuff;
+
+    for (int i = 0; i < nSelectors; i++) {
+      for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
+        // inlined: bsW(1, 1);
+        while (bsLiveShadow >= 8) {
+          outShadow.write(bsBuffShadow >> 24);
+          bsBuffShadow <<= 8;
+          bsLiveShadow -= 8;
+        }
+        bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
+        bsLiveShadow++;
+      }
+
+      // inlined: bsW(1, 0);
+      while (bsLiveShadow >= 8) {
+        outShadow.write(bsBuffShadow >> 24);
+        bsBuffShadow <<= 8;
+        bsLiveShadow -= 8;
+      }
+      // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
+      bsLiveShadow++;
+    }
+
+    this.bsBuff = bsBuffShadow;
+    this.bsLive = bsLiveShadow;
+  }
+
+  private void sendMTFValues6(final int nGroups, final int alphaSize)
+      throws IOException {
+    final byte[][] len = this.data.sendMTFValues_len;
+    final OutputStream outShadow = this.out;
+
+    int bsLiveShadow = this.bsLive;
+    int bsBuffShadow = this.bsBuff;
+
+    for (int t = 0; t < nGroups; t++) {
+      byte[] len_t = len[t];
+      int curr = len_t[0] & 0xff;
+
+      // inlined: bsW(5, curr);
+      while (bsLiveShadow >= 8) {
+        outShadow.write(bsBuffShadow >> 24); // write 8-bit
+        bsBuffShadow <<= 8;
+        bsLiveShadow -= 8;
+      }
+      bsBuffShadow |= curr << (32 - bsLiveShadow - 5);
+      bsLiveShadow += 5;
+
+      for (int i = 0; i < alphaSize; i++) {
+        int lti = len_t[i] & 0xff;
+        while (curr < lti) {
+          // inlined: bsW(2, 2);
+          while (bsLiveShadow >= 8) {
+            outShadow.write(bsBuffShadow >> 24); // write 8-bit
+            bsBuffShadow <<= 8;
+            bsLiveShadow -= 8;
+          }
+          bsBuffShadow |= 2 << (32 - bsLiveShadow - 2);
+          bsLiveShadow += 2;
+
+          curr++; /* 10 */
+        }
+
+        while (curr > lti) {
+          // inlined: bsW(2, 3);
+          while (bsLiveShadow >= 8) {
+            outShadow.write(bsBuffShadow >> 24); // write 8-bit
+            bsBuffShadow <<= 8;
+            bsLiveShadow -= 8;
+          }
+          bsBuffShadow |= 3 << (32 - bsLiveShadow - 2);
+          bsLiveShadow += 2;
+
+          curr--; /* 11 */
+        }
+
+        // inlined: bsW(1, 0);
+        while (bsLiveShadow >= 8) {
+          outShadow.write(bsBuffShadow >> 24); // write 8-bit
+          bsBuffShadow <<= 8;
+          bsLiveShadow -= 8;
+        }
+        // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
+        bsLiveShadow++;
+      }
+    }
+
+    this.bsBuff = bsBuffShadow;
+    this.bsLive = bsLiveShadow;
+  }
+
+  private void sendMTFValues7(final int nSelectors) throws IOException {
+    final Data dataShadow = this.data;
+    final byte[][] len = dataShadow.sendMTFValues_len;
+    final int[][] code = dataShadow.sendMTFValues_code;
+    final OutputStream outShadow = this.out;
+    final byte[] selector = dataShadow.selector;
+    final char[] sfmap = dataShadow.sfmap;
+    final int nMTFShadow = this.nMTF;
+
+    int selCtr = 0;
+
+    int bsLiveShadow = this.bsLive;
+    int bsBuffShadow = this.bsBuff;
+
+    for (int gs = 0; gs < nMTFShadow;) {
+      final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
+      final int selector_selCtr = selector[selCtr] & 0xff;
+      final int[] code_selCtr = code[selector_selCtr];
+      final byte[] len_selCtr = len[selector_selCtr];
+
+      while (gs <= ge) {
+        final int sfmap_i = sfmap[gs];
+
+        //
+        // inlined: bsW(len_selCtr[sfmap_i] & 0xff,
+        // code_selCtr[sfmap_i]);
+        //
+        while (bsLiveShadow >= 8) {
+          outShadow.write(bsBuffShadow >> 24);
+          bsBuffShadow <<= 8;
+          bsLiveShadow -= 8;
+        }
+        final int n = len_selCtr[sfmap_i] & 0xFF;
+        bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n);
+        bsLiveShadow += n;
+
+        gs++;
+      }
+
+      gs = ge + 1;
+      selCtr++;
+    }
+
+    this.bsBuff = bsBuffShadow;
+    this.bsLive = bsLiveShadow;
+  }
+
+  private void moveToFrontCodeAndSend() throws IOException {
+    bsW(24, this.origPtr);
+    generateMTFValues();
+    sendMTFValues();
+  }
+
+  /**
+  * This is the most hammered method of this class.
+  *
+  * <p>
+  * This is the version using unrolled loops. Normally I never use such ones
+  * in Java code. The unrolling has shown a noticable performance improvement
+  * on JRE 1.4.2 (Linux i586 / HotSpot Client). Of course it depends on the
+  * JIT compiler of the vm.
+  * </p>
+  */
+  private boolean mainSimpleSort(final Data dataShadow, final int lo,
+      final int hi, final int d) {
+    final int bigN = hi - lo + 1;
+    if (bigN < 2) {
+      return this.firstAttempt && (this.workDone > this.workLimit);
+    }
+
+    int hp = 0;
+    while (INCS[hp] < bigN) {
+      hp++;
+    }
+
+    final int[] fmap = dataShadow.fmap;
+    final char[] quadrant = dataShadow.quadrant;
+    final byte[] block = dataShadow.block;
+    final int lastShadow = this.last;
+    final int lastPlus1 = lastShadow + 1;
+    final boolean firstAttemptShadow = this.firstAttempt;
+    final int workLimitShadow = this.workLimit;
+    int workDoneShadow = this.workDone;
+
+    // Following block contains unrolled code which could be shortened by
+    // coding it in additional loops.
+
+    HP: while (--hp >= 0) {
+      final int h = INCS[hp];
+      final int mj = lo + h - 1;
+
+      for (int i = lo + h; i <= hi;) {
+        // copy
+        for (int k = 3; (i <= hi) && (--k >= 0); i++) {
+          final int v = fmap[i];
+          final int vd = v + d;
+          int j = i;
+
+          // for (int a;
+          // (j > mj) && mainGtU((a = fmap[j - h]) + d, vd,
+          // block, quadrant, lastShadow);
+          // j -= h) {
+          // fmap[j] = a;
+          // }
+          //
+          // unrolled version:
+
+          // start inline mainGTU
+          boolean onceRunned = false;
+          int a = 0;
+
+          HAMMER: while (true) {
+            if (onceRunned) {
+              fmap[j] = a;
+              if ((j -= h) <= mj) {
+                break HAMMER;
+              }
+            } else {
+              onceRunned = true;
+            }
+
+            a = fmap[j - h];
+            int i1 = a + d;
+            int i2 = vd;
+
+            // following could be done in a loop, but
+            // unrolled it for performance:
+            if (block[i1 + 1] == block[i2 + 1]) {
+              if (block[i1 + 2] == block[i2 + 2]) {
+                if (block[i1 + 3] == block[i2 + 3]) {
+                  if (block[i1 + 4] == block[i2 + 4]) {
+                    if (block[i1 + 5] == block[i2 + 5]) {
+                      if (block[(i1 += 6)] == block[(i2 += 6)]) {
+                        int x = lastShadow;
+                        X: while (x > 0) {
+                          x -= 4;
+
+                          if (block[i1 + 1] == block[i2 + 1]) {
+                            if (quadrant[i1] == quadrant[i2]) {
+                              if (block[i1 + 2] == block[i2 + 2]) {
+                                if (quadrant[i1 + 1] == quadrant[i2 + 1]) {
+                                  if (block[i1 + 3] == block[i2 + 3]) {
+                                    if (quadrant[i1 + 2] == quadrant[i2 + 2]) {
+                                      if (block[i1 + 4] == block[i2 + 4]) {
+                                        if (quadrant[i1 + 3] == quadrant[i2 + 3]) {
+                                          if ((i1 += 4) >= lastPlus1) {
+                                            i1 -= lastPlus1;
+                                          }
+                                          if ((i2 += 4) >= lastPlus1) {
+                                            i2 -= lastPlus1;
+                                          }
+                                          workDoneShadow++;
+                                          continue X;
+                                        } else if ((quadrant[i1 + 3] > quadrant[i2 + 3])) {
+                                          continue HAMMER;
+                                        } else {
+                                          break HAMMER;
+                                        }
+                                      } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) {
+                                        continue HAMMER;
+                                      } else {
+                                        break HAMMER;
+                                      }
+                                    } else if ((quadrant[i1 + 2] > quadrant[i2 + 2])) {
+                                      continue HAMMER;
+                                    } else {
+                                      break HAMMER;
+                                    }
+                                  } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) {
+                                    continue HAMMER;
+                                  } else {
+                                    break HAMMER;
+                                  }
+                                } else if ((quadrant[i1 + 1] > quadrant[i2 + 1])) {
+                                  continue HAMMER;
+                                } else {
+                                  break HAMMER;
+                                }
+                              } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) {
+                                continue HAMMER;
+                              } else {
+                                break HAMMER;
+                              }
+                            } else if ((quadrant[i1] > quadrant[i2])) {
+                              continue HAMMER;
+                            } else {
+                              break HAMMER;
+                            }
+                          } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) {
+                            continue HAMMER;
+                          } else {
+                            break HAMMER;
+                          }
+
+                        }
+                        break HAMMER;
+                      } // while x > 0
+                      else {
+                        if ((block[i1] & 0xff) > (block[i2] & 0xff)) {
+                          continue HAMMER;
+                        } else {
+                          break HAMMER;
+                        }
+                      }
+                    } else if ((block[i1 + 5] & 0xff) > (block[i2 + 5] & 0xff)) {
+                      continue HAMMER;
+                    } else {
+                      break HAMMER;
+                    }
+                  } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) {
+                    continue HAMMER;
+                  } else {
+                    break HAMMER;
+                  }
+                } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) {
+                  continue HAMMER;
+                } else {
+                  break HAMMER;
+                }
+              } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) {
+                continue HAMMER;
+              } else {
+                break HAMMER;
+              }
+            } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) {
+              continue HAMMER;
+            } else {
+              break HAMMER;
+            }
+
+          } // HAMMER
+          // end inline mainGTU
+
+          fmap[j] = v;
+        }
+
+        if (firstAttemptShadow && (i <= hi)
+            && (workDoneShadow > workLimitShadow)) {
+          break HP;
+        }
+      }
+    }
+
+    this.workDone = workDoneShadow;
+    return firstAttemptShadow && (workDoneShadow > workLimitShadow);
+  }
+
+  private static void vswap(int[] fmap, int p1, int p2, int n) {
+    n += p1;
+    while (p1 < n) {
+      int t = fmap[p1];
+      fmap[p1++] = fmap[p2];
+      fmap[p2++] = t;
+    }
+  }
+
+  private static byte med3(byte a, byte b, byte c) {
+    return (a < b) ? (b < c ? b : a < c ? c : a) : (b > c ? b : a > c ? c
+        : a);
+  }
+
+  private void blockSort() {
+    this.workLimit = WORK_FACTOR * this.last;
+    this.workDone = 0;
+    this.blockRandomised = false;
+    this.firstAttempt = true;
+    mainSort();
+
+    if (this.firstAttempt && (this.workDone > this.workLimit)) {
+      randomiseBlock();
+      this.workLimit = this.workDone = 0;
+      this.firstAttempt = false;
+      mainSort();
+    }
+
+    int[] fmap = this.data.fmap;
+    this.origPtr = -1;
+    for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
+      if (fmap[i] == 0) {
+        this.origPtr = i;
+        break;
+      }
+    }
+
+    // assert (this.origPtr != -1) : this.origPtr;
+  }
+
+  /**
+  * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2
+  */
+  private void mainQSort3(final Data dataShadow, final int loSt,
+      final int hiSt, final int dSt) {
+    final int[] stack_ll = dataShadow.stack_ll;
+    final int[] stack_hh = dataShadow.stack_hh;
+    final int[] stack_dd = dataShadow.stack_dd;
+    final int[] fmap = dataShadow.fmap;
+    final byte[] block = dataShadow.block;
+
+    stack_ll[0] = loSt;
+    stack_hh[0] = hiSt;
+    stack_dd[0] = dSt;
+
+    for (int sp = 1; --sp >= 0;) {
+      final int lo = stack_ll[sp];
+      final int hi = stack_hh[sp];
+      final int d = stack_dd[sp];
+
+      if ((hi - lo < SMALL_THRESH) || (d > DEPTH_THRESH)) {
+        if (mainSimpleSort(dataShadow, lo, hi, d)) {
+          return;
+        }
+      } else {
+        final int d1 = d + 1;
+        final int med = med3(block[fmap[lo] + d1],
+            block[fmap[hi] + d1], block[fmap[(lo + hi) >>> 1] + d1]) & 0xff;
+
+        int unLo = lo;
+        int unHi = hi;
+        int ltLo = lo;
+        int gtHi = hi;
+
+        while (true) {
+          while (unLo <= unHi) {
+            final int n = ((int) block[fmap[unLo] + d1] & 0xff)
+                - med;
+            if (n == 0) {
+              final int temp = fmap[unLo];
+              fmap[unLo++] = fmap[ltLo];
+              fmap[ltLo++] = temp;
+            } else if (n < 0) {
+              unLo++;
+            } else {
+              break;
+            }
+          }
+
+          while (unLo <= unHi) {
+            final int n = ((int) block[fmap[unHi] + d1] & 0xff)
+                - med;
+            if (n == 0) {
+              final int temp = fmap[unHi];
+              fmap[unHi--] = fmap[gtHi];
+              fmap[gtHi--] = temp;
+            } else if (n > 0) {
+              unHi--;
+            } else {
+              break;
+            }
+          }
+
+          if (unLo <= unHi) {
+            final int temp = fmap[unLo];
+            fmap[unLo++] = fmap[unHi];
+            fmap[unHi--] = temp;
+          } else {
+            break;
+          }
+        }
+
+        if (gtHi < ltLo) {
+          stack_ll[sp] = lo;
+          stack_hh[sp] = hi;
+          stack_dd[sp] = d1;
+          sp++;
+        } else {
+          int n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo)
+              : (unLo - ltLo);
+          vswap(fmap, lo, unLo - n, n);
+          int m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi)
+              : (gtHi - unHi);
+          vswap(fmap, unLo, hi - m + 1, m);
+
+          n = lo + unLo - ltLo - 1;
+          m = hi - (gtHi - unHi) + 1;
+
+          stack_ll[sp] = lo;
+          stack_hh[sp] = n;
+          stack_dd[sp] = d;
+          sp++;
+
+          stack_ll[sp] = n + 1;
+          stack_hh[sp] = m - 1;
+          stack_dd[sp] = d1;
+          sp++;
+
+          stack_ll[sp] = m;
+          stack_hh[sp] = hi;
+          stack_dd[sp] = d;
+          sp++;
+        }
+      }
+    }
+  }
+
+  private void mainSort() {
+    final Data dataShadow = this.data;
+    final int[] runningOrder = dataShadow.mainSort_runningOrder;
+    final int[] copy = dataShadow.mainSort_copy;
+    final boolean[] bigDone = dataShadow.mainSort_bigDone;
+    final int[] ftab = dataShadow.ftab;
+    final byte[] block = dataShadow.block;
+    final int[] fmap = dataShadow.fmap;
+    final char[] quadrant = dataShadow.quadrant;
+    final int lastShadow = this.last;
+    final int workLimitShadow = this.workLimit;
+    final boolean firstAttemptShadow = this.firstAttempt;
+
+    // Set up the 2-byte frequency table
+    for (int i = 65537; --i >= 0;) {
+      ftab[i] = 0;
+    }
+
+    /*
+    * In the various block-sized structures, live data runs from 0 to
+    * last+NUM_OVERSHOOT_BYTES inclusive. First, set up the overshoot area
+    * for block.
+    */
+    for (int i = 0; i < NUM_OVERSHOOT_BYTES; i++) {
+      block[lastShadow + i + 2] = block[(i % (lastShadow + 1)) + 1];
+    }
+    for (int i = lastShadow + NUM_OVERSHOOT_BYTES; --i >= 0;) {
+      quadrant[i] = 0;
+    }
+    block[0] = block[lastShadow + 1];
+
+    // Complete the initial radix sort:
+
+    int c1 = block[0] & 0xff;
+    for (int i = 0; i <= lastShadow; i++) {
+      final int c2 = block[i + 1] & 0xff;
+      ftab[(c1 << 8) + c2]++;
+      c1 = c2;
+    }
+
+    for (int i = 1; i <= 65536; i++)
+      ftab[i] += ftab[i - 1];
+
+    c1 = block[1] & 0xff;
+    for (int i = 0; i < lastShadow; i++) {
+      final int c2 = block[i + 2] & 0xff;
+      fmap[--ftab[(c1 << 8) + c2]] = i;
+      c1 = c2;
+    }
+
+    fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8) + (block[1] & 0xff)]] = lastShadow;
+
+    /*
+    * Now ftab contains the first loc of every small bucket. Calculate the
+    * running order, from smallest to largest big bucket.
+    */
+    for (int i = 256; --i >= 0;) {
+      bigDone[i] = false;
+      runningOrder[i] = i;
+    }
+
+    for (int h = 364; h != 1;) {
+      h /= 3;
+      for (int i = h; i <= 255; i++) {
+        final int vv = runningOrder[i];
+        final int a = ftab[(vv + 1) << 8] - ftab[vv << 8];
+        final int b = h - 1;
+        int j = i;
+        for (int ro = runningOrder[j - h]; (ftab[(ro + 1) << 8] - ftab[ro << 8]) > a; ro = runningOrder[j
+            - h]) {
+          runningOrder[j] = ro;
+          j -= h;
+          if (j <= b) {
+            break;
+          }
+        }
+        runningOrder[j] = vv;
+      }
+    }
+
+    /*
+    * The main sorting loop.
+    */
+    for (int i = 0; i <= 255; i++) {
+      /*
+      * Process big buckets, starting with the least full.
+      */
+      final int ss = runningOrder[i];
+
+      // Step 1:
+      /*
+      * Complete the big bucket [ss] by quicksorting any unsorted small
+      * buckets [ss, j]. Hopefully previous pointer-scanning phases have
+      * already completed many of the small buckets [ss, j], so we don't
+      * have to sort them at all.
+      */
+      for (int j = 0; j <= 255; j++) {
+        final int sb = (ss << 8) + j;
+        final int ftab_sb = ftab[sb];
+        if ((ftab_sb & SETMASK) != SETMASK) {
+          final int lo = ftab_sb & CLEARMASK;
+          final int hi = (ftab[sb + 1] & CLEARMASK) - 1;
+          if (hi > lo) {
+            mainQSort3(dataShadow, lo, hi, 2);
+            if (firstAttemptShadow
+                && (this.workDone > workLimitShadow)) {
+              return;
+            }
+          }
+          ftab[sb] = ftab_sb | SETMASK;
+        }
+      }
+
+      // Step 2:
+      // Now scan this big bucket so as to synthesise the
+      // sorted order for small buckets [t, ss] for all t != ss.
+
+      for (int j = 0; j <= 255; j++) {
+        copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
+      }
+
+      for (int j = ftab[ss << 8] & CLEARMASK, hj = (ftab[(ss + 1) << 8] & CLEARMASK); j < hj; j++) {
+        final int fmap_j = fmap[j];
+        c1 = block[fmap_j] & 0xff;
+        if (!bigDone[c1]) {
+          fmap[copy[c1]] = (fmap_j == 0) ? lastShadow : (fmap_j - 1);
+          copy[c1]++;
+        }
+      }
+
+      for (int j = 256; --j >= 0;)
+        ftab[(j << 8) + ss] |= SETMASK;
+
+      // Step 3:
+      /*
+      * The ss big bucket is now done. Record this fact, and update the
+      * quadrant descriptors. Remember to update quadrants in the
+      * overshoot area too, if necessary. The "if (i < 255)" test merely
+      * skips this updating for the last bucket processed, since updating
+      * for the last bucket is pointless.
+      */
+      bigDone[ss] = true;
+
+      if (i < 255) {
+        final int bbStart = ftab[ss << 8] & CLEARMASK;
+        final int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart;
+        int shifts = 0;
+
+        while ((bbSize >> shifts) > 65534) {
+          shifts++;
+        }
+
+        for (int j = 0; j < bbSize; j++) {
+          final int a2update = fmap[bbStart + j];
+          final char qVal = (char) (j >> shifts);
+          quadrant[a2update] = qVal;
+          if (a2update < NUM_OVERSHOOT_BYTES) {
+            quadrant[a2update + lastShadow + 1] = qVal;
+          }
+        }
+      }
+
+    }
+  }
+
+  private void randomiseBlock() {
+    final boolean[] inUse = this.data.inUse;
+    final byte[] block = this.data.block;
+    final int lastShadow = this.last;
+
+    for (int i = 256; --i >= 0;)
+      inUse[i] = false;
+
+    int rNToGo = 0;
+    int rTPos = 0;
+    for (int i = 0, j = 1; i <= lastShadow; i = j, j++) {
+      if (rNToGo == 0) {
+        rNToGo = (char) BZip2Constants.rNums[rTPos];
+        if (++rTPos == 512) {
+          rTPos = 0;
+        }
+      }
+
+      rNToGo--;
+      block[j] ^= ((rNToGo == 1) ? 1 : 0);
+
+      // handle 16 bit signed numbers
+      inUse[block[j] & 0xff] = true;
+    }
+
+    this.blockRandomised = true;
+  }
+
+  private void generateMTFValues() {
+    final int lastShadow = this.last;
+    final Data dataShadow = this.data;
+    final boolean[] inUse = dataShadow.inUse;
+    final byte[] block = dataShadow.block;
+    final int[] fmap = dataShadow.fmap;
+    final char[] sfmap = dataShadow.sfmap;
+    final int[] mtfFreq = dataShadow.mtfFreq;
+    final byte[] unseqToSeq = dataShadow.unseqToSeq;
+    final byte[] yy = dataShadow.generateMTFValues_yy;
+
+    // make maps
+    int nInUseShadow = 0;
+    for (int i = 0; i < 256; i++) {
+      if (inUse[i]) {
+        unseqToSeq[i] = (byte) nInUseShadow;
+        nInUseShadow++;
+      }
+    }
+    this.nInUse = nInUseShadow;
+
+    final int eob = nInUseShadow + 1;
+
+    for (int i = eob; i >= 0; i--) {
+      mtfFreq[i] = 0;
+    }
+
+    for (int i = nInUseShadow; --i >= 0;) {
+      yy[i] = (byte) i;
+    }
+
+    int wr = 0;
+    int zPend = 0;
+
+    for (int i = 0; i <= lastShadow; i++) {
+      final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff];
+      byte tmp = yy[0];
+      int j = 0;
+
+      while (ll_i != tmp) {
+        j++;
+        byte tmp2 = tmp;
+        tmp = yy[j];
+        yy[j] = tmp2;
+      }
+      yy[0] = tmp;
+
+      if (j == 0) {
+        zPend++;
+      } else {
+        if (zPend > 0) {
+          zPend--;
+          while (true) {
+            if ((zPend & 1) == 0) {
+              sfmap[wr] = RUNA;
+              wr++;
+              mtfFreq[RUNA]++;
+            } else {
+              sfmap[wr] = RUNB;
+              wr++;
+              mtfFreq[RUNB]++;
+            }
+
+            if (zPend >= 2) {
+              zPend = (zPend - 2) >> 1;
+            } else {
+              break;
+            }
+          }
+          zPend = 0;
+        }
+        sfmap[wr] = (char) (j + 1);
+        wr++;
+        mtfFreq[j + 1]++;
+      }
+    }
+
+    if (zPend > 0) {
+      zPend--;
+      while (true) {
+        if ((zPend & 1) == 0) {
+          sfmap[wr] = RUNA;
+          wr++;
+          mtfFreq[RUNA]++;
+        } else {
+          sfmap[wr] = RUNB;
+          wr++;
+          mtfFreq[RUNB]++;
+        }
+
+        if (zPend >= 2) {
+          zPend = (zPend - 2) >> 1;
+        } else {
+          break;
+        }
+      }
+    }
+
+    sfmap[wr] = (char) eob;
+    mtfFreq[eob]++;
+    this.nMTF = wr + 1;
+  }
+
+  private static final class Data extends Object {
+
+    // with blockSize 900k
+    final boolean[] inUse = new boolean[256]; // 256 byte
+    final byte[] unseqToSeq = new byte[256]; // 256 byte
+    final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
+    final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
+    final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
+
+    final byte[] generateMTFValues_yy = new byte[256]; // 256 byte
+    final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548
+    // byte
+    final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
+    // byte
+    final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte
+    final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte
+    final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
+    // byte
+    final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
+    final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
+
+    final int[] stack_ll = new int[QSORT_STACK_SIZE]; // 4000 byte
+    final int[] stack_hh = new int[QSORT_STACK_SIZE]; // 4000 byte
+    final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte
+
+    final int[] mainSort_runningOrder = new int[256]; // 1024 byte
+    final int[] mainSort_copy = new int[256]; // 1024 byte
+    final boolean[] mainSort_bigDone = new boolean[256]; // 256 byte
+
+    final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
+    final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
+    final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
+
+    final int[] ftab = new int[65537]; // 262148 byte
+    // ------------
+    // 333408 byte
+
+    final byte[] block; // 900021 byte
+    final int[] fmap; // 3600000 byte
+    final char[] sfmap; // 3600000 byte
+    // ------------
+    // 8433529 byte
+    // ============
+
+    /**
+    * Array instance identical to sfmap, both are used only temporarily and
+    * indepently, so we do not need to allocate additional memory.
+    */
+    final char[] quadrant;
+
+    Data(int blockSize100k) {
+      super();
+
+      final int n = blockSize100k * BZip2Constants.baseBlockSize;
+      this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)];
+      this.fmap = new int[n];
+      this.sfmap = new char[2 * n];
+      this.quadrant = this.sfmap;
+    }
+
+  }
+
+}

+ 125 - 0
src/core/org/apache/hadoop/io/compress/bzip2/CRC.java

@@ -0,0 +1,125 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+
+/*
+ * This package is based on the work done by Keiron Liddle, Aftex Software
+ * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
+ * great code.
+ */
+
+package org.apache.hadoop.io.compress.bzip2;
+
+/**
+ * A simple class the hold and calculate the CRC for sanity checking of the
+ * data.
+ *
+ */
+final class CRC {
+  static final int crc32Table[] = { 0x00000000, 0x04c11db7, 0x09823b6e,
+      0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005,
+      0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 0x350c9b64,
+      0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, 0x4c11db70, 0x48d0c6c7,
+      0x4593e01e, 0x4152fda9, 0x5f15adac, 0x5bd4b01b, 0x569796c2,
+      0x52568b75, 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011,
+      0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd, 0x9823b6e0,
+      0x9ce2ab57, 0x91a18d8e, 0x95609039, 0x8b27c03c, 0x8fe6dd8b,
+      0x82a5fb52, 0x8664e6e5, 0xbe2b5b58, 0xbaea46ef, 0xb7a96036,
+      0xb3687d81, 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d,
+      0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, 0xc7361b4c,
+      0xc3f706fb, 0xceb42022, 0xca753d95, 0xf23a8028, 0xf6fb9d9f,
+      0xfbb8bb46, 0xff79a6f1, 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a,
+      0xec7dd02d, 0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae,
+      0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072, 0x128e9dcf,
+      0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, 0x018aeb13, 0x054bf6a4,
+      0x0808d07d, 0x0cc9cdca, 0x7897ab07, 0x7c56b6b0, 0x71159069,
+      0x75d48dde, 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02,
+      0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066, 0x4d9b3063,
+      0x495a2dd4, 0x44190b0d, 0x40d816ba, 0xaca5c697, 0xa864db20,
+      0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b, 0xbb60adfc, 0xb6238b25,
+      0xb2e29692, 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6,
+      0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, 0xe0b41de7,
+      0xe4750050, 0xe9362689, 0xedf73b3e, 0xf3b06b3b, 0xf771768c,
+      0xfa325055, 0xfef34de2, 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31,
+      0xcbffd686, 0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a,
+      0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, 0x7a089632,
+      0x7ec98b85, 0x738aad5c, 0x774bb0eb, 0x4f040d56, 0x4bc510e1,
+      0x46863638, 0x42472b8f, 0x5c007b8a, 0x58c1663d, 0x558240e4,
+      0x51435d53, 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47,
+      0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b, 0x0315d626,
+      0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, 0x1011a0fa, 0x14d0bd4d,
+      0x19939b94, 0x1d528623, 0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60,
+      0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b,
+      0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, 0xc423cd6a,
+      0xc0e2d0dd, 0xcda1f604, 0xc960ebb3, 0xbd3e8d7e, 0xb9ff90c9,
+      0xb4bcb610, 0xb07daba7, 0xae3afba2, 0xaafbe615, 0xa7b8c0cc,
+      0xa379dd7b, 0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f,
+      0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, 0x5d8a9099,
+      0x594b8d2e, 0x5408abf7, 0x50c9b640, 0x4e8ee645, 0x4a4ffbf2,
+      0x470cdd2b, 0x43cdc09c, 0x7b827d21, 0x7f436096, 0x7200464f,
+      0x76c15bf8, 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24,
+      0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30, 0x029f3d35,
+      0x065e2082, 0x0b1d065b, 0x0fdc1bec, 0x3793a651, 0x3352bbe6,
+      0x3e119d3f, 0x3ad08088, 0x2497d08d, 0x2056cd3a, 0x2d15ebe3,
+      0x29d4f654, 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0,
+      0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, 0xe3a1cbc1,
+      0xe760d676, 0xea23f0af, 0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa,
+      0xf9278673, 0xfde69bc4, 0x89b8fd09, 0x8d79e0be, 0x803ac667,
+      0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c,
+      0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, 0xbcb4666d,
+      0xb8757bda, 0xb5365d03, 0xb1f740b4 };
+
+  CRC() {
+    initialiseCRC();
+  }
+
+  void initialiseCRC() {
+    globalCrc = 0xffffffff;
+  }
+
+  int getFinalCRC() {
+    return ~globalCrc;
+  }
+
+  int getGlobalCRC() {
+    return globalCrc;
+  }
+
+  void setGlobalCRC(int newCrc) {
+    globalCrc = newCrc;
+  }
+
+  void updateCRC(int inCh) {
+    int temp = (globalCrc >> 24) ^ inCh;
+    if (temp < 0) {
+      temp = 256 + temp;
+    }
+    globalCrc = (globalCrc << 8) ^ CRC.crc32Table[temp];
+  }
+
+  void updateCRC(int inCh, int repeat) {
+    int globalCrcShadow = this.globalCrc;
+    while (repeat-- > 0) {
+      int temp = (globalCrcShadow >> 24) ^ inCh;
+      globalCrcShadow = (globalCrcShadow << 8)
+          ^ crc32Table[(temp >= 0) ? temp : (temp + 256)];
+    }
+    this.globalCrc = globalCrcShadow;
+  }
+
+  int globalCrc;
+}

+ 6 - 0
src/test/org/apache/hadoop/io/compress/TestCodec.java

@@ -63,6 +63,10 @@ public class TestCodec extends TestCase {
       codecTest(conf, seed, count, "org.apache.hadoop.io.compress.LzopCodec");
       codecTest(conf, seed, count, "org.apache.hadoop.io.compress.LzopCodec");
     }
     }
   }
   }
+  
+  public void testBZip2Codec() throws IOException {    
+      codecTest(conf, seed, count, "org.apache.hadoop.io.compress.BZip2Codec");    
+  }
 
 
   private static void codecTest(Configuration conf, int seed, int count, 
   private static void codecTest(Configuration conf, int seed, int count, 
                                 String codecClass) 
                                 String codecClass) 
@@ -104,6 +108,8 @@ public class TestCodec extends TestCase {
     deflateOut.write(data.getData(), 0, data.getLength());
     deflateOut.write(data.getData(), 0, data.getLength());
     deflateOut.flush();
     deflateOut.flush();
     deflateFilter.finish();
     deflateFilter.finish();
+    //Necessary to close the stream for BZip2 Codec to write its final output.  Flush is not enough.
+    deflateOut.close();
     LOG.info("Finished compressing data");
     LOG.info("Finished compressing data");
     
     
     // De-compress data
     // De-compress data

+ 6 - 0
src/test/org/apache/hadoop/io/compress/TestCodecFactory.java

@@ -126,15 +126,21 @@ public class TestCodecFactory extends TestCase {
     assertEquals("default factory foo codec", null, codec);
     assertEquals("default factory foo codec", null, codec);
     codec = factory.getCodec(new Path("/tmp/foo.gz"));
     codec = factory.getCodec(new Path("/tmp/foo.gz"));
     checkCodec("default factory for .gz", GzipCodec.class, codec);
     checkCodec("default factory for .gz", GzipCodec.class, codec);
+    codec = factory.getCodec(new Path("/tmp/foo.bz2"));
+    checkCodec("default factory for .bz2", BZip2Codec.class, codec);
     factory = setClasses(new Class[0]);
     factory = setClasses(new Class[0]);
     codec = factory.getCodec(new Path("/tmp/foo.bar"));
     codec = factory.getCodec(new Path("/tmp/foo.bar"));
     assertEquals("empty codec bar codec", null, codec);
     assertEquals("empty codec bar codec", null, codec);
     codec = factory.getCodec(new Path("/tmp/foo.gz"));
     codec = factory.getCodec(new Path("/tmp/foo.gz"));
     assertEquals("empty codec gz codec", null, codec);
     assertEquals("empty codec gz codec", null, codec);
+    codec = factory.getCodec(new Path("/tmp/foo.bz2"));
+    assertEquals("default factory for .bz2", null, codec);
     factory = setClasses(new Class[]{BarCodec.class, FooCodec.class, 
     factory = setClasses(new Class[]{BarCodec.class, FooCodec.class, 
                                      FooBarCodec.class});
                                      FooBarCodec.class});
     codec = factory.getCodec(new Path("/tmp/.foo.bar.gz"));
     codec = factory.getCodec(new Path("/tmp/.foo.bar.gz"));
     assertEquals("full factory gz codec", null, codec);
     assertEquals("full factory gz codec", null, codec);
+    codec = factory.getCodec(new Path("/tmp/foo.bz2"));
+    assertEquals("default factory for .bz2", null, codec);
     codec = factory.getCodec(new Path("/tmp/foo.bar"));
     codec = factory.getCodec(new Path("/tmp/foo.bar"));
     checkCodec("full factory bar codec", BarCodec.class, codec);
     checkCodec("full factory bar codec", BarCodec.class, codec);
     codec = factory.getCodec(new Path("/tmp/foo/baz.foo.bar"));
     codec = factory.getCodec(new Path("/tmp/foo/baz.foo.bar"));