|
@@ -0,0 +1,570 @@
|
|
|
+/*
|
|
|
+ * Licensed to the Apache Software Foundation (ASF) under one
|
|
|
+ * or more contributor license agreements. See the NOTICE file
|
|
|
+ * distributed with this work for additional information
|
|
|
+ * regarding copyright ownership. The ASF licenses this file
|
|
|
+ * to you under the Apache License, Version 2.0 (the
|
|
|
+ * "License"); you may not use this file except in compliance
|
|
|
+ * with the License. You may obtain a copy of the License at
|
|
|
+ *
|
|
|
+ * http://www.apache.org/licenses/LICENSE-2.0
|
|
|
+ *
|
|
|
+ * Unless required by applicable law or agreed to in writing, software
|
|
|
+ * distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
+ * See the License for the specific language governing permissions and
|
|
|
+ * limitations under the License.
|
|
|
+ */
|
|
|
+
|
|
|
+package org.apache.hadoop.io.compress.zlib;
|
|
|
+
|
|
|
+import java.io.IOException;
|
|
|
+import java.util.zip.DataFormatException;
|
|
|
+import java.util.zip.Inflater;
|
|
|
+
|
|
|
+import org.apache.hadoop.util.PureJavaCrc32;
|
|
|
+import org.apache.hadoop.io.compress.Decompressor;
|
|
|
+
|
|
|
+/**
|
|
|
+ * A {@link Decompressor} based on the popular gzip compressed file format.
|
|
|
+ * http://www.gzip.org/
|
|
|
+ *
|
|
|
+ */
|
|
|
+public class BuiltInGzipDecompressor implements Decompressor {
|
|
|
+ private static final int GZIP_MAGIC_ID = 0x8b1f; // if read as LE short int
|
|
|
+ private static final int GZIP_DEFLATE_METHOD = 8;
|
|
|
+ private static final int GZIP_FLAGBIT_HEADER_CRC = 0x02;
|
|
|
+ private static final int GZIP_FLAGBIT_EXTRA_FIELD = 0x04;
|
|
|
+ private static final int GZIP_FLAGBIT_FILENAME = 0x08;
|
|
|
+ private static final int GZIP_FLAGBIT_COMMENT = 0x10;
|
|
|
+ private static final int GZIP_FLAGBITS_RESERVED = 0xe0;
|
|
|
+
|
|
|
+ // 'true' (nowrap) => Inflater will handle raw deflate stream only
|
|
|
+ private Inflater inflater = new Inflater(true);
|
|
|
+
|
|
|
+ private byte[] userBuf = null;
|
|
|
+ private int userBufOff = 0;
|
|
|
+ private int userBufLen = 0;
|
|
|
+
|
|
|
+ private byte[] localBuf = new byte[256];
|
|
|
+ private int localBufOff = 0;
|
|
|
+
|
|
|
+ private int headerBytesRead = 0;
|
|
|
+ private int trailerBytesRead = 0;
|
|
|
+ private int numExtraFieldBytesRemaining = -1;
|
|
|
+ private PureJavaCrc32 crc = new PureJavaCrc32();
|
|
|
+ private boolean hasExtraField = false;
|
|
|
+ private boolean hasFilename = false;
|
|
|
+ private boolean hasComment = false;
|
|
|
+ private boolean hasHeaderCRC = false;
|
|
|
+
|
|
|
+ private GzipStateLabel state;
|
|
|
+
|
|
|
+ /**
|
|
|
+ * The current state of the gzip decoder, external to the Inflater context.
|
|
|
+ * (Technically, the private variables localBuf through hasHeaderCRC are
|
|
|
+ * also part of the state, so this enum is merely the label for it.)
|
|
|
+ */
|
|
|
+ private static enum GzipStateLabel {
|
|
|
+ /**
|
|
|
+ * Immediately prior to or (strictly) within the 10-byte basic gzip header.
|
|
|
+ */
|
|
|
+ HEADER_BASIC,
|
|
|
+ /**
|
|
|
+ * Immediately prior to or within the optional "extra field."
|
|
|
+ */
|
|
|
+ HEADER_EXTRA_FIELD,
|
|
|
+ /**
|
|
|
+ * Immediately prior to or within the optional filename field.
|
|
|
+ */
|
|
|
+ HEADER_FILENAME,
|
|
|
+ /**
|
|
|
+ * Immediately prior to or within the optional comment field.
|
|
|
+ */
|
|
|
+ HEADER_COMMENT,
|
|
|
+ /**
|
|
|
+ * Immediately prior to or within the optional 2-byte header CRC value.
|
|
|
+ */
|
|
|
+ HEADER_CRC,
|
|
|
+ /**
|
|
|
+ * Immediately prior to or within the main compressed (deflate) data stream.
|
|
|
+ */
|
|
|
+ DEFLATE_STREAM,
|
|
|
+ /**
|
|
|
+ * Immediately prior to or (strictly) within the 4-byte uncompressed CRC.
|
|
|
+ */
|
|
|
+ TRAILER_CRC,
|
|
|
+ /**
|
|
|
+ * Immediately prior to or (strictly) within the 4-byte uncompressed size.
|
|
|
+ */
|
|
|
+ TRAILER_SIZE,
|
|
|
+ /**
|
|
|
+ * Immediately after the trailer (and potentially prior to the next gzip
|
|
|
+ * member/substream header), without reset() having been called.
|
|
|
+ */
|
|
|
+ FINISHED;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Creates a new (pure Java) gzip decompressor.
|
|
|
+ */
|
|
|
+ public BuiltInGzipDecompressor() {
|
|
|
+ state = GzipStateLabel.HEADER_BASIC;
|
|
|
+ crc.reset();
|
|
|
+ // FIXME? Inflater docs say: 'it is also necessary to provide an extra
|
|
|
+ // "dummy" byte as input. This is required by the ZLIB native
|
|
|
+ // library in order to support certain optimizations.' However,
|
|
|
+ // this does not appear to be true, and in any case, it's not
|
|
|
+ // entirely clear where the byte should go or what its value
|
|
|
+ // should be. Perhaps it suffices to have some deflated bytes
|
|
|
+ // in the first buffer load? (But how else would one do it?)
|
|
|
+ }
|
|
|
+
|
|
|
+ /** {@inheritDoc} */
|
|
|
+ public synchronized boolean needsInput() {
|
|
|
+ if (state == GzipStateLabel.DEFLATE_STREAM) { // most common case
|
|
|
+ return inflater.needsInput();
|
|
|
+ }
|
|
|
+ // see userBufLen comment at top of decompress(); currently no need to
|
|
|
+ // verify userBufLen <= 0
|
|
|
+ return (state != GzipStateLabel.FINISHED);
|
|
|
+ }
|
|
|
+
|
|
|
+ /** {@inheritDoc} */
|
|
|
+ /*
|
|
|
+ * In our case, the input data includes both gzip header/trailer bytes (which
|
|
|
+ * we handle in executeState()) and deflate-stream bytes (which we hand off
|
|
|
+ * to Inflater).
|
|
|
+ *
|
|
|
+ * NOTE: This code assumes the data passed in via b[] remains unmodified
|
|
|
+ * until _we_ signal that it's safe to modify it (via needsInput()).
|
|
|
+ * The alternative would require an additional buffer-copy even for
|
|
|
+ * the bulk deflate stream, which is a performance hit we don't want
|
|
|
+ * to absorb. (Decompressor now documents this requirement.)
|
|
|
+ */
|
|
|
+ public synchronized void setInput(byte[] b, int off, int len) {
|
|
|
+ if (b == null) {
|
|
|
+ throw new NullPointerException();
|
|
|
+ }
|
|
|
+ if (off < 0 || len < 0 || off > b.length - len) {
|
|
|
+ throw new ArrayIndexOutOfBoundsException();
|
|
|
+ }
|
|
|
+
|
|
|
+ userBuf = b;
|
|
|
+ userBufOff = off;
|
|
|
+ userBufLen = len; // note: might be zero
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Decompress the data (gzip header, deflate stream, gzip trailer) in the
|
|
|
+ * provided buffer.
|
|
|
+ *
|
|
|
+ * @return the number of decompressed bytes placed into b
|
|
|
+ */
|
|
|
+ /* From the caller's perspective, this is where the state machine lives.
|
|
|
+ * The code is written such that we never return from decompress() with
|
|
|
+ * data remaining in userBuf unless we're in FINISHED state and there was
|
|
|
+ * data beyond the current gzip member (e.g., we're within a concatenated
|
|
|
+ * gzip stream). If this ever changes, {@link #needsInput()} will also
|
|
|
+ * need to be modified (i.e., uncomment the userBufLen condition).
|
|
|
+ *
|
|
|
+ * The actual deflate-stream processing (decompression) is handled by
|
|
|
+ * Java's Inflater class. Unlike the gzip header/trailer code (execute*
|
|
|
+ * methods below), the deflate stream is never copied; Inflater operates
|
|
|
+ * directly on the user's buffer.
|
|
|
+ */
|
|
|
+ public synchronized int decompress(byte[] b, int off, int len)
|
|
|
+ throws IOException {
|
|
|
+ int numAvailBytes = 0;
|
|
|
+
|
|
|
+ if (state != GzipStateLabel.DEFLATE_STREAM) {
|
|
|
+ executeHeaderState();
|
|
|
+
|
|
|
+ if (userBufLen <= 0) {
|
|
|
+ return numAvailBytes;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // "executeDeflateStreamState()"
|
|
|
+ if (state == GzipStateLabel.DEFLATE_STREAM) {
|
|
|
+ // hand off user data (or what's left of it) to Inflater--but note that
|
|
|
+ // Inflater may not have consumed all of previous bufferload (e.g., if
|
|
|
+ // data highly compressed or output buffer very small), in which case
|
|
|
+ // userBufLen will be zero
|
|
|
+ if (userBufLen > 0) {
|
|
|
+ inflater.setInput(userBuf, userBufOff, userBufLen);
|
|
|
+ userBufOff += userBufLen;
|
|
|
+ userBufLen = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ // now decompress it into b[]
|
|
|
+ try {
|
|
|
+ numAvailBytes = inflater.inflate(b, off, len);
|
|
|
+ } catch (DataFormatException dfe) {
|
|
|
+ throw new IOException(dfe.getMessage());
|
|
|
+ }
|
|
|
+ crc.update(b, off, numAvailBytes); // CRC-32 is on _uncompressed_ data
|
|
|
+ if (inflater.finished()) {
|
|
|
+ state = GzipStateLabel.TRAILER_CRC;
|
|
|
+ int bytesRemaining = inflater.getRemaining();
|
|
|
+ assert (bytesRemaining >= 0) :
|
|
|
+ "logic error: Inflater finished; byte-count is inconsistent";
|
|
|
+ // could save a copy of userBufLen at call to inflater.setInput() and
|
|
|
+ // verify that bytesRemaining <= origUserBufLen, but would have to
|
|
|
+ // be a (class) member variable...seems excessive for a sanity check
|
|
|
+ userBufOff -= bytesRemaining;
|
|
|
+ userBufLen = bytesRemaining; // or "+=", but guaranteed 0 coming in
|
|
|
+ } else {
|
|
|
+ return numAvailBytes; // minor optimization
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ executeTrailerState();
|
|
|
+
|
|
|
+ return numAvailBytes;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Parse the gzip header (assuming we're in the appropriate state).
|
|
|
+ * In order to deal with degenerate cases (e.g., user buffer is one byte
|
|
|
+ * long), we copy (some) header bytes to another buffer. (Filename,
|
|
|
+ * comment, and extra-field bytes are simply skipped.)</p>
|
|
|
+ *
|
|
|
+ * See http://www.ietf.org/rfc/rfc1952.txt for the gzip spec. Note that
|
|
|
+ * no version of gzip to date (at least through 1.4.0, 2010-01-20) supports
|
|
|
+ * the FHCRC header-CRC16 flagbit; instead, the implementation treats it
|
|
|
+ * as a multi-file continuation flag (which it also doesn't support). :-(
|
|
|
+ * Sun's JDK v6 (1.6) supports the header CRC, however, and so do we.
|
|
|
+ */
|
|
|
+ private void executeHeaderState() throws IOException {
|
|
|
+
|
|
|
+ // this can happen because DecompressorStream's decompress() is written
|
|
|
+ // to call decompress() first, setInput() second:
|
|
|
+ if (userBufLen <= 0) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ // "basic"/required header: somewhere in first 10 bytes
|
|
|
+ if (state == GzipStateLabel.HEADER_BASIC) {
|
|
|
+ int n = Math.min(userBufLen, 10-localBufOff); // (or 10-headerBytesRead)
|
|
|
+ checkAndCopyBytesToLocal(n); // modifies userBufLen, etc.
|
|
|
+ if (localBufOff >= 10) { // should be strictly ==
|
|
|
+ processBasicHeader(); // sig, compression method, flagbits
|
|
|
+ localBufOff = 0; // no further need for basic header
|
|
|
+ state = GzipStateLabel.HEADER_EXTRA_FIELD;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (userBufLen <= 0) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ // optional header stuff (extra field, filename, comment, header CRC)
|
|
|
+
|
|
|
+ if (state == GzipStateLabel.HEADER_EXTRA_FIELD) {
|
|
|
+ if (hasExtraField) {
|
|
|
+ // 2 substates: waiting for 2 bytes => get numExtraFieldBytesRemaining,
|
|
|
+ // or already have 2 bytes & waiting to finish skipping specified length
|
|
|
+ if (numExtraFieldBytesRemaining < 0) {
|
|
|
+ int n = Math.min(userBufLen, 2-localBufOff);
|
|
|
+ checkAndCopyBytesToLocal(n);
|
|
|
+ if (localBufOff >= 2) {
|
|
|
+ numExtraFieldBytesRemaining = readUShortLE(localBuf, 0);
|
|
|
+ localBufOff = 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (numExtraFieldBytesRemaining > 0 && userBufLen > 0) {
|
|
|
+ int n = Math.min(userBufLen, numExtraFieldBytesRemaining);
|
|
|
+ checkAndSkipBytes(n); // modifies userBufLen, etc.
|
|
|
+ numExtraFieldBytesRemaining -= n;
|
|
|
+ }
|
|
|
+ if (numExtraFieldBytesRemaining == 0) {
|
|
|
+ state = GzipStateLabel.HEADER_FILENAME;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ state = GzipStateLabel.HEADER_FILENAME;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (userBufLen <= 0) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (state == GzipStateLabel.HEADER_FILENAME) {
|
|
|
+ if (hasFilename) {
|
|
|
+ boolean doneWithFilename = checkAndSkipBytesUntilNull();
|
|
|
+ if (!doneWithFilename) {
|
|
|
+ return; // exit early: used up entire buffer without hitting NULL
|
|
|
+ }
|
|
|
+ }
|
|
|
+ state = GzipStateLabel.HEADER_COMMENT;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (userBufLen <= 0) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (state == GzipStateLabel.HEADER_COMMENT) {
|
|
|
+ if (hasComment) {
|
|
|
+ boolean doneWithComment = checkAndSkipBytesUntilNull();
|
|
|
+ if (!doneWithComment) {
|
|
|
+ return; // exit early: used up entire buffer
|
|
|
+ }
|
|
|
+ }
|
|
|
+ state = GzipStateLabel.HEADER_CRC;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (userBufLen <= 0) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (state == GzipStateLabel.HEADER_CRC) {
|
|
|
+ if (hasHeaderCRC) {
|
|
|
+ assert (localBufOff < 2);
|
|
|
+ int n = Math.min(userBufLen, 2-localBufOff);
|
|
|
+ copyBytesToLocal(n);
|
|
|
+ if (localBufOff >= 2) {
|
|
|
+ long headerCRC = readUShortLE(localBuf, 0);
|
|
|
+ if (headerCRC != (crc.getValue() & 0xffff)) {
|
|
|
+ throw new IOException("gzip header CRC failure");
|
|
|
+ }
|
|
|
+ localBufOff = 0;
|
|
|
+ crc.reset();
|
|
|
+ state = GzipStateLabel.DEFLATE_STREAM;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ crc.reset(); // will reuse for CRC-32 of uncompressed data
|
|
|
+ state = GzipStateLabel.DEFLATE_STREAM; // switching to Inflater now
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Parse the gzip trailer (assuming we're in the appropriate state).
|
|
|
+ * In order to deal with degenerate cases (e.g., user buffer is one byte
|
|
|
+ * long), we copy trailer bytes (all 8 of 'em) to a local buffer.</p>
|
|
|
+ *
|
|
|
+ * See http://www.ietf.org/rfc/rfc1952.txt for the gzip spec.
|
|
|
+ */
|
|
|
+ private void executeTrailerState() throws IOException {
|
|
|
+
|
|
|
+ if (userBufLen <= 0) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ // verify that the CRC-32 of the decompressed stream matches the value
|
|
|
+ // stored in the gzip trailer
|
|
|
+ if (state == GzipStateLabel.TRAILER_CRC) {
|
|
|
+ // localBuf was empty before we handed off to Inflater, so we handle this
|
|
|
+ // exactly like header fields
|
|
|
+ assert (localBufOff < 4); // initially 0, but may need multiple calls
|
|
|
+ int n = Math.min(userBufLen, 4-localBufOff);
|
|
|
+ copyBytesToLocal(n);
|
|
|
+ if (localBufOff >= 4) {
|
|
|
+ long streamCRC = readUIntLE(localBuf, 0);
|
|
|
+ if (streamCRC != crc.getValue()) {
|
|
|
+ throw new IOException("gzip stream CRC failure");
|
|
|
+ }
|
|
|
+ localBufOff = 0;
|
|
|
+ crc.reset();
|
|
|
+ state = GzipStateLabel.TRAILER_SIZE;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (userBufLen <= 0) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ // verify that the mod-2^32 decompressed stream size matches the value
|
|
|
+ // stored in the gzip trailer
|
|
|
+ if (state == GzipStateLabel.TRAILER_SIZE) {
|
|
|
+ assert (localBufOff < 4); // initially 0, but may need multiple calls
|
|
|
+ int n = Math.min(userBufLen, 4-localBufOff);
|
|
|
+ copyBytesToLocal(n); // modifies userBufLen, etc.
|
|
|
+ if (localBufOff >= 4) { // should be strictly ==
|
|
|
+ long inputSize = readUIntLE(localBuf, 0);
|
|
|
+ if (inputSize != (inflater.getBytesWritten() & 0xffffffff)) {
|
|
|
+ throw new IOException(
|
|
|
+ "stored gzip size doesn't match decompressed size");
|
|
|
+ }
|
|
|
+ localBufOff = 0;
|
|
|
+ state = GzipStateLabel.FINISHED;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (state == GzipStateLabel.FINISHED) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Returns the total number of compressed bytes input so far, including
|
|
|
+ * gzip header/trailer bytes.</p>
|
|
|
+ *
|
|
|
+ * @return the total (non-negative) number of compressed bytes read so far
|
|
|
+ */
|
|
|
+ public synchronized long getBytesRead() {
|
|
|
+ return headerBytesRead + inflater.getBytesRead() + trailerBytesRead;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Returns the number of bytes remaining in the input buffer; normally
|
|
|
+ * called when finished() is true to determine amount of post-gzip-stream
|
|
|
+ * data. Note that, other than the finished state with concatenated data
|
|
|
+ * after the end of the current gzip stream, this will never return a
|
|
|
+ * non-zero value unless called after {@link #setInput(byte[] b, int off,
|
|
|
+ * int len)} and before {@link #decompress(byte[] b, int off, int len)}.
|
|
|
+ * (That is, after {@link #decompress(byte[] b, int off, int len)} it
|
|
|
+ * always returns zero, except in finished state with concatenated data.)</p>
|
|
|
+ *
|
|
|
+ * @return the total (non-negative) number of unprocessed bytes in input
|
|
|
+ */
|
|
|
+ public synchronized int getRemaining() {
|
|
|
+ return userBufLen;
|
|
|
+ }
|
|
|
+
|
|
|
+ /** {@inheritDoc} */
|
|
|
+ public synchronized boolean needsDictionary() {
|
|
|
+ return inflater.needsDictionary();
|
|
|
+ }
|
|
|
+
|
|
|
+ /** {@inheritDoc} */
|
|
|
+ public synchronized void setDictionary(byte[] b, int off, int len) {
|
|
|
+ inflater.setDictionary(b, off, len);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Returns true if the end of the gzip substream (single "member") has been
|
|
|
+ * reached.</p>
|
|
|
+ */
|
|
|
+ public synchronized boolean finished() {
|
|
|
+ return (state == GzipStateLabel.FINISHED);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Resets everything, including the input buffer, regardless of whether the
|
|
|
+ * current gzip substream is finished.</p>
|
|
|
+ */
|
|
|
+ public synchronized void reset() {
|
|
|
+ // could optionally emit INFO message if state != GzipStateLabel.FINISHED
|
|
|
+ inflater.reset();
|
|
|
+ state = GzipStateLabel.HEADER_BASIC;
|
|
|
+ crc.reset();
|
|
|
+ userBufOff = userBufLen = 0;
|
|
|
+ localBufOff = 0;
|
|
|
+ headerBytesRead = 0;
|
|
|
+ trailerBytesRead = 0;
|
|
|
+ numExtraFieldBytesRemaining = -1;
|
|
|
+ hasExtraField = false;
|
|
|
+ hasFilename = false;
|
|
|
+ hasComment = false;
|
|
|
+ hasHeaderCRC = false;
|
|
|
+ }
|
|
|
+
|
|
|
+ /** {@inheritDoc} */
|
|
|
+ public synchronized void end() {
|
|
|
+ inflater.end();
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Check ID bytes (throw if necessary), compression method (throw if not 8),
|
|
|
+ * and flag bits (set hasExtraField, hasFilename, hasComment, hasHeaderCRC).
|
|
|
+ * Ignore MTIME, XFL, OS. Caller must ensure we have at least 10 bytes (at
|
|
|
+ * the start of localBuf).</p>
|
|
|
+ */
|
|
|
+ /*
|
|
|
+ * Flag bits (remainder are reserved and must be zero):
|
|
|
+ * bit 0 FTEXT
|
|
|
+ * bit 1 FHCRC (never implemented in gzip, at least through version
|
|
|
+ * 1.4.0; instead interpreted as "continuation of multi-
|
|
|
+ * part gzip file," which is unsupported through 1.4.0)
|
|
|
+ * bit 2 FEXTRA
|
|
|
+ * bit 3 FNAME
|
|
|
+ * bit 4 FCOMMENT
|
|
|
+ * [bit 5 encrypted]
|
|
|
+ */
|
|
|
+ private void processBasicHeader() throws IOException {
|
|
|
+ if (readUShortLE(localBuf, 0) != GZIP_MAGIC_ID) {
|
|
|
+ throw new IOException("not a gzip file");
|
|
|
+ }
|
|
|
+ if (readUByte(localBuf, 2) != GZIP_DEFLATE_METHOD) {
|
|
|
+ throw new IOException("gzip data not compressed with deflate method");
|
|
|
+ }
|
|
|
+ int flg = readUByte(localBuf, 3);
|
|
|
+ if ((flg & GZIP_FLAGBITS_RESERVED) != 0) {
|
|
|
+ throw new IOException("unknown gzip format (reserved flagbits set)");
|
|
|
+ }
|
|
|
+ hasExtraField = ((flg & GZIP_FLAGBIT_EXTRA_FIELD) != 0);
|
|
|
+ hasFilename = ((flg & GZIP_FLAGBIT_FILENAME) != 0);
|
|
|
+ hasComment = ((flg & GZIP_FLAGBIT_COMMENT) != 0);
|
|
|
+ hasHeaderCRC = ((flg & GZIP_FLAGBIT_HEADER_CRC) != 0);
|
|
|
+ }
|
|
|
+
|
|
|
+ private void checkAndCopyBytesToLocal(int len) {
|
|
|
+ System.arraycopy(userBuf, userBufOff, localBuf, localBufOff, len);
|
|
|
+ localBufOff += len;
|
|
|
+ // alternatively, could call checkAndSkipBytes(len) for rest...
|
|
|
+ crc.update(userBuf, userBufOff, len);
|
|
|
+ userBufOff += len;
|
|
|
+ userBufLen -= len;
|
|
|
+ headerBytesRead += len;
|
|
|
+ }
|
|
|
+
|
|
|
+ private void checkAndSkipBytes(int len) {
|
|
|
+ crc.update(userBuf, userBufOff, len);
|
|
|
+ userBufOff += len;
|
|
|
+ userBufLen -= len;
|
|
|
+ headerBytesRead += len;
|
|
|
+ }
|
|
|
+
|
|
|
+ // returns true if saw NULL, false if ran out of buffer first; called _only_
|
|
|
+ // during gzip-header processing (not trailer)
|
|
|
+ // (caller can check before/after state of userBufLen to compute num bytes)
|
|
|
+ private boolean checkAndSkipBytesUntilNull() {
|
|
|
+ boolean hitNull = false;
|
|
|
+ if (userBufLen > 0) {
|
|
|
+ do {
|
|
|
+ hitNull = (userBuf[userBufOff] == 0);
|
|
|
+ crc.update(userBuf[userBufOff]);
|
|
|
+ ++userBufOff;
|
|
|
+ --userBufLen;
|
|
|
+ ++headerBytesRead;
|
|
|
+ } while (userBufLen > 0 && !hitNull);
|
|
|
+ }
|
|
|
+ return hitNull;
|
|
|
+ }
|
|
|
+
|
|
|
+ // this one doesn't update the CRC and does support trailer processing but
|
|
|
+ // otherwise is same as its "checkAnd" sibling
|
|
|
+ private void copyBytesToLocal(int len) {
|
|
|
+ System.arraycopy(userBuf, userBufOff, localBuf, localBufOff, len);
|
|
|
+ localBufOff += len;
|
|
|
+ userBufOff += len;
|
|
|
+ userBufLen -= len;
|
|
|
+ if (state == GzipStateLabel.TRAILER_CRC ||
|
|
|
+ state == GzipStateLabel.TRAILER_SIZE) {
|
|
|
+ trailerBytesRead += len;
|
|
|
+ } else {
|
|
|
+ headerBytesRead += len;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private int readUByte(byte[] b, int off) {
|
|
|
+ return ((int)b[off] & 0xff);
|
|
|
+ }
|
|
|
+
|
|
|
+ // caller is responsible for not overrunning buffer
|
|
|
+ private int readUShortLE(byte[] b, int off) {
|
|
|
+ return ((((b[off+1] & 0xff) << 8) |
|
|
|
+ ((b[off] & 0xff) )) & 0xffff);
|
|
|
+ }
|
|
|
+
|
|
|
+ // caller is responsible for not overrunning buffer
|
|
|
+ private long readUIntLE(byte[] b, int off) {
|
|
|
+ return ((((long)(b[off+3] & 0xff) << 24) |
|
|
|
+ ((long)(b[off+2] & 0xff) << 16) |
|
|
|
+ ((long)(b[off+1] & 0xff) << 8) |
|
|
|
+ ((long)(b[off] & 0xff) )) & 0xffffffff);
|
|
|
+ }
|
|
|
+
|
|
|
+}
|