|
@@ -0,0 +1,298 @@
|
|
|
|
+/*
|
|
|
|
+ * Licensed to the Apache Software Foundation (ASF) under one
|
|
|
|
+ * or more contributor license agreements. See the NOTICE file
|
|
|
|
+ * distributed with this work for additional information
|
|
|
|
+ * regarding copyright ownership. The ASF licenses this file
|
|
|
|
+ * to you under the Apache License, Version 2.0 (the
|
|
|
|
+ * "License"); you may not use this file except in compliance
|
|
|
|
+ * with the License. You may obtain a copy of the License at
|
|
|
|
+ *
|
|
|
|
+ * http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
+ *
|
|
|
|
+ * Unless required by applicable law or agreed to in writing, software
|
|
|
|
+ * distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
+ * See the License for the specific language governing permissions and
|
|
|
|
+ * limitations under the License.
|
|
|
|
+ */
|
|
|
|
+
|
|
|
|
+package org.apache.hadoop.io.compress.snappy;
|
|
|
|
+
|
|
|
|
+import java.io.IOException;
|
|
|
|
+import java.nio.Buffer;
|
|
|
|
+import java.nio.ByteBuffer;
|
|
|
|
+
|
|
|
|
+import org.apache.commons.logging.Log;
|
|
|
|
+import org.apache.commons.logging.LogFactory;
|
|
|
|
+import org.apache.hadoop.conf.Configuration;
|
|
|
|
+import org.apache.hadoop.io.compress.Compressor;
|
|
|
|
+
|
|
|
|
+/**
|
|
|
|
+ * A {@link Compressor} based on the snappy compression algorithm.
|
|
|
|
+ * http://code.google.com/p/snappy/
|
|
|
|
+ */
|
|
|
|
+public class SnappyCompressor implements Compressor {
|
|
|
|
+ private static final Log LOG =
|
|
|
|
+ LogFactory.getLog(SnappyCompressor.class.getName());
|
|
|
|
+ private static final int DEFAULT_DIRECT_BUFFER_SIZE = 64 * 1024;
|
|
|
|
+
|
|
|
|
+ // HACK - Use this as a global lock in the JNI layer
|
|
|
|
+ @SuppressWarnings({"unchecked", "unused"})
|
|
|
|
+ private static Class clazz = SnappyCompressor.class;
|
|
|
|
+
|
|
|
|
+ private int directBufferSize;
|
|
|
|
+ private Buffer compressedDirectBuf = null;
|
|
|
|
+ private int uncompressedDirectBufLen;
|
|
|
|
+ private Buffer uncompressedDirectBuf = null;
|
|
|
|
+ private byte[] userBuf = null;
|
|
|
|
+ private int userBufOff = 0, userBufLen = 0;
|
|
|
|
+ private boolean finish, finished;
|
|
|
|
+
|
|
|
|
+ private long bytesRead = 0L;
|
|
|
|
+ private long bytesWritten = 0L;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ static {
|
|
|
|
+ if (LoadSnappy.isLoaded()) {
|
|
|
|
+ // Initialize the native library
|
|
|
|
+ try {
|
|
|
|
+ initIDs();
|
|
|
|
+ } catch (Throwable t) {
|
|
|
|
+ // Ignore failure to load/initialize snappy
|
|
|
|
+ LOG.warn(t.toString());
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ LOG.error("Cannot load " + SnappyCompressor.class.getName() +
|
|
|
|
+ " without snappy library!");
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Creates a new compressor.
|
|
|
|
+ *
|
|
|
|
+ * @param directBufferSize size of the direct buffer to be used.
|
|
|
|
+ */
|
|
|
|
+ public SnappyCompressor(int directBufferSize) {
|
|
|
|
+ this.directBufferSize = directBufferSize;
|
|
|
|
+
|
|
|
|
+ uncompressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize);
|
|
|
|
+ compressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize);
|
|
|
|
+ compressedDirectBuf.position(directBufferSize);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Creates a new compressor with the default buffer size.
|
|
|
|
+ */
|
|
|
|
+ public SnappyCompressor() {
|
|
|
|
+ this(DEFAULT_DIRECT_BUFFER_SIZE);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Sets input data for compression.
|
|
|
|
+ * This should be called whenever #needsInput() returns
|
|
|
|
+ * <code>true</code> indicating that more input data is required.
|
|
|
|
+ *
|
|
|
|
+ * @param b Input data
|
|
|
|
+ * @param off Start offset
|
|
|
|
+ * @param len Length
|
|
|
|
+ */
|
|
|
|
+ @Override
|
|
|
|
+ public synchronized void setInput(byte[] b, int off, int len) {
|
|
|
|
+ if (b == null) {
|
|
|
|
+ throw new NullPointerException();
|
|
|
|
+ }
|
|
|
|
+ if (off < 0 || len < 0 || off > b.length - len) {
|
|
|
|
+ throw new ArrayIndexOutOfBoundsException();
|
|
|
|
+ }
|
|
|
|
+ finished = false;
|
|
|
|
+
|
|
|
|
+ if (len > uncompressedDirectBuf.remaining()) {
|
|
|
|
+ // save data; now !needsInput
|
|
|
|
+ this.userBuf = b;
|
|
|
|
+ this.userBufOff = off;
|
|
|
|
+ this.userBufLen = len;
|
|
|
|
+ } else {
|
|
|
|
+ ((ByteBuffer) uncompressedDirectBuf).put(b, off, len);
|
|
|
|
+ uncompressedDirectBufLen = uncompressedDirectBuf.position();
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ bytesRead += len;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * If a write would exceed the capacity of the direct buffers, it is set
|
|
|
|
+ * aside to be loaded by this function while the compressed data are
|
|
|
|
+ * consumed.
|
|
|
|
+ */
|
|
|
|
+ synchronized void setInputFromSavedData() {
|
|
|
|
+ if (0 >= userBufLen) {
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+ finished = false;
|
|
|
|
+
|
|
|
|
+ uncompressedDirectBufLen = Math.min(userBufLen, directBufferSize);
|
|
|
|
+ ((ByteBuffer) uncompressedDirectBuf).put(userBuf, userBufOff,
|
|
|
|
+ uncompressedDirectBufLen);
|
|
|
|
+
|
|
|
|
+ // Note how much data is being fed to snappy
|
|
|
|
+ userBufOff += uncompressedDirectBufLen;
|
|
|
|
+ userBufLen -= uncompressedDirectBufLen;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Does nothing.
|
|
|
|
+ */
|
|
|
|
+ @Override
|
|
|
|
+ public synchronized void setDictionary(byte[] b, int off, int len) {
|
|
|
|
+ // do nothing
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Returns true if the input data buffer is empty and
|
|
|
|
+ * #setInput() should be called to provide more input.
|
|
|
|
+ *
|
|
|
|
+ * @return <code>true</code> if the input data buffer is empty and
|
|
|
|
+ * #setInput() should be called in order to provide more input.
|
|
|
|
+ */
|
|
|
|
+ @Override
|
|
|
|
+ public synchronized boolean needsInput() {
|
|
|
|
+ return !(compressedDirectBuf.remaining() > 0
|
|
|
|
+ || uncompressedDirectBuf.remaining() == 0 || userBufLen > 0);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * When called, indicates that compression should end
|
|
|
|
+ * with the current contents of the input buffer.
|
|
|
|
+ */
|
|
|
|
+ @Override
|
|
|
|
+ public synchronized void finish() {
|
|
|
|
+ finish = true;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Returns true if the end of the compressed
|
|
|
|
+ * data output stream has been reached.
|
|
|
|
+ *
|
|
|
|
+ * @return <code>true</code> if the end of the compressed
|
|
|
|
+ * data output stream has been reached.
|
|
|
|
+ */
|
|
|
|
+ @Override
|
|
|
|
+ public synchronized boolean finished() {
|
|
|
|
+ // Check if all uncompressed data has been consumed
|
|
|
|
+ return (finish && finished && compressedDirectBuf.remaining() == 0);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Fills specified buffer with compressed data. Returns actual number
|
|
|
|
+ * of bytes of compressed data. A return value of 0 indicates that
|
|
|
|
+ * needsInput() should be called in order to determine if more input
|
|
|
|
+ * data is required.
|
|
|
|
+ *
|
|
|
|
+ * @param b Buffer for the compressed data
|
|
|
|
+ * @param off Start offset of the data
|
|
|
|
+ * @param len Size of the buffer
|
|
|
|
+ * @return The actual number of bytes of compressed data.
|
|
|
|
+ */
|
|
|
|
+ @Override
|
|
|
|
+ public synchronized int compress(byte[] b, int off, int len)
|
|
|
|
+ throws IOException {
|
|
|
|
+ if (b == null) {
|
|
|
|
+ throw new NullPointerException();
|
|
|
|
+ }
|
|
|
|
+ if (off < 0 || len < 0 || off > b.length - len) {
|
|
|
|
+ throw new ArrayIndexOutOfBoundsException();
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // Check if there is compressed data
|
|
|
|
+ int n = compressedDirectBuf.remaining();
|
|
|
|
+ if (n > 0) {
|
|
|
|
+ n = Math.min(n, len);
|
|
|
|
+ ((ByteBuffer) compressedDirectBuf).get(b, off, n);
|
|
|
|
+ bytesWritten += n;
|
|
|
|
+ return n;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // Re-initialize the snappy's output direct-buffer
|
|
|
|
+ compressedDirectBuf.clear();
|
|
|
|
+ compressedDirectBuf.limit(0);
|
|
|
|
+ if (0 == uncompressedDirectBuf.position()) {
|
|
|
|
+ // No compressed data, so we should have !needsInput or !finished
|
|
|
|
+ setInputFromSavedData();
|
|
|
|
+ if (0 == uncompressedDirectBuf.position()) {
|
|
|
|
+ // Called without data; write nothing
|
|
|
|
+ finished = true;
|
|
|
|
+ return 0;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // Compress data
|
|
|
|
+ n = compressBytesDirect();
|
|
|
|
+ compressedDirectBuf.limit(n);
|
|
|
|
+ uncompressedDirectBuf.clear(); // snappy consumes all buffer input
|
|
|
|
+
|
|
|
|
+ // Set 'finished' if snapy has consumed all user-data
|
|
|
|
+ if (0 == userBufLen) {
|
|
|
|
+ finished = true;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // Get atmost 'len' bytes
|
|
|
|
+ n = Math.min(n, len);
|
|
|
|
+ bytesWritten += n;
|
|
|
|
+ ((ByteBuffer) compressedDirectBuf).get(b, off, n);
|
|
|
|
+
|
|
|
|
+ return n;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Resets compressor so that a new set of input data can be processed.
|
|
|
|
+ */
|
|
|
|
+ @Override
|
|
|
|
+ public synchronized void reset() {
|
|
|
|
+ finish = false;
|
|
|
|
+ finished = false;
|
|
|
|
+ uncompressedDirectBuf.clear();
|
|
|
|
+ uncompressedDirectBufLen = 0;
|
|
|
|
+ compressedDirectBuf.clear();
|
|
|
|
+ compressedDirectBuf.limit(0);
|
|
|
|
+ userBufOff = userBufLen = 0;
|
|
|
|
+ bytesRead = bytesWritten = 0L;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Prepare the compressor to be used in a new stream with settings defined in
|
|
|
|
+ * the given Configuration
|
|
|
|
+ *
|
|
|
|
+ * @param conf Configuration from which new setting are fetched
|
|
|
|
+ */
|
|
|
|
+ @Override
|
|
|
|
+ public synchronized void reinit(Configuration conf) {
|
|
|
|
+ reset();
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Return number of bytes given to this compressor since last reset.
|
|
|
|
+ */
|
|
|
|
+ @Override
|
|
|
|
+ public synchronized long getBytesRead() {
|
|
|
|
+ return bytesRead;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Return number of bytes consumed by callers of compress since last reset.
|
|
|
|
+ */
|
|
|
|
+ @Override
|
|
|
|
+ public synchronized long getBytesWritten() {
|
|
|
|
+ return bytesWritten;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Closes the compressor and discards any unprocessed input.
|
|
|
|
+ */
|
|
|
|
+ @Override
|
|
|
|
+ public synchronized void end() {
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private native static void initIDs();
|
|
|
|
+
|
|
|
|
+ private native int compressBytesDirect();
|
|
|
|
+}
|