浏览代码

Revert "HADOOP-12572. Update Hadoop's lz4 to r131. Contributed by Kevin Bowling."

This reverts commit 4c061e607ef62f81000aebdb04efbd1aa403c76d.
Haohui Mai 9 年之前
父节点
当前提交
4bff073b4d

+ 3 - 3
LICENSE.txt

@@ -257,8 +257,7 @@ For src/main/native/src/org/apache/hadoop/io/compress/lz4/{lz4.h,lz4.c,lz4hc.h,l
 /*
    LZ4 - Fast LZ compression algorithm
    Header File
-   Copyright (C) 2011-2015, Yann Collet.
-
+   Copyright (C) 2011-2014, Yann Collet.
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
    Redistribution and use in source and binary forms, with or without
@@ -285,10 +284,11 @@ For src/main/native/src/org/apache/hadoop/io/compress/lz4/{lz4.h,lz4.c,lz4hc.h,l
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
    You can contact the author at :
-   - LZ4 source repository : https://github.com/Cyan4973/lz4
+   - LZ4 source repository : http://code.google.com/p/lz4/
    - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
 */
 
+
 For hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/gtest
 ---------------------------------------------------------------------
 Copyright 2008, Google Inc.

+ 0 - 2
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -971,8 +971,6 @@ Release 2.8.0 - UNRELEASED
     HADOOP-12575. Add build instruction for docker toolbox instead of
     boot2docker(Kai Sasaki via ozawa)
 
-    HADOOP-12572. Update Hadoop's lz4 to r131. (Kevin Bowling via wheat9)
-
     HADOOP-10035. Cleanup TestFilterFileSystem. (Suresh Srinivas via wheat9)
 
     HADOOP-10555. Add offset support to MurmurHash.

+ 2 - 2
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/Lz4Compressor.c

@@ -73,7 +73,7 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_lz4_Lz4Compressor_comp
     return (jint)0;
   }
 
-  compressed_direct_buf_len = LZ4_compress_default(uncompressed_bytes, compressed_bytes, uncompressed_direct_buf_len, LZ4_compressBound(uncompressed_direct_buf_len));
+  compressed_direct_buf_len = LZ4_compress(uncompressed_bytes, compressed_bytes, uncompressed_direct_buf_len);
   if (compressed_direct_buf_len < 0){
     THROW(env, "java/lang/InternalError", "LZ4_compress failed");
   }
@@ -115,7 +115,7 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_lz4_Lz4Compressor_comp
     return (jint)0;
   }
 
-  compressed_direct_buf_len = LZ4_compress_HC(uncompressed_bytes, compressed_bytes, uncompressed_direct_buf_len, LZ4_compressBound(uncompressed_direct_buf_len), 0);
+  compressed_direct_buf_len = LZ4_compressHC(uncompressed_bytes, compressed_bytes, uncompressed_direct_buf_len);
   if (compressed_direct_buf_len < 0){
     THROW(env, "java/lang/InternalError", "LZ4_compressHC failed");
   }

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/Lz4Decompressor.c

@@ -72,7 +72,7 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_lz4_Lz4Decompressor_de
 
   uncompressed_direct_buf_len = LZ4_decompress_safe(compressed_bytes, uncompressed_bytes, compressed_direct_buf_len, uncompressed_direct_buf_len);
   if (uncompressed_direct_buf_len < 0) {
-    THROW(env, "java/lang/InternalError", "LZ4_decompress_safe failed.");
+    THROW(env, "java/lang/InternalError", "LZ4_uncompress_unknownOutputSize failed.");
   }
 
   (*env)->SetIntField(env, thisj, Lz4Decompressor_compressedDirectBufLen, 0);

文件差异内容过多而无法显示
+ 316 - 559
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.c


+ 122 - 159
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.h

@@ -1,8 +1,7 @@
 /*
    LZ4 - Fast LZ compression algorithm
    Header File
-   Copyright (C) 2011-2015, Yann Collet.
-
+   Copyright (C) 2011-2014, Yann Collet.
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
    Redistribution and use in source and binary forms, with or without
@@ -29,7 +28,7 @@
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
    You can contact the author at :
-   - LZ4 source repository : https://github.com/Cyan4973/lz4
+   - LZ4 source repository : http://code.google.com/p/lz4/
    - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
 */
 #pragma once
@@ -39,22 +38,22 @@ extern "C" {
 #endif
 
 /*
- * lz4.h provides block compression functions, and gives full buffer control to programmer.
- * If you need to generate inter-operable compressed data (respecting LZ4 frame specification),
- * and can let the library handle its own memory, please use lz4frame.h instead.
+ * lz4.h provides raw compression format functions, for optimal performance and integration into programs.
+ * If you need to generate data using an inter-operable format (respecting the framing specification),
+ * please use lz4frame.h instead.
 */
 
 /**************************************
-*  Version
+   Version
 **************************************/
-#define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
-#define LZ4_VERSION_MINOR    7    /* for new (non-breaking) interface capabilities */
+#define LZ4_VERSION_MAJOR    1    /* for major interface/format changes  */
+#define LZ4_VERSION_MINOR    3    /* for minor interface/format changes  */
 #define LZ4_VERSION_RELEASE  1    /* for tweaks, bug-fixes, or development */
 #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
 int LZ4_versionNumber (void);
 
 /**************************************
-*  Tuning parameter
+   Tuning parameter
 **************************************/
 /*
  * LZ4_MEMORY_USAGE :
@@ -67,90 +66,77 @@ int LZ4_versionNumber (void);
 
 
 /**************************************
-*  Simple Functions
+   Simple Functions
 **************************************/
 
-int LZ4_compress_default(const char* source, char* dest, int sourceSize, int maxDestSize);
+int LZ4_compress        (const char* source, char* dest, int sourceSize);
 int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize);
 
 /*
-LZ4_compress_default() :
-    Compresses 'sourceSize' bytes from buffer 'source'
-    into already allocated 'dest' buffer of size 'maxDestSize'.
-    Compression is guaranteed to succeed if 'maxDestSize' >= LZ4_compressBound(sourceSize).
-    It also runs faster, so it's a recommended setting.
-    If the function cannot compress 'source' into a more limited 'dest' budget,
-    compression stops *immediately*, and the function result is zero.
-    As a consequence, 'dest' content is not valid.
-    This function never writes outside 'dest' buffer, nor read outside 'source' buffer.
-        sourceSize  : Max supported value is LZ4_MAX_INPUT_VALUE
-        maxDestSize : full or partial size of buffer 'dest' (which must be already allocated)
-        return : the number of bytes written into buffer 'dest' (necessarily <= maxOutputSize)
-              or 0 if compression fails
+LZ4_compress() :
+    Compresses 'sourceSize' bytes from 'source' into 'dest'.
+    Destination buffer must be already allocated,
+    and must be sized to handle worst cases situations (input data not compressible)
+    Worst case size evaluation is provided by function LZ4_compressBound()
+    inputSize : Max supported value is LZ4_MAX_INPUT_SIZE
+    return : the number of bytes written in buffer dest
+             or 0 if the compression fails
 
 LZ4_decompress_safe() :
-    compressedSize : is the precise full size of the compressed block.
-    maxDecompressedSize : is the size of destination buffer, which must be already allocated.
-    return : the number of bytes decompressed into destination buffer (necessarily <= maxDecompressedSize)
-             If destination buffer is not large enough, decoding will stop and output an error code (<0).
+    compressedSize : is obviously the source size
+    maxDecompressedSize : is the size of the destination buffer, which must be already allocated.
+    return : the number of bytes decompressed into the destination buffer (necessarily <= maxDecompressedSize)
+             If the destination buffer is not large enough, decoding will stop and output an error code (<0).
              If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function is protected against buffer overflow exploits, including malicious data packets.
-             It never writes outside output buffer, nor reads outside input buffer.
+             This function is protected against buffer overflow exploits,
+             and never writes outside of output buffer, nor reads outside of input buffer.
+             It is also protected against malicious data packets.
 */
 
 
 /**************************************
-*  Advanced Functions
+   Advanced Functions
 **************************************/
 #define LZ4_MAX_INPUT_SIZE        0x7E000000   /* 2 113 929 216 bytes */
-#define LZ4_COMPRESSBOUND(isize)  ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
+#define LZ4_COMPRESSBOUND(isize)  ((unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
 
 /*
 LZ4_compressBound() :
     Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
-    This function is primarily useful for memory allocation purposes (destination buffer size).
+    This function is primarily useful for memory allocation purposes (output buffer size).
     Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
-    Note that LZ4_compress_default() compress faster when dest buffer size is >= LZ4_compressBound(srcSize)
-        inputSize  : max supported value is LZ4_MAX_INPUT_SIZE
-        return : maximum output size in a "worst case" scenario
-              or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
-*/
-int LZ4_compressBound(int inputSize);
 
-/*
-LZ4_compress_fast() :
-    Same as LZ4_compress_default(), but allows to select an "acceleration" factor.
-    The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
-    It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
-    An acceleration value of "1" is the same as regular LZ4_compress_default()
-    Values <= 0 will be replaced by ACCELERATION_DEFAULT (see lz4.c), which is 1.
+    isize  : is the input size. Max supported value is LZ4_MAX_INPUT_SIZE
+    return : maximum output size in a "worst case" scenario
+             or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
 */
-int LZ4_compress_fast (const char* source, char* dest, int sourceSize, int maxDestSize, int acceleration);
+int LZ4_compressBound(int isize);
 
 
 /*
-LZ4_compress_fast_extState() :
-    Same compression function, just using an externally allocated memory space to store compression state.
-    Use LZ4_sizeofState() to know how much memory must be allocated,
-    and allocate it on 8-bytes boundaries (using malloc() typically).
-    Then, provide it as 'void* state' to compression function.
+LZ4_compress_limitedOutput() :
+    Compress 'sourceSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
+    If it cannot achieve it, compression will stop, and result of the function will be zero.
+    This saves time and memory on detecting non-compressible (or barely compressible) data.
+    This function never writes outside of provided output buffer.
+
+    sourceSize  : Max supported value is LZ4_MAX_INPUT_VALUE
+    maxOutputSize : is the size of the destination buffer (which must be already allocated)
+    return : the number of bytes written in buffer 'dest'
+             or 0 if compression fails
 */
-int LZ4_sizeofState(void);
-int LZ4_compress_fast_extState (void* state, const char* source, char* dest, int inputSize, int maxDestSize, int acceleration);
+int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
 
 
 /*
-LZ4_compress_destSize() :
-    Reverse the logic, by compressing as much data as possible from 'source' buffer
-    into already allocated buffer 'dest' of size 'targetDestSize'.
-    This function either compresses the entire 'source' content into 'dest' if it's large enough,
-    or fill 'dest' buffer completely with as much data as possible from 'source'.
-        *sourceSizePtr : will be modified to indicate how many bytes where read from 'source' to fill 'dest'.
-                         New value is necessarily <= old value.
-        return : Nb bytes written into 'dest' (necessarily <= targetDestSize)
-              or 0 if compression fails
+LZ4_compress_withState() :
+    Same compression functions, but using an externally allocated memory space to store compression state.
+    Use LZ4_sizeofState() to know how much memory must be allocated,
+    and then, provide it as 'void* state' to compression functions.
 */
-int LZ4_compress_destSize (const char* source, char* dest, int* sourceSizePtr, int targetDestSize);
+int LZ4_sizeofState(void);
+int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
+int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
 
 
 /*
@@ -166,6 +152,7 @@ LZ4_decompress_fast() :
 */
 int LZ4_decompress_fast (const char* source, char* dest, int originalSize);
 
+
 /*
 LZ4_decompress_safe_partial() :
     This function decompress a compressed block of size 'compressedSize' at position 'source'
@@ -182,108 +169,104 @@ int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedS
 
 
 /***********************************************
-*  Streaming Compression Functions
+   Experimental Streaming Compression Functions
 ***********************************************/
-#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
-#define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(long long))
+
+#define LZ4_STREAMSIZE_U32 ((1 << (LZ4_MEMORY_USAGE-2)) + 8)
+#define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U32 * sizeof(unsigned int))
 /*
  * LZ4_stream_t
  * information structure to track an LZ4 stream.
  * important : init this structure content before first use !
- * note : only allocated directly the structure if you are statically linking LZ4
- *        If you are using liblz4 as a DLL, please use below construction methods instead.
  */
-typedef struct { long long table[LZ4_STREAMSIZE_U64]; } LZ4_stream_t;
+typedef struct { unsigned int table[LZ4_STREAMSIZE_U32]; } LZ4_stream_t;
 
 /*
  * LZ4_resetStream
  * Use this function to init an allocated LZ4_stream_t structure
  */
-void LZ4_resetStream (LZ4_stream_t* streamPtr);
+void LZ4_resetStream (LZ4_stream_t* LZ4_streamPtr);
 
 /*
+ * If you prefer dynamic allocation methods,
  * LZ4_createStream will allocate and initialize an LZ4_stream_t structure
  * LZ4_freeStream releases its memory.
- * In the context of a DLL (liblz4), please use these methods rather than the static struct.
- * They are more future proof, in case of a change of LZ4_stream_t size.
  */
 LZ4_stream_t* LZ4_createStream(void);
-int           LZ4_freeStream (LZ4_stream_t* streamPtr);
+int           LZ4_freeStream (LZ4_stream_t* LZ4_stream);
 
 /*
  * LZ4_loadDict
  * Use this function to load a static dictionary into LZ4_stream.
  * Any previous data will be forgotten, only 'dictionary' will remain in memory.
  * Loading a size of 0 is allowed.
- * Return : dictionary size, in bytes (necessarily <= 64 KB)
+ * Return : 1 if OK, 0 if error
  */
-int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
+int LZ4_loadDict (LZ4_stream_t* LZ4_stream, const char* dictionary, int dictSize);
 
 /*
- * LZ4_compress_fast_continue
- * Compress buffer content 'src', using data from previously compressed blocks as dictionary to improve compression ratio.
- * Important : Previous data blocks are assumed to still be present and unmodified !
- * 'dst' buffer must be already allocated.
- * If maxDstSize >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
- * If not, and if compressed data cannot fit into 'dst' buffer size, compression stops, and function returns a zero.
+ * LZ4_compress_continue
+ * Compress data block 'source', using blocks compressed before as dictionary to improve compression ratio
+ * Previous data blocks are assumed to still be present at their previous location.
  */
-int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int maxDstSize, int acceleration);
+int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize);
+
+/*
+ * LZ4_compress_limitedOutput_continue
+ * Same as before, but also specify a maximum target compressed size (maxOutputSize)
+ * If objective cannot be met, compression exits, and returns a zero.
+ */
+int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize);
 
 /*
  * LZ4_saveDict
  * If previously compressed data block is not guaranteed to remain available at its memory location
  * save it into a safer place (char* safeBuffer)
  * Note : you don't need to call LZ4_loadDict() afterwards,
- *        dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue()
- * Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error
+ *        dictionary is immediately usable, you can therefore call again LZ4_compress_continue()
+ * Return : dictionary size in bytes, or 0 if error
+ * Note : any dictSize > 64 KB will be interpreted as 64KB.
  */
-int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize);
+int LZ4_saveDict (LZ4_stream_t* LZ4_stream, char* safeBuffer, int dictSize);
 
 
 /************************************************
-*  Streaming Decompression Functions
+  Experimental Streaming Decompression Functions
 ************************************************/
 
-#define LZ4_STREAMDECODESIZE_U64  4
-#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
-typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t;
+#define LZ4_STREAMDECODESIZE_U32 4
+#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U32 * sizeof(unsigned int))
 /*
  * LZ4_streamDecode_t
  * information structure to track an LZ4 stream.
- * init this structure content using LZ4_setStreamDecode or memset() before first use !
- *
- * In the context of a DLL (liblz4) please prefer usage of construction methods below.
- * They are more future proof, in case of a change of LZ4_streamDecode_t size in the future.
- * LZ4_createStreamDecode will allocate and initialize an LZ4_streamDecode_t structure
- * LZ4_freeStreamDecode releases its memory.
+ * important : init this structure content using LZ4_setStreamDecode or memset() before first use !
  */
-LZ4_streamDecode_t* LZ4_createStreamDecode(void);
-int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
+typedef struct { unsigned int table[LZ4_STREAMDECODESIZE_U32]; } LZ4_streamDecode_t;
 
 /*
  * LZ4_setStreamDecode
  * Use this function to instruct where to find the dictionary.
- * Setting a size of 0 is allowed (same effect as reset).
+ * This function can be used to specify a static dictionary,
+ * or to instruct where to find some previously decoded data saved into a different memory space.
+ * Setting a size of 0 is allowed (same effect as no dictionary).
  * Return : 1 if OK, 0 if error
  */
 int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
 
+/*
+ * If you prefer dynamic allocation methods,
+ * LZ4_createStreamDecode will allocate and initialize an LZ4_streamDecode_t structure
+ * LZ4_freeStreamDecode releases its memory.
+ */
+LZ4_streamDecode_t* LZ4_createStreamDecode(void);
+int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
+
 /*
 *_continue() :
     These decoding functions allow decompression of multiple blocks in "streaming" mode.
-    Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB)
-    In the case of a ring buffers, decoding buffer must be either :
-    - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions)
-      In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB).
-    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
-      maxBlockSize is implementation dependent. It's the maximum size you intend to compress into a single block.
-      In which case, encoding and decoding buffers do not need to be synchronized,
-      and encoding ring buffer can have any size, including small ones ( < 64 KB).
-    - _At least_ 64 KB + 8 bytes + maxBlockSize.
-      In which case, encoding and decoding buffers do not need to be synchronized,
-      and encoding ring buffer can have any size, including larger than decoding buffer.
-    Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer,
-    and indicate where it is saved using LZ4_setStreamDecode()
+    Previously decoded blocks must still be available at the memory position where they were decoded.
+    If it's not possible, save the relevant part of decoded data into a safe buffer,
+    and indicate where its new address using LZ4_setStreamDecode()
 */
 int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize);
 int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize);
@@ -293,8 +276,8 @@ int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch
 Advanced decoding functions :
 *_usingDict() :
     These decoding functions work the same as
-    a combination of LZ4_setStreamDecode() followed by LZ4_decompress_x_continue()
-    They are stand-alone. They don't need nor update an LZ4_streamDecode_t structure.
+    a combination of LZ4_setDictDecode() followed by LZ4_decompress_x_continue()
+    They don't use nor update an LZ4_streamDecode_t structure.
 */
 int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize);
 int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize);
@@ -302,57 +285,37 @@ int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalS
 
 
 /**************************************
-*  Obsolete Functions
+   Obsolete Functions
 **************************************/
-/* Deprecate Warnings */
-/* Should these warnings messages be a problem,
-   it is generally possible to disable them,
-   with -Wno-deprecated-declarations for gcc
-   or _CRT_SECURE_NO_WARNINGS in Visual for example.
-   You can also define LZ4_DEPRECATE_WARNING_DEFBLOCK. */
-#ifndef LZ4_DEPRECATE_WARNING_DEFBLOCK
-#  define LZ4_DEPRECATE_WARNING_DEFBLOCK
-#  define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-#  if (LZ4_GCC_VERSION >= 405) || defined(__clang__)
-#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
-#  elif (LZ4_GCC_VERSION >= 301)
-#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
-#  elif defined(_MSC_VER)
-#    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
-#  else
-#    pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
-#    define LZ4_DEPRECATED(message)
-#  endif
-#endif /* LZ4_DEPRECATE_WARNING_DEFBLOCK */
-
-/* Obsolete compression functions */
-/* These functions are planned to start generate warnings by r131 approximately */
-int LZ4_compress               (const char* source, char* dest, int sourceSize);
-int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
-int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
-int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
-int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
-int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
-
-/* Obsolete decompression functions */
-/* These function names are completely deprecated and must no longer be used.
-   They are only provided here for compatibility with older programs.
-    - LZ4_uncompress is the same as LZ4_decompress_fast
-    - LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe
-   These function prototypes are now disabled; uncomment them only if you really need them.
-   It is highly recommended to stop using these prototypes and migrate to maintained ones */
+/*
+Obsolete decompression functions
+These function names are deprecated and should no longer be used.
+They are only provided here for compatibility with older user programs.
+- LZ4_uncompress is the same as LZ4_decompress_fast
+- LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe
+These function prototypes are now disabled; uncomment them if you really need them.
+It is highly recommended to stop using these functions and migrated to newer ones */
 /* int LZ4_uncompress (const char* source, char* dest, int outputSize); */
 /* int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); */
 
+/*
+ * If you prefer dynamic allocation methods,
+ * LZ4_createStreamDecode()
+ * provides a pointer (void*) towards an initialized LZ4_streamDecode_t structure.
+ * LZ4_free just frees it.
+ */
+/* void* LZ4_createStreamDecode(void); */
+/*int   LZ4_free (void* LZ4_stream);    yes, it's the same one as for compression */
+
 /* Obsolete streaming functions; use new streaming interface whenever possible */
-LZ4_DEPRECATED("use LZ4_createStream() instead") void* LZ4_create (char* inputBuffer);
-LZ4_DEPRECATED("use LZ4_createStream() instead") int   LZ4_sizeofStreamState(void);
-LZ4_DEPRECATED("use LZ4_resetStream() instead")  int   LZ4_resetStreamState(void* state, char* inputBuffer);
-LZ4_DEPRECATED("use LZ4_saveDict() instead")     char* LZ4_slideInputBuffer (void* state);
+void* LZ4_create (const char* inputBuffer);
+int   LZ4_sizeofStreamState(void);
+int   LZ4_resetStreamState(void* state, const char* inputBuffer);
+char* LZ4_slideInputBuffer (void* state);
 
 /* Obsolete streaming decoding functions */
-LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
-LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
+int LZ4_decompress_safe_withPrefix64k (const char* source, char* dest, int compressedSize, int maxOutputSize);
+int LZ4_decompress_fast_withPrefix64k (const char* source, char* dest, int originalSize);
 
 
 #if defined (__cplusplus)

文件差异内容过多而无法显示
+ 532 - 364
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.c


+ 102 - 118
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.h

@@ -1,7 +1,7 @@
 /*
    LZ4 HC - High Compression Mode of LZ4
    Header File
-   Copyright (C) 2011-2015, Yann Collet.
+   Copyright (C) 2011-2014, Yann Collet.
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
    Redistribution and use in source and binary forms, with or without
@@ -28,8 +28,8 @@
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
    You can contact the author at :
-   - LZ4 source repository : https://github.com/Cyan4973/lz4
-   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+   - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
+   - LZ4 source repository : http://code.google.com/p/lz4/
 */
 #pragma once
 
@@ -38,150 +38,134 @@
 extern "C" {
 #endif
 
-/*****************************
-*  Includes
-*****************************/
-#include <stddef.h>   /* size_t */
 
+int LZ4_compressHC (const char* source, char* dest, int inputSize);
+/*
+LZ4_compressHC :
+    return : the number of bytes in compressed buffer dest
+             or 0 if compression fails.
+    note : destination buffer must be already allocated.
+        To avoid any problem, size it to handle worst cases situations (input data not compressible)
+        Worst case size evaluation is provided by function LZ4_compressBound() (see "lz4.h")
+*/
 
-/**************************************
-*  Block Compression
-**************************************/
-int LZ4_compress_HC (const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
+int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
 /*
-LZ4_compress_HC :
-    Destination buffer 'dst' must be already allocated.
-    Compression completion is guaranteed if 'dst' buffer is sized to handle worst circumstances (data not compressible)
-    Worst size evaluation is provided by function LZ4_compressBound() (see "lz4.h")
-      srcSize  : Max supported value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
-      compressionLevel : Recommended values are between 4 and 9, although any value between 0 and 16 will work.
-                         0 means "use default value" (see lz4hc.c).
-                         Values >16 behave the same as 16.
-      return : the number of bytes written into buffer 'dst'
-            or 0 if compression fails.
+LZ4_compress_limitedOutput() :
+    Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
+    If it cannot achieve it, compression will stop, and result of the function will be zero.
+    This function never writes outside of provided output buffer.
+
+    inputSize  : Max supported value is 1 GB
+    maxOutputSize : is maximum allowed size into the destination buffer (which must be already allocated)
+    return : the number of output bytes written in buffer 'dest'
+             or 0 if compression fails.
 */
 
 
+int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
+int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+/*
+    Same functions as above, but with programmable 'compressionLevel'.
+    Recommended values are between 4 and 9, although any value between 0 and 16 will work.
+    'compressionLevel'==0 means use default 'compressionLevel' value.
+    Values above 16 behave the same as 16.
+    Equivalent variants exist for all other compression functions below.
+*/
+
 /* Note :
-   Decompression functions are provided within LZ4 source code (see "lz4.h") (BSD license)
+Decompression functions are provided within LZ4 source code (see "lz4.h") (BSD license)
 */
 
 
+/**************************************
+   Using an external allocation
+**************************************/
 int LZ4_sizeofStateHC(void);
-int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
+int LZ4_compressHC_withStateHC               (void* state, const char* source, char* dest, int inputSize);
+int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+int LZ4_compressHC2_withStateHC              (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
+int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+
 /*
-LZ4_compress_HC_extStateHC() :
-   Use this function if you prefer to manually allocate memory for compression tables.
-   To know how much memory must be allocated for the compression tables, use :
-      int LZ4_sizeofStateHC();
+These functions are provided should you prefer to allocate memory for compression tables with your own allocation methods.
+To know how much memory must be allocated for the compression tables, use :
+int LZ4_sizeofStateHC();
 
-   Allocated memory must be aligned on 8-bytes boundaries (which a normal malloc() will do properly).
+Note that tables must be aligned for pointer (32 or 64 bits), otherwise compression will fail (return code 0).
 
-   The allocated memory can then be provided to the compression functions using 'void* state' parameter.
-   LZ4_compress_HC_extStateHC() is equivalent to previously described function.
-   It just uses externally allocated memory for stateHC.
+The allocated memory can be provided to the compressions functions using 'void* state' parameter.
+LZ4_compress_withStateHC() and LZ4_compress_limitedOutput_withStateHC() are equivalent to previously described functions.
+They just use the externally allocated memory area instead of allocating their own (on stack, or on heap).
 */
 
 
 /**************************************
-*  Streaming Compression
+   Streaming Functions
 **************************************/
-#define LZ4_STREAMHCSIZE        262192
-#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
-typedef struct { size_t table[LZ4_STREAMHCSIZE_SIZET]; } LZ4_streamHC_t;
-/*
-  LZ4_streamHC_t
-  This structure allows static allocation of LZ4 HC streaming state.
-  State must then be initialized using LZ4_resetStreamHC() before first use.
-
-  Static allocation should only be used in combination with static linking.
-  If you want to use LZ4 as a DLL, please use construction functions below, which are future-proof.
-*/
+/* Note : these streaming functions still follows the older model */
+void* LZ4_createHC (const char* inputBuffer);
+int   LZ4_compressHC_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize);
+int   LZ4_compressHC_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize);
+char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
+int   LZ4_freeHC (void* LZ4HC_Data);
 
+int   LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
+int   LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
 
-LZ4_streamHC_t* LZ4_createStreamHC(void);
-int             LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);
 /*
-  These functions create and release memory for LZ4 HC streaming state.
-  Newly created states are already initialized.
-  Existing state space can be re-used anytime using LZ4_resetStreamHC().
-  If you use LZ4 as a DLL, use these functions instead of static structure allocation,
-  to avoid size mismatch between different versions.
+These functions allow the compression of dependent blocks, where each block benefits from prior 64 KB within preceding blocks.
+In order to achieve this, it is necessary to start creating the LZ4HC Data Structure, thanks to the function :
+
+void* LZ4_createHC (const char* inputBuffer);
+The result of the function is the (void*) pointer on the LZ4HC Data Structure.
+This pointer will be needed in all other functions.
+If the pointer returned is NULL, then the allocation has failed, and compression must be aborted.
+The only parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer.
+The input buffer must be already allocated, and size at least 192KB.
+'inputBuffer' will also be the 'const char* source' of the first block.
+
+All blocks are expected to lay next to each other within the input buffer, starting from 'inputBuffer'.
+To compress each block, use either LZ4_compressHC_continue() or LZ4_compressHC_limitedOutput_continue().
+Their behavior are identical to LZ4_compressHC() or LZ4_compressHC_limitedOutput(),
+but require the LZ4HC Data Structure as their first argument, and check that each block starts right after the previous one.
+If next block does not begin immediately after the previous one, the compression will fail (return 0).
+
+When it's no longer possible to lay the next block after the previous one (not enough space left into input buffer), a call to :
+char* LZ4_slideInputBufferHC(void* LZ4HC_Data);
+must be performed. It will typically copy the latest 64KB of input at the beginning of input buffer.
+Note that, for this function to work properly, minimum size of an input buffer must be 192KB.
+==> The memory position where the next input data block must start is provided as the result of the function.
+
+Compression can then resume, using LZ4_compressHC_continue() or LZ4_compressHC_limitedOutput_continue(), as usual.
+
+When compression is completed, a call to LZ4_freeHC() will release the memory used by the LZ4HC Data Structure.
 */
 
-void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
-int  LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize);
-
-int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, const char* src, char* dst, int srcSize, int maxDstSize);
-
-int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize);
+int LZ4_sizeofStreamStateHC(void);
+int LZ4_resetStreamStateHC(void* state, const char* inputBuffer);
 
 /*
-  These functions compress data in successive blocks of any size, using previous blocks as dictionary.
-  One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks.
-  There is an exception for ring buffers, which can be smaller 64 KB.
-  Such case is automatically detected and correctly handled by LZ4_compress_HC_continue().
-
-  Before starting compression, state must be properly initialized, using LZ4_resetStreamHC().
-  A first "fictional block" can then be designated as initial dictionary, using LZ4_loadDictHC() (Optional).
-
-  Then, use LZ4_compress_HC_continue() to compress each successive block.
-  It works like LZ4_compress_HC(), but use previous memory blocks as dictionary to improve compression.
-  Previous memory blocks (including initial dictionary when present) must remain accessible and unmodified during compression.
-  As a reminder, size 'dst' buffer to handle worst cases, using LZ4_compressBound(), to ensure success of compression operation.
-
-  If, for any reason, previous data blocks can't be preserved unmodified in memory during next compression block,
-  you must save it to a safer memory space, using LZ4_saveDictHC().
-  Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer'.
-*/
+These functions achieve the same result as :
+void* LZ4_createHC (const char* inputBuffer);
 
+They are provided here to allow the user program to allocate memory using its own routines.
 
+To know how much space must be allocated, use LZ4_sizeofStreamStateHC();
+Note also that space must be aligned for pointers (32 or 64 bits).
 
-/**************************************
-*  Deprecated Functions
-**************************************/
-/* Deprecate Warnings */
-/* Should these warnings messages be a problem,
-   it is generally possible to disable them,
-   with -Wno-deprecated-declarations for gcc
-   or _CRT_SECURE_NO_WARNINGS in Visual for example.
-   You can also define LZ4_DEPRECATE_WARNING_DEFBLOCK. */
-#ifndef LZ4_DEPRECATE_WARNING_DEFBLOCK
-#  define LZ4_DEPRECATE_WARNING_DEFBLOCK
-#  define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-#  if (LZ4_GCC_VERSION >= 405) || defined(__clang__)
-#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
-#  elif (LZ4_GCC_VERSION >= 301)
-#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
-#  elif defined(_MSC_VER)
-#    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
-#  else
-#    pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
-#    define LZ4_DEPRECATED(message)
-#  endif
-#endif // LZ4_DEPRECATE_WARNING_DEFBLOCK
-
-/* compression functions */
-/* these functions are planned to trigger warning messages by r131 approximately */
-int LZ4_compressHC                (const char* source, char* dest, int inputSize);
-int LZ4_compressHC_limitedOutput  (const char* source, char* dest, int inputSize, int maxOutputSize);
-int LZ4_compressHC2               (const char* source, char* dest, int inputSize, int compressionLevel);
-int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
-int LZ4_compressHC_withStateHC               (void* state, const char* source, char* dest, int inputSize);
-int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
-int LZ4_compressHC2_withStateHC              (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
-int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
-int LZ4_compressHC_continue               (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
-int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
-
-/* Streaming functions following the older model; should no longer be used */
-LZ4_DEPRECATED("use LZ4_createStreamHC() instead") void* LZ4_createHC (char* inputBuffer);
-LZ4_DEPRECATED("use LZ4_saveDictHC() instead")     char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
-LZ4_DEPRECATED("use LZ4_freeStreamHC() instead")   int   LZ4_freeHC (void* LZ4HC_Data);
-LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int   LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
-LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int   LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
-LZ4_DEPRECATED("use LZ4_createStreamHC() instead") int   LZ4_sizeofStreamStateHC(void);
-LZ4_DEPRECATED("use LZ4_resetStreamHC() instead")  int   LZ4_resetStreamStateHC(void* state, char* inputBuffer);
+Once space is allocated, you must initialize it using : LZ4_resetStreamStateHC(void* state, const char* inputBuffer);
+void* state is a pointer to the space allocated.
+It must be aligned for pointers (32 or 64 bits), and be large enough.
+The parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer.
+The input buffer must be already allocated, and size at least 192KB.
+'inputBuffer' will also be the 'const char* source' of the first block.
+
+The same space can be re-used multiple times, just by initializing it each time with LZ4_resetStreamState().
+return value of LZ4_resetStreamStateHC() must be 0 is OK.
+Any other value means there was an error (typically, state is not aligned for pointers (32 or 64 bits)).
+*/
 
 
 #if defined (__cplusplus)

+ 1 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/src/codec/Lz4Codec.cc

@@ -35,7 +35,7 @@ Lz4CompressStream::Lz4CompressStream(OutputStream * stream, uint32_t bufferSizeH
 
 void Lz4CompressStream::compressOneBlock(const void * buff, uint32_t length) {
   size_t compressedLength = _tempBufferSize - 8;
-  int ret = LZ4_compress_default((char*)buff, _tempBuffer + 8, length, LZ4_compressBound(length));
+  int ret = LZ4_compress((char*)buff, _tempBuffer + 8, length);
   if (ret > 0) {
     compressedLength = ret;
     ((uint32_t*)_tempBuffer)[0] = bswap(length);

+ 1 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/test/TestCompressions.cc

@@ -185,7 +185,7 @@ void MeasureSingleFileLz4(const string & path, CompressResult & total, size_t bl
     size_t currentblocksize = std::min(data.length() - start, blockSize);
     uint64_t startTime = t.now();
     for (int i = 0; i < times; i++) {
-      int osize = LZ4_compress_default((char*)data.data() + start, outputBuffer, currentblocksize, LZ4_compressBound(currentblocksize));
+      int osize = LZ4_compress((char*)data.data() + start, outputBuffer, currentblocksize);
       result.compressedSize += osize;
       result.uncompressedSize += currentblocksize;
     }

部分文件因为文件数量过多而无法显示