Ver Fonte

HADOOP-12572. Update Hadoop's lz4 to r131. Contributed by Kevin Bowling.

Haohui Mai há 9 anos atrás
pai
commit
4c061e607e

+ 3 - 3
LICENSE.txt

@@ -257,7 +257,8 @@ For src/main/native/src/org/apache/hadoop/io/compress/lz4/{lz4.h,lz4.c,lz4hc.h,l
 /*
    LZ4 - Fast LZ compression algorithm
    Header File
-   Copyright (C) 2011-2014, Yann Collet.
+   Copyright (C) 2011-2015, Yann Collet.
+
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
    Redistribution and use in source and binary forms, with or without
@@ -284,11 +285,10 @@ For src/main/native/src/org/apache/hadoop/io/compress/lz4/{lz4.h,lz4.c,lz4hc.h,l
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
    You can contact the author at :
-   - LZ4 source repository : http://code.google.com/p/lz4/
+   - LZ4 source repository : https://github.com/Cyan4973/lz4
    - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
 */
 
-
 For hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/gtest
 ---------------------------------------------------------------------
 Copyright 2008, Google Inc.

+ 2 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -968,6 +968,8 @@ Release 2.8.0 - UNRELEASED
     HADOOP-12575. Add build instruction for docker toolbox instead of
     boot2docker(Kai Sasaki via ozawa)
 
+    HADOOP-12572. Update Hadoop's lz4 to r131. (Kevin Bowling via wheat9)
+
   OPTIMIZATIONS
 
     HADOOP-11785. Reduce the number of listStatus operation in distcp

+ 2 - 2
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/Lz4Compressor.c

@@ -73,7 +73,7 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_lz4_Lz4Compressor_comp
     return (jint)0;
   }
 
-  compressed_direct_buf_len = LZ4_compress(uncompressed_bytes, compressed_bytes, uncompressed_direct_buf_len);
+  compressed_direct_buf_len = LZ4_compress_default(uncompressed_bytes, compressed_bytes, uncompressed_direct_buf_len, LZ4_compressBound(uncompressed_direct_buf_len));
   if (compressed_direct_buf_len < 0){
     THROW(env, "java/lang/InternalError", "LZ4_compress failed");
   }
@@ -115,7 +115,7 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_lz4_Lz4Compressor_comp
     return (jint)0;
   }
 
-  compressed_direct_buf_len = LZ4_compressHC(uncompressed_bytes, compressed_bytes, uncompressed_direct_buf_len);
+  compressed_direct_buf_len = LZ4_compress_HC(uncompressed_bytes, compressed_bytes, uncompressed_direct_buf_len, LZ4_compressBound(uncompressed_direct_buf_len), 0);
   if (compressed_direct_buf_len < 0){
     THROW(env, "java/lang/InternalError", "LZ4_compressHC failed");
   }

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/Lz4Decompressor.c

@@ -72,7 +72,7 @@ JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_lz4_Lz4Decompressor_de
 
   uncompressed_direct_buf_len = LZ4_decompress_safe(compressed_bytes, uncompressed_bytes, compressed_direct_buf_len, uncompressed_direct_buf_len);
   if (uncompressed_direct_buf_len < 0) {
-    THROW(env, "java/lang/InternalError", "LZ4_uncompress_unknownOutputSize failed.");
+    THROW(env, "java/lang/InternalError", "LZ4_decompress_safe failed.");
   }
 
   (*env)->SetIntField(env, thisj, Lz4Decompressor_compressedDirectBufLen, 0);

Diff do ficheiro suprimidas por serem muito extensas
+ 559 - 316
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.c


+ 159 - 122
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.h

@@ -1,7 +1,8 @@
 /*
    LZ4 - Fast LZ compression algorithm
    Header File
-   Copyright (C) 2011-2014, Yann Collet.
+   Copyright (C) 2011-2015, Yann Collet.
+
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
    Redistribution and use in source and binary forms, with or without
@@ -28,7 +29,7 @@
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
    You can contact the author at :
-   - LZ4 source repository : http://code.google.com/p/lz4/
+   - LZ4 source repository : https://github.com/Cyan4973/lz4
    - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
 */
 #pragma once
@@ -38,22 +39,22 @@ extern "C" {
 #endif
 
 /*
- * lz4.h provides raw compression format functions, for optimal performance and integration into programs.
- * If you need to generate data using an inter-operable format (respecting the framing specification),
- * please use lz4frame.h instead.
+ * lz4.h provides block compression functions, and gives full buffer control to programmer.
+ * If you need to generate inter-operable compressed data (respecting LZ4 frame specification),
+ * and can let the library handle its own memory, please use lz4frame.h instead.
 */
 
 /**************************************
-   Version
+*  Version
 **************************************/
-#define LZ4_VERSION_MAJOR    1    /* for major interface/format changes  */
-#define LZ4_VERSION_MINOR    3    /* for minor interface/format changes  */
+#define LZ4_VERSION_MAJOR    1    /* for breaking interface changes  */
+#define LZ4_VERSION_MINOR    7    /* for new (non-breaking) interface capabilities */
 #define LZ4_VERSION_RELEASE  1    /* for tweaks, bug-fixes, or development */
 #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
 int LZ4_versionNumber (void);
 
 /**************************************
-   Tuning parameter
+*  Tuning parameter
 **************************************/
 /*
  * LZ4_MEMORY_USAGE :
@@ -66,77 +67,90 @@ int LZ4_versionNumber (void);
 
 
 /**************************************
-   Simple Functions
+*  Simple Functions
 **************************************/
 
-int LZ4_compress        (const char* source, char* dest, int sourceSize);
+int LZ4_compress_default(const char* source, char* dest, int sourceSize, int maxDestSize);
 int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize);
 
 /*
-LZ4_compress() :
-    Compresses 'sourceSize' bytes from 'source' into 'dest'.
-    Destination buffer must be already allocated,
-    and must be sized to handle worst cases situations (input data not compressible)
-    Worst case size evaluation is provided by function LZ4_compressBound()
-    inputSize : Max supported value is LZ4_MAX_INPUT_SIZE
-    return : the number of bytes written in buffer dest
-             or 0 if the compression fails
+LZ4_compress_default() :
+    Compresses 'sourceSize' bytes from buffer 'source'
+    into already allocated 'dest' buffer of size 'maxDestSize'.
+    Compression is guaranteed to succeed if 'maxDestSize' >= LZ4_compressBound(sourceSize).
+    It also runs faster, so it's a recommended setting.
+    If the function cannot compress 'source' into a more limited 'dest' budget,
+    compression stops *immediately*, and the function result is zero.
+    As a consequence, 'dest' content is not valid.
+    This function never writes outside 'dest' buffer, nor read outside 'source' buffer.
+        sourceSize  : Max supported value is LZ4_MAX_INPUT_VALUE
+        maxDestSize : full or partial size of buffer 'dest' (which must be already allocated)
+        return : the number of bytes written into buffer 'dest' (necessarily <= maxOutputSize)
+              or 0 if compression fails
 
 LZ4_decompress_safe() :
-    compressedSize : is obviously the source size
-    maxDecompressedSize : is the size of the destination buffer, which must be already allocated.
-    return : the number of bytes decompressed into the destination buffer (necessarily <= maxDecompressedSize)
-             If the destination buffer is not large enough, decoding will stop and output an error code (<0).
+    compressedSize : is the precise full size of the compressed block.
+    maxDecompressedSize : is the size of destination buffer, which must be already allocated.
+    return : the number of bytes decompressed into destination buffer (necessarily <= maxDecompressedSize)
+             If destination buffer is not large enough, decoding will stop and output an error code (<0).
              If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function is protected against buffer overflow exploits,
-             and never writes outside of output buffer, nor reads outside of input buffer.
-             It is also protected against malicious data packets.
+             This function is protected against buffer overflow exploits, including malicious data packets.
+             It never writes outside output buffer, nor reads outside input buffer.
 */
 
 
 /**************************************
-   Advanced Functions
+*  Advanced Functions
 **************************************/
 #define LZ4_MAX_INPUT_SIZE        0x7E000000   /* 2 113 929 216 bytes */
-#define LZ4_COMPRESSBOUND(isize)  ((unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
+#define LZ4_COMPRESSBOUND(isize)  ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
 
 /*
 LZ4_compressBound() :
     Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
-    This function is primarily useful for memory allocation purposes (output buffer size).
+    This function is primarily useful for memory allocation purposes (destination buffer size).
     Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
-
-    isize  : is the input size. Max supported value is LZ4_MAX_INPUT_SIZE
-    return : maximum output size in a "worst case" scenario
-             or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
+    Note that LZ4_compress_default() compress faster when dest buffer size is >= LZ4_compressBound(srcSize)
+        inputSize  : max supported value is LZ4_MAX_INPUT_SIZE
+        return : maximum output size in a "worst case" scenario
+              or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
 */
-int LZ4_compressBound(int isize);
-
+int LZ4_compressBound(int inputSize);
 
 /*
-LZ4_compress_limitedOutput() :
-    Compress 'sourceSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
-    If it cannot achieve it, compression will stop, and result of the function will be zero.
-    This saves time and memory on detecting non-compressible (or barely compressible) data.
-    This function never writes outside of provided output buffer.
-
-    sourceSize  : Max supported value is LZ4_MAX_INPUT_VALUE
-    maxOutputSize : is the size of the destination buffer (which must be already allocated)
-    return : the number of bytes written in buffer 'dest'
-             or 0 if compression fails
+LZ4_compress_fast() :
+    Same as LZ4_compress_default(), but allows to select an "acceleration" factor.
+    The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
+    It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
+    An acceleration value of "1" is the same as regular LZ4_compress_default()
+    Values <= 0 will be replaced by ACCELERATION_DEFAULT (see lz4.c), which is 1.
 */
-int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
+int LZ4_compress_fast (const char* source, char* dest, int sourceSize, int maxDestSize, int acceleration);
 
 
 /*
-LZ4_compress_withState() :
-    Same compression functions, but using an externally allocated memory space to store compression state.
+LZ4_compress_fast_extState() :
+    Same compression function, just using an externally allocated memory space to store compression state.
     Use LZ4_sizeofState() to know how much memory must be allocated,
-    and then, provide it as 'void* state' to compression functions.
+    and allocate it on 8-bytes boundaries (using malloc() typically).
+    Then, provide it as 'void* state' to compression function.
 */
 int LZ4_sizeofState(void);
-int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
-int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+int LZ4_compress_fast_extState (void* state, const char* source, char* dest, int inputSize, int maxDestSize, int acceleration);
+
+
+/*
+LZ4_compress_destSize() :
+    Reverse the logic, by compressing as much data as possible from 'source' buffer
+    into already allocated buffer 'dest' of size 'targetDestSize'.
+    This function either compresses the entire 'source' content into 'dest' if it's large enough,
+    or fill 'dest' buffer completely with as much data as possible from 'source'.
+        *sourceSizePtr : will be modified to indicate how many bytes where read from 'source' to fill 'dest'.
+                         New value is necessarily <= old value.
+        return : Nb bytes written into 'dest' (necessarily <= targetDestSize)
+              or 0 if compression fails
+*/
+int LZ4_compress_destSize (const char* source, char* dest, int* sourceSizePtr, int targetDestSize);
 
 
 /*
@@ -152,7 +166,6 @@ LZ4_decompress_fast() :
 */
 int LZ4_decompress_fast (const char* source, char* dest, int originalSize);
 
-
 /*
 LZ4_decompress_safe_partial() :
     This function decompress a compressed block of size 'compressedSize' at position 'source'
@@ -169,104 +182,108 @@ int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedS
 
 
 /***********************************************
-   Experimental Streaming Compression Functions
+*  Streaming Compression Functions
 ***********************************************/
-
-#define LZ4_STREAMSIZE_U32 ((1 << (LZ4_MEMORY_USAGE-2)) + 8)
-#define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U32 * sizeof(unsigned int))
+#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4)
+#define LZ4_STREAMSIZE     (LZ4_STREAMSIZE_U64 * sizeof(long long))
 /*
  * LZ4_stream_t
  * information structure to track an LZ4 stream.
  * important : init this structure content before first use !
+ * note : only allocated directly the structure if you are statically linking LZ4
+ *        If you are using liblz4 as a DLL, please use below construction methods instead.
  */
-typedef struct { unsigned int table[LZ4_STREAMSIZE_U32]; } LZ4_stream_t;
+typedef struct { long long table[LZ4_STREAMSIZE_U64]; } LZ4_stream_t;
 
 /*
  * LZ4_resetStream
  * Use this function to init an allocated LZ4_stream_t structure
  */
-void LZ4_resetStream (LZ4_stream_t* LZ4_streamPtr);
+void LZ4_resetStream (LZ4_stream_t* streamPtr);
 
 /*
- * If you prefer dynamic allocation methods,
  * LZ4_createStream will allocate and initialize an LZ4_stream_t structure
  * LZ4_freeStream releases its memory.
+ * In the context of a DLL (liblz4), please use these methods rather than the static struct.
+ * They are more future proof, in case of a change of LZ4_stream_t size.
  */
 LZ4_stream_t* LZ4_createStream(void);
-int           LZ4_freeStream (LZ4_stream_t* LZ4_stream);
+int           LZ4_freeStream (LZ4_stream_t* streamPtr);
 
 /*
  * LZ4_loadDict
  * Use this function to load a static dictionary into LZ4_stream.
  * Any previous data will be forgotten, only 'dictionary' will remain in memory.
  * Loading a size of 0 is allowed.
- * Return : 1 if OK, 0 if error
+ * Return : dictionary size, in bytes (necessarily <= 64 KB)
  */
-int LZ4_loadDict (LZ4_stream_t* LZ4_stream, const char* dictionary, int dictSize);
+int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize);
 
 /*
- * LZ4_compress_continue
- * Compress data block 'source', using blocks compressed before as dictionary to improve compression ratio
- * Previous data blocks are assumed to still be present at their previous location.
+ * LZ4_compress_fast_continue
+ * Compress buffer content 'src', using data from previously compressed blocks as dictionary to improve compression ratio.
+ * Important : Previous data blocks are assumed to still be present and unmodified !
+ * 'dst' buffer must be already allocated.
+ * If maxDstSize >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
+ * If not, and if compressed data cannot fit into 'dst' buffer size, compression stops, and function returns a zero.
  */
-int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize);
-
-/*
- * LZ4_compress_limitedOutput_continue
- * Same as before, but also specify a maximum target compressed size (maxOutputSize)
- * If objective cannot be met, compression exits, and returns a zero.
- */
-int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize);
+int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int maxDstSize, int acceleration);
 
 /*
  * LZ4_saveDict
  * If previously compressed data block is not guaranteed to remain available at its memory location
  * save it into a safer place (char* safeBuffer)
  * Note : you don't need to call LZ4_loadDict() afterwards,
- *        dictionary is immediately usable, you can therefore call again LZ4_compress_continue()
- * Return : dictionary size in bytes, or 0 if error
- * Note : any dictSize > 64 KB will be interpreted as 64KB.
+ *        dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue()
+ * Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error
  */
-int LZ4_saveDict (LZ4_stream_t* LZ4_stream, char* safeBuffer, int dictSize);
+int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int dictSize);
 
 
 /************************************************
-  Experimental Streaming Decompression Functions
+*  Streaming Decompression Functions
 ************************************************/
 
-#define LZ4_STREAMDECODESIZE_U32 4
-#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U32 * sizeof(unsigned int))
+#define LZ4_STREAMDECODESIZE_U64  4
+#define LZ4_STREAMDECODESIZE     (LZ4_STREAMDECODESIZE_U64 * sizeof(unsigned long long))
+typedef struct { unsigned long long table[LZ4_STREAMDECODESIZE_U64]; } LZ4_streamDecode_t;
 /*
  * LZ4_streamDecode_t
  * information structure to track an LZ4 stream.
- * important : init this structure content using LZ4_setStreamDecode or memset() before first use !
+ * init this structure content using LZ4_setStreamDecode or memset() before first use !
+ *
+ * In the context of a DLL (liblz4) please prefer usage of construction methods below.
+ * They are more future proof, in case of a change of LZ4_streamDecode_t size in the future.
+ * LZ4_createStreamDecode will allocate and initialize an LZ4_streamDecode_t structure
+ * LZ4_freeStreamDecode releases its memory.
  */
-typedef struct { unsigned int table[LZ4_STREAMDECODESIZE_U32]; } LZ4_streamDecode_t;
+LZ4_streamDecode_t* LZ4_createStreamDecode(void);
+int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
 
 /*
  * LZ4_setStreamDecode
  * Use this function to instruct where to find the dictionary.
- * This function can be used to specify a static dictionary,
- * or to instruct where to find some previously decoded data saved into a different memory space.
- * Setting a size of 0 is allowed (same effect as no dictionary).
+ * Setting a size of 0 is allowed (same effect as reset).
  * Return : 1 if OK, 0 if error
  */
 int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize);
 
-/*
- * If you prefer dynamic allocation methods,
- * LZ4_createStreamDecode will allocate and initialize an LZ4_streamDecode_t structure
- * LZ4_freeStreamDecode releases its memory.
- */
-LZ4_streamDecode_t* LZ4_createStreamDecode(void);
-int                 LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream);
-
 /*
 *_continue() :
     These decoding functions allow decompression of multiple blocks in "streaming" mode.
-    Previously decoded blocks must still be available at the memory position where they were decoded.
-    If it's not possible, save the relevant part of decoded data into a safe buffer,
-    and indicate where its new address using LZ4_setStreamDecode()
+    Previously decoded blocks *must* remain available at the memory position where they were decoded (up to 64 KB)
+    In the case of a ring buffers, decoding buffer must be either :
+    - Exactly same size as encoding buffer, with same update rule (block boundaries at same positions)
+      In which case, the decoding & encoding ring buffer can have any size, including very small ones ( < 64 KB).
+    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+      maxBlockSize is implementation dependent. It's the maximum size you intend to compress into a single block.
+      In which case, encoding and decoding buffers do not need to be synchronized,
+      and encoding ring buffer can have any size, including small ones ( < 64 KB).
+    - _At least_ 64 KB + 8 bytes + maxBlockSize.
+      In which case, encoding and decoding buffers do not need to be synchronized,
+      and encoding ring buffer can have any size, including larger than decoding buffer.
+    Whenever these conditions are not possible, save the last 64KB of decoded data into a safe buffer,
+    and indicate where it is saved using LZ4_setStreamDecode()
 */
 int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxDecompressedSize);
 int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize);
@@ -276,8 +293,8 @@ int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch
 Advanced decoding functions :
 *_usingDict() :
     These decoding functions work the same as
-    a combination of LZ4_setDictDecode() followed by LZ4_decompress_x_continue()
-    They don't use nor update an LZ4_streamDecode_t structure.
+    a combination of LZ4_setStreamDecode() followed by LZ4_decompress_x_continue()
+    They are stand-alone. They don't need nor update an LZ4_streamDecode_t structure.
 */
 int LZ4_decompress_safe_usingDict (const char* source, char* dest, int compressedSize, int maxDecompressedSize, const char* dictStart, int dictSize);
 int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalSize, const char* dictStart, int dictSize);
@@ -285,37 +302,57 @@ int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalS
 
 
 /**************************************
-   Obsolete Functions
+*  Obsolete Functions
 **************************************/
-/*
-Obsolete decompression functions
-These function names are deprecated and should no longer be used.
-They are only provided here for compatibility with older user programs.
-- LZ4_uncompress is the same as LZ4_decompress_fast
-- LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe
-These function prototypes are now disabled; uncomment them if you really need them.
-It is highly recommended to stop using these functions and migrated to newer ones */
+/* Deprecate Warnings */
+/* Should these warnings messages be a problem,
+   it is generally possible to disable them,
+   with -Wno-deprecated-declarations for gcc
+   or _CRT_SECURE_NO_WARNINGS in Visual for example.
+   You can also define LZ4_DEPRECATE_WARNING_DEFBLOCK. */
+#ifndef LZ4_DEPRECATE_WARNING_DEFBLOCK
+#  define LZ4_DEPRECATE_WARNING_DEFBLOCK
+#  define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  if (LZ4_GCC_VERSION >= 405) || defined(__clang__)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+#  elif (LZ4_GCC_VERSION >= 301)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
+#    define LZ4_DEPRECATED(message)
+#  endif
+#endif /* LZ4_DEPRECATE_WARNING_DEFBLOCK */
+
+/* Obsolete compression functions */
+/* These functions are planned to start generate warnings by r131 approximately */
+int LZ4_compress               (const char* source, char* dest, int sourceSize);
+int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
+int LZ4_compress_withState               (void* state, const char* source, char* dest, int inputSize);
+int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+int LZ4_compress_continue                (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
+int LZ4_compress_limitedOutput_continue  (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/* Obsolete decompression functions */
+/* These function names are completely deprecated and must no longer be used.
+   They are only provided here for compatibility with older programs.
+    - LZ4_uncompress is the same as LZ4_decompress_fast
+    - LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe
+   These function prototypes are now disabled; uncomment them only if you really need them.
+   It is highly recommended to stop using these prototypes and migrate to maintained ones */
 /* int LZ4_uncompress (const char* source, char* dest, int outputSize); */
 /* int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); */
 
-/*
- * If you prefer dynamic allocation methods,
- * LZ4_createStreamDecode()
- * provides a pointer (void*) towards an initialized LZ4_streamDecode_t structure.
- * LZ4_free just frees it.
- */
-/* void* LZ4_createStreamDecode(void); */
-/*int   LZ4_free (void* LZ4_stream);    yes, it's the same one as for compression */
-
 /* Obsolete streaming functions; use new streaming interface whenever possible */
-void* LZ4_create (const char* inputBuffer);
-int   LZ4_sizeofStreamState(void);
-int   LZ4_resetStreamState(void* state, const char* inputBuffer);
-char* LZ4_slideInputBuffer (void* state);
+LZ4_DEPRECATED("use LZ4_createStream() instead") void* LZ4_create (char* inputBuffer);
+LZ4_DEPRECATED("use LZ4_createStream() instead") int   LZ4_sizeofStreamState(void);
+LZ4_DEPRECATED("use LZ4_resetStream() instead")  int   LZ4_resetStreamState(void* state, char* inputBuffer);
+LZ4_DEPRECATED("use LZ4_saveDict() instead")     char* LZ4_slideInputBuffer (void* state);
 
 /* Obsolete streaming decoding functions */
-int LZ4_decompress_safe_withPrefix64k (const char* source, char* dest, int compressedSize, int maxOutputSize);
-int LZ4_decompress_fast_withPrefix64k (const char* source, char* dest, int originalSize);
+LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize);
+LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize);
 
 
 #if defined (__cplusplus)

Diff do ficheiro suprimidas por serem muito extensas
+ 364 - 532
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.c


+ 118 - 102
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.h

@@ -1,7 +1,7 @@
 /*
    LZ4 HC - High Compression Mode of LZ4
    Header File
-   Copyright (C) 2011-2014, Yann Collet.
+   Copyright (C) 2011-2015, Yann Collet.
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
    Redistribution and use in source and binary forms, with or without
@@ -28,8 +28,8 @@
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
    You can contact the author at :
-   - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
-   - LZ4 source repository : http://code.google.com/p/lz4/
+   - LZ4 source repository : https://github.com/Cyan4973/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
 */
 #pragma once
 
@@ -38,136 +38,152 @@
 extern "C" {
 #endif
 
+/*****************************
+*  Includes
+*****************************/
+#include <stddef.h>   /* size_t */
 
-int LZ4_compressHC (const char* source, char* dest, int inputSize);
-/*
-LZ4_compressHC :
-    return : the number of bytes in compressed buffer dest
-             or 0 if compression fails.
-    note : destination buffer must be already allocated.
-        To avoid any problem, size it to handle worst cases situations (input data not compressible)
-        Worst case size evaluation is provided by function LZ4_compressBound() (see "lz4.h")
-*/
 
-int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
+/**************************************
+*  Block Compression
+**************************************/
+int LZ4_compress_HC (const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
 /*
-LZ4_compress_limitedOutput() :
-    Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
-    If it cannot achieve it, compression will stop, and result of the function will be zero.
-    This function never writes outside of provided output buffer.
-
-    inputSize  : Max supported value is 1 GB
-    maxOutputSize : is maximum allowed size into the destination buffer (which must be already allocated)
-    return : the number of output bytes written in buffer 'dest'
-             or 0 if compression fails.
+LZ4_compress_HC :
+    Destination buffer 'dst' must be already allocated.
+    Compression completion is guaranteed if 'dst' buffer is sized to handle worst circumstances (data not compressible)
+    Worst size evaluation is provided by function LZ4_compressBound() (see "lz4.h")
+      srcSize  : Max supported value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
+      compressionLevel : Recommended values are between 4 and 9, although any value between 0 and 16 will work.
+                         0 means "use default value" (see lz4hc.c).
+                         Values >16 behave the same as 16.
+      return : the number of bytes written into buffer 'dst'
+            or 0 if compression fails.
 */
 
 
-int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
-int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
-/*
-    Same functions as above, but with programmable 'compressionLevel'.
-    Recommended values are between 4 and 9, although any value between 0 and 16 will work.
-    'compressionLevel'==0 means use default 'compressionLevel' value.
-    Values above 16 behave the same as 16.
-    Equivalent variants exist for all other compression functions below.
-*/
-
 /* Note :
-Decompression functions are provided within LZ4 source code (see "lz4.h") (BSD license)
+   Decompression functions are provided within LZ4 source code (see "lz4.h") (BSD license)
 */
 
 
-/**************************************
-   Using an external allocation
-**************************************/
 int LZ4_sizeofStateHC(void);
-int LZ4_compressHC_withStateHC               (void* state, const char* source, char* dest, int inputSize);
-int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
-
-int LZ4_compressHC2_withStateHC              (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
-int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
-
+int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
 /*
-These functions are provided should you prefer to allocate memory for compression tables with your own allocation methods.
-To know how much memory must be allocated for the compression tables, use :
-int LZ4_sizeofStateHC();
+LZ4_compress_HC_extStateHC() :
+   Use this function if you prefer to manually allocate memory for compression tables.
+   To know how much memory must be allocated for the compression tables, use :
+      int LZ4_sizeofStateHC();
 
-Note that tables must be aligned for pointer (32 or 64 bits), otherwise compression will fail (return code 0).
+   Allocated memory must be aligned on 8-bytes boundaries (which a normal malloc() will do properly).
 
-The allocated memory can be provided to the compressions functions using 'void* state' parameter.
-LZ4_compress_withStateHC() and LZ4_compress_limitedOutput_withStateHC() are equivalent to previously described functions.
-They just use the externally allocated memory area instead of allocating their own (on stack, or on heap).
+   The allocated memory can then be provided to the compression functions using 'void* state' parameter.
+   LZ4_compress_HC_extStateHC() is equivalent to previously described function.
+   It just uses externally allocated memory for stateHC.
 */
 
 
 /**************************************
-   Streaming Functions
+*  Streaming Compression
 **************************************/
-/* Note : these streaming functions still follows the older model */
-void* LZ4_createHC (const char* inputBuffer);
-int   LZ4_compressHC_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize);
-int   LZ4_compressHC_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize);
-char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
-int   LZ4_freeHC (void* LZ4HC_Data);
-
-int   LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
-int   LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
-
+#define LZ4_STREAMHCSIZE        262192
+#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
+typedef struct { size_t table[LZ4_STREAMHCSIZE_SIZET]; } LZ4_streamHC_t;
 /*
-These functions allow the compression of dependent blocks, where each block benefits from prior 64 KB within preceding blocks.
-In order to achieve this, it is necessary to start creating the LZ4HC Data Structure, thanks to the function :
-
-void* LZ4_createHC (const char* inputBuffer);
-The result of the function is the (void*) pointer on the LZ4HC Data Structure.
-This pointer will be needed in all other functions.
-If the pointer returned is NULL, then the allocation has failed, and compression must be aborted.
-The only parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer.
-The input buffer must be already allocated, and size at least 192KB.
-'inputBuffer' will also be the 'const char* source' of the first block.
-
-All blocks are expected to lay next to each other within the input buffer, starting from 'inputBuffer'.
-To compress each block, use either LZ4_compressHC_continue() or LZ4_compressHC_limitedOutput_continue().
-Their behavior are identical to LZ4_compressHC() or LZ4_compressHC_limitedOutput(),
-but require the LZ4HC Data Structure as their first argument, and check that each block starts right after the previous one.
-If next block does not begin immediately after the previous one, the compression will fail (return 0).
-
-When it's no longer possible to lay the next block after the previous one (not enough space left into input buffer), a call to :
-char* LZ4_slideInputBufferHC(void* LZ4HC_Data);
-must be performed. It will typically copy the latest 64KB of input at the beginning of input buffer.
-Note that, for this function to work properly, minimum size of an input buffer must be 192KB.
-==> The memory position where the next input data block must start is provided as the result of the function.
-
-Compression can then resume, using LZ4_compressHC_continue() or LZ4_compressHC_limitedOutput_continue(), as usual.
-
-When compression is completed, a call to LZ4_freeHC() will release the memory used by the LZ4HC Data Structure.
+  LZ4_streamHC_t
+  This structure allows static allocation of LZ4 HC streaming state.
+  State must then be initialized using LZ4_resetStreamHC() before first use.
+
+  Static allocation should only be used in combination with static linking.
+  If you want to use LZ4 as a DLL, please use construction functions below, which are future-proof.
 */
 
-int LZ4_sizeofStreamStateHC(void);
-int LZ4_resetStreamStateHC(void* state, const char* inputBuffer);
 
+LZ4_streamHC_t* LZ4_createStreamHC(void);
+int             LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);
 /*
-These functions achieve the same result as :
-void* LZ4_createHC (const char* inputBuffer);
+  These functions create and release memory for LZ4 HC streaming state.
+  Newly created states are already initialized.
+  Existing state space can be re-used anytime using LZ4_resetStreamHC().
+  If you use LZ4 as a DLL, use these functions instead of static structure allocation,
+  to avoid size mismatch between different versions.
+*/
 
-They are provided here to allow the user program to allocate memory using its own routines.
+void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
+int  LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize);
 
-To know how much space must be allocated, use LZ4_sizeofStreamStateHC();
-Note also that space must be aligned for pointers (32 or 64 bits).
+int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, const char* src, char* dst, int srcSize, int maxDstSize);
 
-Once space is allocated, you must initialize it using : LZ4_resetStreamStateHC(void* state, const char* inputBuffer);
-void* state is a pointer to the space allocated.
-It must be aligned for pointers (32 or 64 bits), and be large enough.
-The parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer.
-The input buffer must be already allocated, and size at least 192KB.
-'inputBuffer' will also be the 'const char* source' of the first block.
+int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize);
 
-The same space can be re-used multiple times, just by initializing it each time with LZ4_resetStreamState().
-return value of LZ4_resetStreamStateHC() must be 0 is OK.
-Any other value means there was an error (typically, state is not aligned for pointers (32 or 64 bits)).
+/*
+  These functions compress data in successive blocks of any size, using previous blocks as dictionary.
+  One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks.
+  There is an exception for ring buffers, which can be smaller 64 KB.
+  Such case is automatically detected and correctly handled by LZ4_compress_HC_continue().
+
+  Before starting compression, state must be properly initialized, using LZ4_resetStreamHC().
+  A first "fictional block" can then be designated as initial dictionary, using LZ4_loadDictHC() (Optional).
+
+  Then, use LZ4_compress_HC_continue() to compress each successive block.
+  It works like LZ4_compress_HC(), but use previous memory blocks as dictionary to improve compression.
+  Previous memory blocks (including initial dictionary when present) must remain accessible and unmodified during compression.
+  As a reminder, size 'dst' buffer to handle worst cases, using LZ4_compressBound(), to ensure success of compression operation.
+
+  If, for any reason, previous data blocks can't be preserved unmodified in memory during next compression block,
+  you must save it to a safer memory space, using LZ4_saveDictHC().
+  Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer'.
 */
 
 
+
+/**************************************
+*  Deprecated Functions
+**************************************/
+/* Deprecate Warnings */
+/* Should these warnings messages be a problem,
+   it is generally possible to disable them,
+   with -Wno-deprecated-declarations for gcc
+   or _CRT_SECURE_NO_WARNINGS in Visual for example.
+   You can also define LZ4_DEPRECATE_WARNING_DEFBLOCK. */
+#ifndef LZ4_DEPRECATE_WARNING_DEFBLOCK
+#  define LZ4_DEPRECATE_WARNING_DEFBLOCK
+#  define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#  if (LZ4_GCC_VERSION >= 405) || defined(__clang__)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+#  elif (LZ4_GCC_VERSION >= 301)
+#    define LZ4_DEPRECATED(message) __attribute__((deprecated))
+#  elif defined(_MSC_VER)
+#    define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+#  else
+#    pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler")
+#    define LZ4_DEPRECATED(message)
+#  endif
+#endif // LZ4_DEPRECATE_WARNING_DEFBLOCK
+
+/* compression functions */
+/* these functions are planned to trigger warning messages by r131 approximately */
+int LZ4_compressHC                (const char* source, char* dest, int inputSize);
+int LZ4_compressHC_limitedOutput  (const char* source, char* dest, int inputSize, int maxOutputSize);
+int LZ4_compressHC2               (const char* source, char* dest, int inputSize, int compressionLevel);
+int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+int LZ4_compressHC_withStateHC               (void* state, const char* source, char* dest, int inputSize);
+int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
+int LZ4_compressHC2_withStateHC              (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
+int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+int LZ4_compressHC_continue               (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
+int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/* Streaming functions following the older model; should no longer be used */
+LZ4_DEPRECATED("use LZ4_createStreamHC() instead") void* LZ4_createHC (char* inputBuffer);
+LZ4_DEPRECATED("use LZ4_saveDictHC() instead")     char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
+LZ4_DEPRECATED("use LZ4_freeStreamHC() instead")   int   LZ4_freeHC (void* LZ4HC_Data);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int   LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int   LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
+LZ4_DEPRECATED("use LZ4_createStreamHC() instead") int   LZ4_sizeofStreamStateHC(void);
+LZ4_DEPRECATED("use LZ4_resetStreamHC() instead")  int   LZ4_resetStreamStateHC(void* state, char* inputBuffer);
+
+
 #if defined (__cplusplus)
 }
 #endif

+ 1 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/src/codec/Lz4Codec.cc

@@ -35,7 +35,7 @@ Lz4CompressStream::Lz4CompressStream(OutputStream * stream, uint32_t bufferSizeH
 
 void Lz4CompressStream::compressOneBlock(const void * buff, uint32_t length) {
   size_t compressedLength = _tempBufferSize - 8;
-  int ret = LZ4_compress((char*)buff, _tempBuffer + 8, length);
+  int ret = LZ4_compress_default((char*)buff, _tempBuffer + 8, length, LZ4_compressBound(length));
   if (ret > 0) {
     compressedLength = ret;
     ((uint32_t*)_tempBuffer)[0] = bswap(length);

+ 1 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/src/main/native/test/TestCompressions.cc

@@ -185,7 +185,7 @@ void MeasureSingleFileLz4(const string & path, CompressResult & total, size_t bl
     size_t currentblocksize = std::min(data.length() - start, blockSize);
     uint64_t startTime = t.now();
     for (int i = 0; i < times; i++) {
-      int osize = LZ4_compress((char*)data.data() + start, outputBuffer, currentblocksize);
+      int osize = LZ4_compress_default((char*)data.data() + start, outputBuffer, currentblocksize, LZ4_compressBound(currentblocksize));
       result.compressedSize += osize;
       result.uncompressedSize += currentblocksize;
     }

Alguns ficheiros não foram mostrados porque muitos ficheiros mudaram neste diff