Переглянути джерело

HADOOP-11887. Introduce Intel ISA-L erasure coding library for native erasure encoding support (Kai Zheng via Colin P. McCabe)

Colin Patrick Mccabe 9 роки тому
батько
коміт
482e35c55a
18 змінених файлів з 1122 додано та 11 видалено
  1. 25 0
      BUILDING.txt
  2. 3 0
      hadoop-common-project/hadoop-common/CHANGES.txt
  3. 19 2
      hadoop-common-project/hadoop-common/pom.xml
  4. 25 0
      hadoop-common-project/hadoop-common/src/CMakeLists.txt
  5. 1 0
      hadoop-common-project/hadoop-common/src/config.h.cmake
  6. 86 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ErasureCodeNative.java
  7. 9 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java
  8. 18 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java
  9. 19 3
      hadoop-common-project/hadoop-common/src/main/native/native.vcxproj
  10. 49 0
      hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/coder/erasure_code_native.c
  11. 29 0
      hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/coder/org_apache_hadoop_io_erasurecode_ErasureCodeNative.h
  12. 271 0
      hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/erasure_code.c
  13. 125 0
      hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/include/erasure_code.h
  14. 111 0
      hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/include/gf_util.h
  15. 10 0
      hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/NativeCodeLoader.c
  16. 310 0
      hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/io/erasurecode/erasure_code_test.c
  17. 11 2
      hadoop-project-dist/pom.xml
  18. 1 0
      hadoop-project/pom.xml

+ 25 - 0
BUILDING.txt

@@ -74,6 +74,8 @@ Optional packages:
 
 * Snappy compression
   $ sudo apt-get install snappy libsnappy-dev
+* Intel ISA-L library for erasure coding
+  Please refer to https://01.org/intel%C2%AE-storage-acceleration-library-open-source-version
 * Bzip2
   $ sudo apt-get install bzip2 libbz2-dev
 * Jansson (C Library for JSON)
@@ -179,6 +181,29 @@ Maven build goals:
   * -Dtest.exclude=<TESTCLASSNAME>
   * -Dtest.exclude.pattern=**/<TESTCLASSNAME1>.java,**/<TESTCLASSNAME2>.java
 
+ Intel ISA-L build options:
+
+   Intel ISA-L is a erasure coding library that can be utilized by the native code.
+   It is currently an optional component, meaning that Hadoop can be built with
+   or without this dependency. Note the library is used via dynamic module. Please
+   reference the official site for the library details.
+   https://01.org/intel%C2%AE-storage-acceleration-library-open-source-version
+
+  * Use -Drequire.isal to fail the build if libisal.so is not found.
+    If this option is not specified and the isal library is missing,
+    we silently build a version of libhadoop.so that cannot make use of ISA-L and
+    the native raw erasure coders.
+    This option is recommended if you plan on making use of native raw erasure
+    coders and want to get more repeatable builds.
+  * Use -Disal.prefix to specify a nonstandard location for the libisal
+    library files. You do not need this option if you have installed ISA-L to the
+    system library path.
+  * Use -Disal.lib to specify a nonstandard location for the libisal library
+    files.
+  * Use -Dbundle.isal to copy the contents of the isal.lib directory into
+    the final tar file. This option requires that -Disal.lib is also given,
+    and it ignores the -Disal.prefix option.
+
 ----------------------------------------------------------------------------------
 Building components separately
 

+ 3 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -948,6 +948,9 @@ Release 2.8.0 - UNRELEASED
     HADOOP-12040. Adjust inputs order for the decode API in raw erasure coder.
     (Kai Zheng via yliu)
 
+    HADOOP-11887. Introduce Intel ISA-L erasure coding library for native
+    erasure encoding support (Kai Zheng via Colin P. McCabe)
+
   OPTIMIZATIONS
 
     HADOOP-11785. Reduce the number of listStatus operation in distcp

+ 19 - 2
hadoop-common-project/hadoop-common/pom.xml

@@ -567,6 +567,9 @@
         <openssl.prefix></openssl.prefix>
         <openssl.lib></openssl.lib>
         <openssl.include></openssl.include>
+        <require.isal>false</require.isal>
+        <isal.prefix></isal.prefix>
+        <isal.lib></isal.lib>
         <require.openssl>false</require.openssl>
         <runningWithNative>true</runningWithNative>
         <bundle.openssl.in.bin>false</bundle.openssl.in.bin>
@@ -620,6 +623,7 @@
                     <javahClassName>org.apache.hadoop.io.compress.snappy.SnappyDecompressor</javahClassName>
                     <javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Compressor</javahClassName>
                     <javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Decompressor</javahClassName>
+                    <javahClassName>org.apache.hadoop.io.erasurecode.ErasureCodeNative</javahClassName>
                     <javahClassName>org.apache.hadoop.crypto.OpensslCipher</javahClassName>
                     <javahClassName>org.apache.hadoop.crypto.random.OpensslSecureRandom</javahClassName>
                     <javahClassName>org.apache.hadoop.util.NativeCrc32</javahClassName>
@@ -642,7 +646,7 @@
                 <configuration>
                   <target>
                     <exec executable="cmake" dir="${project.build.directory}/native" failonerror="true">
-                      <arg line="${basedir}/src/ -DGENERATED_JAVAH=${project.build.directory}/native/javah -DJVM_ARCH_DATA_MODEL=${sun.arch.data.model} -DREQUIRE_BZIP2=${require.bzip2} -DREQUIRE_SNAPPY=${require.snappy} -DCUSTOM_SNAPPY_PREFIX=${snappy.prefix} -DCUSTOM_SNAPPY_LIB=${snappy.lib} -DCUSTOM_SNAPPY_INCLUDE=${snappy.include} -DREQUIRE_OPENSSL=${require.openssl} -DCUSTOM_OPENSSL_PREFIX=${openssl.prefix} -DCUSTOM_OPENSSL_LIB=${openssl.lib} -DCUSTOM_OPENSSL_INCLUDE=${openssl.include} -DEXTRA_LIBHADOOP_RPATH=${extra.libhadoop.rpath}"/>
+                      <arg line="${basedir}/src/ -DGENERATED_JAVAH=${project.build.directory}/native/javah -DJVM_ARCH_DATA_MODEL=${sun.arch.data.model} -DREQUIRE_BZIP2=${require.bzip2} -DREQUIRE_SNAPPY=${require.snappy} -DCUSTOM_SNAPPY_PREFIX=${snappy.prefix} -DCUSTOM_SNAPPY_LIB=${snappy.lib} -DCUSTOM_SNAPPY_INCLUDE=${snappy.include} -DREQUIRE_ISAL=${require.isal} -DCUSTOM_ISAL_PREFIX=${isal.prefix} -DCUSTOM_ISAL_LIB=${isal.lib} -DREQUIRE_OPENSSL=${require.openssl} -DCUSTOM_OPENSSL_PREFIX=${openssl.prefix} -DCUSTOM_OPENSSL_LIB=${openssl.lib} -DCUSTOM_OPENSSL_INCLUDE=${openssl.include} -DEXTRA_LIBHADOOP_RPATH=${extra.libhadoop.rpath}"/>
                     </exec>
                     <exec executable="make" dir="${project.build.directory}/native" failonerror="true">
                       <arg line="VERBOSE=1"/>
@@ -664,7 +668,13 @@
                       <arg value="[ x$SKIPTESTS = xtrue ] || ${project.build.directory}/native/test_bulk_crc32"/>
                       <env key="SKIPTESTS" value="${skipTests}"/>
                     </exec>
-                  </target>
+                    <exec executable="${shell-executable}" failonerror="true" dir="${project.build.directory}/native">
+                      <arg value="-c"/>
+                      <arg value="[ ! -f ${project.build.directory}/native/erasure_code_test ] || ${project.build.directory}/native/erasure_code_test"/>
+                      <env key="SKIPTESTS" value="${skipTests}"/>
+                      <env key="LD_LIBRARY_PATH" value="${LD_LIBRARY_PATH}:${isal.lib}:${isal.prefix}"/>
+                    </exec>
+                </target>
                 </configuration>
               </execution>
             </executions>
@@ -684,6 +694,9 @@
         <snappy.prefix></snappy.prefix>
         <snappy.lib></snappy.lib>
         <snappy.include></snappy.include>
+        <require.isal>false</require.isal>
+        <isal.prefix></isal.prefix>
+        <isal.lib></isal.lib>
         <require.snappy>false</require.snappy>
         <bundle.snappy.in.bin>true</bundle.snappy.in.bin>
         <openssl.prefix></openssl.prefix>
@@ -737,6 +750,7 @@
                     <javahClassName>org.apache.hadoop.io.compress.snappy.SnappyDecompressor</javahClassName>
                     <javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Compressor</javahClassName>
                     <javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Decompressor</javahClassName>
+                    <javahClassName>org.apache.hadoop.io.erasurecode.ErasureCodeNative</javahClassName>
                     <javahClassName>org.apache.hadoop.crypto.OpensslCipher</javahClassName>
                     <javahClassName>org.apache.hadoop.crypto.random.OpensslSecureRandom</javahClassName>
                     <javahClassName>org.apache.hadoop.util.NativeCrc32</javahClassName>
@@ -790,6 +804,9 @@
                     <argument>/p:CustomOpensslLib=${openssl.lib}</argument>
                     <argument>/p:CustomOpensslInclude=${openssl.include}</argument>
                     <argument>/p:RequireOpenssl=${require.openssl}</argument>
+                    <argument>/p:RequireIsal=${require.isal}</argument>
+                    <argument>/p:CustomIsalPrefix=${isal.prefix}</argument>
+                    <argument>/p:CustomIsalLib=${isal.lib}</argument>
                   </arguments>
                 </configuration>
               </execution>

+ 25 - 0
hadoop-common-project/hadoop-common/src/CMakeLists.txt

@@ -94,6 +94,29 @@ else()
     endif()
 endif()
 
+set(STORED_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
+hadoop_set_find_shared_library_version("2")
+find_library(ISAL_LIBRARY
+    NAMES isal
+     PATHS ${CUSTOM_ISAL_PREFIX} ${CUSTOM_ISAL_PREFIX}/lib
+              ${CUSTOM_ISAL_PREFIX}/lib64 ${CUSTOM_ISAL_LIB})
+set(CMAKE_FIND_LIBRARY_SUFFIXES ${STORED_CMAKE_FIND_LIBRARY_SUFFIXES})
+if (ISAL_LIBRARY)
+    GET_FILENAME_COMPONENT(HADOOP_ISAL_LIBRARY ${ISAL_LIBRARY} NAME)
+    set(ISAL_INCLUDE_DIR ${SRC}/io/erasurecode/include)
+    set(ISAL_SOURCE_FILES
+        ${SRC}/io/erasurecode/erasure_code.c)
+        add_executable(erasure_code_test
+             ${SRC}/io/erasurecode/erasure_code.c
+             ${TST}/io/erasurecode/erasure_code_test.c
+        )
+        target_link_libraries(erasure_code_test ${CMAKE_DL_LIBS})
+else (ISAL_LIBRARY)
+    IF(REQUIRE_ISAL)
+        MESSAGE(FATAL_ERROR "Required ISA-L library could not be found.  ISAL_LIBRARY=${ISAL_LIBRARY}, CUSTOM_ISAL_PREFIX=${CUSTOM_ISAL_PREFIX}")
+    ENDIF(REQUIRE_ISAL)
+endif (ISAL_LIBRARY)
+
 # Build hardware CRC32 acceleration, if supported on the platform.
 if(CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64")
   set(BULK_CRC_ARCH_SOURCE_FIlE "${SRC}/util/bulk_crc32_x86.c")
@@ -169,6 +192,7 @@ include_directories(
     ${ZLIB_INCLUDE_DIRS}
     ${BZIP2_INCLUDE_DIR}
     ${SNAPPY_INCLUDE_DIR}
+    ${ISAL_INCLUDE_DIR}
     ${OPENSSL_INCLUDE_DIR}
     ${SRC}/util
 )
@@ -181,6 +205,7 @@ hadoop_add_dual_library(hadoop
     ${SRC}/io/compress/lz4/Lz4Decompressor.c
     ${SRC}/io/compress/lz4/lz4.c
     ${SRC}/io/compress/lz4/lz4hc.c
+    ${ISAL_SOURCE_FILES}
     ${SNAPPY_SOURCE_FILES}
     ${OPENSSL_SOURCE_FILES}
     ${SRC}/io/compress/zlib/ZlibCompressor.c

+ 1 - 0
hadoop-common-project/hadoop-common/src/config.h.cmake

@@ -22,6 +22,7 @@
 #cmakedefine HADOOP_BZIP2_LIBRARY "@HADOOP_BZIP2_LIBRARY@"
 #cmakedefine HADOOP_SNAPPY_LIBRARY "@HADOOP_SNAPPY_LIBRARY@"
 #cmakedefine HADOOP_OPENSSL_LIBRARY "@HADOOP_OPENSSL_LIBRARY@"
+#cmakedefine HADOOP_ISAL_LIBRARY "@HADOOP_ISAL_LIBRARY@"
 #cmakedefine HAVE_SYNC_FILE_RANGE
 #cmakedefine HAVE_POSIX_FADVISE
 

+ 86 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/ErasureCodeNative.java

@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.erasurecode;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.util.NativeCodeLoader;
+
+/**
+ * Erasure code native libraries (for now, Intel ISA-L) related utilities.
+ */
+public final class ErasureCodeNative {
+
+  private static final Log LOG =
+      LogFactory.getLog(ErasureCodeNative.class.getName());
+
+  /**
+   * The reason why ISA-L library is not available, or null if it is available.
+   */
+  private static final String LOADING_FAILURE_REASON;
+
+  static {
+    if (!NativeCodeLoader.isNativeCodeLoaded()) {
+      LOADING_FAILURE_REASON = "hadoop native library cannot be loaded.";
+    } else if (!NativeCodeLoader.buildSupportsIsal()) {
+      LOADING_FAILURE_REASON = "libhadoop was built without ISA-L support";
+    } else {
+      String problem = null;
+      try {
+        loadLibrary();
+      } catch (Throwable t) {
+        problem = "Loading ISA-L failed: " + t.getMessage();
+        LOG.error("Loading ISA-L failed", t);
+      }
+      LOADING_FAILURE_REASON = problem;
+    }
+  }
+
+  private ErasureCodeNative() {}
+
+  /**
+   * Are native libraries loaded?
+   */
+  public static boolean isNativeCodeLoaded() {
+    return LOADING_FAILURE_REASON == null;
+  }
+
+  /**
+   * Is the native ISA-L library loaded and initialized? Throw exception if not.
+   */
+  public static void checkNativeCodeLoaded() {
+    if (LOADING_FAILURE_REASON != null) {
+      throw new RuntimeException(LOADING_FAILURE_REASON);
+    }
+  }
+
+  /**
+   * Load native library available or supported.
+   */
+  public static native void loadLibrary();
+
+  /**
+   * Get the native library name that's available or supported.
+   */
+  public static native String getLibraryName();
+
+  public static String getLoadingFailureReason() {
+    return LOADING_FAILURE_REASON;
+  }
+}

+ 9 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java

@@ -31,7 +31,7 @@ import org.apache.hadoop.classification.InterfaceStability;
  */
 @InterfaceAudience.Private
 @InterfaceStability.Unstable
-public class NativeCodeLoader {
+public final class NativeCodeLoader {
 
   private static final Log LOG =
     LogFactory.getLog(NativeCodeLoader.class);
@@ -62,6 +62,8 @@ public class NativeCodeLoader {
     }
   }
 
+  private NativeCodeLoader() {}
+
   /**
    * Check if native-hadoop code is loaded for this platform.
    * 
@@ -76,7 +78,12 @@ public class NativeCodeLoader {
    * Returns true only if this build was compiled with support for snappy.
    */
   public static native boolean buildSupportsSnappy();
-  
+
+  /**
+   * Returns true only if this build was compiled with support for ISA-L.
+   */
+  public static native boolean buildSupportsIsal();
+
   /**
    * Returns true only if this build was compiled with support for openssl.
    */

+ 18 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java

@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.util;
 
+import org.apache.hadoop.io.erasurecode.ErasureCodeNative;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.crypto.OpensslCipher;
 import org.apache.hadoop.io.compress.Lz4Codec;
@@ -65,6 +66,7 @@ public class NativeLibraryChecker {
     boolean nativeHadoopLoaded = NativeCodeLoader.isNativeCodeLoaded();
     boolean zlibLoaded = false;
     boolean snappyLoaded = false;
+    boolean isalLoaded = false;
     // lz4 is linked within libhadoop
     boolean lz4Loaded = nativeHadoopLoaded;
     boolean bzip2Loaded = Bzip2Factory.isNativeBzip2Loaded(conf);
@@ -75,6 +77,7 @@ public class NativeLibraryChecker {
     String hadoopLibraryName = "";
     String zlibLibraryName = "";
     String snappyLibraryName = "";
+    String isalDetail = "";
     String lz4LibraryName = "";
     String bzip2LibraryName = "";
     String winutilsPath = null;
@@ -85,18 +88,29 @@ public class NativeLibraryChecker {
       if (zlibLoaded) {
         zlibLibraryName = ZlibFactory.getLibraryName();
       }
+
       snappyLoaded = NativeCodeLoader.buildSupportsSnappy() &&
           SnappyCodec.isNativeCodeLoaded();
       if (snappyLoaded && NativeCodeLoader.buildSupportsSnappy()) {
         snappyLibraryName = SnappyCodec.getLibraryName();
       }
-      if (OpensslCipher.getLoadingFailureReason() != null) {
-        openSslDetail = OpensslCipher.getLoadingFailureReason();
+
+      isalDetail = ErasureCodeNative.getLoadingFailureReason();
+      if (isalDetail != null) {
+        isalLoaded = false;
+      } else {
+        isalDetail = ErasureCodeNative.getLibraryName();
+        isalLoaded = true;
+      }
+
+      openSslDetail = OpensslCipher.getLoadingFailureReason();
+      if (openSslDetail != null) {
         openSslLoaded = false;
       } else {
         openSslDetail = OpensslCipher.getLibraryName();
         openSslLoaded = true;
       }
+
       if (lz4Loaded) {
         lz4LibraryName = Lz4Codec.getLibraryName();
       }
@@ -125,6 +139,8 @@ public class NativeLibraryChecker {
     System.out.printf("lz4:     %b %s%n", lz4Loaded, lz4LibraryName);
     System.out.printf("bzip2:   %b %s%n", bzip2Loaded, bzip2LibraryName);
     System.out.printf("openssl: %b %s%n", openSslLoaded, openSslDetail);
+    System.out.printf("ISA-L:   %b %s%n", isalLoaded, isalDetail);
+
     if (Shell.WINDOWS) {
       System.out.printf("winutils: %b %s%n", winutilsExists, winutilsPath);
     }

+ 19 - 3
hadoop-common-project/hadoop-common/src/main/native/native.vcxproj

@@ -17,7 +17,7 @@
    limitations under the License.
 -->
 
-<Project DefaultTargets="CheckRequireSnappy;Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+<Project DefaultTargets="CheckRequireSnappy;CheckRequireIsal;Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="ProjectConfigurations">
     <ProjectConfiguration Include="Release|Win32">
       <Configuration>Release</Configuration>
@@ -79,11 +79,22 @@
     <IncludePath Condition="'$(SnappyEnabled)' == 'true'">$(SnappyInclude);$(IncludePath)</IncludePath>
     <IncludePath Condition="Exists('$(ZLIB_HOME)')">$(ZLIB_HOME);$(IncludePath)</IncludePath>
   </PropertyGroup>
+  <PropertyGroup>
+    <IsalLib Condition="Exists('$(CustomIsalPrefix)\isa-l.dll')">$(CustomIsalPrefix)</IsalLib>
+    <IsalLib Condition="Exists('$(CustomIsalPrefix)\lib\isa-l.dll') And '$(IsalLib)' == ''">$(CustomIsalPrefix)\lib</IsalLib>
+    <IsalLib Condition="Exists('$(CustomIsalLib)') And '$(IsalLib)' == ''">$(CustomIsalLib)</IsalLib>
+    <IsalEnabled Condition="'$(IsalLib)' != ''">true</IsalEnabled>
+  </PropertyGroup>
   <Target Name="CheckRequireSnappy">
     <Error
       Text="Required snappy library could not be found.  SnappyLibrary=$(SnappyLibrary), SnappyInclude=$(SnappyInclude), CustomSnappyLib=$(CustomSnappyLib), CustomSnappyInclude=$(CustomSnappyInclude), CustomSnappyPrefix=$(CustomSnappyPrefix)"
       Condition="'$(RequireSnappy)' == 'true' And '$(SnappyEnabled)' != 'true'" />
   </Target>
+  <Target Name="CheckRequireIsal">
+    <Error
+      Text="Required ISA-L library could not be found. CustomIsalLib=$(CustomIsalLib), CustomIsalPrefix=$(CustomIsalPrefix)"
+      Condition="'$(RequireIsal)' == 'true' And '$(IsalEnabled)' != 'true'" />
+  </Target>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
     <ClCompile>
       <WarningLevel>Level3</WarningLevel>
@@ -92,7 +103,7 @@
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;NATIVE_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>..\winutils\include;..\..\..\target\native\javah;%JAVA_HOME%\include;%JAVA_HOME%\include\win32;.\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\winutils\include;..\native\src\org\apache\hadoop\io\erasurecode\include;..\..\..\target\native\javah;%JAVA_HOME%\include;%JAVA_HOME%\include\win32;.\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <CompileAs>CompileAsC</CompileAs>
       <DisableSpecificWarnings>4244</DisableSpecificWarnings>
     </ClCompile>
@@ -113,7 +124,7 @@
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;NATIVE_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <AdditionalIncludeDirectories>..\winutils\include;..\..\..\target\native\javah;%JAVA_HOME%\include;%JAVA_HOME%\include\win32;.\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <AdditionalIncludeDirectories>..\winutils\include;..\native\src\org\apache\hadoop\io\erasurecode\include;..\..\..\target\native\javah;%JAVA_HOME%\include;%JAVA_HOME%\include\win32;.\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <CompileAs>CompileAsC</CompileAs>
       <DisableSpecificWarnings>4244</DisableSpecificWarnings>
     </ClCompile>
@@ -145,11 +156,16 @@
     <ClCompile Include="src\org\apache\hadoop\util\bulk_crc32.c" />
     <ClCompile Include="src\org\apache\hadoop\util\NativeCodeLoader.c">
       <AdditionalOptions Condition="'$(SnappyEnabled)' == 'true'">/D HADOOP_SNAPPY_LIBRARY=L\"snappy.dll\"</AdditionalOptions>
+      <AdditionalOptions Condition="'$(IsalEnabled)' == 'true'">/D HADOOP_ISAL_LIBRARY=\"isa-l.dll\"</AdditionalOptions>
     </ClCompile>
     <ClCompile Include="src\org\apache\hadoop\util\NativeCrc32.c" />
     <ClCompile Include="src\org\apache\hadoop\yarn\server\nodemanager\windows_secure_container_executor.c">
       <AdditionalIncludeDirectories>src\org\apache\hadoop\io\nativeio;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
     </ClCompile>
+    <ClCompile Include="src\org\apache\hadoop\io\erasurecode\erasure_code.c" Condition="'$(IsalEnabled)' == 'true'">
+      <AdditionalOptions>/D HADOOP_ISAL_LIBRARY=\"isa-l.dll\"</AdditionalOptions>
+    </ClCompile>
+    <ClCompile Include="src\org\apache\hadoop\io\erasurecode\coder\erasure_code_native.c" Condition="'$(IsalEnabled)' == 'true'"/>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\src\org\apache\hadoop\util\crc32c_tables.h" />

+ 49 - 0
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/coder/erasure_code_native.c

@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "org_apache_hadoop.h"
+#include "../include/erasure_code.h"
+#include "org_apache_hadoop_io_erasurecode_ErasureCodeNative.h"
+
+#ifdef UNIX
+#include "config.h"
+#endif
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_io_erasurecode_ErasureCodeNative_loadLibrary
+(JNIEnv *env, jclass myclass) {
+  char errMsg[1024];
+  load_erasurecode_lib(errMsg, sizeof(errMsg));
+  if (strlen(errMsg) > 0) {
+    THROW(env, "java/lang/UnsatisfiedLinkError", errMsg);
+  }
+}
+
+JNIEXPORT jstring JNICALL
+Java_org_apache_hadoop_io_erasurecode_ErasureCodeNative_getLibraryName
+(JNIEnv *env, jclass myclass) {
+  char* libName = get_library_name();
+  if (libName == NULL) {
+    libName = "Unavailable";
+  }
+  return (*env)->NewStringUTF(env, libName);
+}

+ 29 - 0
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/coder/org_apache_hadoop_io_erasurecode_ErasureCodeNative.h

@@ -0,0 +1,29 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_apache_hadoop_io_erasurecode_ErasureCodeNative */
+
+#ifndef _Included_org_apache_hadoop_io_erasurecode_ErasureCodeNative
+#define _Included_org_apache_hadoop_io_erasurecode_ErasureCodeNative
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class:     org_apache_hadoop_io_erasurecode_ErasureCodeNative
+ * Method:    loadLibrary
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_org_apache_hadoop_io_erasurecode_ErasureCodeNative_loadLibrary
+  (JNIEnv *, jclass);
+
+/*
+ * Class:     org_apache_hadoop_io_erasurecode_ErasureCodeNative
+ * Method:    getLibraryName
+ * Signature: ()Ljava/lang/String;
+ */
+JNIEXPORT jstring JNICALL Java_org_apache_hadoop_io_erasurecode_ErasureCodeNative_getLibraryName
+  (JNIEnv *, jclass);
+
+#ifdef __cplusplus
+}
+#endif
+#endif

+ 271 - 0
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/erasure_code.c

@@ -0,0 +1,271 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "org_apache_hadoop.h"
+#include "../include/gf_util.h"
+#include "../include/erasure_code.h"
+
+#ifdef UNIX
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <dlfcn.h>
+
+#include "config.h"
+#endif
+
+#ifdef WINDOWS
+#include <Windows.h>
+#endif
+
+/**
+ *  erasure_code.c
+ *  Implementation erasure code utilities based on lib of erasure_code.so.
+ *  Building of this codes won't rely on any ISA-L source codes, but running
+ *  into this will rely on successfully loading of the dynamic library.
+ *
+ */
+
+/**
+ * The loaded library handle.
+ */
+static void* libec = NULL;
+
+/**
+ * A helper function to dlsym a 'symbol' from a given library-handle.
+ */
+
+#ifdef UNIX
+
+static __attribute__ ((unused))
+void *my_dlsym(void *handle, const char *symbol) {
+  void *func_ptr = dlsym(handle, symbol);
+  return func_ptr;
+}
+
+/* A helper macro to dlsym the requisite dynamic symbol in NON-JNI env. */
+#define EC_LOAD_DYNAMIC_SYMBOL(func_ptr, handle, symbol) \
+  if ((func_ptr = my_dlsym(handle, symbol)) == NULL) { \
+    return "Failed to load symbol" symbol; \
+  }
+
+#endif
+
+#ifdef WINDOWS
+
+
+
+static FARPROC WINAPI my_dlsym(HMODULE handle, LPCSTR symbol) {
+  FARPROC func_ptr = GetProcAddress(handle, symbol);
+  return func_ptr;
+}
+
+/* A helper macro to dlsym the requisite dynamic symbol in NON-JNI env. */
+#define EC_LOAD_DYNAMIC_SYMBOL(func_type, func_ptr, handle, symbol) \
+  if ((func_ptr = (func_type)my_dlsym(handle, symbol)) == NULL) { \
+    return "Failed to load symbol" symbol; \
+  }
+
+#endif
+
+
+#ifdef UNIX
+// For gf_util.h
+static unsigned char (*d_gf_mul)(unsigned char, unsigned char);
+static unsigned char (*d_gf_inv)(unsigned char);
+static void (*d_gf_gen_rs_matrix)(unsigned char *, int, int);
+static void (*d_gf_gen_cauchy_matrix)(unsigned char *, int, int);
+static int (*d_gf_invert_matrix)(unsigned char *, unsigned char *, const int);
+static int (*d_gf_vect_mul)(int, unsigned char *, void *, void *);
+
+// For erasure_code.h
+static void (*d_ec_init_tables)(int, int, unsigned char*, unsigned char*);
+static void (*d_ec_encode_data)(int, int, int, unsigned char*,
+                                          unsigned char**, unsigned char**);
+static void (*d_ec_encode_data_update)(int, int, int, int, unsigned char*,
+                                             unsigned char*, unsigned char**);
+#endif
+
+#ifdef WINDOWS
+// For erasure_code.h
+typedef unsigned char (__cdecl *__d_gf_mul)(unsigned char, unsigned char);
+static __d_gf_mul d_gf_mul;
+typedef unsigned char (__cdecl *__d_gf_inv)(unsigned char);
+static __d_gf_inv d_gf_inv;
+typedef void (__cdecl *__d_gf_gen_rs_matrix)(unsigned char *, int, int);
+static __d_gf_gen_rs_matrix d_gf_gen_rs_matrix;
+typedef void (__cdecl *__d_gf_gen_cauchy_matrix)(unsigned char *, int, int);
+static __d_gf_gen_cauchy_matrix d_gf_gen_cauchy_matrix;
+typedef int (__cdecl *__d_gf_invert_matrix)(unsigned char *,
+                                                   unsigned char *, const int);
+static __d_gf_invert_matrix d_gf_invert_matrix;
+typedef int (__cdecl *__d_gf_vect_mul)(int, unsigned char *, void *, void *);
+static __d_gf_vect_mul d_gf_vect_mul;
+
+// For erasure_code.h
+typedef void (__cdecl *__d_ec_init_tables)(int, int,
+                                                unsigned char*, unsigned char*);
+static __d_ec_init_tables d_ec_init_tables;
+typedef void (__cdecl *__d_ec_encode_data)(int, int, int, unsigned char*,
+                                             unsigned char**, unsigned char**);
+static __d_ec_encode_data d_ec_encode_data;
+typedef void (__cdecl *__d_ec_encode_data_update)(int, int, int, int, unsigned char*,
+                                             unsigned char*, unsigned char**);
+static __d_ec_encode_data_update d_ec_encode_data_update;
+#endif
+
+static const char* load_functions(void* libec) {
+#ifdef UNIX
+  EC_LOAD_DYNAMIC_SYMBOL(d_gf_mul, libec, "gf_mul");
+  EC_LOAD_DYNAMIC_SYMBOL(d_gf_inv, libec, "gf_inv");
+  EC_LOAD_DYNAMIC_SYMBOL(d_gf_gen_rs_matrix, libec, "gf_gen_rs_matrix");
+  EC_LOAD_DYNAMIC_SYMBOL(d_gf_gen_cauchy_matrix, libec, "gf_gen_cauchy1_matrix");
+  EC_LOAD_DYNAMIC_SYMBOL(d_gf_invert_matrix, libec, "gf_invert_matrix");
+  EC_LOAD_DYNAMIC_SYMBOL(d_gf_vect_mul, libec, "gf_vect_mul");
+
+  EC_LOAD_DYNAMIC_SYMBOL(d_ec_init_tables, libec, "ec_init_tables");
+  EC_LOAD_DYNAMIC_SYMBOL(d_ec_encode_data, libec, "ec_encode_data");
+  EC_LOAD_DYNAMIC_SYMBOL(d_ec_encode_data_update, libec, "ec_encode_data_update");
+#endif
+
+#ifdef WINDOWS
+  EC_LOAD_DYNAMIC_SYMBOL(__d_gf_mul, d_gf_mul, libec, "gf_mul");
+  EC_LOAD_DYNAMIC_SYMBOL(__d_gf_inv, d_gf_inv, libec, "gf_inv");
+  EC_LOAD_DYNAMIC_SYMBOL(__d_gf_gen_rs_matrix, d_gf_gen_rs_matrix, libec, "gf_gen_rs_matrix");
+  EC_LOAD_DYNAMIC_SYMBOL(__d_gf_gen_cauchy_matrix, d_gf_gen_cauchy_matrix, libec, "gf_gen_cauchy1_matrix");
+  EC_LOAD_DYNAMIC_SYMBOL(__d_gf_invert_matrix, d_gf_invert_matrix, libec, "gf_invert_matrix");
+  EC_LOAD_DYNAMIC_SYMBOL(__d_gf_vect_mul, d_gf_vect_mul, libec, "gf_vect_mul");
+
+  EC_LOAD_DYNAMIC_SYMBOL(__d_ec_init_tables, d_ec_init_tables, libec, "ec_init_tables");
+  EC_LOAD_DYNAMIC_SYMBOL(__d_ec_encode_data, d_ec_encode_data, libec, "ec_encode_data");
+  EC_LOAD_DYNAMIC_SYMBOL(__d_ec_encode_data_update, d_ec_encode_data_update, libec, "ec_encode_data_update");
+#endif
+
+  return NULL;
+}
+
+void load_erasurecode_lib(char* err, size_t err_len) {
+  const char* errMsg;
+
+  err[0] = '\0';
+
+  if (libec != NULL) {
+    return;
+  }
+
+  // Load Intel ISA-L
+  #ifdef UNIX
+  libec = dlopen(HADOOP_ISAL_LIBRARY, RTLD_LAZY | RTLD_GLOBAL);
+  if (libec == NULL) {
+    snprintf(err, err_len, "Failed to load %s (%s)",
+                             HADOOP_ISAL_LIBRARY, dlerror());
+    return;
+  }
+  // Clear any existing error
+  dlerror();
+  #endif
+
+  #ifdef WINDOWS
+  libec = LoadLibrary(HADOOP_ISAL_LIBRARY);
+  if (libec == NULL) {
+    snprintf(err, err_len, "Failed to load %s", HADOOP_ISAL_LIBRARY);
+    return;
+  }
+  #endif
+
+  errMsg = load_functions(libec);
+  if (errMsg != NULL) {
+    snprintf(err, err_len, "Loading functions from ISA-L failed: %s", errMsg);
+  }
+}
+
+int build_support_erasurecode() {
+#ifdef HADOOP_ISAL_LIBRARY
+  return 1;
+#else
+  return 0;
+#endif
+}
+
+const char* get_library_name() {
+#ifdef UNIX
+  Dl_info dl_info;
+
+  if (d_ec_encode_data == NULL) {
+    return HADOOP_ISAL_LIBRARY;
+  }
+
+  if(dladdr(d_ec_encode_data, &dl_info)) {
+    return dl_info.dli_fname;
+  }
+#else
+  LPTSTR filename = NULL;
+
+  if (libec == NULL) {
+    return HADOOP_ISAL_LIBRARY;
+  }
+
+  if (GetModuleFileName(libec, filename, 256) > 0) {
+    return filename;
+  }
+#endif
+
+  return NULL;
+}
+
+unsigned char h_gf_mul(unsigned char a, unsigned char b) {
+  return d_gf_mul(a, b);
+}
+
+unsigned char h_gf_inv(unsigned char a) {
+  return d_gf_inv(a);
+}
+
+void h_gf_gen_rs_matrix(unsigned char *a, int m, int k) {
+  d_gf_gen_rs_matrix(a, m, k);
+}
+
+void h_gf_gen_cauchy_matrix(unsigned char *a, int m, int k) {
+  d_gf_gen_cauchy_matrix(a, m, k);
+}
+
+int h_gf_invert_matrix(unsigned char *in, unsigned char *out, const int n) {
+  return d_gf_invert_matrix(in, out, n);
+}
+
+int h_gf_vect_mul(int len, unsigned char *gftbl, void *src, void *dest) {
+  return d_gf_vect_mul(len, gftbl, src, dest);
+}
+
+void h_ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls) {
+  d_ec_init_tables(k, rows, a, gftbls);
+}
+
+void h_ec_encode_data(int len, int k, int rows, unsigned char *gftbls,
+    unsigned char **data, unsigned char **coding) {
+  d_ec_encode_data(len, k, rows, gftbls, data, coding);
+}
+
+void h_ec_encode_data_update(int len, int k, int rows, int vec_i,
+         unsigned char *gftbls, unsigned char *data, unsigned char **coding) {
+  d_ec_encode_data_update(len, k, rows, vec_i, gftbls, data, coding);
+}

+ 125 - 0
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/include/erasure_code.h

@@ -0,0 +1,125 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#ifndef _ERASURE_CODE_H_
+#define _ERASURE_CODE_H_
+
+#include <stddef.h>
+
+/**
+ *  Interface to functions supporting erasure code encode and decode.
+ *
+ *  This file defines the interface to optimized functions used in erasure
+ *  codes.  Encode and decode of erasures in GF(2^8) are made by calculating the
+ *  dot product of the symbols (bytes in GF(2^8)) across a set of buffers and a
+ *  set of coefficients.  Values for the coefficients are determined by the type
+ *  of erasure code.  Using a general dot product means that any sequence of
+ *  coefficients may be used including erasure codes based on random
+ *  coefficients.
+ *  Multiple versions of dot product are supplied to calculate 1-6 output
+ *  vectors in one pass.
+ *  Base GF multiply and divide functions can be sped up by defining
+ *  GF_LARGE_TABLES at the expense of memory size.
+ *
+ */
+
+/**
+ * Return 0 if not support, 1 otherwise.
+ */
+int build_support_erasurecode();
+
+/**
+ * Get the library name possibly of full path.
+ */
+const char* get_library_name();
+
+/**
+ * Initialize and load erasure code library, returning error message if any.
+ *
+ * @param err     The err message buffer.
+ * @param err_len The length of the message buffer.
+ */
+void load_erasurecode_lib(char* err, size_t err_len);
+
+/**
+ * Initialize tables for fast Erasure Code encode and decode.
+ *
+ * Generates the expanded tables needed for fast encode or decode for erasure
+ * codes on blocks of data.  32bytes is generated for each input coefficient.
+ *
+ * @param k      The number of vector sources or rows in the generator matrix
+ *               for coding.
+ * @param rows   The number of output vectors to concurrently encode/decode.
+ * @param a      Pointer to sets of arrays of input coefficients used to encode
+ *               or decode data.
+ * @param gftbls Pointer to start of space for concatenated output tables
+ *               generated from input coefficients.  Must be of size 32*k*rows.
+ * @returns none
+ */
+void h_ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls);
+
+/**
+ * Generate or decode erasure codes on blocks of data, runs appropriate version.
+ *
+ * Given a list of source data blocks, generate one or multiple blocks of
+ * encoded data as specified by a matrix of GF(2^8) coefficients. When given a
+ * suitable set of coefficients, this function will perform the fast generation
+ * or decoding of Reed-Solomon type erasure codes.
+ *
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ *
+ * @param len    Length of each block of data (vector) of source or dest data.
+ * @param k      The number of vector sources or rows in the generator matrix
+ *        for coding.
+ * @param rows   The number of output vectors to concurrently encode/decode.
+ * @param gftbls Pointer to array of input tables generated from coding
+ *        coefficients in ec_init_tables(). Must be of size 32*k*rows
+ * @param data   Array of pointers to source input buffers.
+ * @param coding Array of pointers to coded output buffers.
+ * @returns none
+ */
+void h_ec_encode_data(int len, int k, int rows, unsigned char *gftbls,
+                                 unsigned char **data, unsigned char **coding);
+
+/**
+ * @brief Generate update for encode or decode of erasure codes from single
+ *        source, runs appropriate version.
+ *
+ * Given one source data block, update one or multiple blocks of encoded data as
+ * specified by a matrix of GF(2^8) coefficients. When given a suitable set of
+ * coefficients, this function will perform the fast generation or decoding of
+ * Reed-Solomon type erasure codes from one input source at a time.
+ *
+ * This function determines what instruction sets are enabled and selects the
+ * appropriate version at runtime.
+ *
+ * @param len    Length of each block of data (vector) of source or dest data.
+ * @param k      The number of vector sources or rows in the generator matrix
+ *               for coding.
+ * @param rows   The number of output vectors to concurrently encode/decode.
+ * @param vec_i  The vector index corresponding to the single input source.
+ * @param gftbls Pointer to array of input tables generated from coding
+ *               coefficients in ec_init_tables(). Must be of size 32*k*rows
+ * @param data   Pointer to single input source used to update output parity.
+ * @param coding Array of pointers to coded output buffers.
+ * @returns none
+ */
+void h_ec_encode_data_update(int len, int k, int rows, int vec_i,
+           unsigned char *gftbls, unsigned char *data, unsigned char **coding);
+
+#endif //_ERASURE_CODE_H_

+ 111 - 0
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/include/gf_util.h

@@ -0,0 +1,111 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#ifndef _GF_UTIL_H
+#define _GF_UTIL_H
+
+/**
+ *  gf_util.h
+ *  Interface to functions for vector (block) multiplication in GF(2^8).
+ *
+ *  This file defines the interface to routines used in fast RAID rebuild and
+ *  erasure codes.
+ */
+
+
+/**
+ * Single element GF(2^8) multiply.
+ *
+ * @param a  Multiplicand a
+ * @param b  Multiplicand b
+ * @returns  Product of a and b in GF(2^8)
+ */
+unsigned char h_gf_mul(unsigned char a, unsigned char b);
+
+/**
+ * Single element GF(2^8) inverse.
+ *
+ * @param a  Input element
+ * @returns  Field element b such that a x b = {1}
+ */
+unsigned char h_gf_inv(unsigned char a);
+
+/**
+ * Generate a matrix of coefficients to be used for encoding.
+ *
+ * Vandermonde matrix example of encoding coefficients where high portion of
+ * matrix is identity matrix I and lower portion is constructed as 2^{i*(j-k+1)}
+ * i:{0,k-1} j:{k,m-1}. Commonly used method for choosing coefficients in
+ * erasure encoding but does not guarantee invertable for every sub matrix.  For
+ * large k it is possible to find cases where the decode matrix chosen from
+ * sources and parity not in erasure are not invertable. Users may want to
+ * adjust for k > 5.
+ *
+ * @param a  [mxk] array to hold coefficients
+ * @param m  number of rows in matrix corresponding to srcs + parity.
+ * @param k  number of columns in matrix corresponding to srcs.
+ * @returns  none
+ */
+void h_gf_gen_rs_matrix(unsigned char *a, int m, int k);
+
+/**
+ * Generate a Cauchy matrix of coefficients to be used for encoding.
+ *
+ * Cauchy matrix example of encoding coefficients where high portion of matrix
+ * is identity matrix I and lower portion is constructed as 1/(i + j) | i != j,
+ * i:{0,k-1} j:{k,m-1}.  Any sub-matrix of a Cauchy matrix should be invertable.
+ *
+ * @param a  [mxk] array to hold coefficients
+ * @param m  number of rows in matrix corresponding to srcs + parity.
+ * @param k  number of columns in matrix corresponding to srcs.
+ * @returns  none
+ */
+void h_gf_gen_cauchy_matrix(unsigned char *a, int m, int k);
+
+/**
+ * Invert a matrix in GF(2^8)
+ *
+ * @param in  input matrix
+ * @param out output matrix such that [in] x [out] = [I] - identity matrix
+ * @param n   size of matrix [nxn]
+ * @returns 0 successful, other fail on singular input matrix
+ */
+int h_gf_invert_matrix(unsigned char *in, unsigned char *out, const int n);
+
+/**
+ * GF(2^8) vector multiply by constant, runs appropriate version.
+ *
+ * Does a GF(2^8) vector multiply b = Ca where a and b are arrays and C
+ * is a single field element in GF(2^8). Can be used for RAID6 rebuild
+ * and partial write functions. Function requires pre-calculation of a
+ * 32-element constant array based on constant C. gftbl(C) = {C{00},
+ * C{01}, C{02}, ... , C{0f} }, {C{00}, C{10}, C{20}, ... , C{f0} }.
+ * Len and src must be aligned to 32B.
+ *
+ * This function determines what instruction sets are enabled
+ * and selects the appropriate version at runtime.
+ *
+ * @param len   Length of vector in bytes. Must be aligned to 32B.
+ * @param gftbl Pointer to 32-byte array of pre-calculated constants based on C.
+ * @param src   Pointer to src data array. Must be aligned to 32B.
+ * @param dest  Pointer to destination data array. Must be aligned to 32B.
+ * @returns 0 pass, other fail
+ */
+int h_gf_vect_mul(int len, unsigned char *gftbl, void *src, void *dest);
+
+
+#endif //_GF_UTIL_H

+ 10 - 0
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/util/NativeCodeLoader.c

@@ -49,6 +49,16 @@ JNIEXPORT jboolean JNICALL Java_org_apache_hadoop_util_NativeCodeLoader_buildSup
 #endif
 }
 
+JNIEXPORT jboolean JNICALL Java_org_apache_hadoop_util_NativeCodeLoader_buildSupportsIsal
+  (JNIEnv *env, jclass clazz)
+{
+#ifdef HADOOP_ISAL_LIBRARY
+  return JNI_TRUE;
+#else
+  return JNI_FALSE;
+#endif
+}
+
 JNIEXPORT jstring JNICALL Java_org_apache_hadoop_util_NativeCodeLoader_getLibraryName
   (JNIEnv *env, jclass clazz)
 {

+ 310 - 0
hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/io/erasurecode/erasure_code_test.c

@@ -0,0 +1,310 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This is a lightweight version of the same file in Intel ISA-L library to test
+ * and verify the basic functions of ISA-L integration. Note it's not serving as
+ * a complete ISA-L library test nor as any sample to write an erasure coder
+ * using the library. A sample is to be written and provided separately.
+ */
+
+#include "org_apache_hadoop.h"
+#include "erasure_code.h"
+#include "gf_util.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define TEST_LEN 8192
+#define TEST_SOURCES  127
+#define MMAX TEST_SOURCES
+#define KMAX TEST_SOURCES
+#define TEST_SEED 11
+
+static void dump(unsigned char *buf, int len)
+{
+  int i;
+  for (i = 0; i < len;) {
+    printf(" %2x", 0xff & buf[i++]);
+    if (i % 32 == 0)
+      printf("\n");
+  }
+  printf("\n");
+}
+
+static void dump_matrix(unsigned char **s, int k, int m)
+{
+  int i, j;
+  for (i = 0; i < k; i++) {
+    for (j = 0; j < m; j++) {
+      printf(" %2x", s[i][j]);
+    }
+    printf("\n");
+  }
+  printf("\n");
+}
+
+static void dump_u8xu8(unsigned char *s, int k, int m)
+{
+  int i, j;
+  for (i = 0; i < k; i++) {
+    for (j = 0; j < m; j++) {
+      printf(" %2x", 0xff & s[j + (i * m)]);
+    }
+    printf("\n");
+  }
+  printf("\n");
+}
+
+// Generate Random errors
+static void gen_err_list(unsigned char *src_err_list,
+       unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m)
+{
+  int i, err;
+  int nerrs = 0, nsrcerrs = 0;
+
+  for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) {
+    err = 1 & rand();
+    src_in_err[i] = err;
+    if (err) {
+      src_err_list[nerrs++] = i;
+      if (i < k) {
+        nsrcerrs++;
+      }
+    }
+  }
+  if (nerrs == 0) { // should have at least one error
+    while ((err = (rand() % KMAX)) >= m) ;
+    src_err_list[nerrs++] = err;
+    src_in_err[err] = 1;
+    if (err < k)
+      nsrcerrs = 1;
+  }
+  *pnerrs = nerrs;
+  *pnsrcerrs = nsrcerrs;
+  return;
+}
+
+#define NO_INVERT_MATRIX -2
+// Generate decode matrix from encode matrix
+static int gf_gen_decode_matrix(unsigned char *encode_matrix,
+        unsigned char *decode_matrix,
+        unsigned char *invert_matrix,
+        unsigned int *decode_index,
+        unsigned char *src_err_list,
+        unsigned char *src_in_err,
+        int nerrs, int nsrcerrs, int k, int m)
+{
+  int i, j, p;
+  int r;
+  unsigned char *backup, *b, s;
+  int incr = 0;
+
+  b = malloc(MMAX * KMAX);
+  backup = malloc(MMAX * KMAX);
+
+  if (b == NULL || backup == NULL) {
+    printf("Test failure! Error with malloc\n");
+    free(b);
+    free(backup);
+    return -1;
+  }
+  // Construct matrix b by removing error rows
+  for (i = 0, r = 0; i < k; i++, r++) {
+    while (src_in_err[r])
+      r++;
+    for (j = 0; j < k; j++) {
+      b[k * i + j] = encode_matrix[k * r + j];
+      backup[k * i + j] = encode_matrix[k * r + j];
+    }
+    decode_index[i] = r;
+  }
+  incr = 0;
+  while (h_gf_invert_matrix(b, invert_matrix, k) < 0) {
+    if (nerrs == (m - k)) {
+      free(b);
+      free(backup);
+      printf("BAD MATRIX\n");
+      return NO_INVERT_MATRIX;
+    }
+    incr++;
+    memcpy(b, backup, MMAX * KMAX);
+    for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) {
+      if (src_err_list[i] == (decode_index[k - 1] + incr)) {
+        // skip the erased parity line
+        incr++;
+        continue;
+      }
+    }
+    if (decode_index[k - 1] + incr >= m) {
+      free(b);
+      free(backup);
+      printf("BAD MATRIX\n");
+      return NO_INVERT_MATRIX;
+    }
+    decode_index[k - 1] += incr;
+    for (j = 0; j < k; j++)
+      b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j];
+
+  };
+
+  for (i = 0; i < nsrcerrs; i++) {
+    for (j = 0; j < k; j++) {
+      decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j];
+    }
+  }
+  /* src_err_list from encode_matrix * invert of b for parity decoding */
+  for (p = nsrcerrs; p < nerrs; p++) {
+    for (i = 0; i < k; i++) {
+      s = 0;
+      for (j = 0; j < k; j++)
+        s ^= h_gf_mul(invert_matrix[j * k + i],
+              encode_matrix[k * src_err_list[p] + j]);
+
+      decode_matrix[k * p + i] = s;
+    }
+  }
+  free(b);
+  free(backup);
+  return 0;
+}
+
+int main(int argc, char *argv[])
+{
+  char err[256];
+  size_t err_len = sizeof(err);
+  int re, i, j, p, m, k;
+  int nerrs, nsrcerrs;
+  unsigned int decode_index[MMAX];
+  unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
+  unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls;
+  unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES];
+  unsigned char *recov[TEST_SOURCES];
+
+  if (0 == build_support_erasurecode()) {
+    printf("The native library isn't available, skipping this test\n");
+    return 0; // Normal, not an error
+  }
+
+  load_erasurecode_lib(err, err_len);
+  if (strlen(err) > 0) {
+    printf("Loading erasurecode library failed: %s\n", err);
+    return -1;
+  }
+
+  printf("Performing erasure code test\n");
+  srand(TEST_SEED);
+
+  // Allocate the arrays
+  for (i = 0; i < TEST_SOURCES; i++) {
+    buffs[i] = malloc(TEST_LEN);
+  }
+
+  for (i = 0; i < TEST_SOURCES; i++) {
+    temp_buffs[i] = malloc(TEST_LEN);
+  }
+
+  // Test erasure code by encode and recovery
+
+  encode_matrix = malloc(MMAX * KMAX);
+  decode_matrix = malloc(MMAX * KMAX);
+  invert_matrix = malloc(MMAX * KMAX);
+  g_tbls = malloc(KMAX * TEST_SOURCES * 32);
+  if (encode_matrix == NULL || decode_matrix == NULL
+      || invert_matrix == NULL || g_tbls == NULL) {
+    snprintf(err, err_len, "%s", "allocating test matrix buffers error");
+    return -1;
+  }
+
+  m = 9;
+  k = 5;
+  if (m > MMAX || k > KMAX)
+    return -1;
+
+  // Make random data
+  for (i = 0; i < k; i++)
+    for (j = 0; j < TEST_LEN; j++)
+      buffs[i][j] = rand();
+
+  // The matrix generated by gf_gen_cauchy1_matrix
+  // is always invertable.
+  h_gf_gen_cauchy_matrix(encode_matrix, m, k);
+
+  // Generate g_tbls from encode matrix encode_matrix
+  h_ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
+
+  // Perform matrix dot_prod for EC encoding
+  // using g_tbls from encode matrix encode_matrix
+  h_ec_encode_data(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
+
+  // Choose random buffers to be in erasure
+  memset(src_in_err, 0, TEST_SOURCES);
+  gen_err_list(src_err_list, src_in_err, &nerrs, &nsrcerrs, k, m);
+
+  // Generate decode matrix
+  re = gf_gen_decode_matrix(encode_matrix, decode_matrix,
+          invert_matrix, decode_index, src_err_list, src_in_err,
+          nerrs, nsrcerrs, k, m);
+  if (re != 0) {
+    snprintf(err, err_len, "%s", "gf_gen_decode_matrix failed");
+    return -1;
+  }
+  // Pack recovery array as list of valid sources
+  // Its order must be the same as the order
+  // to generate matrix b in gf_gen_decode_matrix
+  for (i = 0; i < k; i++) {
+    recov[i] = buffs[decode_index[i]];
+  }
+
+  // Recover data
+  h_ec_init_tables(k, nerrs, decode_matrix, g_tbls);
+  h_ec_encode_data(TEST_LEN, k, nerrs, g_tbls, recov, &temp_buffs[k]);
+  for (i = 0; i < nerrs; i++) {
+    if (0 != memcmp(temp_buffs[k + i], buffs[src_err_list[i]], TEST_LEN)) {
+      snprintf(err, err_len, "%s", "Error recovery failed");
+      printf("Fail error recovery (%d, %d, %d)\n", m, k, nerrs);
+
+      printf(" - erase list = ");
+      for (j = 0; j < nerrs; j++) {
+        printf(" %d", src_err_list[j]);
+      }
+
+      printf(" - Index = ");
+      for (p = 0; p < k; p++) {
+        printf(" %d", decode_index[p]);
+      }
+
+      printf("\nencode_matrix:\n");
+      dump_u8xu8((unsigned char *) encode_matrix, m, k);
+      printf("inv b:\n");
+      dump_u8xu8((unsigned char *) invert_matrix, k, k);
+      printf("\ndecode_matrix:\n");
+      dump_u8xu8((unsigned char *) decode_matrix, m, k);
+      printf("recov %d:", src_err_list[i]);
+      dump(temp_buffs[k + i], 25);
+      printf("orig   :");
+      dump(buffs[src_err_list[i]], 25);
+
+      return -1;
+    }
+  }
+
+  printf("done EC tests: Pass\n");
+  return 0;
+}

+ 11 - 2
hadoop-project-dist/pom.xml

@@ -41,6 +41,7 @@
     <hadoop.component>UNDEF</hadoop.component>
     <bundle.snappy>false</bundle.snappy>
     <bundle.snappy.in.bin>false</bundle.snappy.in.bin>
+    <bundle.isal>true</bundle.isal>
     <bundle.openssl>false</bundle.openssl>
     <bundle.openssl.in.bin>false</bundle.openssl.in.bin>
   </properties>
@@ -332,14 +333,22 @@
                         mkdir -p $${TARGET_DIR}
                         cd $${LIB_DIR}
                         $$TAR lib* | (cd $${TARGET_DIR}/; $$UNTAR)
-                        if [ "${bundle.snappy}" = "true" ] ; then
+                        if [ "X${bundle.snappy}" = "Xtrue" ] ; then
                           cd "${snappy.lib}"
                           $$TAR *snappy* | (cd $${TARGET_DIR}/; $$UNTAR)
                         fi
-                        if [ "${bundle.openssl}" = "true" ] ; then
+                        if [ "X${bundle.openssl}" = "Xtrue" ] ; then
                           cd "${openssl.lib}"
                           $$TAR *crypto* | (cd $${TARGET_DIR}/; $$UNTAR)
                         fi
+                        if [ "X${bundle.isal}" = "Xtrue" ] ; then
+                          if [ "X${isal.lib}" != "X" ]; then
+                            cd "${isal.lib}"
+                            $$TAR *isa* | (cd $${TARGET_DIR}/; $$UNTAR)
+                          else
+                            echo "The required option isal.lib isn't given, bundling ISA-L skipped"
+                          fi
+                        fi
                       fi
                       BIN_DIR="${BUILD_DIR}/bin"
                       if [ -d $${BIN_DIR} ] ; then

+ 1 - 0
hadoop-project/pom.xml

@@ -1245,6 +1245,7 @@
                 <!-- Specify where to look for the native DLL on Windows -->
                 <PATH>${env.PATH};${hadoop.common.build.dir}/bin;${snappy.lib}</PATH>
                 <PATH>${env.PATH};${hadoop.common.build.dir}/bin;${openssl.lib}</PATH>
+                <PATH>${env.PATH};${hadoop.common.build.dir}/bin;${isal.lib}</PATH>
               </environmentVariables>
             </configuration>
           </plugin>