Browse Source

HADOOP-12041. Implement another Reed-Solomon coder in pure Java. Contributed by Kai Zheng.

Change-Id: I35ff2e498d4f988c9a064f74374f7c7258b7a6b7
zhezhang 9 years ago
parent
commit
c89a14a8a4
16 changed files with 1030 additions and 162 deletions
  1. 3 0
      hadoop-common-project/hadoop-common/CHANGES.txt
  2. 3 36
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/AbstractRawErasureDecoder.java
  3. 7 4
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawDecoder.java
  4. 176 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawDecoder2.java
  5. 76 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawEncoder2.java
  6. 37 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawErasureCoderFactory2.java
  7. 1 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RawErasureCoder.java
  8. 83 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/CoderUtil.java
  9. 15 3
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/DumpUtil.java
  10. 339 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/GF256.java
  11. 1 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/RSUtil.java
  12. 172 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/RSUtil2.java
  13. 0 91
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoder.java
  14. 33 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoder2.java
  15. 84 25
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoderBase.java
  16. 0 1
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestXORRawCoder.java

+ 3 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -647,6 +647,9 @@ Trunk (Unreleased)
       HADOOP-12757. Findbug compilation fails for 'Kafka Library support'.
       (aajisaka)
 
+      HADOOP-12041. Implement another Reed-Solomon coder in pure Java.
+      (Kai Zheng via zhz)
+
 Release 2.9.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 3 - 36
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/AbstractRawErasureDecoder.java

@@ -20,9 +20,9 @@ package org.apache.hadoop.io.erasurecode.rawcoder;
 import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.io.erasurecode.ECChunk;
+import org.apache.hadoop.io.erasurecode.rawcoder.util.CoderUtil;
 
 import java.nio.ByteBuffer;
-import java.util.Arrays;
 
 /**
  * An abstract raw erasure decoder that's to be inherited by new decoders.
@@ -42,7 +42,7 @@ public abstract class AbstractRawErasureDecoder extends AbstractRawErasureCoder
                      ByteBuffer[] outputs) {
     checkParameters(inputs, erasedIndexes, outputs);
 
-    ByteBuffer validInput = findFirstValidInput(inputs);
+    ByteBuffer validInput = CoderUtil.findFirstValidInput(inputs);
     boolean usingDirectBuffer = validInput.isDirect();
     int dataLen = validInput.remaining();
     if (dataLen == 0) {
@@ -106,7 +106,7 @@ public abstract class AbstractRawErasureDecoder extends AbstractRawErasureCoder
   public void decode(byte[][] inputs, int[] erasedIndexes, byte[][] outputs) {
     checkParameters(inputs, erasedIndexes, outputs);
 
-    byte[] validInput = findFirstValidInput(inputs);
+    byte[] validInput = CoderUtil.findFirstValidInput(inputs);
     int dataLen = validInput.length;
     if (dataLen == 0) {
       return;
@@ -178,37 +178,4 @@ public abstract class AbstractRawErasureDecoder extends AbstractRawErasureCoder
           "No enough valid inputs are provided, not recoverable");
     }
   }
-
-  /**
-   * Get indexes into inputs array for items marked as null, either erased or
-   * not to read.
-   * @return indexes into inputs array
-   */
-  protected <T> int[] getErasedOrNotToReadIndexes(T[] inputs) {
-    int[] invalidIndexes = new int[inputs.length];
-    int idx = 0;
-    for (int i = 0; i < inputs.length; i++) {
-      if (inputs[i] == null) {
-        invalidIndexes[idx++] = i;
-      }
-    }
-
-    return Arrays.copyOf(invalidIndexes, idx);
-  }
-
-  /**
-   * Find the valid input from all the inputs.
-   * @param inputs input buffers to look for valid input
-   * @return the first valid input
-   */
-  protected static <T> T findFirstValidInput(T[] inputs) {
-    for (T input : inputs) {
-      if (input != null) {
-        return input;
-      }
-    }
-
-    throw new HadoopIllegalArgumentException(
-        "Invalid inputs are found, all being null");
-  }
 }

+ 7 - 4
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawDecoder.java

@@ -19,6 +19,7 @@ package org.apache.hadoop.io.erasurecode.rawcoder;
 
 import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.io.erasurecode.rawcoder.util.CoderUtil;
 import org.apache.hadoop.io.erasurecode.rawcoder.util.RSUtil;
 
 import java.nio.ByteBuffer;
@@ -103,7 +104,7 @@ public class RSRawDecoder extends AbstractRawErasureDecoder {
 
   private void doDecodeImpl(ByteBuffer[] inputs, int[] erasedIndexes,
                           ByteBuffer[] outputs) {
-    ByteBuffer valid = findFirstValidInput(inputs);
+    ByteBuffer valid = CoderUtil.findFirstValidInput(inputs);
     int dataLen = valid.remaining();
     for (int i = 0; i < erasedIndexes.length; i++) {
       errSignature[i] = primitivePower[erasedIndexes[i]];
@@ -136,7 +137,8 @@ public class RSRawDecoder extends AbstractRawErasureDecoder {
      * implementations, so we have to adjust them before calling doDecodeImpl.
      */
 
-    int[] erasedOrNotToReadIndexes = getErasedOrNotToReadIndexes(inputs);
+    int[] erasedOrNotToReadIndexes =
+        CoderUtil.getErasedOrNotToReadIndexes(inputs);
 
     // Prepare for adjustedOutputsParameter
 
@@ -181,7 +183,7 @@ public class RSRawDecoder extends AbstractRawErasureDecoder {
   @Override
   protected void doDecode(ByteBuffer[] inputs, int[] erasedIndexes,
                           ByteBuffer[] outputs) {
-    ByteBuffer validInput = findFirstValidInput(inputs);
+    ByteBuffer validInput = CoderUtil.findFirstValidInput(inputs);
     int dataLen = validInput.remaining();
 
     /**
@@ -189,7 +191,8 @@ public class RSRawDecoder extends AbstractRawErasureDecoder {
      * implementations, so we have to adjust them before calling doDecodeImpl.
      */
 
-    int[] erasedOrNotToReadIndexes = getErasedOrNotToReadIndexes(inputs);
+    int[] erasedOrNotToReadIndexes =
+        CoderUtil.getErasedOrNotToReadIndexes(inputs);
 
     // Prepare for adjustedDirectBufferOutputsParameter
 

+ 176 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawDecoder2.java

@@ -0,0 +1,176 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.rawcoder;
+
+import org.apache.hadoop.HadoopIllegalArgumentException;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.io.erasurecode.rawcoder.util.CoderUtil;
+import org.apache.hadoop.io.erasurecode.rawcoder.util.DumpUtil;
+import org.apache.hadoop.io.erasurecode.rawcoder.util.GF256;
+import org.apache.hadoop.io.erasurecode.rawcoder.util.RSUtil;
+import org.apache.hadoop.io.erasurecode.rawcoder.util.RSUtil2;
+
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
+/**
+ * A raw erasure decoder in RS code scheme in pure Java in case native one
+ * isn't available in some environment. Please always use native implementations
+ * when possible. This new Java coder is about 5X faster than the one originated
+ * from HDFS-RAID, and also compatible with the native/ISA-L coder.
+ */
+@InterfaceAudience.Private
+public class RSRawDecoder2 extends AbstractRawErasureDecoder {
+  //relevant to schema and won't change during decode calls
+  private byte[] encodeMatrix;
+
+  /**
+   * Below are relevant to schema and erased indexes, thus may change during
+   * decode calls.
+   */
+  private byte[] decodeMatrix;
+  private byte[] invertMatrix;
+  /**
+   * Array of input tables generated from coding coefficients previously.
+   * Must be of size 32*k*rows
+   */
+  private byte[] gfTables;
+  private int[] cachedErasedIndexes;
+  private int[] validIndexes;
+  private int numErasedDataUnits;
+  private boolean[] erasureFlags;
+
+  public RSRawDecoder2(int numDataUnits, int numParityUnits) {
+    super(numDataUnits, numParityUnits);
+    if (numDataUnits + numParityUnits >= RSUtil.GF.getFieldSize()) {
+      throw new HadoopIllegalArgumentException(
+              "Invalid getNumDataUnits() and numParityUnits");
+    }
+
+    int numAllUnits = getNumDataUnits() + numParityUnits;
+    encodeMatrix = new byte[numAllUnits * getNumDataUnits()];
+    RSUtil2.genCauchyMatrix(encodeMatrix, numAllUnits, getNumDataUnits());
+    if (isAllowingVerboseDump()) {
+      DumpUtil.dumpMatrix(encodeMatrix, numDataUnits, numAllUnits);
+    }
+  }
+
+  @Override
+  protected void doDecode(ByteBuffer[] inputs, int[] erasedIndexes,
+                          ByteBuffer[] outputs) {
+    prepareDecoding(inputs, erasedIndexes);
+
+    ByteBuffer[] realInputs = new ByteBuffer[getNumDataUnits()];
+    for (int i = 0; i < getNumDataUnits(); i++) {
+      realInputs[i] = inputs[validIndexes[i]];
+    }
+    RSUtil2.encodeData(gfTables, realInputs, outputs);
+  }
+
+  @Override
+  protected void doDecode(byte[][] inputs, int[] inputOffsets,
+                          int dataLen, int[] erasedIndexes,
+                          byte[][] outputs, int[] outputOffsets) {
+    prepareDecoding(inputs, erasedIndexes);
+
+    byte[][] realInputs = new byte[getNumDataUnits()][];
+    int[] realInputOffsets = new int[getNumDataUnits()];
+    for (int i = 0; i < getNumDataUnits(); i++) {
+      realInputs[i] = inputs[validIndexes[i]];
+      realInputOffsets[i] = inputOffsets[validIndexes[i]];
+    }
+    RSUtil2.encodeData(gfTables, dataLen, realInputs, realInputOffsets,
+            outputs, outputOffsets);
+  }
+
+  private <T> void prepareDecoding(T[] inputs, int[] erasedIndexes) {
+    int[] tmpValidIndexes = new int[getNumDataUnits()];
+    CoderUtil.makeValidIndexes(inputs, tmpValidIndexes);
+    if (Arrays.equals(this.cachedErasedIndexes, erasedIndexes) &&
+        Arrays.equals(this.validIndexes, tmpValidIndexes)) {
+      return; // Optimization. Nothing to do
+    }
+    this.cachedErasedIndexes =
+            Arrays.copyOf(erasedIndexes, erasedIndexes.length);
+    this.validIndexes =
+            Arrays.copyOf(tmpValidIndexes, tmpValidIndexes.length);
+
+    processErasures(erasedIndexes);
+  }
+
+  private void processErasures(int[] erasedIndexes) {
+    this.decodeMatrix = new byte[getNumAllUnits() * getNumDataUnits()];
+    this.invertMatrix = new byte[getNumAllUnits() * getNumDataUnits()];
+    this.gfTables = new byte[getNumAllUnits() * getNumDataUnits() * 32];
+
+    this.erasureFlags = new boolean[getNumAllUnits()];
+    this.numErasedDataUnits = 0;
+
+    for (int i = 0; i < erasedIndexes.length; i++) {
+      int index = erasedIndexes[i];
+      erasureFlags[index] = true;
+      if (index < getNumDataUnits()) {
+        numErasedDataUnits++;
+      }
+    }
+
+    generateDecodeMatrix(erasedIndexes);
+
+    RSUtil2.initTables(getNumDataUnits(), erasedIndexes.length,
+        decodeMatrix, 0, gfTables);
+    if (isAllowingVerboseDump()) {
+      System.out.println(DumpUtil.bytesToHex(gfTables, -1));
+    }
+  }
+
+  // Generate decode matrix from encode matrix
+  private void generateDecodeMatrix(int[] erasedIndexes) {
+    int i, j, r, p;
+    byte s;
+    byte[] tmpMatrix = new byte[getNumAllUnits() * getNumDataUnits()];
+
+    // Construct matrix tmpMatrix by removing error rows
+    for (i = 0; i < getNumDataUnits(); i++) {
+      r = validIndexes[i];
+      for (j = 0; j < getNumDataUnits(); j++) {
+        tmpMatrix[getNumDataUnits() * i + j] =
+                encodeMatrix[getNumDataUnits() * r + j];
+      }
+    }
+
+    GF256.gfInvertMatrix(tmpMatrix, invertMatrix, getNumDataUnits());
+
+    for (i = 0; i < numErasedDataUnits; i++) {
+      for (j = 0; j < getNumDataUnits(); j++) {
+        decodeMatrix[getNumDataUnits() * i + j] =
+                invertMatrix[getNumDataUnits() * erasedIndexes[i] + j];
+      }
+    }
+
+    for (p = numErasedDataUnits; p < erasedIndexes.length; p++) {
+      for (i = 0; i < getNumDataUnits(); i++) {
+        s = 0;
+        for (j = 0; j < getNumDataUnits(); j++) {
+          s ^= GF256.gfMul(invertMatrix[j * getNumDataUnits() + i],
+                  encodeMatrix[getNumDataUnits() * erasedIndexes[p] + j]);
+        }
+        decodeMatrix[getNumDataUnits() * p + i] = s;
+      }
+    }
+  }
+}

+ 76 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawEncoder2.java

@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.rawcoder;
+
+import org.apache.hadoop.HadoopIllegalArgumentException;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.io.erasurecode.rawcoder.util.DumpUtil;
+import org.apache.hadoop.io.erasurecode.rawcoder.util.RSUtil;
+import org.apache.hadoop.io.erasurecode.rawcoder.util.RSUtil2;
+
+import java.nio.ByteBuffer;
+
+/**
+ * A raw erasure encoder in RS code scheme in pure Java in case native one
+ * isn't available in some environment. Please always use native implementations
+ * when possible. This new Java coder is about 5X faster than the one originated
+ * from HDFS-RAID, and also compatible with the native/ISA-L coder.
+ */
+@InterfaceAudience.Private
+public class RSRawEncoder2 extends AbstractRawErasureEncoder {
+  // relevant to schema and won't change during encode calls.
+  private byte[] encodeMatrix;
+  /**
+   * Array of input tables generated from coding coefficients previously.
+   * Must be of size 32*k*rows
+   */
+  private byte[] gfTables;
+
+  public RSRawEncoder2(int numDataUnits, int numParityUnits) {
+    super(numDataUnits, numParityUnits);
+
+    if (numDataUnits + numParityUnits >= RSUtil.GF.getFieldSize()) {
+      throw new HadoopIllegalArgumentException(
+          "Invalid numDataUnits and numParityUnits");
+    }
+
+    encodeMatrix = new byte[getNumAllUnits() * numDataUnits];
+    RSUtil2.genCauchyMatrix(encodeMatrix, getNumAllUnits(), numDataUnits);
+    if (isAllowingVerboseDump()) {
+      DumpUtil.dumpMatrix(encodeMatrix, numDataUnits, getNumAllUnits());
+    }
+    gfTables = new byte[getNumAllUnits() * numDataUnits * 32];
+    RSUtil2.initTables(numDataUnits, numParityUnits, encodeMatrix,
+        numDataUnits * numDataUnits, gfTables);
+    if (isAllowingVerboseDump()) {
+      System.out.println(DumpUtil.bytesToHex(gfTables, -1));
+    }
+  }
+
+  @Override
+  protected void doEncode(ByteBuffer[] inputs, ByteBuffer[] outputs) {
+    RSUtil2.encodeData(gfTables, inputs, outputs);
+  }
+
+  @Override
+  protected void doEncode(byte[][] inputs, int[] inputOffsets,
+                          int dataLen, byte[][] outputs, int[] outputOffsets) {
+    RSUtil2.encodeData(gfTables, dataLen, inputs, inputOffsets, outputs,
+        outputOffsets);
+  }
+}

+ 37 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawErasureCoderFactory2.java

@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.rawcoder;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * A raw coder factory for raw Reed-Solomon coder in Java.
+ */
+@InterfaceAudience.Private
+public class RSRawErasureCoderFactory2 implements RawErasureCoderFactory {
+
+  @Override
+  public RawErasureEncoder createEncoder(int numDataUnits, int numParityUnits) {
+    return new RSRawEncoder2(numDataUnits, numParityUnits);
+  }
+
+  @Override
+  public RawErasureDecoder createDecoder(int numDataUnits, int numParityUnits) {
+    return new RSRawDecoder2(numDataUnits, numParityUnits);
+  }
+}

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RawErasureCoder.java

@@ -40,7 +40,7 @@ public interface RawErasureCoder extends Configurable {
   /**
    * Get a coder option value.
    * @param option
-   * @return
+   * @return option value
    */
   public Object getCoderOption(CoderOption option);
 

+ 83 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/CoderUtil.java

@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.rawcoder.util;
+
+import org.apache.hadoop.HadoopIllegalArgumentException;
+import org.apache.hadoop.classification.InterfaceAudience;
+
+import java.util.Arrays;
+
+/**
+ * Helpful utilities for implementing some raw erasure coders.
+ */
+@InterfaceAudience.Private
+public final class CoderUtil {
+
+  private CoderUtil() {
+    // No called
+  }
+
+
+  /**
+   * Get indexes into inputs array for items marked as null, either erased or
+   * not to read.
+   * @return indexes into inputs array
+   */
+  public static <T> int[] getErasedOrNotToReadIndexes(T[] inputs) {
+    int[] invalidIndexes = new int[inputs.length];
+    int idx = 0;
+    for (int i = 0; i < inputs.length; i++) {
+      if (inputs[i] == null) {
+        invalidIndexes[idx++] = i;
+      }
+    }
+
+    return Arrays.copyOf(invalidIndexes, idx);
+  }
+
+  /**
+   * Find the valid input from all the inputs.
+   * @param inputs input buffers to look for valid input
+   * @return the first valid input
+   */
+  public static <T> T findFirstValidInput(T[] inputs) {
+    for (T input : inputs) {
+      if (input != null) {
+        return input;
+      }
+    }
+
+    throw new HadoopIllegalArgumentException(
+        "Invalid inputs are found, all being null");
+  }
+
+  /**
+   * Picking up indexes of valid inputs.
+   * @param inputs actually decoding input buffers
+   * @param validIndexes an array to be filled and returned
+   * @param <T>
+   */
+  public static <T> void makeValidIndexes(T[] inputs, int[] validIndexes) {
+    int idx = 0;
+    for (int i = 0; i < inputs.length && idx < validIndexes.length; i++) {
+      if (inputs[i] != null) {
+        validIndexes[idx++] = i;
+      }
+    }
+  }
+}

+ 15 - 3
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/DumpUtil.java

@@ -21,8 +21,8 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.io.erasurecode.ECChunk;
 
 /**
- * A dump utility class for debugging data erasure coding/decoding issues. Don't
- * suggest they are used in runtime production codes.
+ * A dump utility class for debugging data erasure coding/decoding issues.
+ * Don't suggest they are used in runtime production codes.
  */
 @InterfaceAudience.Private
 public final class DumpUtil {
@@ -35,9 +35,10 @@ public final class DumpUtil {
 
   /**
    * Convert bytes into format like 0x02 02 00 80.
+   * If limit is negative or too large, then all bytes will be converted.
    */
   public static String bytesToHex(byte[] bytes, int limit) {
-    if (limit > bytes.length) {
+    if (limit <= 0 || limit > bytes.length) {
       limit = bytes.length;
     }
     int len = limit * 2;
@@ -56,6 +57,17 @@ public final class DumpUtil {
     return new String(hexChars);
   }
 
+  public static void dumpMatrix(byte[] matrix,
+                                int numDataUnits, int numAllUnits) {
+    for (int i = 0; i < numDataUnits; i++) {
+      for (int j = 0; j < numAllUnits; j++) {
+        System.out.print(" ");
+        System.out.print(0xff & matrix[i * numAllUnits + j]);
+      }
+      System.out.println();
+    }
+  }
+
   /**
    * Print data in hex format in an array of chunks.
    * @param header

+ 339 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/GF256.java

@@ -0,0 +1,339 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.rawcoder.util;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * A GaloisField utility class only caring of 256 fields for efficiency. Some
+ * of the codes are borrowed from ISA-L implementation (C or ASM codes).
+ */
+@InterfaceAudience.Private
+public final class GF256 {
+
+  private GF256() { }
+
+  public static byte[] gfBase() {
+    return GF_BASE;
+  }
+
+  private static final byte[] GF_BASE = new byte[] {
+      (byte) 0x01, (byte) 0x02, (byte) 0x04, (byte) 0x08, (byte) 0x10,
+      (byte) 0x20, (byte) 0x40, (byte) 0x80, (byte) 0x1d, (byte) 0x3a,
+      (byte) 0x74, (byte) 0xe8, (byte) 0xcd, (byte) 0x87, (byte) 0x13,
+      (byte) 0x26, (byte) 0x4c, (byte) 0x98, (byte) 0x2d, (byte) 0x5a,
+      (byte) 0xb4, (byte) 0x75, (byte) 0xea, (byte) 0xc9, (byte) 0x8f,
+      (byte) 0x03, (byte) 0x06, (byte) 0x0c, (byte) 0x18, (byte) 0x30,
+      (byte) 0x60, (byte) 0xc0, (byte) 0x9d, (byte) 0x27, (byte) 0x4e,
+      (byte) 0x9c, (byte) 0x25, (byte) 0x4a, (byte) 0x94, (byte) 0x35,
+      (byte) 0x6a, (byte) 0xd4, (byte) 0xb5, (byte) 0x77, (byte) 0xee,
+      (byte) 0xc1, (byte) 0x9f, (byte) 0x23, (byte) 0x46, (byte) 0x8c,
+      (byte) 0x05, (byte) 0x0a, (byte) 0x14, (byte) 0x28, (byte) 0x50,
+      (byte) 0xa0, (byte) 0x5d, (byte) 0xba, (byte) 0x69, (byte) 0xd2,
+      (byte) 0xb9, (byte) 0x6f, (byte) 0xde, (byte) 0xa1, (byte) 0x5f,
+      (byte) 0xbe, (byte) 0x61, (byte) 0xc2, (byte) 0x99, (byte) 0x2f,
+      (byte) 0x5e, (byte) 0xbc, (byte) 0x65, (byte) 0xca, (byte) 0x89,
+      (byte) 0x0f, (byte) 0x1e, (byte) 0x3c, (byte) 0x78, (byte) 0xf0,
+      (byte) 0xfd, (byte) 0xe7, (byte) 0xd3, (byte) 0xbb, (byte) 0x6b,
+      (byte) 0xd6, (byte) 0xb1, (byte) 0x7f, (byte) 0xfe, (byte) 0xe1,
+      (byte) 0xdf, (byte) 0xa3, (byte) 0x5b, (byte) 0xb6, (byte) 0x71,
+      (byte) 0xe2, (byte) 0xd9, (byte) 0xaf, (byte) 0x43, (byte) 0x86,
+      (byte) 0x11, (byte) 0x22, (byte) 0x44, (byte) 0x88, (byte) 0x0d,
+      (byte) 0x1a, (byte) 0x34, (byte) 0x68, (byte) 0xd0, (byte) 0xbd,
+      (byte) 0x67, (byte) 0xce, (byte) 0x81, (byte) 0x1f, (byte) 0x3e,
+      (byte) 0x7c, (byte) 0xf8, (byte) 0xed, (byte) 0xc7, (byte) 0x93,
+      (byte) 0x3b, (byte) 0x76, (byte) 0xec, (byte) 0xc5, (byte) 0x97,
+      (byte) 0x33, (byte) 0x66, (byte) 0xcc, (byte) 0x85, (byte) 0x17,
+      (byte) 0x2e, (byte) 0x5c, (byte) 0xb8, (byte) 0x6d, (byte) 0xda,
+      (byte) 0xa9, (byte) 0x4f, (byte) 0x9e, (byte) 0x21, (byte) 0x42,
+      (byte) 0x84, (byte) 0x15, (byte) 0x2a, (byte) 0x54, (byte) 0xa8,
+      (byte) 0x4d, (byte) 0x9a, (byte) 0x29, (byte) 0x52, (byte) 0xa4,
+      (byte) 0x55, (byte) 0xaa, (byte) 0x49, (byte) 0x92, (byte) 0x39,
+      (byte) 0x72, (byte) 0xe4, (byte) 0xd5, (byte) 0xb7, (byte) 0x73,
+      (byte) 0xe6, (byte) 0xd1, (byte) 0xbf, (byte) 0x63, (byte) 0xc6,
+      (byte) 0x91, (byte) 0x3f, (byte) 0x7e, (byte) 0xfc, (byte) 0xe5,
+      (byte) 0xd7, (byte) 0xb3, (byte) 0x7b, (byte) 0xf6, (byte) 0xf1,
+      (byte) 0xff, (byte) 0xe3, (byte) 0xdb, (byte) 0xab, (byte) 0x4b,
+      (byte) 0x96, (byte) 0x31, (byte) 0x62, (byte) 0xc4, (byte) 0x95,
+      (byte) 0x37, (byte) 0x6e, (byte) 0xdc, (byte) 0xa5, (byte) 0x57,
+      (byte) 0xae, (byte) 0x41, (byte) 0x82, (byte) 0x19, (byte) 0x32,
+      (byte) 0x64, (byte) 0xc8, (byte) 0x8d, (byte) 0x07, (byte) 0x0e,
+      (byte) 0x1c, (byte) 0x38, (byte) 0x70, (byte) 0xe0, (byte) 0xdd,
+      (byte) 0xa7, (byte) 0x53, (byte) 0xa6, (byte) 0x51, (byte) 0xa2,
+      (byte) 0x59, (byte) 0xb2, (byte) 0x79, (byte) 0xf2, (byte) 0xf9,
+      (byte) 0xef, (byte) 0xc3, (byte) 0x9b, (byte) 0x2b, (byte) 0x56,
+      (byte) 0xac, (byte) 0x45, (byte) 0x8a, (byte) 0x09, (byte) 0x12,
+      (byte) 0x24, (byte) 0x48, (byte) 0x90, (byte) 0x3d, (byte) 0x7a,
+      (byte) 0xf4, (byte) 0xf5, (byte) 0xf7, (byte) 0xf3, (byte) 0xfb,
+      (byte) 0xeb, (byte) 0xcb, (byte) 0x8b, (byte) 0x0b, (byte) 0x16,
+      (byte) 0x2c, (byte) 0x58, (byte) 0xb0, (byte) 0x7d, (byte) 0xfa,
+      (byte) 0xe9, (byte) 0xcf, (byte) 0x83, (byte) 0x1b, (byte) 0x36,
+      (byte) 0x6c, (byte) 0xd8, (byte) 0xad, (byte) 0x47, (byte) 0x8e,
+      (byte) 0x01
+  };
+
+  public static byte[] gfLogBase() {
+    return GF_LOG_BASE;
+  }
+
+  private static final byte[] GF_LOG_BASE = new byte[] {
+      (byte) 0x00, (byte) 0xff, (byte) 0x01, (byte) 0x19, (byte) 0x02,
+      (byte) 0x32, (byte) 0x1a, (byte) 0xc6, (byte) 0x03, (byte) 0xdf,
+      (byte) 0x33, (byte) 0xee, (byte) 0x1b, (byte) 0x68, (byte) 0xc7,
+      (byte) 0x4b, (byte) 0x04, (byte) 0x64, (byte) 0xe0, (byte) 0x0e,
+      (byte) 0x34, (byte) 0x8d, (byte) 0xef, (byte) 0x81, (byte) 0x1c,
+      (byte) 0xc1, (byte) 0x69, (byte) 0xf8, (byte) 0xc8, (byte) 0x08,
+      (byte) 0x4c, (byte) 0x71, (byte) 0x05, (byte) 0x8a, (byte) 0x65,
+      (byte) 0x2f, (byte) 0xe1, (byte) 0x24, (byte) 0x0f, (byte) 0x21,
+      (byte) 0x35, (byte) 0x93, (byte) 0x8e, (byte) 0xda, (byte) 0xf0,
+      (byte) 0x12, (byte) 0x82, (byte) 0x45, (byte) 0x1d, (byte) 0xb5,
+      (byte) 0xc2, (byte) 0x7d, (byte) 0x6a, (byte) 0x27, (byte) 0xf9,
+      (byte) 0xb9, (byte) 0xc9, (byte) 0x9a, (byte) 0x09, (byte) 0x78,
+      (byte) 0x4d, (byte) 0xe4, (byte) 0x72, (byte) 0xa6, (byte) 0x06,
+      (byte) 0xbf, (byte) 0x8b, (byte) 0x62, (byte) 0x66, (byte) 0xdd,
+      (byte) 0x30, (byte) 0xfd, (byte) 0xe2, (byte) 0x98, (byte) 0x25,
+      (byte) 0xb3, (byte) 0x10, (byte) 0x91, (byte) 0x22, (byte) 0x88,
+      (byte) 0x36, (byte) 0xd0, (byte) 0x94, (byte) 0xce, (byte) 0x8f,
+      (byte) 0x96, (byte) 0xdb, (byte) 0xbd, (byte) 0xf1, (byte) 0xd2,
+      (byte) 0x13, (byte) 0x5c, (byte) 0x83, (byte) 0x38, (byte) 0x46,
+      (byte) 0x40, (byte) 0x1e, (byte) 0x42, (byte) 0xb6, (byte) 0xa3,
+      (byte) 0xc3, (byte) 0x48, (byte) 0x7e, (byte) 0x6e, (byte) 0x6b,
+      (byte) 0x3a, (byte) 0x28, (byte) 0x54, (byte) 0xfa, (byte) 0x85,
+      (byte) 0xba, (byte) 0x3d, (byte) 0xca, (byte) 0x5e, (byte) 0x9b,
+      (byte) 0x9f, (byte) 0x0a, (byte) 0x15, (byte) 0x79, (byte) 0x2b,
+      (byte) 0x4e, (byte) 0xd4, (byte) 0xe5, (byte) 0xac, (byte) 0x73,
+      (byte) 0xf3, (byte) 0xa7, (byte) 0x57, (byte) 0x07, (byte) 0x70,
+      (byte) 0xc0, (byte) 0xf7, (byte) 0x8c, (byte) 0x80, (byte) 0x63,
+      (byte) 0x0d, (byte) 0x67, (byte) 0x4a, (byte) 0xde, (byte) 0xed,
+      (byte) 0x31, (byte) 0xc5, (byte) 0xfe, (byte) 0x18, (byte) 0xe3,
+      (byte) 0xa5, (byte) 0x99, (byte) 0x77, (byte) 0x26, (byte) 0xb8,
+      (byte) 0xb4, (byte) 0x7c, (byte) 0x11, (byte) 0x44, (byte) 0x92,
+      (byte) 0xd9, (byte) 0x23, (byte) 0x20, (byte) 0x89, (byte) 0x2e,
+      (byte) 0x37, (byte) 0x3f, (byte) 0xd1, (byte) 0x5b, (byte) 0x95,
+      (byte) 0xbc, (byte) 0xcf, (byte) 0xcd, (byte) 0x90, (byte) 0x87,
+      (byte) 0x97, (byte) 0xb2, (byte) 0xdc, (byte) 0xfc, (byte) 0xbe,
+      (byte) 0x61, (byte) 0xf2, (byte) 0x56, (byte) 0xd3, (byte) 0xab,
+      (byte) 0x14, (byte) 0x2a, (byte) 0x5d, (byte) 0x9e, (byte) 0x84,
+      (byte) 0x3c, (byte) 0x39, (byte) 0x53, (byte) 0x47, (byte) 0x6d,
+      (byte) 0x41, (byte) 0xa2, (byte) 0x1f, (byte) 0x2d, (byte) 0x43,
+      (byte) 0xd8, (byte) 0xb7, (byte) 0x7b, (byte) 0xa4, (byte) 0x76,
+      (byte) 0xc4, (byte) 0x17, (byte) 0x49, (byte) 0xec, (byte) 0x7f,
+      (byte) 0x0c, (byte) 0x6f, (byte) 0xf6, (byte) 0x6c, (byte) 0xa1,
+      (byte) 0x3b, (byte) 0x52, (byte) 0x29, (byte) 0x9d, (byte) 0x55,
+      (byte) 0xaa, (byte) 0xfb, (byte) 0x60, (byte) 0x86, (byte) 0xb1,
+      (byte) 0xbb, (byte) 0xcc, (byte) 0x3e, (byte) 0x5a, (byte) 0xcb,
+      (byte) 0x59, (byte) 0x5f, (byte) 0xb0, (byte) 0x9c, (byte) 0xa9,
+      (byte) 0xa0, (byte) 0x51, (byte) 0x0b, (byte) 0xf5, (byte) 0x16,
+      (byte) 0xeb, (byte) 0x7a, (byte) 0x75, (byte) 0x2c, (byte) 0xd7,
+      (byte) 0x4f, (byte) 0xae, (byte) 0xd5, (byte) 0xe9, (byte) 0xe6,
+      (byte) 0xe7, (byte) 0xad, (byte) 0xe8, (byte) 0x74, (byte) 0xd6,
+      (byte) 0xf4, (byte) 0xea, (byte) 0xa8, (byte) 0x50, (byte) 0x58,
+      (byte) 0xaf
+  };
+
+  private static byte[][] theGfMulTab; // multiply result table in GF 256 space
+
+  /**
+   * Initialize the GF multiply table for performance. Just compute once, and
+   * avoid repeatedly doing the multiply during encoding/decoding.
+   */
+  static {
+    theGfMulTab = new byte[256][256];
+    for (int i = 0; i < 256; i++) {
+      for (int j = 0; j < 256; j++) {
+        theGfMulTab[i][j] = gfMul((byte) i, (byte) j);
+      }
+    }
+  }
+
+  /**
+   * Get the big GF multiply table so utilize it efficiently.
+   * @return the big GF multiply table
+   */
+  public static byte[][] gfMulTab() {
+    return theGfMulTab;
+  }
+
+  public static byte gfMul(byte a, byte b) {
+    if ((a == 0) || (b == 0)) {
+      return 0;
+    }
+
+    int tmp = (GF_LOG_BASE[a & 0xff] & 0xff) +
+        (GF_LOG_BASE[b & 0xff] & 0xff);
+    if (tmp > 254) {
+      tmp -= 255;
+    }
+
+    return GF_BASE[tmp];
+  }
+
+  public static byte gfInv(byte a) {
+    if (a == 0) {
+      return 0;
+    }
+
+    return GF_BASE[255 - GF_LOG_BASE[a & 0xff] & 0xff];
+  }
+
+  /**
+   * Invert a matrix assuming it's invertible.
+   *
+   * Ported from Intel ISA-L library.
+   */
+  public static void gfInvertMatrix(byte[] inMatrix, byte[] outMatrix, int n) {
+    byte temp;
+
+    // Set outMatrix[] to the identity matrix
+    for (int i = 0; i < n * n; i++) {
+      // memset(outMatrix, 0, n*n)
+      outMatrix[i] = 0;
+    }
+
+    for (int i = 0; i < n; i++) {
+      outMatrix[i * n + i] = 1;
+    }
+
+    // Inverse
+    for (int j, i = 0; i < n; i++) {
+      // Check for 0 in pivot element
+      if (inMatrix[i * n + i] == 0) {
+        // Find a row with non-zero in current column and swap
+        for (j = i + 1; j < n; j++) {
+          if (inMatrix[j * n + i] != 0) {
+            break;
+          }
+        }
+        if (j == n) {
+          // Couldn't find means it's singular
+          throw new RuntimeException("Not invertble");
+        }
+
+        for (int k = 0; k < n; k++) {
+          // Swap rows i,j
+          temp = inMatrix[i * n + k];
+          inMatrix[i * n + k] = inMatrix[j * n + k];
+          inMatrix[j * n + k] = temp;
+
+          temp = outMatrix[i * n + k];
+          outMatrix[i * n + k] = outMatrix[j * n + k];
+          outMatrix[j * n + k] = temp;
+        }
+      }
+
+      temp = gfInv(inMatrix[i * n + i]); // 1/pivot
+      for (j = 0; j < n; j++) {
+        // Scale row i by 1/pivot
+        inMatrix[i * n + j] = gfMul(inMatrix[i * n + j], temp);
+        outMatrix[i * n + j] = gfMul(outMatrix[i * n + j], temp);
+      }
+
+      for (j = 0; j < n; j++) {
+        if (j == i) {
+          continue;
+        }
+
+        temp = inMatrix[j * n + i];
+        for (int k = 0; k < n; k++) {
+          outMatrix[j * n + k] ^= gfMul(temp, outMatrix[i * n + k]);
+          inMatrix[j * n + k] ^= gfMul(temp, inMatrix[i * n + k]);
+        }
+      }
+    }
+  }
+
+  /**
+   * Ported from Intel ISA-L library.
+   *
+   * Calculates const table gftbl in GF(2^8) from single input A
+   * gftbl(A) = {A{00}, A{01}, A{02}, ... , A{0f} }, {A{00}, A{10}, A{20},
+   * ... , A{f0} } -- from ISA-L implementation
+   */
+  public static void gfVectMulInit(byte c, byte[] tbl, int offset) {
+    byte c2 = (byte) ((c << 1) ^ ((c & 0x80) != 0 ? 0x1d : 0));
+    byte c4 = (byte) ((c2 << 1) ^ ((c2 & 0x80) != 0 ? 0x1d : 0));
+    byte c8 = (byte) ((c4 << 1) ^ ((c4 & 0x80) != 0 ? 0x1d : 0));
+
+    byte c3, c5, c6, c7, c9, c10, c11, c12, c13, c14, c15;
+    byte c17, c18, c19, c20, c21, c22, c23, c24, c25, c26,
+        c27, c28, c29, c30, c31;
+
+    c3 = (byte) (c2 ^ c);
+    c5 = (byte) (c4 ^ c);
+    c6 = (byte) (c4 ^ c2);
+    c7 = (byte) (c4 ^ c3);
+
+    c9 = (byte) (c8 ^ c);
+    c10 = (byte) (c8 ^ c2);
+    c11 = (byte) (c8 ^ c3);
+    c12 = (byte) (c8 ^ c4);
+    c13 = (byte) (c8 ^ c5);
+    c14 = (byte) (c8 ^ c6);
+    c15 = (byte) (c8 ^ c7);
+
+    tbl[offset + 0] = 0;
+    tbl[offset + 1] = c;
+    tbl[offset + 2] = c2;
+    tbl[offset + 3] = c3;
+    tbl[offset + 4] = c4;
+    tbl[offset + 5] = c5;
+    tbl[offset + 6] = c6;
+    tbl[offset + 7] = c7;
+    tbl[offset + 8] = c8;
+    tbl[offset + 9] = c9;
+    tbl[offset + 10] = c10;
+    tbl[offset + 11] = c11;
+    tbl[offset + 12] = c12;
+    tbl[offset + 13] = c13;
+    tbl[offset + 14] = c14;
+    tbl[offset + 15] = c15;
+
+    c17 = (byte) ((c8 << 1) ^ ((c8 & 0x80) != 0 ? 0x1d : 0));
+    c18 = (byte) ((c17 << 1) ^ ((c17 & 0x80) != 0 ? 0x1d : 0));
+    c19 = (byte) (c18 ^ c17);
+    c20 = (byte) ((c18 << 1) ^ ((c18 & 0x80) != 0 ? 0x1d : 0));
+    c21 = (byte) (c20 ^ c17);
+    c22 = (byte) (c20 ^ c18);
+    c23 = (byte) (c20 ^ c19);
+    c24 = (byte) ((c20 << 1) ^ ((c20 & 0x80) != 0 ? 0x1d : 0));
+    c25 = (byte) (c24 ^ c17);
+    c26 = (byte) (c24 ^ c18);
+    c27 = (byte) (c24 ^ c19);
+    c28 = (byte) (c24 ^ c20);
+    c29 = (byte) (c24 ^ c21);
+    c30 = (byte) (c24 ^ c22);
+    c31 = (byte) (c24 ^ c23);
+
+    tbl[offset + 16] = 0;
+    tbl[offset + 17] = c17;
+    tbl[offset + 18] = c18;
+    tbl[offset + 19] = c19;
+    tbl[offset + 20] = c20;
+    tbl[offset + 21] = c21;
+    tbl[offset + 22] = c22;
+    tbl[offset + 23] = c23;
+    tbl[offset + 24] = c24;
+    tbl[offset + 25] = c25;
+    tbl[offset + 26] = c26;
+    tbl[offset + 27] = c27;
+    tbl[offset + 28] = c28;
+    tbl[offset + 29] = c29;
+    tbl[offset + 30] = c30;
+    tbl[offset + 31] = c31;
+  }
+}

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/RSUtil.java

@@ -20,7 +20,7 @@ package org.apache.hadoop.io.erasurecode.rawcoder.util;
 import org.apache.hadoop.classification.InterfaceAudience;
 
 /**
- * Some utilities for Reed-Solomon coding.
+ * Utilities for implementing Reed-Solomon code, used by RS coder.
  */
 @InterfaceAudience.Private
 public class RSUtil {

+ 172 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/util/RSUtil2.java

@@ -0,0 +1,172 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.rawcoder.util;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+import java.nio.ByteBuffer;
+
+/**
+ * Utilities for implementing Reed-Solomon code, used by RS2 coder. Some of the
+ * codes are borrowed from ISA-L implementation (C or ASM codes).
+ */
+@InterfaceAudience.Private
+public final class RSUtil2 {
+
+  private RSUtil2() { }
+
+  public static void initTables(int k, int rows, byte[] codingMatrix,
+                                int matrixOffset, byte[] gfTables) {
+    int i, j;
+
+    int offset = 0, idx = matrixOffset;
+    for (i = 0; i < rows; i++) {
+      for (j = 0; j < k; j++) {
+        GF256.gfVectMulInit(codingMatrix[idx++], gfTables, offset);
+        offset += 32;
+      }
+    }
+  }
+
+  /**
+   * Ported from Intel ISA-L library.
+   */
+  public static void genCauchyMatrix(byte[] a, int m, int k) {
+    // Identity matrix in high position
+    for (int i = 0; i < k; i++) {
+      a[k * i + i] = 1;
+    }
+
+    // For the rest choose 1/(i + j) | i != j
+    int pos = k * k;
+    for (int i = k; i < m; i++) {
+      for (int j = 0; j < k; j++) {
+        a[pos++] = GF256.gfInv((byte) (i ^ j));
+      }
+    }
+  }
+
+  /**
+   * Encode a group of inputs data and generate the outputs. It's also used for
+   * decoding because, in this implementation, encoding and decoding are
+   * unified.
+   *
+   * The algorithm is ported from Intel ISA-L library for compatible. It
+   * leverages Java auto-vectorization support for performance.
+   */
+  public static void encodeData(byte[] gfTables, int dataLen, byte[][] inputs,
+                                int[] inputOffsets, byte[][] outputs,
+                                int[] outputOffsets) {
+    int numInputs = inputs.length;
+    int numOutputs = outputs.length;
+    int l, i, j, iPos, oPos;
+    byte[] input, output;
+    byte s;
+    final int times = dataLen / 8;
+    final int extra = dataLen - dataLen % 8;
+    byte[] tableLine;
+
+    for (l = 0; l < numOutputs; l++) {
+      output = outputs[l];
+
+      for (j = 0; j < numInputs; j++) {
+        input = inputs[j];
+        iPos = inputOffsets[j];
+        oPos = outputOffsets[l];
+
+        s = gfTables[j * 32 + l * numInputs * 32 + 1];
+        tableLine = GF256.gfMulTab()[s & 0xff];
+
+        /**
+         * Purely for performance, assuming we can use 8 bytes in the SIMD
+         * instruction. Subject to be improved.
+         */
+        for (i = 0; i < times; i++, iPos += 8, oPos += 8) {
+          output[oPos + 0] ^= tableLine[0xff & input[iPos + 0]];
+          output[oPos + 1] ^= tableLine[0xff & input[iPos + 1]];
+          output[oPos + 2] ^= tableLine[0xff & input[iPos + 2]];
+          output[oPos + 3] ^= tableLine[0xff & input[iPos + 3]];
+          output[oPos + 4] ^= tableLine[0xff & input[iPos + 4]];
+          output[oPos + 5] ^= tableLine[0xff & input[iPos + 5]];
+          output[oPos + 6] ^= tableLine[0xff & input[iPos + 6]];
+          output[oPos + 7] ^= tableLine[0xff & input[iPos + 7]];
+        }
+
+        /**
+         * For the left bytes, do it one by one.
+         */
+        for (i = extra; i < dataLen; i++, iPos++, oPos++) {
+          output[oPos] ^= tableLine[0xff & input[iPos]];
+        }
+      }
+    }
+  }
+
+  /**
+   * See above. Try to use the byte[] version when possible.
+   */
+  public static void encodeData(byte[] gfTables, ByteBuffer[] inputs,
+                                ByteBuffer[] outputs) {
+    int numInputs = inputs.length;
+    int numOutputs = outputs.length;
+    int dataLen = inputs[0].remaining();
+    int l, i, j, iPos, oPos;
+    ByteBuffer input, output;
+    byte s;
+    final int times = dataLen / 8;
+    final int extra = dataLen - dataLen % 8;
+    byte[] tableLine;
+
+    for (l = 0; l < numOutputs; l++) {
+      output = outputs[l];
+
+      for (j = 0; j < numInputs; j++) {
+        input = inputs[j];
+        iPos = input.position();
+        oPos = output.position();
+
+        s = gfTables[j * 32 + l * numInputs * 32 + 1];
+        tableLine = GF256.gfMulTab()[s & 0xff];
+
+        for (i = 0; i < times; i++, iPos += 8, oPos += 8) {
+          output.put(oPos + 0, (byte) (output.get(oPos + 0) ^
+              tableLine[0xff & input.get(iPos + 0)]));
+          output.put(oPos + 1, (byte) (output.get(oPos + 1) ^
+              tableLine[0xff & input.get(iPos + 1)]));
+          output.put(oPos + 2, (byte) (output.get(oPos + 2) ^
+              tableLine[0xff & input.get(iPos + 2)]));
+          output.put(oPos + 3, (byte) (output.get(oPos + 3) ^
+              tableLine[0xff & input.get(iPos + 3)]));
+          output.put(oPos + 4, (byte) (output.get(oPos + 4) ^
+              tableLine[0xff & input.get(iPos + 4)]));
+          output.put(oPos + 5, (byte) (output.get(oPos + 5) ^
+              tableLine[0xff & input.get(iPos + 5)]));
+          output.put(oPos + 6, (byte) (output.get(oPos + 6) ^
+              tableLine[0xff & input.get(iPos + 6)]));
+          output.put(oPos + 7, (byte) (output.get(oPos + 7) ^
+              tableLine[0xff & input.get(iPos + 7)]));
+        }
+
+        for (i = extra; i < dataLen; i++, iPos++, oPos++) {
+          output.put(oPos, (byte) (output.get(oPos) ^
+              tableLine[0xff & input.get(iPos)]));
+        }
+      }
+    }
+  }
+}

+ 0 - 91
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoder.java

@@ -31,95 +31,4 @@ public class TestRSRawCoder extends TestRSRawCoderBase {
     this.decoderClass = RSRawDecoder.class;
     setAllowDump(false); // Change to true to allow verbose dump for debugging
   }
-
-  @Test
-  public void testCoding_6x3_erasing_all_d() {
-    prepare(null, 6, 3, new int[]{0, 1, 2}, new int[0], true);
-    testCodingDoMixAndTwice();
-  }
-
-  @Test
-  public void testCoding_6x3_erasing_d0_d2() {
-    prepare(null, 6, 3, new int[] {0, 2}, new int[]{});
-    testCodingDoMixAndTwice();
-  }
-
-  @Test
-  public void testCoding_6x3_erasing_d0() {
-    prepare(null, 6, 3, new int[]{0}, new int[0]);
-    testCodingDoMixAndTwice();
-  }
-
-  @Test
-  public void testCoding_6x3_erasing_d2() {
-    prepare(null, 6, 3, new int[]{2}, new int[]{});
-    testCodingDoMixAndTwice();
-  }
-
-  @Test
-  public void testCoding_6x3_erasing_d0_p0() {
-    prepare(null, 6, 3, new int[]{0}, new int[]{0});
-    testCodingDoMixAndTwice();
-  }
-
-  @Test
-  public void testCoding_6x3_erasing_all_p() {
-    prepare(null, 6, 3, new int[0], new int[]{0, 1, 2});
-    testCodingDoMixAndTwice();
-  }
-
-  @Test
-  public void testCoding_6x3_erasing_p0() {
-    prepare(null, 6, 3, new int[0], new int[]{0});
-    testCodingDoMixAndTwice();
-  }
-
-  @Test
-  public void testCoding_6x3_erasing_p2() {
-    prepare(null, 6, 3, new int[0], new int[]{2});
-    testCodingDoMixAndTwice();
-  }
-
-  @Test
-  public void testCoding_6x3_erasure_p0_p2() {
-    prepare(null, 6, 3, new int[0], new int[]{0, 2});
-    testCodingDoMixAndTwice();
-  }
-
-  @Test
-  public void testCoding_6x3_erasing_d0_p0_p1() {
-    prepare(null, 6, 3, new int[]{0}, new int[]{0, 1});
-    testCodingDoMixAndTwice();
-  }
-
-  @Test
-  public void testCoding_6x3_erasing_d0_d2_p2() {
-    prepare(null, 6, 3, new int[]{0, 2}, new int[]{2});
-    testCodingDoMixAndTwice();
-  }
-
-  @Test
-  public void testCodingNegative_6x3_erasing_d2_d4() {
-    prepare(null, 6, 3, new int[]{2, 4}, new int[0]);
-    testCodingDoMixAndTwice();
-  }
-
-  @Test
-  public void testCodingNegative_6x3_erasing_too_many() {
-    prepare(null, 6, 3, new int[]{2, 4}, new int[]{0, 1});
-    testCodingWithErasingTooMany();
-  }
-
-  @Test
-  public void testCoding_10x4_erasing_d0_p0() {
-    prepare(null, 10, 4, new int[] {0}, new int[] {0});
-    testCodingDoMixAndTwice();
-  }
-
-  @Test
-  public void testCodingInputBufferPosition() {
-    prepare(null, 6, 3, new int[]{0}, new int[]{0});
-    testInputPosition(false);
-    testInputPosition(true);
-  }
 }

+ 33 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoder2.java

@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.erasurecode.rawcoder;
+
+import org.junit.Before;
+
+/**
+ * Test the new raw Reed-solomon coder implemented in Java.
+ */
+public class TestRSRawCoder2 extends TestRSRawCoderBase {
+
+  @Before
+  public void setup() {
+    this.encoderClass = RSRawEncoder2.class;
+    this.decoderClass = RSRawDecoder2.class;
+    setAllowDump(false);
+  }
+}

+ 84 - 25
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestRSRawCoderBase.java

@@ -17,42 +17,101 @@
  */
 package org.apache.hadoop.io.erasurecode.rawcoder;
 
-import org.apache.hadoop.io.erasurecode.rawcoder.util.RSUtil;
+import org.junit.Test;
 
 /**
  * Test base for raw Reed-solomon coders.
  */
 public abstract class TestRSRawCoderBase extends TestRawCoderBase {
 
-  private static int symbolSize = 0;
-  private static int symbolMax = 0;
+  @Test
+  public void testCoding_6x3_erasing_all_d() {
+    prepare(null, 6, 3, new int[]{0, 1, 2}, new int[0], true);
+    testCodingDoMixAndTwice();
+  }
+
+  @Test
+  public void testCoding_6x3_erasing_d0_d2() {
+    prepare(null, 6, 3, new int[] {0, 2}, new int[]{});
+    testCodingDoMixAndTwice();
+  }
+
+  @Test
+  public void testCoding_6x3_erasing_d0() {
+    prepare(null, 6, 3, new int[]{0}, new int[0]);
+    testCodingDoMixAndTwice();
+  }
+
+  @Test
+  public void testCoding_6x3_erasing_d2() {
+    prepare(null, 6, 3, new int[]{2}, new int[]{});
+    testCodingDoMixAndTwice();
+  }
+
+  @Test
+  public void testCoding_6x3_erasing_d0_p0() {
+    prepare(null, 6, 3, new int[]{0}, new int[]{0});
+    testCodingDoMixAndTwice();
+  }
+
+  @Test
+  public void testCoding_6x3_erasing_all_p() {
+    prepare(null, 6, 3, new int[0], new int[]{0, 1, 2});
+    testCodingDoMixAndTwice();
+  }
 
-  private static int RS_FIXED_DATA_GENERATOR = 0;
+  @Test
+  public void testCoding_6x3_erasing_p0() {
+    prepare(null, 6, 3, new int[0], new int[]{0});
+    testCodingDoMixAndTwice();
+  }
+
+  @Test
+  public void testCoding_6x3_erasing_p2() {
+    prepare(null, 6, 3, new int[0], new int[]{2});
+    testCodingDoMixAndTwice();
+  }
+
+  @Test
+  public void testCoding_6x3_erasure_p0_p2() {
+    prepare(null, 6, 3, new int[0], new int[]{0, 2});
+    testCodingDoMixAndTwice();
+  }
+
+  @Test
+  public void testCoding_6x3_erasing_d0_p0_p1() {
+    prepare(null, 6, 3, new int[]{0}, new int[]{0, 1});
+    testCodingDoMixAndTwice();
+  }
+
+  @Test
+  public void testCoding_6x3_erasing_d0_d2_p2() {
+    prepare(null, 6, 3, new int[]{0, 2}, new int[]{2});
+    testCodingDoMixAndTwice();
+  }
+
+  @Test
+  public void testCodingNegative_6x3_erasing_d2_d4() {
+    prepare(null, 6, 3, new int[]{2, 4}, new int[0]);
+    testCodingDoMixAndTwice();
+  }
 
-  static {
-    symbolSize = (int) Math.round(Math.log(
-        RSUtil.GF.getFieldSize()) / Math.log(2));
-    symbolMax = (int) Math.pow(2, symbolSize);
+  @Test
+  public void testCodingNegative_6x3_erasing_too_many() {
+    prepare(null, 6, 3, new int[]{2, 4}, new int[]{0, 1});
+    testCodingWithErasingTooMany();
   }
 
-  @Override
-  protected byte[] generateData(int len) {
-    byte[] buffer = new byte[len];
-    for (int i = 0; i < buffer.length; i++) {
-      buffer[i] = (byte) RAND.nextInt(symbolMax);
-    }
-    return buffer;
+  @Test
+  public void testCoding_10x4_erasing_d0_p0() {
+    prepare(null, 10, 4, new int[] {0}, new int[] {0});
+    testCodingDoMixAndTwice();
   }
 
-  @Override
-  protected byte[] generateFixedData(int len) {
-    byte[] buffer = new byte[len];
-    for (int i = 0; i < buffer.length; i++) {
-      buffer[i] = (byte) RS_FIXED_DATA_GENERATOR++;
-      if (RS_FIXED_DATA_GENERATOR == symbolMax) {
-        RS_FIXED_DATA_GENERATOR = 0;
-      }
-    }
-    return buffer;
+  @Test
+  public void testCodingInputBufferPosition() {
+    prepare(null, 6, 3, new int[]{0}, new int[]{0});
+    testInputPosition(false);
+    testInputPosition(true);
   }
 }

+ 0 - 1
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/rawcoder/TestXORRawCoder.java

@@ -29,7 +29,6 @@ public class TestXORRawCoder extends TestRawCoderBase {
   public void setup() {
     this.encoderClass = XORRawEncoder.class;
     this.decoderClass = XORRawDecoder.class;
-    setAllowDump(false);
   }
 
   @Test