Browse Source

HADOOP-8239. Add subclasses of MD5MD5CRC32FileChecksum to support file checksum with CRC32C. (Kihwal Lee via szetszwo)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1375834 13f79535-47bb-0310-9956-ffa450edef68
Thomas Graves 12 years ago
parent
commit
9cdbebac38

+ 3 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -36,6 +36,9 @@ Release 0.23.3 - UNRELEASED
     HADOOP-8240. Add a new API to allow users to specify a checksum type
     HADOOP-8240. Add a new API to allow users to specify a checksum type
     on FileSystem.create(..).  (Kihwal Lee via szetszwo)
     on FileSystem.create(..).  (Kihwal Lee via szetszwo)
 
 
+    HADOOP-8239. Add subclasses of MD5MD5CRC32FileChecksum to support file
+    checksum with CRC32C.  (Kihwal Lee via szetszwo)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
   BUG FIXES
   BUG FIXES

+ 41 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java

@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import org.apache.hadoop.io.MD5Hash;
+import org.apache.hadoop.util.DataChecksum;
+
+/** For CRC32 with the Castagnoli polynomial */
+public class MD5MD5CRC32CastagnoliFileChecksum extends MD5MD5CRC32FileChecksum {
+  /** Same as this(0, 0, null) */
+  public MD5MD5CRC32CastagnoliFileChecksum() {
+    this(0, 0, null);
+  }
+
+  /** Create a MD5FileChecksum */
+  public MD5MD5CRC32CastagnoliFileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) {
+    super(bytesPerCRC, crcPerBlock, md5);
+  }
+
+  @Override
+  public DataChecksum.Type getCrcType() {
+    // default to the one that is understood by all releases.
+    return DataChecksum.Type.CRC32C;
+  }
+}

+ 58 - 6
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java

@@ -23,12 +23,17 @@ import java.io.IOException;
 
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.Options.ChecksumOpt;
 import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.io.WritableUtils;
 import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.util.DataChecksum;
 import org.xml.sax.Attributes;
 import org.xml.sax.Attributes;
 import org.xml.sax.SAXException;
 import org.xml.sax.SAXException;
 import org.znerd.xmlenc.XMLOutputter;
 import org.znerd.xmlenc.XMLOutputter;
 
 
+import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum;
+import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum;
+
 /** MD5 of MD5 of CRC32. */
 /** MD5 of MD5 of CRC32. */
 @InterfaceAudience.LimitedPrivate({"HDFS"})
 @InterfaceAudience.LimitedPrivate({"HDFS"})
 @InterfaceStability.Unstable
 @InterfaceStability.Unstable
@@ -54,7 +59,19 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
   
   
   /** {@inheritDoc} */ 
   /** {@inheritDoc} */ 
   public String getAlgorithmName() {
   public String getAlgorithmName() {
-    return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC + "CRC32";
+    return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC +
+        getCrcType().name();
+  }
+
+  public static DataChecksum.Type getCrcTypeFromAlgorithmName(String algorithm)
+      throws IOException {
+    if (algorithm.endsWith(DataChecksum.Type.CRC32.name())) {
+      return DataChecksum.Type.CRC32;
+    } else if (algorithm.endsWith(DataChecksum.Type.CRC32C.name())) {
+      return DataChecksum.Type.CRC32C;
+    }
+
+    throw new IOException("Unknown checksum type in " + algorithm);
   }
   }
 
 
   /** {@inheritDoc} */ 
   /** {@inheritDoc} */ 
@@ -65,6 +82,16 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
     return WritableUtils.toByteArray(this);
     return WritableUtils.toByteArray(this);
   }
   }
 
 
+  /** returns the CRC type */
+  public DataChecksum.Type getCrcType() {
+    // default to the one that is understood by all releases.
+    return DataChecksum.Type.CRC32;
+  }
+
+  public ChecksumOpt getChecksumOpt() {
+    return new ChecksumOpt(getCrcType(), bytesPerCRC);
+  }
+
   /** {@inheritDoc} */ 
   /** {@inheritDoc} */ 
   public void readFields(DataInput in) throws IOException {
   public void readFields(DataInput in) throws IOException {
     bytesPerCRC = in.readInt();
     bytesPerCRC = in.readInt();
@@ -86,6 +113,7 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
     if (that != null) {
     if (that != null) {
       xml.attribute("bytesPerCRC", "" + that.bytesPerCRC);
       xml.attribute("bytesPerCRC", "" + that.bytesPerCRC);
       xml.attribute("crcPerBlock", "" + that.crcPerBlock);
       xml.attribute("crcPerBlock", "" + that.crcPerBlock);
+      xml.attribute("crcType", ""+ that.getCrcType().name());
       xml.attribute("md5", "" + that.md5);
       xml.attribute("md5", "" + that.md5);
     }
     }
     xml.endTag();
     xml.endTag();
@@ -97,16 +125,40 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
     final String bytesPerCRC = attrs.getValue("bytesPerCRC");
     final String bytesPerCRC = attrs.getValue("bytesPerCRC");
     final String crcPerBlock = attrs.getValue("crcPerBlock");
     final String crcPerBlock = attrs.getValue("crcPerBlock");
     final String md5 = attrs.getValue("md5");
     final String md5 = attrs.getValue("md5");
+    String crcType = attrs.getValue("crcType");
+    DataChecksum.Type finalCrcType;
     if (bytesPerCRC == null || crcPerBlock == null || md5 == null) {
     if (bytesPerCRC == null || crcPerBlock == null || md5 == null) {
       return null;
       return null;
     }
     }
 
 
     try {
     try {
-      return new MD5MD5CRC32FileChecksum(Integer.valueOf(bytesPerCRC),
-          Integer.valueOf(crcPerBlock), new MD5Hash(md5));
-    } catch(Exception e) {
+      // old versions don't support crcType.
+      if (crcType == null || crcType == "") {
+        finalCrcType = DataChecksum.Type.CRC32;
+      } else {
+        finalCrcType = DataChecksum.Type.valueOf(crcType);
+      }
+
+      switch (finalCrcType) {
+        case CRC32:
+          return new MD5MD5CRC32GzipFileChecksum(
+              Integer.valueOf(bytesPerCRC),
+              Integer.valueOf(crcPerBlock),
+              new MD5Hash(md5));
+        case CRC32C:
+          return new MD5MD5CRC32CastagnoliFileChecksum(
+              Integer.valueOf(bytesPerCRC),
+              Integer.valueOf(crcPerBlock),
+              new MD5Hash(md5));
+        default:
+          // we should never get here since finalCrcType will
+          // hold a valid type or we should have got an exception.
+          return null;
+      }
+    } catch (Exception e) {
       throw new SAXException("Invalid attributes: bytesPerCRC=" + bytesPerCRC
       throw new SAXException("Invalid attributes: bytesPerCRC=" + bytesPerCRC
-          + ", crcPerBlock=" + crcPerBlock + ", md5=" + md5, e);
+          + ", crcPerBlock=" + crcPerBlock + ", crcType=" + crcType 
+          + ", md5=" + md5, e);
     }
     }
   }
   }
 
 
@@ -114,4 +166,4 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
   public String toString() {
   public String toString() {
     return getAlgorithmName() + ":" + md5;
     return getAlgorithmName() + ":" + md5;
   }
   }
-}
+}

+ 40 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java

@@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import org.apache.hadoop.io.MD5Hash;
+import org.apache.hadoop.util.DataChecksum;
+
+/** For CRC32 with the Gzip polynomial */
+public class MD5MD5CRC32GzipFileChecksum extends MD5MD5CRC32FileChecksum {
+  /** Same as this(0, 0, null) */
+  public MD5MD5CRC32GzipFileChecksum() {
+    this(0, 0, null);
+  }
+
+  /** Create a MD5FileChecksum */
+  public MD5MD5CRC32GzipFileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) {
+    super(bytesPerCRC, crcPerBlock, md5);
+  }
+  @Override
+  public DataChecksum.Type getCrcType() {
+    // default to the one that is understood by all releases.
+    return DataChecksum.Type.CRC32;
+  }
+}

+ 3 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java

@@ -44,13 +44,15 @@ public class DataChecksum implements Checksum {
   public static final int CHECKSUM_CRC32   = 1;
   public static final int CHECKSUM_CRC32   = 1;
   public static final int CHECKSUM_CRC32C  = 2;
   public static final int CHECKSUM_CRC32C  = 2;
   public static final int CHECKSUM_DEFAULT = 3; 
   public static final int CHECKSUM_DEFAULT = 3; 
+  public static final int CHECKSUM_MIXED   = 4;
 
 
   /** The checksum types */
   /** The checksum types */
   public static enum Type {
   public static enum Type {
     NULL  (CHECKSUM_NULL, 0),
     NULL  (CHECKSUM_NULL, 0),
     CRC32 (CHECKSUM_CRC32, 4),
     CRC32 (CHECKSUM_CRC32, 4),
     CRC32C(CHECKSUM_CRC32C, 4),
     CRC32C(CHECKSUM_CRC32C, 4),
-    DEFAULT(CHECKSUM_DEFAULT, 0); // This cannot be used to create DataChecksum
+    DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum
+    MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum
 
 
     public final int id;
     public final int id;
     public final int size;
     public final int size;

+ 18 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java

@@ -29,6 +29,8 @@ import java.util.TreeMap;
 import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.fs.FileChecksum;
 import org.apache.hadoop.fs.FileChecksum;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum;
+import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum;
 import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
 import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.DFSUtil;
@@ -44,6 +46,7 @@ import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.security.token.TokenIdentifier;
+import org.apache.hadoop.util.DataChecksum;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.StringUtils;
 import org.mortbay.util.ajax.JSON;
 import org.mortbay.util.ajax.JSON;
 
 
@@ -512,7 +515,21 @@ public class JsonUtil {
     final byte[] bytes = StringUtils.hexStringToByte((String)m.get("bytes"));
     final byte[] bytes = StringUtils.hexStringToByte((String)m.get("bytes"));
 
 
     final DataInputStream in = new DataInputStream(new ByteArrayInputStream(bytes));
     final DataInputStream in = new DataInputStream(new ByteArrayInputStream(bytes));
-    final MD5MD5CRC32FileChecksum checksum = new MD5MD5CRC32FileChecksum();
+    final DataChecksum.Type crcType = 
+        MD5MD5CRC32FileChecksum.getCrcTypeFromAlgorithmName(algorithm);
+    final MD5MD5CRC32FileChecksum checksum;
+
+    // Recreate what DFSClient would have returned.
+    switch(crcType) {
+      case CRC32:
+        checksum = new MD5MD5CRC32GzipFileChecksum();
+        break;
+      case CRC32C:
+        checksum = new MD5MD5CRC32CastagnoliFileChecksum();
+        break;
+      default:
+        throw new IOException("Unknown algorithm: " + algorithm);
+    }
     checksum.readFields(in);
     checksum.readFields(in);
 
 
     //check algorithm name
     //check algorithm name