|
@@ -23,12 +23,17 @@ import java.io.IOException;
|
|
|
|
|
|
import org.apache.hadoop.classification.InterfaceAudience;
|
|
import org.apache.hadoop.classification.InterfaceAudience;
|
|
import org.apache.hadoop.classification.InterfaceStability;
|
|
import org.apache.hadoop.classification.InterfaceStability;
|
|
|
|
+import org.apache.hadoop.fs.Options.ChecksumOpt;
|
|
import org.apache.hadoop.io.MD5Hash;
|
|
import org.apache.hadoop.io.MD5Hash;
|
|
import org.apache.hadoop.io.WritableUtils;
|
|
import org.apache.hadoop.io.WritableUtils;
|
|
|
|
+import org.apache.hadoop.util.DataChecksum;
|
|
import org.xml.sax.Attributes;
|
|
import org.xml.sax.Attributes;
|
|
import org.xml.sax.SAXException;
|
|
import org.xml.sax.SAXException;
|
|
import org.znerd.xmlenc.XMLOutputter;
|
|
import org.znerd.xmlenc.XMLOutputter;
|
|
|
|
|
|
|
|
+import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum;
|
|
|
|
+import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum;
|
|
|
|
+
|
|
/** MD5 of MD5 of CRC32. */
|
|
/** MD5 of MD5 of CRC32. */
|
|
@InterfaceAudience.LimitedPrivate({"HDFS"})
|
|
@InterfaceAudience.LimitedPrivate({"HDFS"})
|
|
@InterfaceStability.Unstable
|
|
@InterfaceStability.Unstable
|
|
@@ -54,7 +59,19 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
|
|
|
|
|
|
/** {@inheritDoc} */
|
|
/** {@inheritDoc} */
|
|
public String getAlgorithmName() {
|
|
public String getAlgorithmName() {
|
|
- return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC + "CRC32";
|
|
|
|
|
|
+ return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC +
|
|
|
|
+ getCrcType().name();
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public static DataChecksum.Type getCrcTypeFromAlgorithmName(String algorithm)
|
|
|
|
+ throws IOException {
|
|
|
|
+ if (algorithm.endsWith(DataChecksum.Type.CRC32.name())) {
|
|
|
|
+ return DataChecksum.Type.CRC32;
|
|
|
|
+ } else if (algorithm.endsWith(DataChecksum.Type.CRC32C.name())) {
|
|
|
|
+ return DataChecksum.Type.CRC32C;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ throw new IOException("Unknown checksum type in " + algorithm);
|
|
}
|
|
}
|
|
|
|
|
|
/** {@inheritDoc} */
|
|
/** {@inheritDoc} */
|
|
@@ -65,6 +82,16 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
|
|
return WritableUtils.toByteArray(this);
|
|
return WritableUtils.toByteArray(this);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ /** returns the CRC type */
|
|
|
|
+ public DataChecksum.Type getCrcType() {
|
|
|
|
+ // default to the one that is understood by all releases.
|
|
|
|
+ return DataChecksum.Type.CRC32;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public ChecksumOpt getChecksumOpt() {
|
|
|
|
+ return new ChecksumOpt(getCrcType(), bytesPerCRC);
|
|
|
|
+ }
|
|
|
|
+
|
|
/** {@inheritDoc} */
|
|
/** {@inheritDoc} */
|
|
public void readFields(DataInput in) throws IOException {
|
|
public void readFields(DataInput in) throws IOException {
|
|
bytesPerCRC = in.readInt();
|
|
bytesPerCRC = in.readInt();
|
|
@@ -86,6 +113,7 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
|
|
if (that != null) {
|
|
if (that != null) {
|
|
xml.attribute("bytesPerCRC", "" + that.bytesPerCRC);
|
|
xml.attribute("bytesPerCRC", "" + that.bytesPerCRC);
|
|
xml.attribute("crcPerBlock", "" + that.crcPerBlock);
|
|
xml.attribute("crcPerBlock", "" + that.crcPerBlock);
|
|
|
|
+ xml.attribute("crcType", ""+ that.getCrcType().name());
|
|
xml.attribute("md5", "" + that.md5);
|
|
xml.attribute("md5", "" + that.md5);
|
|
}
|
|
}
|
|
xml.endTag();
|
|
xml.endTag();
|
|
@@ -97,16 +125,40 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
|
|
final String bytesPerCRC = attrs.getValue("bytesPerCRC");
|
|
final String bytesPerCRC = attrs.getValue("bytesPerCRC");
|
|
final String crcPerBlock = attrs.getValue("crcPerBlock");
|
|
final String crcPerBlock = attrs.getValue("crcPerBlock");
|
|
final String md5 = attrs.getValue("md5");
|
|
final String md5 = attrs.getValue("md5");
|
|
|
|
+ String crcType = attrs.getValue("crcType");
|
|
|
|
+ DataChecksum.Type finalCrcType;
|
|
if (bytesPerCRC == null || crcPerBlock == null || md5 == null) {
|
|
if (bytesPerCRC == null || crcPerBlock == null || md5 == null) {
|
|
return null;
|
|
return null;
|
|
}
|
|
}
|
|
|
|
|
|
try {
|
|
try {
|
|
- return new MD5MD5CRC32FileChecksum(Integer.valueOf(bytesPerCRC),
|
|
|
|
- Integer.valueOf(crcPerBlock), new MD5Hash(md5));
|
|
|
|
- } catch(Exception e) {
|
|
|
|
|
|
+ // old versions don't support crcType.
|
|
|
|
+ if (crcType == null || crcType == "") {
|
|
|
|
+ finalCrcType = DataChecksum.Type.CRC32;
|
|
|
|
+ } else {
|
|
|
|
+ finalCrcType = DataChecksum.Type.valueOf(crcType);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ switch (finalCrcType) {
|
|
|
|
+ case CRC32:
|
|
|
|
+ return new MD5MD5CRC32GzipFileChecksum(
|
|
|
|
+ Integer.valueOf(bytesPerCRC),
|
|
|
|
+ Integer.valueOf(crcPerBlock),
|
|
|
|
+ new MD5Hash(md5));
|
|
|
|
+ case CRC32C:
|
|
|
|
+ return new MD5MD5CRC32CastagnoliFileChecksum(
|
|
|
|
+ Integer.valueOf(bytesPerCRC),
|
|
|
|
+ Integer.valueOf(crcPerBlock),
|
|
|
|
+ new MD5Hash(md5));
|
|
|
|
+ default:
|
|
|
|
+ // we should never get here since finalCrcType will
|
|
|
|
+ // hold a valid type or we should have got an exception.
|
|
|
|
+ return null;
|
|
|
|
+ }
|
|
|
|
+ } catch (Exception e) {
|
|
throw new SAXException("Invalid attributes: bytesPerCRC=" + bytesPerCRC
|
|
throw new SAXException("Invalid attributes: bytesPerCRC=" + bytesPerCRC
|
|
- + ", crcPerBlock=" + crcPerBlock + ", md5=" + md5, e);
|
|
|
|
|
|
+ + ", crcPerBlock=" + crcPerBlock + ", crcType=" + crcType
|
|
|
|
+ + ", md5=" + md5, e);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -114,4 +166,4 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
|
|
public String toString() {
|
|
public String toString() {
|
|
return getAlgorithmName() + ":" + md5;
|
|
return getAlgorithmName() + ":" + md5;
|
|
}
|
|
}
|
|
-}
|
|
|
|
|
|
+}
|