|
@@ -19,13 +19,22 @@
|
|
|
package org.apache.hadoop.mapred;
|
|
|
|
|
|
import java.io.EOFException;
|
|
|
+import java.io.FileDescriptor;
|
|
|
+import java.io.FileInputStream;
|
|
|
import java.io.IOException;
|
|
|
import java.io.InputStream;
|
|
|
|
|
|
+import org.apache.hadoop.conf.Configuration;
|
|
|
+import org.apache.commons.logging.Log;
|
|
|
+import org.apache.commons.logging.LogFactory;
|
|
|
import org.apache.hadoop.classification.InterfaceAudience;
|
|
|
import org.apache.hadoop.classification.InterfaceStability;
|
|
|
import org.apache.hadoop.fs.ChecksumException;
|
|
|
+import org.apache.hadoop.fs.HasFileDescriptor;
|
|
|
import org.apache.hadoop.io.IOUtils;
|
|
|
+import org.apache.hadoop.io.ReadaheadPool;
|
|
|
+import org.apache.hadoop.io.ReadaheadPool.ReadaheadRequest;
|
|
|
+import org.apache.hadoop.mapreduce.MRConfig;
|
|
|
import org.apache.hadoop.util.DataChecksum;
|
|
|
/**
|
|
|
* A checksum input stream, used for IFiles.
|
|
@@ -35,7 +44,8 @@ import org.apache.hadoop.util.DataChecksum;
|
|
|
@InterfaceStability.Unstable
|
|
|
public class IFileInputStream extends InputStream {
|
|
|
|
|
|
- private final InputStream in; //The input stream to be verified for checksum.
|
|
|
+ private final InputStream in; //The input stream to be verified for checksum.
|
|
|
+ private final FileDescriptor inFd; // the file descriptor, if it is known
|
|
|
private final long length; //The total length of the input file
|
|
|
private final long dataLength;
|
|
|
private DataChecksum sum;
|
|
@@ -43,7 +53,14 @@ public class IFileInputStream extends InputStream {
|
|
|
private final byte b[] = new byte[1];
|
|
|
private byte csum[] = null;
|
|
|
private int checksumSize;
|
|
|
-
|
|
|
+
|
|
|
+ private ReadaheadRequest curReadahead = null;
|
|
|
+ private ReadaheadPool raPool = ReadaheadPool.getInstance();
|
|
|
+ private boolean readahead;
|
|
|
+ private int readaheadLength;
|
|
|
+
|
|
|
+ public static final Log LOG = LogFactory.getLog(IFileInputStream.class);
|
|
|
+
|
|
|
private boolean disableChecksumValidation = false;
|
|
|
|
|
|
/**
|
|
@@ -51,13 +68,36 @@ public class IFileInputStream extends InputStream {
|
|
|
* @param in The input stream to be verified for checksum.
|
|
|
* @param len The length of the input stream including checksum bytes.
|
|
|
*/
|
|
|
- public IFileInputStream(InputStream in, long len) {
|
|
|
+ public IFileInputStream(InputStream in, long len, Configuration conf) {
|
|
|
this.in = in;
|
|
|
+ this.inFd = getFileDescriptorIfAvail(in);
|
|
|
sum = DataChecksum.newDataChecksum(DataChecksum.CHECKSUM_CRC32,
|
|
|
Integer.MAX_VALUE);
|
|
|
checksumSize = sum.getChecksumSize();
|
|
|
length = len;
|
|
|
dataLength = length - checksumSize;
|
|
|
+
|
|
|
+ conf = (conf != null) ? conf : new Configuration();
|
|
|
+ readahead = conf.getBoolean(MRConfig.MAPRED_IFILE_READAHEAD,
|
|
|
+ MRConfig.DEFAULT_MAPRED_IFILE_READAHEAD);
|
|
|
+ readaheadLength = conf.getInt(MRConfig.MAPRED_IFILE_READAHEAD_BYTES,
|
|
|
+ MRConfig.DEFAULT_MAPRED_IFILE_READAHEAD_BYTES);
|
|
|
+
|
|
|
+ doReadahead();
|
|
|
+ }
|
|
|
+
|
|
|
+ private static FileDescriptor getFileDescriptorIfAvail(InputStream in) {
|
|
|
+ FileDescriptor fd = null;
|
|
|
+ try {
|
|
|
+ if (in instanceof HasFileDescriptor) {
|
|
|
+ fd = ((HasFileDescriptor)in).getFileDescriptor();
|
|
|
+ } else if (in instanceof FileInputStream) {
|
|
|
+ fd = ((FileInputStream)in).getFD();
|
|
|
+ }
|
|
|
+ } catch (IOException e) {
|
|
|
+ LOG.info("Unable to determine FileDescriptor", e);
|
|
|
+ }
|
|
|
+ return fd;
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -66,6 +106,10 @@ public class IFileInputStream extends InputStream {
|
|
|
*/
|
|
|
@Override
|
|
|
public void close() throws IOException {
|
|
|
+
|
|
|
+ if (curReadahead != null) {
|
|
|
+ curReadahead.cancel();
|
|
|
+ }
|
|
|
if (currentOffset < dataLength) {
|
|
|
byte[] t = new byte[Math.min((int)
|
|
|
(Integer.MAX_VALUE & (dataLength - currentOffset)), 32 * 1024)];
|
|
@@ -102,10 +146,21 @@ public class IFileInputStream extends InputStream {
|
|
|
if (currentOffset >= dataLength) {
|
|
|
return -1;
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
+ doReadahead();
|
|
|
+
|
|
|
return doRead(b,off,len);
|
|
|
}
|
|
|
|
|
|
+ private void doReadahead() {
|
|
|
+ if (raPool != null && inFd != null && readahead) {
|
|
|
+ curReadahead = raPool.readaheadStream(
|
|
|
+ "ifile", inFd,
|
|
|
+ currentOffset, readaheadLength, dataLength,
|
|
|
+ curReadahead);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
* Read bytes from the stream.
|
|
|
* At EOF, checksum is validated and sent back
|