16 éve · 826c4a2842
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -141,6 +141,9 @@ Trunk (unreleased changes)
 
				     HADOOP-4339. Remove redundant calls from FileSystem/FsShell when
			
 
				     generating/processing ContentSummary. (David Phillips via cdouglas)
			
 
				 
			
 
				+    HADOOP-2774. Add counters tracking records spilled to disk in MapTask and
			
 
				+    ReduceTask. (Ravi Gummadi via cdouglas)
			
 
				+
			
 
				   OPTIMIZATIONS
			
 
				 
			
 
				     HADOOP-3293. Fixes FileInputFormat to do provide locations for splits
			
--- a/src/mapred/org/apache/hadoop/mapred/IFile.java
+++ b/src/mapred/org/apache/hadoop/mapred/IFile.java
@@ -65,7 +65,11 @@ class IFile {
 
				     
			
 
				     long decompressedBytesWritten = 0;
			
 
				     long compressedBytesWritten = 0;
			
 
				-    
			
 
				+
			
 
				+    // Count records written to disk
			
 
				+    private long numRecordsWritten = 0;
			
 
				+    private final Counters.Counter writtenRecordsCounter;
			
 
				+
			
 
				     IFileOutputStream checksumOut;
			
 
				 
			
 
				     Class<K> keyClass;
			
@@ -77,14 +81,18 @@ class IFile {
 
				 
			
 
				     public Writer(Configuration conf, FileSystem fs, Path file, 
			
 
				                   Class<K> keyClass, Class<V> valueClass,
			
 
				-                  CompressionCodec codec) throws IOException {
			
 
				-      this(conf, fs.create(file), keyClass, valueClass, codec);
			
 
				+                  CompressionCodec codec,
			
 
				+                  Counters.Counter writesCounter) throws IOException {
			
 
				+      this(conf, fs.create(file), keyClass, valueClass, codec,
			
 
				+           writesCounter);
			
 
				       ownOutputStream = true;
			
 
				     }
			
 
				     
			
 
				     public Writer(Configuration conf, FSDataOutputStream out, 
			
 
				         Class<K> keyClass, Class<V> valueClass,
			
 
				-        CompressionCodec codec) throws IOException {
			
 
				+        CompressionCodec codec, Counters.Counter writesCounter)
			
 
				+        throws IOException {
			
 
				+      this.writtenRecordsCounter = writesCounter;
			
 
				       this.checksumOut = new IFileOutputStream(out);
			
 
				       this.rawOut = out;
			
 
				       this.start = this.rawOut.getPos();
			
@@ -107,7 +115,7 @@ class IFile {
 
				       this.valueSerializer = serializationFactory.getSerializer(valueClass);
			
 
				       this.valueSerializer.open(buffer);
			
 
				     }
			
 
				-    
			
 
				+
			
 
				     public void close() throws IOException {
			
 
				 
			
 
				       // Close the serializers
			
@@ -140,6 +148,9 @@ class IFile {
 
				         rawOut.close();
			
 
				       }
			
 
				       out = null;
			
 
				+      if(writtenRecordsCounter != null) {
			
 
				+        writtenRecordsCounter.increment(numRecordsWritten);
			
 
				+      }
			
 
				     }
			
 
				 
			
 
				     public void append(K key, V value) throws IOException {
			
@@ -178,6 +189,7 @@ class IFile {
 
				       decompressedBytesWritten += keyLength + valueLength + 
			
 
				                                   WritableUtils.getVIntSize(keyLength) + 
			
 
				                                   WritableUtils.getVIntSize(valueLength);
			
 
				+      ++numRecordsWritten;
			
 
				     }
			
 
				     
			
 
				     public void append(DataInputBuffer key, DataInputBuffer value)
			
@@ -203,7 +215,8 @@ class IFile {
 
				       decompressedBytesWritten += keyLength + valueLength + 
			
 
				                       WritableUtils.getVIntSize(keyLength) + 
			
 
				                       WritableUtils.getVIntSize(valueLength);
			
 
				-}
			
 
				+      ++numRecordsWritten;
			
 
				+    }
			
 
				     
			
 
				     public long getRawLength() {
			
 
				       return decompressedBytesWritten;
			
@@ -221,6 +234,10 @@ class IFile {
 
				     private static final int DEFAULT_BUFFER_SIZE = 128*1024;
			
 
				     private static final int MAX_VINT_SIZE = 9;
			
 
				 
			
 
				+    // Count records read from disk
			
 
				+    private long numRecordsRead = 0;
			
 
				+    private final Counters.Counter readRecordsCounter;
			
 
				+
			
 
				     final InputStream in;        // Possibly decompressed stream that we read
			
 
				     Decompressor decompressor;
			
 
				     long bytesRead = 0;
			
@@ -242,14 +259,15 @@ class IFile {
 
				      * @param file Path of the file to be opened. This file should have
			
 
				      *             checksum bytes for the data at the end of the file.
			
 
				      * @param codec codec
			
 
				+     * @param readsCounter Counter for records read from disk
			
 
				      * @throws IOException
			
 
				      */
			
 
				-    
			
 
				     public Reader(Configuration conf, FileSystem fs, Path file,
			
 
				-                  CompressionCodec codec) throws IOException {
			
 
				+                  CompressionCodec codec,
			
 
				+                  Counters.Counter readsCounter) throws IOException {
			
 
				       this(conf, fs.open(file), 
			
 
				            fs.getFileStatus(file).getLen(),
			
 
				-           codec);
			
 
				+           codec, readsCounter);
			
 
				     }
			
 
				 
			
 
				     /**
			
@@ -260,10 +278,13 @@ class IFile {
 
				      * @param length Length of the data in the stream, including the checksum
			
 
				      *               bytes.
			
 
				      * @param codec codec
			
 
				+     * @param readsCounter Counter for records read from disk
			
 
				      * @throws IOException
			
 
				      */
			
 
				     public Reader(Configuration conf, FSDataInputStream in, long length, 
			
 
				-                  CompressionCodec codec) throws IOException {
			
 
				+                  CompressionCodec codec,
			
 
				+                  Counters.Counter readsCounter) throws IOException {
			
 
				+      readRecordsCounter = readsCounter;
			
 
				       checksumIn = new IFileInputStream(in,length);
			
 
				       if (codec != null) {
			
 
				         decompressor = CodecPool.getDecompressor(codec);
			
@@ -400,7 +421,8 @@ class IFile {
 
				       bytesRead += recordLength;
			
 
				 
			
 
				       ++recNo;
			
 
				-      
			
 
				+      ++numRecordsRead;
			
 
				+
			
 
				       return true;
			
 
				     }
			
 
				 
			
@@ -418,6 +440,9 @@ class IFile {
 
				       // Release the buffer
			
 
				       dataIn = null;
			
 
				       buffer = null;
			
 
				+      if(readRecordsCounter != null) {
			
 
				+        readRecordsCounter.increment(numRecordsRead);
			
 
				+      }
			
 
				     }
			
 
				   }    
			
 
				   
			
@@ -431,7 +456,7 @@ class IFile {
 
				     public InMemoryReader(RamManager ramManager, TaskAttemptID taskAttemptId,
			
 
				                           byte[] data, int start, int length)
			
 
				                           throws IOException {
			
 
				-      super(null, null, length - start, null);
			
 
				+      super(null, null, length - start, null, null);
			
 
				       this.ramManager = ramManager;
			
 
				       this.taskAttemptId = taskAttemptId;
			
 
				       
			
--- a/src/mapred/org/apache/hadoop/mapred/MapTask.java
+++ b/src/mapred/org/apache/hadoop/mapred/MapTask.java
@@ -948,7 +948,8 @@ class MapTask extends Task {
 
				           IFile.Writer<K, V> writer = null;
			
 
				           try {
			
 
				             long segmentStart = out.getPos();
			
 
				-            writer = new Writer<K, V>(job, out, keyClass, valClass, codec);
			
 
				+            writer = new Writer<K, V>(job, out, keyClass, valClass, codec,
			
 
				+                                      spilledRecordsCounter);
			
 
				             if (null == combinerClass) {
			
 
				               // spill directly
			
 
				               DataInputBuffer key = new DataInputBuffer();
			
@@ -1047,7 +1048,8 @@ class MapTask extends Task {
 
				           try {
			
 
				             long segmentStart = out.getPos();
			
 
				             // Create a new codec, don't care!
			
 
				-            writer = new IFile.Writer<K, V>(job, out, keyClass, valClass, codec);
			
 
				+            writer = new IFile.Writer<K, V>(job, out, keyClass, valClass, codec,
			
 
				+                                            spilledRecordsCounter);
			
 
				 
			
 
				             if (i == partition) {
			
 
				               final long recordStart = out.getPos();
			
@@ -1188,7 +1190,7 @@ class MapTask extends Task {
 
				           writeSingleSpillIndexToFile(getTaskID(),
			
 
				               new Path(filename[0].getParent(),"file.out.index"));
			
 
				         }
			
 
				-    	  return;
			
 
				+        return;
			
 
				       }
			
 
				       //make correction in the length to include the sequence file header
			
 
				       //lengths for each partition
			
@@ -1217,8 +1219,8 @@ class MapTask extends Task {
 
				         //create dummy files
			
 
				         for (int i = 0; i < partitions; i++) {
			
 
				           long segmentStart = finalOut.getPos();
			
 
				-          Writer<K, V> writer = new Writer<K, V>(job, finalOut, 
			
 
				-                                                 keyClass, valClass, codec);
			
 
				+          Writer<K, V> writer = new Writer<K, V>(job, finalOut, keyClass,
			
 
				+                                                 valClass, codec, null);
			
 
				           writer.close();
			
 
				           writeIndexRecord(finalIndexChecksumOut, segmentStart, writer);
			
 
				         }
			
@@ -1245,8 +1247,8 @@ class MapTask extends Task {
 
				             in.seek(segmentOffset);
			
 
				 
			
 
				             Segment<K, V> s = 
			
 
				-              new Segment<K, V>(new Reader<K, V>(job, in, segmentLength, codec),
			
 
				-                                true);
			
 
				+              new Segment<K, V>(new Reader<K, V>(job, in, segmentLength,
			
 
				+                                                 codec, null), true);
			
 
				             segmentList.add(i, s);
			
 
				             
			
 
				             if (LOG.isDebugEnabled()) {
			
@@ -1264,12 +1266,14 @@ class MapTask extends Task {
 
				                          keyClass, valClass,
			
 
				                          segmentList, job.getInt("io.sort.factor", 100), 
			
 
				                          new Path(getTaskID().toString()), 
			
 
				-                         job.getOutputKeyComparator(), reporter);
			
 
				+                         job.getOutputKeyComparator(), reporter,
			
 
				+                         null, spilledRecordsCounter);
			
 
				 
			
 
				           //write merged output to disk
			
 
				           long segmentStart = finalOut.getPos();
			
 
				           Writer<K, V> writer = 
			
 
				-              new Writer<K, V>(job, finalOut, keyClass, valClass, codec);
			
 
				+              new Writer<K, V>(job, finalOut, keyClass, valClass, codec,
			
 
				+                               spilledRecordsCounter);
			
 
				           if (null == combinerClass || numSpills < minSpillsForCombine) {
			
 
				             Merger.writeFile(kvIter, writer, reporter);
			
 
				           } else {
			
--- a/src/mapred/org/apache/hadoop/mapred/Merger.java
+++ b/src/mapred/org/apache/hadoop/mapred/Merger.java
@@ -54,12 +54,15 @@ class Merger {
 
				                             CompressionCodec codec,
			
 
				                             Path[] inputs, boolean deleteInputs, 
			
 
				                             int mergeFactor, Path tmpDir,
			
 
				-                            RawComparator<K> comparator, Progressable reporter)
			
 
				+                            RawComparator<K> comparator, Progressable reporter,
			
 
				+                            Counters.Counter readsCounter,
			
 
				+                            Counters.Counter writesCounter)
			
 
				   throws IOException {
			
 
				     return 
			
 
				       new MergeQueue<K, V>(conf, fs, inputs, deleteInputs, codec, comparator, 
			
 
				                            reporter).merge(keyClass, valueClass,
			
 
				-                                           mergeFactor, tmpDir);
			
 
				+                                           mergeFactor, tmpDir,
			
 
				+                                           readsCounter, writesCounter);
			
 
				   }
			
 
				   
			
 
				   public static <K extends Object, V extends Object>
			
@@ -67,10 +70,12 @@ class Merger {
 
				                             Class<K> keyClass, Class<V> valueClass, 
			
 
				                             List<Segment<K, V>> segments, 
			
 
				                             int mergeFactor, Path tmpDir,
			
 
				-                            RawComparator<K> comparator, Progressable reporter)
			
 
				+                            RawComparator<K> comparator, Progressable reporter,
			
 
				+                            Counters.Counter readsCounter,
			
 
				+                            Counters.Counter writesCounter)
			
 
				       throws IOException {
			
 
				     return merge(conf, fs, keyClass, valueClass, segments, mergeFactor, tmpDir,
			
 
				-                 comparator, reporter, false);
			
 
				+                 comparator, reporter, false, readsCounter, writesCounter);
			
 
				   }
			
 
				 
			
 
				   public static <K extends Object, V extends Object>
			
@@ -79,11 +84,14 @@ class Merger {
 
				                             List<Segment<K, V>> segments,
			
 
				                             int mergeFactor, Path tmpDir,
			
 
				                             RawComparator<K> comparator, Progressable reporter,
			
 
				-                            boolean sortSegments)
			
 
				+                            boolean sortSegments,
			
 
				+                            Counters.Counter readsCounter,
			
 
				+                            Counters.Counter writesCounter)
			
 
				       throws IOException {
			
 
				     return new MergeQueue<K, V>(conf, fs, segments, comparator, reporter,
			
 
				                            sortSegments).merge(keyClass, valueClass,
			
 
				-                                               mergeFactor, tmpDir);
			
 
				+                                               mergeFactor, tmpDir,
			
 
				+                                               readsCounter, writesCounter);
			
 
				   }
			
 
				 
			
 
				   static <K extends Object, V extends Object>
			
@@ -92,12 +100,15 @@ class Merger {
 
				                             List<Segment<K, V>> segments,
			
 
				                             int mergeFactor, int inMemSegments, Path tmpDir,
			
 
				                             RawComparator<K> comparator, Progressable reporter,
			
 
				-                            boolean sortSegments)
			
 
				+                            boolean sortSegments,
			
 
				+                            Counters.Counter readsCounter,
			
 
				+                            Counters.Counter writesCounter)
			
 
				       throws IOException {
			
 
				     return new MergeQueue<K, V>(conf, fs, segments, comparator, reporter,
			
 
				                            sortSegments).merge(keyClass, valueClass,
			
 
				                                                mergeFactor, inMemSegments,
			
 
				-                                               tmpDir);
			
 
				+                                               tmpDir,
			
 
				+                                               readsCounter, writesCounter);
			
 
				   }
			
 
				 
			
 
				   public static <K extends Object, V extends Object>
			
@@ -144,9 +155,9 @@ class Merger {
 
				       this.segmentLength = reader.getLength();
			
 
				     }
			
 
				 
			
 
				-    private void init() throws IOException {
			
 
				+    private void init(Counters.Counter readsCounter) throws IOException {
			
 
				       if (reader == null) {
			
 
				-        reader = new Reader<K, V>(conf, fs, file, codec);
			
 
				+        reader = new Reader<K, V>(conf, fs, file, codec, readsCounter);
			
 
				       }
			
 
				     }
			
 
				     
			
@@ -309,13 +320,18 @@ class Merger {
 
				     }
			
 
				     
			
 
				     public RawKeyValueIterator merge(Class<K> keyClass, Class<V> valueClass,
			
 
				-                                     int factor, Path tmpDir) 
			
 
				+                                     int factor, Path tmpDir,
			
 
				+                                     Counters.Counter readsCounter,
			
 
				+                                     Counters.Counter writesCounter)
			
 
				         throws IOException {
			
 
				-      return merge(keyClass, valueClass, factor, 0, tmpDir);
			
 
				+      return merge(keyClass, valueClass, factor, 0, tmpDir,
			
 
				+                   readsCounter, writesCounter);
			
 
				     }
			
 
				 
			
 
				     RawKeyValueIterator merge(Class<K> keyClass, Class<V> valueClass,
			
 
				-                                     int factor, int inMem, Path tmpDir)
			
 
				+                                     int factor, int inMem, Path tmpDir,
			
 
				+                                     Counters.Counter readsCounter,
			
 
				+                                     Counters.Counter writesCounter)
			
 
				         throws IOException {
			
 
				       LOG.info("Merging " + segments.size() + " sorted segments");
			
 
				       
			
@@ -344,7 +360,7 @@ class Merger {
 
				           for (Segment<K, V> segment : mStream) {
			
 
				             // Initialize the segment at the last possible moment;
			
 
				             // this helps in ensuring we don't use buffers until we need them
			
 
				-            segment.init();
			
 
				+            segment.init(readsCounter);
			
 
				             long startPos = segment.getPosition();
			
 
				             boolean hasNext = segment.next();
			
 
				             long endPos = segment.getPosition();
			
@@ -417,7 +433,8 @@ class Merger {
 
				                                               approxOutputSize, conf);
			
 
				 
			
 
				           Writer<K, V> writer = 
			
 
				-            new Writer<K, V>(conf, fs, outputFile, keyClass, valueClass, codec);
			
 
				+            new Writer<K, V>(conf, fs, outputFile, keyClass, valueClass, codec,
			
 
				+                             writesCounter);
			
 
				           writeFile(this, writer, reporter);
			
 
				           writer.close();
			
 
				           
			
--- a/src/mapred/org/apache/hadoop/mapred/ReduceTask.java
+++ b/src/mapred/org/apache/hadoop/mapred/ReduceTask.java
@@ -382,7 +382,7 @@ class ReduceTask extends Task {
 
				           job.getMapOutputValueClass(), codec, getMapFiles(rfs, true),
			
 
				           !conf.getKeepFailedTaskFiles(), job.getInt("io.sort.factor", 100),
			
 
				           new Path(getTaskID().toString()), job.getOutputKeyComparator(),
			
 
				-          reporter)
			
 
				+          reporter, spilledRecordsCounter, null)
			
 
				       : reduceCopier.createKVIterator(job, rfs, reporter);
			
 
				         
			
 
				     // free up the data structures
			
@@ -2080,9 +2080,9 @@ class ReduceTask extends Task {
 
				                             reduceTask.getTaskID(), inMemToDiskBytes);
			
 
				           final RawKeyValueIterator rIter = Merger.merge(job, fs,
			
 
				               keyClass, valueClass, memDiskSegments, numMemDiskSegments,
			
 
				-              tmpDir, comparator, reporter);
			
 
				+              tmpDir, comparator, reporter, spilledRecordsCounter, null);
			
 
				           final Writer writer = new Writer(job, fs, outputPath,
			
 
				-              keyClass, valueClass, codec);
			
 
				+              keyClass, valueClass, codec, null);
			
 
				           try {
			
 
				             Merger.writeFile(rIter, writer, reporter);
			
 
				             addToMapOutputFilesOnDisk(fs.getFileStatus(outputPath));
			
@@ -2139,7 +2139,7 @@ class ReduceTask extends Task {
 
				         RawKeyValueIterator diskMerge = Merger.merge(
			
 
				             job, fs, keyClass, valueClass, diskSegments,
			
 
				             ioSortFactor, numInMemSegments, tmpDir, comparator,
			
 
				-            reporter, false);
			
 
				+            reporter, false, spilledRecordsCounter, null);
			
 
				         diskSegments.clear();
			
 
				         if (0 == finalSegments.size()) {
			
 
				           return diskMerge;
			
@@ -2149,7 +2149,7 @@ class ReduceTask extends Task {
 
				       }
			
 
				       return Merger.merge(job, fs, keyClass, valueClass,
			
 
				                    finalSegments, finalSegments.size(), tmpDir,
			
 
				-                   comparator, reporter);
			
 
				+                   comparator, reporter, spilledRecordsCounter, null);
			
 
				     }
			
 
				 
			
 
				     class RawKVIteratorReader extends IFile.Reader<K,V> {
			
@@ -2158,7 +2158,7 @@ class ReduceTask extends Task {
 
				 
			
 
				       public RawKVIteratorReader(RawKeyValueIterator kvIter, long size)
			
 
				           throws IOException {
			
 
				-        super(null, null, size, null);
			
 
				+        super(null, null, size, null, spilledRecordsCounter);
			
 
				         this.kvIter = kvIter;
			
 
				       }
			
 
				 
			
@@ -2383,7 +2383,7 @@ class ReduceTask extends Task {
 
				               new Writer(conf,rfs, outputPath, 
			
 
				                          conf.getMapOutputKeyClass(), 
			
 
				                          conf.getMapOutputValueClass(),
			
 
				-                         codec);
			
 
				+                         codec, null);
			
 
				             RawKeyValueIterator iter  = null;
			
 
				             Path tmpDir = new Path(reduceTask.getTaskID().toString());
			
 
				             final Reporter reporter = getReporter(umbilical);
			
@@ -2393,7 +2393,8 @@ class ReduceTask extends Task {
 
				                                   conf.getMapOutputValueClass(),
			
 
				                                   codec, mapFiles.toArray(new Path[mapFiles.size()]), 
			
 
				                                   true, ioSortFactor, tmpDir, 
			
 
				-                                  conf.getOutputKeyComparator(), reporter);
			
 
				+                                  conf.getOutputKeyComparator(), reporter,
			
 
				+                                  spilledRecordsCounter, null);
			
 
				               
			
 
				               Merger.writeFile(iter, writer, reporter);
			
 
				               writer.close();
			
@@ -2477,7 +2478,7 @@ class ReduceTask extends Task {
 
				           new Writer(conf, rfs, outputPath,
			
 
				                      conf.getMapOutputKeyClass(),
			
 
				                      conf.getMapOutputValueClass(),
			
 
				-                     codec);
			
 
				+                     codec, null);
			
 
				 
			
 
				         RawKeyValueIterator rIter = null;
			
 
				         final Reporter reporter = getReporter(umbilical);
			
@@ -2490,7 +2491,8 @@ class ReduceTask extends Task {
 
				                                (Class<V>)conf.getMapOutputValueClass(),
			
 
				                                inMemorySegments, inMemorySegments.size(),
			
 
				                                new Path(reduceTask.getTaskID().toString()),
			
 
				-                               conf.getOutputKeyComparator(), reporter);
			
 
				+                               conf.getOutputKeyComparator(), reporter,
			
 
				+                               spilledRecordsCounter, null);
			
 
				           
			
 
				           if (null == combinerClass) {
			
 
				             Merger.writeFile(rIter, writer, reporter);
			
--- a/src/mapred/org/apache/hadoop/mapred/Task.java
+++ b/src/mapred/org/apache/hadoop/mapred/Task.java
@@ -68,7 +68,8 @@ abstract class Task implements Writable, Configurable {
 
				     REDUCE_INPUT_RECORDS,
			
 
				     REDUCE_OUTPUT_RECORDS,
			
 
				     REDUCE_SKIPPED_GROUPS,
			
 
				-    REDUCE_SKIPPED_RECORDS
			
 
				+    REDUCE_SKIPPED_RECORDS,
			
 
				+    SPILLED_RECORDS
			
 
				   }
			
 
				   
			
 
				   /**
			
@@ -131,6 +132,7 @@ abstract class Task implements Writable, Configurable {
 
				   protected JobContext jobContext;
			
 
				   protected TaskAttemptContext taskContext;
			
 
				   private volatile boolean commitPending = false;
			
 
				+  protected final Counters.Counter spilledRecordsCounter;
			
 
				 
			
 
				   ////////////////////////////////////////////
			
 
				   // Constructors
			
@@ -139,6 +141,7 @@ abstract class Task implements Writable, Configurable {
 
				   public Task() {
			
 
				     taskStatus = TaskStatus.createTaskStatus(isMapTask());
			
 
				     taskId = new TaskAttemptID();
			
 
				+    spilledRecordsCounter = counters.findCounter(Counter.SPILLED_RECORDS);
			
 
				   }
			
 
				 
			
 
				   public Task(String jobFile, TaskAttemptID taskId, int partition) {
			
@@ -155,6 +158,7 @@ abstract class Task implements Writable, Configurable {
 
				                                                     TaskStatus.Phase.SHUFFLE, 
			
 
				                                                   counters);
			
 
				     this.mapOutputFile.setJobId(taskId.getJobID());
			
 
				+    spilledRecordsCounter = counters.findCounter(Counter.SPILLED_RECORDS);
			
 
				   }
			
 
				 
			
 
				   ////////////////////////////////////////////
			
--- a/src/mapred/org/apache/hadoop/mapred/Task_Counter.properties
+++ b/src/mapred/org/apache/hadoop/mapred/Task_Counter.properties
@@ -14,5 +14,5 @@ REDUCE_INPUT_RECORDS.name=     Reduce input records
 
				 REDUCE_OUTPUT_RECORDS.name=    Reduce output records
			
 
				 REDUCE_SKIPPED_RECORDS.name=   Reduce skipped records
			
 
				 REDUCE_SKIPPED_GROUPS.name=    Reduce skipped groups
			
 
				-
			
 
				+SPILLED_RECORDS.name=          Spilled Records
			
 
				 
			
--- a/src/test/org/apache/hadoop/mapred/TestReduceTask.java
+++ b/src/test/org/apache/hadoop/mapred/TestReduceTask.java
@@ -80,7 +80,8 @@ public class TestReduceTask extends TestCase {
 
				     FileSystem rfs = ((LocalFileSystem)localFs).getRaw();
			
 
				     Path path = new Path(tmpDir, "data.in");
			
 
				     IFile.Writer<Text, Text> writer = 
			
 
				-      new IFile.Writer<Text, Text>(conf, rfs, path, Text.class, Text.class, codec);
			
 
				+      new IFile.Writer<Text, Text>(conf, rfs, path, Text.class, Text.class,
			
 
				+                                   codec, null);
			
 
				     for(Pair p: vals) {
			
 
				       writer.append(new Text(p.key), new Text(p.value));
			
 
				     }
			
@@ -90,7 +91,7 @@ public class TestReduceTask extends TestCase {
 
				     RawKeyValueIterator rawItr = 
			
 
				       Merger.merge(conf, rfs, Text.class, Text.class, codec, new Path[]{path}, 
			
 
				                    false, conf.getInt("io.sort.factor", 100), tmpDir, 
			
 
				-                   new Text.Comparator(), new NullProgress());
			
 
				+                   new Text.Comparator(), new NullProgress(),null,null);
			
 
				     @SuppressWarnings("unchecked") // WritableComparators are not generic
			
 
				     ReduceTask.ValuesIterator valItr = 
			
 
				       new ReduceTask.ValuesIterator<Text,Text>(rawItr,
			
--- a/src/test/org/apache/hadoop/mapred/TestSpilledRecordsCounter.java
+++ b/src/test/org/apache/hadoop/mapred/TestSpilledRecordsCounter.java
@@ -0,0 +1,180 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapred;
			
 
				+
			
 
				+import java.io.File;
			
 
				+import java.io.FileWriter;
			
 
				+import java.io.Writer;
			
 
				+import java.io.BufferedWriter;
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+import junit.framework.TestCase;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.examples.WordCount;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.IntWritable;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+
			
 
				+/**
			
 
				+ * This is an wordcount application that tests the count of records
			
 
				+ * got spilled to disk. It generates simple text input files. Then
			
 
				+ * runs the wordcount map/reduce application on (1) 3 i/p files(with 3 maps
			
 
				+ * and 1 reduce) and verifies the counters and (2) 4 i/p files(with 4 maps
			
 
				+ * and 1 reduce) and verifies counters. Wordcount application reads the
			
 
				+ * text input files, breaks each line into words and counts them. The output
			
 
				+ * is a locally sorted list of words and the count of how often they occurred.
			
 
				+ *
			
 
				+ */
			
 
				+public class TestSpilledRecordsCounter extends TestCase {
			
 
				+
			
 
				+  private void validateCounters(Counters counter, long spillRecCnt) {
			
 
				+      // Check if the numer of Spilled Records is same as expected
			
 
				+      assertEquals(counter.findCounter(Task.Counter.SPILLED_RECORDS).
			
 
				+                     getCounter(), spillRecCnt);
			
 
				+  }
			
 
				+
			
 
				+  private void createWordsFile(File inpFile) throws Exception {
			
 
				+    Writer out = new BufferedWriter(new FileWriter(inpFile));
			
 
				+    try {
			
 
				+      // 500*4 unique words --- repeated 5 times => 5*2K words
			
 
				+      int REPLICAS=5, NUMLINES=500, NUMWORDSPERLINE=4;
			
 
				+
			
 
				+      for (int i = 0; i < REPLICAS; i++) {
			
 
				+        for (int j = 1; j <= NUMLINES*NUMWORDSPERLINE; j+=NUMWORDSPERLINE) {
			
 
				+          out.write("word" + j + " word" + (j+1) + " word" + (j+2) + " word" + (j+3) + '\n');
			
 
				+        }
			
 
				+      }
			
 
				+    } finally {
			
 
				+      out.close();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+
			
 
				+  /**
			
 
				+   * The main driver for word count map/reduce program.
			
 
				+   * Invoke this method to submit the map/reduce job.
			
 
				+   * @throws IOException When there is communication problems with the
			
 
				+   *                     job tracker.
			
 
				+   */
			
 
				+  public void testSpillCounter() throws Exception {
			
 
				+    JobConf conf = new JobConf(TestSpilledRecordsCounter.class);
			
 
				+    conf.setJobName("wordcountSpilledRecordsCounter");
			
 
				+
			
 
				+    // the keys are words (strings)
			
 
				+    conf.setOutputKeyClass(Text.class);
			
 
				+    // the values are counts (ints)
			
 
				+    conf.setOutputValueClass(IntWritable.class);
			
 
				+
			
 
				+    conf.setMapperClass(WordCount.MapClass.class);
			
 
				+    conf.setCombinerClass(WordCount.Reduce.class);
			
 
				+    conf.setReducerClass(WordCount.Reduce.class);
			
 
				+
			
 
				+    conf.setNumMapTasks(3);
			
 
				+    conf.setNumReduceTasks(1);
			
 
				+    conf.setInt("io.sort.mb", 1);
			
 
				+    conf.setInt("io.sort.factor", 2);
			
 
				+    conf.set("io.sort.record.percent", "0.05");
			
 
				+    conf.set("io.sort.spill.percent", "0.80");
			
 
				+
			
 
				+
			
 
				+    String TEST_ROOT_DIR = new Path(System.getProperty("test.build.data",
			
 
				+                                      File.separator + "tmp"))
			
 
				+                               .toString().replace(' ', '+');
			
 
				+    conf.set("test.build.data", TEST_ROOT_DIR);
			
 
				+    String IN_DIR = TEST_ROOT_DIR + File.separator +
			
 
				+                      "spilledRecords.countertest" +  File.separator +
			
 
				+                      "genins" + File.separator;
			
 
				+    String OUT_DIR = TEST_ROOT_DIR + File.separator +
			
 
				+                      "spilledRecords.countertest" + File.separator;
			
 
				+
			
 
				+    FileSystem fs = FileSystem.get(conf);
			
 
				+    Path testdir = new Path(TEST_ROOT_DIR, "spilledRecords.countertest");
			
 
				+    try {
			
 
				+      if (fs.exists(testdir)) {
			
 
				+        fs.delete(testdir, true);
			
 
				+      }
			
 
				+      if (!fs.mkdirs(testdir)) {
			
 
				+        throw new IOException("Mkdirs failed to create " + testdir.toString());
			
 
				+      }
			
 
				+
			
 
				+      Path wordsIns = new Path(testdir, "genins");
			
 
				+      if (!fs.mkdirs(wordsIns)) {
			
 
				+        throw new IOException("Mkdirs failed to create " + wordsIns.toString());
			
 
				+      }
			
 
				+
			
 
				+      //create 3 input files each with 5*2k words
			
 
				+      File inpFile = new File(IN_DIR + "input5_2k_1");
			
 
				+      createWordsFile(inpFile);
			
 
				+      inpFile = new File(IN_DIR + "input5_2k_2");
			
 
				+      createWordsFile(inpFile);
			
 
				+      inpFile = new File(IN_DIR + "input5_2k_3");
			
 
				+      createWordsFile(inpFile);
			
 
				+
			
 
				+      FileInputFormat.setInputPaths(conf, IN_DIR);
			
 
				+      Path outputPath1=new Path(OUT_DIR, "output5_2k_3");
			
 
				+      FileOutputFormat.setOutputPath(conf, outputPath1);
			
 
				+
			
 
				+      RunningJob myJob = JobClient.runJob(conf);
			
 
				+      Counters c1 = myJob.getCounters();
			
 
				+      // 3maps & in each map, 4 first level spills --- So total 12.
			
 
				+      // spilled records count:
			
 
				+      // Each Map: 1st level:2k+2k+2k+2k=8k;2ndlevel=4k+4k=8k;
			
 
				+      //           3rd level=2k(4k from 1st level & 4k from 2nd level & combineAndSpill)
			
 
				+      //           So total 8k+8k+2k=18k
			
 
				+      // For 3 Maps, total = 3*18=54k
			
 
				+      // Reduce: each of the 3 map o/p's(2k each) will be spilled in shuffleToDisk()
			
 
				+      //         So 3*2k=6k in 1st level; 2nd level:4k(2k+2k);
			
 
				+      //         3rd level directly given to reduce(4k+2k --- combineAndSpill => 2k.
			
 
				+      //         So 0 records spilled to disk in 3rd level)
			
 
				+      //         So total of 6k+4k=10k
			
 
				+      // Total job counter will be 54k+10k = 64k
			
 
				+      validateCounters(c1, 64000);
			
 
				+
			
 
				+      //create 4th input file each with 5*2k words and test with 4 maps
			
 
				+      inpFile = new File(IN_DIR + "input5_2k_4");
			
 
				+      createWordsFile(inpFile);
			
 
				+      conf.setNumMapTasks(4);
			
 
				+      Path outputPath2=new Path(OUT_DIR, "output5_2k_4");
			
 
				+      FileOutputFormat.setOutputPath(conf, outputPath2);
			
 
				+
			
 
				+      myJob = JobClient.runJob(conf);
			
 
				+      c1 = myJob.getCounters();
			
 
				+      // 4maps & in each map 4 first level spills --- So total 16.
			
 
				+      // spilled records count:
			
 
				+      // Each Map: 1st level:2k+2k+2k+2k=8k;2ndlevel=4k+4k=8k;
			
 
				+      //           3rd level=2k(4k from 1st level & 4k from 2nd level & combineAndSpill)
			
 
				+      //           So total 8k+8k+2k=18k
			
 
				+      // For 3 Maps, total = 4*18=72k
			
 
				+      // Reduce: each of the 4 map o/p's(2k each) will be spilled in shuffleToDisk()
			
 
				+      //         So 4*2k=8k in 1st level; 2nd level:4k+4k=8k;
			
 
				+      //         3rd level directly given to reduce(4k+4k --- combineAndSpill => 2k.
			
 
				+      //         So 0 records spilled to disk in 3rd level)
			
 
				+      //         So total of 8k+8k=16k
			
 
				+      // Total job counter will be 72k+16k = 88k
			
 
				+      validateCounters(c1, 88000);
			
 
				+    } finally {
			
 
				+      //clean up the input and output files
			
 
				+      if (fs.exists(testdir)) {
			
 
				+        fs.delete(testdir, true);
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+}