před 16 roky · 1219b6974a
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -181,6 +181,9 @@ Release 0.20.1 - Unreleased
 
				     MAPREDUCE-465. Fix a bug in MultithreadedMapRunner. (Amareshwari 
			
 
				     Sriramadasu via sharad)
			
 
				 
			
 
				+    MAPREDUCE-18. Puts some checks to detect cases where jetty serves up
			
 
				+    incorrect output during shuffle. (Ravi Gummadi via ddas)
			
 
				+
			
 
				 Release 0.20.0 - 2009-04-15
			
 
				 
			
 
				   INCOMPATIBLE CHANGES
			
--- a/src/mapred/org/apache/hadoop/mapred/MRConstants.java
+++ b/src/mapred/org/apache/hadoop/mapred/MRConstants.java
@@ -47,5 +47,15 @@ interface MRConstants {
 
				    */
			
 
				   public static final String RAW_MAP_OUTPUT_LENGTH = "Raw-Map-Output-Length";
			
 
				 
			
 
				+  /**
			
 
				+   * The map task from which the map output data is being transferred
			
 
				+   */
			
 
				+  public static final String FROM_MAP_TASK = "from-map-task";
			
 
				+  
			
 
				+  /**
			
 
				+   * The reduce task number for which this map output is being transferred
			
 
				+   */
			
 
				+  public static final String FOR_REDUCE_TASK = "for-reduce-task";
			
 
				+  
			
 
				   public static final String WORKDIR = "work";
			
 
				 }
			
--- a/src/mapred/org/apache/hadoop/mapred/ReduceTask.java
+++ b/src/mapred/org/apache/hadoop/mapred/ReduceTask.java
@@ -1259,7 +1259,8 @@ class ReduceTask extends Task {
 
				         Path tmpMapOutput = new Path(filename+"-"+id);
			
 
				         
			
 
				         // Copy the map output
			
 
				-        MapOutput mapOutput = getMapOutput(loc, tmpMapOutput);
			
 
				+        MapOutput mapOutput = getMapOutput(loc, tmpMapOutput,
			
 
				+                                           reduceId.getTaskID().getId());
			
 
				         if (mapOutput == null) {
			
 
				           throw new IOException("Failed to fetch map-output for " + 
			
 
				                                 loc.getTaskAttemptId() + " from " + 
			
@@ -1341,24 +1342,60 @@ class ReduceTask extends Task {
 
				        * @throws IOException when something goes wrong
			
 
				        */
			
 
				       private MapOutput getMapOutput(MapOutputLocation mapOutputLoc, 
			
 
				-                                     Path filename)
			
 
				+                                     Path filename, int reduce)
			
 
				       throws IOException, InterruptedException {
			
 
				         // Connect
			
 
				         URLConnection connection = 
			
 
				           mapOutputLoc.getOutputLocation().openConnection();
			
 
				         InputStream input = getInputStream(connection, STALLED_COPY_TIMEOUT,
			
 
				                                            DEFAULT_READ_TIMEOUT); 
			
 
				-
			
 
				-        //We will put a file in memory if it meets certain criteria:
			
 
				-        //1. The size of the (decompressed) file should be less than 25% of 
			
 
				-        //    the total inmem fs
			
 
				-        //2. There is space available in the inmem fs
			
 
				-
			
 
				+        
			
 
				+        // Validate header from map output
			
 
				+        TaskAttemptID mapId = null;
			
 
				+        try {
			
 
				+          mapId =
			
 
				+            TaskAttemptID.forName(connection.getHeaderField(FROM_MAP_TASK));
			
 
				+        } catch (IllegalArgumentException ia) {
			
 
				+          LOG.warn("Invalid map id ", ia);
			
 
				+          return null;
			
 
				+        }
			
 
				+        TaskAttemptID expectedMapId = mapOutputLoc.getTaskAttemptId();
			
 
				+        if (!mapId.equals(expectedMapId)) {
			
 
				+          LOG.warn("data from wrong map:" + mapId +
			
 
				+              " arrived to reduce task " + reduce +
			
 
				+              ", where as expected map output should be from " + expectedMapId);
			
 
				+          return null;
			
 
				+        }
			
 
				+        
			
 
				         long decompressedLength = 
			
 
				           Long.parseLong(connection.getHeaderField(RAW_MAP_OUTPUT_LENGTH));  
			
 
				         long compressedLength = 
			
 
				           Long.parseLong(connection.getHeaderField(MAP_OUTPUT_LENGTH));
			
 
				 
			
 
				+        if (compressedLength < 0 || decompressedLength < 0) {
			
 
				+          LOG.warn(getName() + " invalid lengths in map output header: id: " +
			
 
				+              mapId + " compressed len: " + compressedLength +
			
 
				+              ", decompressed len: " + decompressedLength);
			
 
				+          return null;
			
 
				+        }
			
 
				+        int forReduce =
			
 
				+          (int)Integer.parseInt(connection.getHeaderField(FOR_REDUCE_TASK));
			
 
				+        
			
 
				+        if (forReduce != reduce) {
			
 
				+          LOG.warn("data for the wrong reduce: " + forReduce +
			
 
				+              " with compressed len: " + compressedLength +
			
 
				+              ", decompressed len: " + decompressedLength +
			
 
				+              " arrived to reduce task " + reduce);
			
 
				+          return null;
			
 
				+        }
			
 
				+        LOG.info("header: " + mapId + ", compressed len: " + compressedLength +
			
 
				+                 ", decompressed len: " + decompressedLength);
			
 
				+
			
 
				+        //We will put a file in memory if it meets certain criteria:
			
 
				+        //1. The size of the (decompressed) file should be less than 25% of 
			
 
				+        //    the total inmem fs
			
 
				+        //2. There is space available in the inmem fs
			
 
				+        
			
 
				         // Check if this map-output can be saved in-memory
			
 
				         boolean shuffleInMemory = ramManager.canFitInMemory(decompressedLength); 
			
 
				 
			
--- a/src/mapred/org/apache/hadoop/mapred/TaskTracker.java
+++ b/src/mapred/org/apache/hadoop/mapred/TaskTracker.java
@@ -2870,9 +2870,13 @@ public class TaskTracker
 
				          * Read the index file to get the information about where
			
 
				          * the map-output for the given reducer is available. 
			
 
				          */
			
 
				-       IndexRecord info = 
			
 
				+        IndexRecord info = 
			
 
				           tracker.indexCache.getIndexInformation(mapId, reduce,indexFileName);
			
 
				           
			
 
				+        //set the custom "from-map-task" http header to the map task from which
			
 
				+        //the map output data is being transferred
			
 
				+        response.setHeader(FROM_MAP_TASK, mapId);
			
 
				+        
			
 
				         //set the custom "Raw-Map-Output-Length" http header to 
			
 
				         //the raw (decompressed) length
			
 
				         response.setHeader(RAW_MAP_OUTPUT_LENGTH,
			
@@ -2883,6 +2887,10 @@ public class TaskTracker
 
				         response.setHeader(MAP_OUTPUT_LENGTH,
			
 
				             Long.toString(info.partLength));
			
 
				 
			
 
				+        //set the custom "for-reduce-task" http header to the reduce task number
			
 
				+        //for which this map output is being transferred
			
 
				+        response.setHeader(FOR_REDUCE_TASK, Integer.toString(reduce));
			
 
				+        
			
 
				         //use the same buffersize as used for reading the data from disk
			
 
				         response.setBufferSize(MAX_BYTES_TO_READ);