18 gadi atpakaļ · b87eb771d2
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -209,9 +209,7 @@ Trunk (unreleased changes)
 
				      thread.  Reporting during sorting and more is also more
			
 
				      consistent.  (Vivek Ratan via cutting)
			
 
				 
			
 
				- 64. HADOOP-1440.  When reduce is disabled, use order of splits
			
 
				-     returned by InputFormat#getSplits when numbering outputs.
			
 
				-     (Senthil Subramanian via cutting)
			
 
				+ 64. [ intentionally blank ]
			
 
				 
			
 
				  65. HADOOP-1453.  Remove some unneeded calls to FileSystem#exists()
			
 
				      when opening files, reducing the namenode load somewhat.
			
--- a/src/java/org/apache/hadoop/mapred/JobClient.java
+++ b/src/java/org/apache/hadoop/mapred/JobClient.java
@@ -338,10 +338,6 @@ public class JobClient extends ToolBase implements MRConstants  {
 
				     LOG.debug("Creating splits at " + fs.makeQualified(submitSplitFile));
			
 
				     InputSplit[] splits = 
			
 
				       job.getInputFormat().getSplits(job, job.getNumMapTasks());
			
 
				-    Hashtable<InputSplit, Integer> splitPositions = new Hashtable<InputSplit, Integer>(); 
			
 
				-    for (int i = 0; i < splits.length; ++i) {
			
 
				-      splitPositions.put(splits[i], i);
			
 
				-    }
			
 
				     // sort the splits into order based on size, so that the biggest
			
 
				     // go first
			
 
				     Arrays.sort(splits, new Comparator<InputSplit>() {
			
@@ -365,7 +361,7 @@ public class JobClient extends ToolBase implements MRConstants  {
 
				     // write the splits to a file for the job tracker
			
 
				     FSDataOutputStream out = fs.create(submitSplitFile);
			
 
				     try {
			
 
				-      writeSplitsFile(splits, splitPositions, out);
			
 
				+      writeSplitsFile(splits, out);
			
 
				     } finally {
			
 
				       out.close();
			
 
				     }
			
@@ -394,7 +390,6 @@ public class JobClient extends ToolBase implements MRConstants  {
 
				   static class RawSplit implements Writable {
			
 
				     private String splitClass;
			
 
				     private BytesWritable bytes = new BytesWritable();
			
 
				-    private int position;
			
 
				     private String[] locations;
			
 
				       
			
 
				     public void setBytes(byte[] data, int offset, int length) {
			
@@ -412,19 +407,11 @@ public class JobClient extends ToolBase implements MRConstants  {
 
				     public BytesWritable getBytes() {
			
 
				       return bytes;
			
 
				     }
			
 
				-
			
 
				-    public void setPosition(int position) {
			
 
				-      this.position = position;
			
 
				-    }
			
 
				       
			
 
				     public void setLocations(String[] locations) {
			
 
				       this.locations = locations;
			
 
				     }
			
 
				       
			
 
				-    public int getPosition() {
			
 
				-      return position;
			
 
				-    }
			
 
				-      
			
 
				     public String[] getLocations() {
			
 
				       return locations;
			
 
				     }
			
@@ -432,7 +419,6 @@ public class JobClient extends ToolBase implements MRConstants  {
 
				     public void readFields(DataInput in) throws IOException {
			
 
				       splitClass = Text.readString(in);
			
 
				       bytes.readFields(in);
			
 
				-      position = WritableUtils.readVInt(in);
			
 
				       int len = WritableUtils.readVInt(in);
			
 
				       locations = new String[len];
			
 
				       for(int i=0; i < len; ++i) {
			
@@ -443,7 +429,6 @@ public class JobClient extends ToolBase implements MRConstants  {
 
				     public void write(DataOutput out) throws IOException {
			
 
				       Text.writeString(out, splitClass);
			
 
				       bytes.write(out);
			
 
				-      WritableUtils.writeVInt(out, position);
			
 
				       WritableUtils.writeVInt(out, locations.length);
			
 
				       for(int i = 0; i < locations.length; i++) {
			
 
				         Text.writeString(out, locations[i]);
			
@@ -463,8 +448,7 @@ public class JobClient extends ToolBase implements MRConstants  {
 
				    * @param splits the input splits to write out
			
 
				    * @param out the stream to write to
			
 
				    */
			
 
				-  private void writeSplitsFile(InputSplit[] splits, Hashtable splitPositions,
			
 
				-                              FSDataOutputStream out) throws IOException {
			
 
				+  private void writeSplitsFile(InputSplit[] splits, FSDataOutputStream out) throws IOException {
			
 
				     out.write(SPLIT_FILE_HEADER);
			
 
				     WritableUtils.writeVInt(out, CURRENT_SPLIT_FILE_VERSION);
			
 
				     WritableUtils.writeVInt(out, splits.length);
			
@@ -475,7 +459,6 @@ public class JobClient extends ToolBase implements MRConstants  {
 
				       buffer.reset();
			
 
				       split.write(buffer);
			
 
				       rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
			
 
				-      rawSplit.setPosition(((Integer) splitPositions.get(split)).intValue());
			
 
				       rawSplit.setLocations(split.getLocations());
			
 
				       rawSplit.write(out);
			
 
				     }
			
--- a/src/java/org/apache/hadoop/mapred/JobInProgress.java
+++ b/src/java/org/apache/hadoop/mapred/JobInProgress.java
@@ -222,7 +222,7 @@ class JobInProgress {
 
				       maps[i] = new TaskInProgress(uniqueString, jobFile, 
			
 
				                                    splits[i].getClassName(),
			
 
				                                    splits[i].getBytes(), 
			
 
				-                                   jobtracker, conf, this, splits[i].getPosition());
			
 
				+                                   jobtracker, conf, this, i);
			
 
				       for(String host: splits[i].getLocations()) {
			
 
				         List<TaskInProgress> hostMaps = hostToMaps.get(host);
			
 
				         if (hostMaps == null) {
			
--- a/src/test/org/apache/hadoop/mapred/TestMapOutputOrder.java
+++ b/src/test/org/apache/hadoop/mapred/TestMapOutputOrder.java
@@ -1,162 +0,0 @@
 
				-/**
			
 
				- * Licensed to the Apache Software Foundation (ASF) under one
			
 
				- * or more contributor license agreements.  See the NOTICE file
			
 
				- * distributed with this work for additional information
			
 
				- * regarding copyright ownership.  The ASF licenses this file
			
 
				- * to you under the Apache License, Version 2.0 (the
			
 
				- * "License"); you may not use this file except in compliance
			
 
				- * with the License.  You may obtain a copy of the License at
			
 
				- *
			
 
				- *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				- *
			
 
				- * Unless required by applicable law or agreed to in writing, software
			
 
				- * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				- * See the License for the specific language governing permissions and
			
 
				- * limitations under the License.
			
 
				- */
			
 
				-package org.apache.hadoop.mapred;
			
 
				-
			
 
				-import org.apache.commons.logging.*;
			
 
				-import org.apache.hadoop.fs.*;
			
 
				-import org.apache.hadoop.io.*;
			
 
				-import org.apache.hadoop.io.compress.*;
			
 
				-import org.apache.hadoop.mapred.lib.*;
			
 
				-import org.apache.hadoop.conf.Configuration;
			
 
				-import org.apache.hadoop.dfs.MiniDFSCluster;
			
 
				-import org.apache.hadoop.fs.FileSystem;
			
 
				-import org.apache.hadoop.util.ReflectionUtils;
			
 
				-import junit.framework.TestCase;
			
 
				-import java.io.*;
			
 
				-import java.util.*;
			
 
				-
			
 
				-/** 
			
 
				- * TestMapOutputOrder checks if there is a 1-1 correspondence between
			
 
				- * the order of Map Input files (returned from InputFormat.getSplits())
			
 
				- * and the Map Output Files
			
 
				- */
			
 
				-public class TestMapOutputOrder extends TestCase 
			
 
				-{
			
 
				-  private static final Log LOG =
			
 
				-    LogFactory.getLog(TestTextInputFormat.class.getName());
			
 
				-
			
 
				-  JobConf jobConf = new JobConf(TestMapOutputOrder.class);
			
 
				-  JobClient jc;
			
 
				-
			
 
				-  private static class TestMapper extends MapReduceBase implements Mapper {
			
 
				-    public void map(WritableComparable key, Writable val,
			
 
				-                   OutputCollector output, Reporter reporter)
			
 
				-      throws IOException {
			
 
				-      output.collect(null, val);
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  private static void writeFile(FileSystem fs, Path name,
			
 
				-                                CompressionCodec codec,
			
 
				-                                String contents) throws IOException {
			
 
				-    OutputStream stm;
			
 
				-    if (codec == null) {
			
 
				-      stm = fs.create(name);
			
 
				-    } else {
			
 
				-      stm = codec.createOutputStream(fs.create(name));
			
 
				-    }
			
 
				-    stm.write(contents.getBytes()); 
			
 
				-    stm.close();
			
 
				-  } 
			
 
				-
			
 
				-  private static String readFile(FileSystem fs, Path name,
			
 
				-                                CompressionCodec codec) throws IOException {
			
 
				-    InputStream stm;
			
 
				-    if (codec == null) {
			
 
				-      stm = fs.open(name);
			
 
				-    } else {
			
 
				-      stm = codec.createInputStream(fs.open(name));
			
 
				-    }
			
 
				-
			
 
				-    String contents = "";
			
 
				-    int b = stm.read();
			
 
				-    while (b != -1) {
			
 
				-       contents += (char) b;
			
 
				-       b = stm.read();
			
 
				-    }
			
 
				-    stm.close();
			
 
				-    return contents;
			
 
				-  }
			
 
				-
			
 
				-  public void testMapOutputOrder() throws Exception {
			
 
				-    String nameNode = null;
			
 
				-    MiniDFSCluster dfs = null;
			
 
				-    MiniMRCluster mr = null;
			
 
				-    FileSystem fileSys = null;
			
 
				-
			
 
				-    try {
			
 
				-      final int taskTrackers = 3;
			
 
				-      final int jobTrackerPort = 60070;
			
 
				-
			
 
				-      Configuration conf = new Configuration();
			
 
				-      dfs = new MiniDFSCluster(conf, 1, true, null);
			
 
				-      fileSys = dfs.getFileSystem();
			
 
				-      nameNode = fileSys.getName();
			
 
				-      mr = new MiniMRCluster(taskTrackers, nameNode, 3);
			
 
				-      final String jobTrackerName = "localhost:" + mr.getJobTrackerPort();
			
 
				-
			
 
				-      Path testdir = new Path("/testing/mapoutputorder/");
			
 
				-      Path inDir = new Path(testdir, "input");
			
 
				-      Path outDir = new Path(testdir, "output");
			
 
				-      FileSystem fs = FileSystem.getNamed(nameNode, conf);
			
 
				-      fs.delete(testdir);
			
 
				-      jobConf.set("fs.default.name", nameNode);
			
 
				-      jobConf.set("mapred.job.tracker", jobTrackerName);
			
 
				-      jobConf.setInputFormat(TextInputFormat.class);
			
 
				-      jobConf.setInputPath(inDir);
			
 
				-      jobConf.setOutputPath(outDir);
			
 
				-      jobConf.setMapperClass(TestMapper.class);
			
 
				-      jobConf.setNumMapTasks(3);
			
 
				-      jobConf.setMapOutputKeyClass(LongWritable.class);
			
 
				-      jobConf.setMapOutputValueClass(Text.class); 
			
 
				-      jobConf.setNumReduceTasks(0);
			
 
				-      jobConf.setJar("build/test/testjar/testjob.jar");
			
 
				-
			
 
				-      if (!fs.mkdirs(testdir)) {
			
 
				-        throw new IOException("Mkdirs failed to create " + testdir.toString());
			
 
				-      }
			
 
				-      if (!fs.mkdirs(inDir)) {
			
 
				-        throw new IOException("Mkdirs failed to create " + inDir.toString());
			
 
				-      }
			
 
				-
			
 
				-      // create input files
			
 
				-      CompressionCodec gzip = new GzipCodec();
			
 
				-      ReflectionUtils.setConf(gzip, jobConf);
			
 
				-      String[] inpStrings = new String[3];
			
 
				-      inpStrings[0] = "part1_line1\npart1_line2\n";
			
 
				-      inpStrings[1] = "part2_line1\npart2_line2\npart2_line3\n";
			
 
				-      inpStrings[2] = "part3_line1\n";
			
 
				-      writeFile(fs, new Path(inDir, "part1.txt.gz"), gzip, inpStrings[0]);
			
 
				-      writeFile(fs, new Path(inDir, "part2.txt.gz"), gzip, inpStrings[1]);
			
 
				-      writeFile(fs, new Path(inDir, "part3.txt.gz"), gzip, inpStrings[2]);
			
 
				-
			
 
				-      // run job
			
 
				-      jc = new JobClient(jobConf);
			
 
				-
			
 
				-      RunningJob rj = jc.runJob(jobConf);
			
 
				-      assertTrue("job was complete", rj.isComplete());
			
 
				-      assertTrue("job was successful", rj.isSuccessful());
			
 
				-
			
 
				-      // check map output files
			
 
				-      Path[] outputPaths = fs.listPaths(outDir);
			
 
				-      String contents;
			
 
				-      for (int i = 0; i < outputPaths.length; i++) {
			
 
				-        LOG.debug("Output Path (#" + (i+1) +"): " + outputPaths[i].getName());
			
 
				-        contents = readFile(fs, outputPaths[i], null);
			
 
				-        LOG.debug("Contents: " + contents);
			
 
				-        assertTrue(new String("Input File #" + (i+1) + " == Map Output File #" + (i+1)), inpStrings[i].equals(contents));
			
 
				-      }
			
 
				-    }
			
 
				-    finally {
			
 
				-      // clean-up
			
 
				-      if (fileSys != null) { fileSys.close(); }
			
 
				-      if (dfs != null) { dfs.shutdown(); }
			
 
				-      if (mr != null) { mr.shutdown(); }
			
 
				-    }
			
 
				-  }
			
 
				-}