瀏覽代碼

HADOOP-424. Fix MapReduce so that jobs with zero splits do not fail. Contributed by Frédéric Bertin.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@440826 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 18 年之前
父節點
當前提交
e80eed6345

+ 3 - 0
CHANGES.txt

@@ -120,6 +120,9 @@ Trunk (unreleased changes)
     volumes are handled by a single DataNode daemon, reducing the load
     on the NameNode.  (Milind Bhandarkar via cutting)
 
+30. HADOOP-424.  Fix MapReduce so that jobs which generate zero splits
+    do not fail.  (Frédéric Bertin via cutting)
+
 
 Release 0.5.0 - 2006-08-04
 

+ 8 - 0
src/java/org/apache/hadoop/mapred/JobInProgress.java

@@ -143,6 +143,14 @@ class JobInProgress {
         // adjust number of map tasks to actual number of splits
         //
         this.numMapTasks = splits.length;
+        
+        // if no split is returned, job is considered completed and successful
+        if (numMapTasks == 0) {
+            this.status = new JobStatus(status.getJobId(), 1.0f, 1.0f, JobStatus.SUCCEEDED);
+            tasksInited = true;
+            return;
+        }
+        
         // create a map task for each split
         this.maps = new TaskInProgress[numMapTasks];
         for (int i = 0; i < numMapTasks; i++) {

+ 20 - 0
src/test/org/apache/hadoop/mapred/EmptyInputFormat.java

@@ -0,0 +1,20 @@
+package org.apache.hadoop.mapred;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.FileSystem;
+
+/**
+  * InputFormat which simulates the absence of input data
+  * by returning zero split.
+  */
+public class EmptyInputFormat extends InputFormatBase {
+
+  public FileSplit[] getSplits(FileSystem fs, JobConf job, int numSplits) throws IOException {
+    return new FileSplit[0];
+  }
+
+  public RecordReader getRecordReader(FileSystem fs, FileSplit split, JobConf job, Reporter reporter) throws IOException {
+    return new SequenceFileRecordReader(job, split);
+  }
+}

+ 124 - 0
src/test/org/apache/hadoop/mapred/TestEmptyJobWithDFS.java

@@ -0,0 +1,124 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.dfs.MiniDFSCluster;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.lib.IdentityMapper;
+import org.apache.hadoop.mapred.lib.IdentityReducer;
+
+/**
+ * A JUnit test to test Map-Reduce empty jobs Mini-DFS.
+ */
+public class TestEmptyJobWithDFS extends TestCase {
+  
+  /**
+   * Simple method running a MapReduce job with no input data. Used
+   * to test that such a job is successful.
+   * @param fileSys
+   * @param jobTracker
+   * @param conf
+   * @param numMaps
+   * @param numReduces
+   * @return true if the MR job is successful, otherwise false
+   * @throws IOException
+   */
+  public static boolean launchEmptyJob(String fileSys,
+                                      String jobTracker,
+                                      JobConf conf,
+                                      int numMaps,
+                                      int numReduces) throws IOException {
+      // create an empty input dir
+      final Path inDir = new Path("/testing/empty/input");
+      final Path outDir = new Path("/testing/empty/output");
+      FileSystem fs = FileSystem.getNamed(fileSys, conf);
+      fs.delete(outDir);
+      fs.mkdirs(inDir);
+
+      // use WordCount example
+      conf.set("fs.default.name", fileSys);
+      conf.set("mapred.job.tracker", jobTracker);
+      conf.setJobName("empty");
+      // use an InputFormat which returns no split
+      conf.setInputFormat(EmptyInputFormat.class);
+      conf.setOutputKeyClass(Text.class);
+      conf.setOutputValueClass(IntWritable.class);
+      conf.setMapperClass(IdentityMapper.class);        
+      conf.setReducerClass(IdentityReducer.class);
+      conf.setInputPath(inDir);
+      conf.setOutputPath(outDir);
+      conf.setNumMapTasks(numMaps);
+      conf.setNumReduceTasks(numReduces);
+      
+      // run job and wait for completion
+      JobClient jc = new JobClient(conf);
+      RunningJob runningJob = jc.submitJob(conf);
+      while (true) {
+          try {
+              Thread.sleep(1000);
+          } catch (InterruptedException e) {}
+          if (runningJob.isComplete()) {
+              break;
+          }
+      }
+      // return job result
+      return (runningJob.isSuccessful());
+  }
+  
+  /**
+   * Test that a job with no input data (and thus with no input split and
+   * no map task to execute) is successful.
+   * @throws IOException
+   */
+  public void testEmptyJobWithDFS() throws IOException {
+      String namenode = null;
+      MiniDFSCluster dfs = null;
+      MiniMRCluster mr = null;
+      FileSystem fileSys = null;
+      try {
+          final int taskTrackers = 4;
+          final int jobTrackerPort = 50050;
+          final String jobTrackerName = "localhost:" + jobTrackerPort;
+          Configuration conf = new Configuration();
+          dfs = new MiniDFSCluster(65315, conf, true);
+          fileSys = dfs.getFileSystem();
+          namenode = fileSys.getName();
+          mr = new MiniMRCluster(jobTrackerPort, 50060, taskTrackers, 
+                                 namenode, true);
+
+          JobConf jobConf = new JobConf();
+          boolean result;
+          result = launchEmptyJob(namenode, jobTrackerName, jobConf, 
+                                   3, 1);
+          assertTrue(result);
+          
+      } finally {
+          if (fileSys != null) { fileSys.close(); }
+          if (dfs != null) { dfs.shutdown(); }
+          if (mr != null) { mr.shutdown(); }
+      }
+  }
+  
+}