Browse Source

HADOOP-278. Check for the existence of input directories before starting MapReduce jobs, making it easier to debug this common error. Contributed by Owen.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@417256 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 19 years ago
parent
commit
8c10090919

+ 4 - 0
CHANGES.txt

@@ -47,6 +47,10 @@ Trunk (unreleased changes)
 11. HADOOP-135.  Fix potential deadlock in JobTracker by acquiring
     locks in a consistent order.  (omalley via cutting)
 
+12. HADOOP-278.  Check for existence of input directories before
+    starting MapReduce jobs, making it easier to debug this common
+    error.  (omalley via cutting)
+
 
 Release 0.3.2 - 2006-06-09
 

+ 13 - 0
src/java/org/apache/hadoop/mapred/InputFormat.java

@@ -19,6 +19,7 @@ package org.apache.hadoop.mapred;
 import java.io.IOException;
 
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 
 /** An input data format.  Input files are stored in a {@link FileSystem}.
  * The processing of an input file may be split across multiple machines.
@@ -26,6 +27,18 @@ import org.apache.hadoop.fs.FileSystem;
  * RecordReader}.  Files must thus be split on record boundaries. */
 public interface InputFormat {
 
+  /**
+   * Are the input directories valid? This method is used to test the input
+   * directories when a job is submitted so that the framework can fail early
+   * with a useful error message when the input directory does not exist.
+   * @param fileSys the file system to check for the directories
+   * @param inputDirs the list of input directories
+   * @return is each inputDir valid?
+   * @throws IOException
+   */
+  boolean[] areValidInputDirectories(FileSystem fileSys,
+                                     Path[] inputDirs) throws IOException;
+  
   /** Splits a set of input files.  One split is created per map task.
    *
    * @param fs the filesystem containing the files to be split

+ 10 - 0
src/java/org/apache/hadoop/mapred/InputFormatBase.java

@@ -98,6 +98,16 @@ public abstract class InputFormatBase implements InputFormat {
     return (Path[])result.toArray(new Path[result.size()]);
   }
 
+  public boolean[] areValidInputDirectories(FileSystem fileSys,
+                                            Path[] inputDirs
+                                            ) throws IOException {
+    boolean[] result = new boolean[inputDirs.length];
+    for(int i=0; i < inputDirs.length; ++i) {
+      result[i] = fileSys.isDirectory(inputDirs[i]);
+    }
+    return result;
+  }
+
   /** Splits files returned by {#listPaths(FileSystem,JobConf) when
    * they're too big.*/ 
   public FileSplit[] getSplits(FileSystem fs, JobConf job, int numSplits)

+ 11 - 0
src/java/org/apache/hadoop/mapred/JobClient.java

@@ -260,6 +260,17 @@ public class JobClient implements MRConstants {
           job.setWorkingDirectory(fs.getWorkingDirectory());          
         }
 
+        Path[] inputDirs = job.getInputPaths();
+        boolean[] validDirs = 
+          job.getInputFormat().areValidInputDirectories(fs, inputDirs);
+        for(int i=0; i < validDirs.length; ++i) {
+          if (!validDirs[i]) {
+            String msg = "Input directory " + inputDirs[i] + " is invalid.";
+            LOG.error(msg);
+            throw new IOException(msg);
+          }
+        }
+
         // Check the output specification
         job.getOutputFormat().checkOutputSpecs(fs, job);