19 years ago · 8c10090919
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -47,6 +47,10 @@ Trunk (unreleased changes)
 
				 11. HADOOP-135.  Fix potential deadlock in JobTracker by acquiring
			
 
				     locks in a consistent order.  (omalley via cutting)
			
 
				 
			
 
				+12. HADOOP-278.  Check for existence of input directories before
			
 
				+    starting MapReduce jobs, making it easier to debug this common
			
 
				+    error.  (omalley via cutting)
			
 
				+
			
 
				 
			
 
				 Release 0.3.2 - 2006-06-09
			
 
				 
			
--- a/src/java/org/apache/hadoop/mapred/InputFormat.java
+++ b/src/java/org/apache/hadoop/mapred/InputFormat.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.mapred;
 
				 import java.io.IOException;
			
 
				 
			
 
				 import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				 
			
 
				 /** An input data format.  Input files are stored in a {@link FileSystem}.
			
 
				  * The processing of an input file may be split across multiple machines.
			
@@ -26,6 +27,18 @@ import org.apache.hadoop.fs.FileSystem;
 
				  * RecordReader}.  Files must thus be split on record boundaries. */
			
 
				 public interface InputFormat {
			
 
				 
			
 
				+  /**
			
 
				+   * Are the input directories valid? This method is used to test the input
			
 
				+   * directories when a job is submitted so that the framework can fail early
			
 
				+   * with a useful error message when the input directory does not exist.
			
 
				+   * @param fileSys the file system to check for the directories
			
 
				+   * @param inputDirs the list of input directories
			
 
				+   * @return is each inputDir valid?
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  boolean[] areValidInputDirectories(FileSystem fileSys,
			
 
				+                                     Path[] inputDirs) throws IOException;
			
 
				+  
			
 
				   /** Splits a set of input files.  One split is created per map task.
			
 
				    *
			
 
				    * @param fs the filesystem containing the files to be split
			
--- a/src/java/org/apache/hadoop/mapred/InputFormatBase.java
+++ b/src/java/org/apache/hadoop/mapred/InputFormatBase.java
@@ -98,6 +98,16 @@ public abstract class InputFormatBase implements InputFormat {
 
				     return (Path[])result.toArray(new Path[result.size()]);
			
 
				   }
			
 
				 
			
 
				+  public boolean[] areValidInputDirectories(FileSystem fileSys,
			
 
				+                                            Path[] inputDirs
			
 
				+                                            ) throws IOException {
			
 
				+    boolean[] result = new boolean[inputDirs.length];
			
 
				+    for(int i=0; i < inputDirs.length; ++i) {
			
 
				+      result[i] = fileSys.isDirectory(inputDirs[i]);
			
 
				+    }
			
 
				+    return result;
			
 
				+  }
			
 
				+
			
 
				   /** Splits files returned by {#listPaths(FileSystem,JobConf) when
			
 
				    * they're too big.*/ 
			
 
				   public FileSplit[] getSplits(FileSystem fs, JobConf job, int numSplits)
			
--- a/src/java/org/apache/hadoop/mapred/JobClient.java
+++ b/src/java/org/apache/hadoop/mapred/JobClient.java
@@ -260,6 +260,17 @@ public class JobClient implements MRConstants {
 
				           job.setWorkingDirectory(fs.getWorkingDirectory());          
			
 
				         }
			
 
				 
			
 
				+        Path[] inputDirs = job.getInputPaths();
			
 
				+        boolean[] validDirs = 
			
 
				+          job.getInputFormat().areValidInputDirectories(fs, inputDirs);
			
 
				+        for(int i=0; i < validDirs.length; ++i) {
			
 
				+          if (!validDirs[i]) {
			
 
				+            String msg = "Input directory " + inputDirs[i] + " is invalid.";
			
 
				+            LOG.error(msg);
			
 
				+            throw new IOException(msg);
			
 
				+          }
			
 
				+        }
			
 
				+
			
 
				         // Check the output specification
			
 
				         job.getOutputFormat().checkOutputSpecs(fs, job);