فهرست منبع

HADOOP-265. Fix tasktracker to not start when it doesn't have a writable local directory. Contributed by Hairong.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@411277 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 19 سال پیش
والد
کامیت
5839aa18af

+ 4 - 0
CHANGES.txt

@@ -95,6 +95,10 @@ Trunk (unreleased)
 25. HADOOP-211.  Switch to use the Jakarta Commons logging internally,
     configured to use log4j by default.  (Arun C Murthy and cutting)
 
+26. HADOOP-265.  Tasktracker now fails to start if it does not have a
+    writable local directory for temporary files.  In this case, it
+    logs a message to the JobTracker and exits. (Hairong Kuang via cutting)  
+
 
 Release 0.2.1 - 2006-05-12
 

+ 12 - 0
src/java/org/apache/hadoop/mapred/InterTrackerProtocol.java

@@ -63,6 +63,18 @@ interface InterTrackerProtocol {
    * files referred to by the JobTracker
    */
   public String getFilesystemName() throws IOException;
+  
+  /**
+   * Report a problem to the job tracker.
+   * @param taskTracker the name of the task tracker
+   * @param errorClass the kind of error (eg. the class that was thrown)
+   * @param errorMessage the human readable error message
+   * @throws IOException if there was a problem in communication or on the
+   *                     remote side
+   */
+  public void reportTaskTrackerError(String taskTracker,
+                                     String errorClass,
+                                     String errorMessage) throws IOException;
 }
 
 

+ 7 - 0
src/java/org/apache/hadoop/mapred/JobTracker.java

@@ -835,6 +835,13 @@ public class JobTracker implements MRConstants, InterTrackerProtocol, JobSubmiss
         return fs.getName();
     }
 
+
+    public void reportTaskTrackerError(String taskTracker,
+            String errorClass,
+            String errorMessage) throws IOException {
+        LOG.warn("Report from " + taskTracker + ": " + errorMessage);        
+    }
+
     ////////////////////////////////////////////////////
     // JobSubmissionProtocol
     ////////////////////////////////////////////////////

+ 69 - 28
src/java/org/apache/hadoop/mapred/TaskTracker.java

@@ -20,6 +20,7 @@ import org.apache.commons.logging.*;
 import org.apache.hadoop.fs.*;
 import org.apache.hadoop.ipc.*;
 import org.apache.hadoop.util.*;
+import org.apache.hadoop.util.DiskChecker.DiskErrorException;
 
 import java.io.*;
 import java.net.*;
@@ -101,32 +102,6 @@ public class TaskTracker
       taskCleanupThread.start();
     }
     
-    /**
-     * Start with the local machine name, and the default JobTracker
-     */
-    public TaskTracker(JobConf conf) throws IOException {
-      maxCurrentTasks = conf.getInt("mapred.tasktracker.tasks.maximum", 2);
-      this.fConf = conf;
-      this.jobTrackAddr = JobTracker.getAddress(conf);
-      this.taskTimeout = conf.getInt("mapred.task.timeout", 10* 60 * 1000);
-      this.mapOutputFile = new MapOutputFile();
-      this.mapOutputFile.setConf(conf);
-      int httpPort = conf.getInt("tasktracker.http.port", 50060);
-      StatusHttpServer server = new StatusHttpServer("task", httpPort, true);
-      int workerThreads = conf.getInt("tasktracker.http.threads", 40);
-      server.setThreads(1, workerThreads);
-      server.start();
-      this.httpPort = server.getPort();
-      // let the jsp pages get to the task tracker, config, and other relevant
-      // objects
-      FileSystem local = FileSystem.getNamed("local", conf);
-      server.setAttribute("task.tracker", this);
-      server.setAttribute("local.file.system", local);
-      server.setAttribute("conf", conf);
-      server.setAttribute("log", LOG);
-      initialize();
-    }
-
     /**
      * Do the real constructor work here.  It's in a separate method
      * so we can call it again and "recycle" the object after calling
@@ -135,6 +110,8 @@ public class TaskTracker
     synchronized void initialize() throws IOException {
         this.localHostname = InetAddress.getLocalHost().getHostName();
 
+        //check local disk
+        checkLocalDirs(this.fConf.getLocalDirs());
         fConf.deleteLocalFiles(SUBDIR);
 
         // Clear out state tables
@@ -218,6 +195,32 @@ public class TaskTracker
         this.mapOutputFile.cleanupStorage();
     }
 
+    /**
+     * Start with the local machine name, and the default JobTracker
+     */
+    public TaskTracker(JobConf conf) throws IOException {
+      maxCurrentTasks = conf.getInt("mapred.tasktracker.tasks.maximum", 2);
+      this.fConf = conf;
+      this.jobTrackAddr = JobTracker.getAddress(conf);
+      this.taskTimeout = conf.getInt("mapred.task.timeout", 10* 60 * 1000);
+      this.mapOutputFile = new MapOutputFile();
+      this.mapOutputFile.setConf(conf);
+      int httpPort = conf.getInt("tasktracker.http.port", 50060);
+      StatusHttpServer server = new StatusHttpServer("task", httpPort, true);
+      int workerThreads = conf.getInt("tasktracker.http.threads", 40);
+      server.setThreads(1, workerThreads);
+      server.start();
+      this.httpPort = server.getPort();
+      // let the jsp pages get to the task tracker, config, and other relevant
+      // objects
+      FileSystem local = FileSystem.getNamed("local", conf);
+      server.setAttribute("task.tracker", this);
+      server.setAttribute("local.file.system", local);
+      server.setAttribute("conf", conf);
+      server.setAttribute("log", LOG);
+      initialize();
+    }
+
     /**
      * The connection to the JobTracker, used by the TaskRunner 
      * for locating remote files.
@@ -287,11 +290,17 @@ public class TaskTracker
             //
             try {
               if (mapTotal < maxCurrentTasks || reduceTotal < maxCurrentTasks) {
+                  checkLocalDirs(fConf.getLocalDirs());
                   Task t = jobClient.pollForNewTask(taskTrackerName);
                   if (t != null) {
                     startNewTask(t);
                   }
               }
+            } catch (DiskErrorException de ) {
+                LOG.warn("Exiting task tracker because "+de.getMessage());
+                jobClient.reportTaskTrackerError(taskTrackerName, 
+                        "DiskErrorException", de.getMessage());
+                return STALE_STATE;
             } catch (IOException ie) {
               LOG.info("Problem launching task: " + 
                        StringUtils.stringifyException(ie));
@@ -913,6 +922,33 @@ public class TaskTracker
       return fConf;
     }
     
+    /**
+     * Check if the given local directories
+     * (and parent directories, if necessary) can be created.
+     * @param localDirs where the new TaskTracker should keep its local files.
+     * @throws DiskErrorException if all local directories are not writable
+     * @author hairong
+     */
+    private static void checkLocalDirs( String[] localDirs ) 
+            throws DiskErrorException {
+        boolean writable = false;
+        
+        if( localDirs != null ) {
+            for (int i = 0; i < localDirs.length; i++) {
+                try {
+                    DiskChecker.checkDir( new File(localDirs[i]) );
+                    writable = true;
+                } catch( DiskErrorException e ) {
+                    LOG.warn("Task Tracker local " + e.getMessage() );
+                }
+            }
+        }
+
+        if( !writable )
+            throw new DiskErrorException( 
+                    "all local directories are not writable" );
+    }
+    
     /**
      * Start the TaskTracker, point toward the indicated JobTracker
      */
@@ -922,7 +958,12 @@ public class TaskTracker
             System.exit(-1);
         }
 
-        JobConf conf=new JobConf();
-        new TaskTracker(conf).run();
+        try {
+          JobConf conf=new JobConf();
+          new TaskTracker(conf).run();
+        } catch (IOException e) {
+            LOG.warn( "Can not start task tracker because "+e.getMessage());
+            System.exit(-1);
+        }
     }
 }

+ 1 - 1
src/java/org/apache/hadoop/util/DiskChecker.java

@@ -11,7 +11,7 @@ import java.io.IOException;
 public class DiskChecker {
 
     public static class DiskErrorException extends IOException {
-      DiskErrorException(String msg) {
+      public DiskErrorException(String msg) {
         super(msg);
       }
     }