Browse Source

HADOOP-289. Improved exception handling in DFS datanode. Contributed by Konstantin.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@413096 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 19 years ago
parent
commit
095307710d

+ 3 - 0
CHANGES.txt

@@ -23,6 +23,9 @@ Trunk (unreleased changes)
  6. HADOOP-277.  Fix a race condition when creating directories.
  6. HADOOP-277.  Fix a race condition when creating directories.
    (Sameer Paranjpye via cutting)
    (Sameer Paranjpye via cutting)
 
 
+ 7. HADOOP-289.  Improved exception handling in DFS datanode.
+    (Konstantin Shvachko via cutting)
+
 
 
 Release 0.3.1 - 2006-06-05
 Release 0.3.1 - 2006-06-05
 
 

+ 24 - 15
src/java/org/apache/hadoop/dfs/DataNode.java

@@ -105,17 +105,7 @@ public class DataNode implements FSConstants, Runnable {
              new File(datadir),
              new File(datadir),
              createSocketAddr(conf.get("fs.default.name", "local")), conf);
              createSocketAddr(conf.get("fs.default.name", "local")), conf);
         // register datanode
         // register datanode
-        while (shouldRun) {
-          try {
-            register();
-            break;
-          } catch (ConnectException ce) {
-            LOG.info("Namenode not available yet, Zzzzz...");
-            try {
-              Thread.sleep(10 * 1000);
-            } catch (InterruptedException ie) {}
-          }
-        }
+        register();
     }
     }
 
 
     /**
     /**
@@ -182,7 +172,19 @@ public class DataNode implements FSConstants, Runnable {
      * @throws IOException
      * @throws IOException
      */
      */
     private void register() throws IOException {
     private void register() throws IOException {
-      dnRegistration = namenode.register( dnRegistration );
+      while (shouldRun) {
+        try {
+          dnRegistration = namenode.register( dnRegistration );
+          break;
+        } catch( ConnectException se ) {  // namenode has not been started
+          LOG.info("Namenode not available yet, Zzzzz...");
+        } catch( SocketTimeoutException te ) {  // namenode is busy
+          LOG.info("Namenode " + te.getLocalizedMessage() );
+        }
+        try {
+          Thread.sleep(10 * 1000);
+        } catch (InterruptedException ie) {}
+      }
       if( storage.getStorageID().equals("") ) {
       if( storage.getStorageID().equals("") ) {
         storage.setStorageID( dnRegistration.getStorageID());
         storage.setStorageID( dnRegistration.getStorageID());
         storage.write();
         storage.write();
@@ -203,7 +205,7 @@ public class DataNode implements FSConstants, Runnable {
     }
     }
 
 
     void handleDiskError( String errMsgr ) {
     void handleDiskError( String errMsgr ) {
-        LOG.warn( "Shuting down DataNode because "+errMsgr );
+        LOG.warn( "DataNode is shutting down.\n" + errMsgr );
         try {
         try {
             namenode.errorReport(
             namenode.errorReport(
                     dnRegistration, DatanodeProtocol.DISK_ERROR, errMsgr);
                     dnRegistration, DatanodeProtocol.DISK_ERROR, errMsgr);
@@ -332,9 +334,16 @@ public class DataNode implements FSConstants, Runnable {
           } // synchronized
           } // synchronized
         } // while (shouldRun)
         } // while (shouldRun)
       } catch(DiskErrorException e) {
       } catch(DiskErrorException e) {
-        handleDiskError(e.getMessage());
+        handleDiskError(e.getLocalizedMessage());
+      } catch( RemoteException re ) {
+        String reClass = re.getClassName();
+        if( UnregisteredDatanodeException.class.getName().equals( reClass )) {
+          LOG.warn( "DataNode is shutting down.\n" + re );
+          shutdown();
+          return;
+        }
+        throw re;
       }
       }
-      
     } // offerService
     } // offerService
 
 
     /**
     /**

+ 2 - 0
src/java/org/apache/hadoop/dfs/FSNamesystem.java

@@ -1329,6 +1329,8 @@ class FSNamesystem implements FSConstants {
      */
      */
     private void proccessOverReplicatedBlock( Block block, short replication ) {
     private void proccessOverReplicatedBlock( Block block, short replication ) {
       TreeSet containingNodes = (TreeSet) blocksMap.get(block);
       TreeSet containingNodes = (TreeSet) blocksMap.get(block);
+      if( containingNodes == null )
+        return;
       Vector nonExcess = new Vector();
       Vector nonExcess = new Vector();
       for (Iterator it = containingNodes.iterator(); it.hasNext(); ) {
       for (Iterator it = containingNodes.iterator(); it.hasNext(); ) {
           DatanodeInfo cur = (DatanodeInfo) it.next();
           DatanodeInfo cur = (DatanodeInfo) it.next();