Browse Source

HADOOP-937. Fix namenode to request re-registration by datanodes in more circumstances. Contributed by Hairong.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@501603 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 18 years ago
parent
commit
56f34cc4b3

+ 3 - 0
CHANGES.txt

@@ -95,6 +95,9 @@ Trunk (unreleased changes)
 29. HADOOP-884.  Add scripts in contrib/ec2 to facilitate running
     Hadoop on an Amazon's EC2 cluster.  (Tom White via cutting)
 
+30. HADOOP-937.  Change the namenode to request re-registration of
+    datanodes in more circumstances.  (Hairong Kuang via cutting)
+
 
 Release 0.10.1 - 2007-01-10
 

+ 6 - 4
src/java/org/apache/hadoop/dfs/DataNode.java

@@ -403,10 +403,12 @@ public class DataNode implements FSConstants, Runnable {
                 // shut down the data node
                 this.shutdown();
                 continue;
-              case DNA_REPORT:
-                // namenode requested a block report; sending
-                lastBlockReport = 0;
-                break;
+              case DNA_REGISTER:
+                // namenode requested a registration
+                register();
+                lastHeartbeat=0;
+                lastBlockReport=0;
+                continue;
               default:
                 LOG.warn( "Unknown BlockCommand action: " + cmd.action);
               }

+ 3 - 3
src/java/org/apache/hadoop/dfs/DatanodeProtocol.java

@@ -31,8 +31,8 @@ import org.apache.hadoop.ipc.VersionedProtocol;
  * @author Michael Cafarella
  **********************************************************************/
 interface DatanodeProtocol extends VersionedProtocol {
-  public static final long versionID = 3L;  // BlockCommand.action replaced boolean members
-                                            // affected: BlockCommand
+  public static final long versionID = 4L; // BlockCommand.action:
+                                           // replace DNA_REPORT by DNA_REGISTER
   
   // error code
   final static int DISK_ERROR = 1;
@@ -46,7 +46,7 @@ interface DatanodeProtocol extends VersionedProtocol {
                               DNA_TRANSFER,   // transfer blocks to another datanode
                               DNA_INVALIDATE, // invalidate blocks
                               DNA_SHUTDOWN,   // shutdown node
-                              DNA_REPORT; }   // send block report to the namenode
+                              DNA_REGISTER; }   // re-register
 
   /** 
    * Register Datanode.

+ 34 - 20
src/java/org/apache/hadoop/dfs/FSNamesystem.java

@@ -1424,7 +1424,7 @@ class FSNamesystem implements FSConstants {
      */
     public synchronized void registerDatanode( DatanodeRegistration nodeReg 
                                               ) throws IOException {
-      NameNode.stateChangeLog.debug(
+      NameNode.stateChangeLog.info(
           "BLOCK* NameSystem.registerDatanode: "
           + "node registration from " + nodeReg.getName()
           + " storage " + nodeReg.getStorageID() );
@@ -1463,8 +1463,16 @@ class FSNamesystem implements FSConstants {
         getEditLog().logRemoveDatanode( nodeS );
         nodeS.updateRegInfo( nodeReg );
         getEditLog().logAddDatanode( nodeS );
+        
+        // also treat the registration message as a heartbeat
+        synchronized( heartbeats ) {
+            heartbeats.add( nodeS );
+            //update its timestamp
+            nodeS.updateHeartbeat( 0L, 0L, 0);
+            nodeS.isAlive = true;
+        }
         return;
-      }
+      } 
 
       // this is a new datanode serving a new data storage
       if( nodeReg.getStorageID().equals("") ) {
@@ -1477,10 +1485,16 @@ class FSNamesystem implements FSConstants {
       }
       // register new datanode
       DatanodeDescriptor nodeDescr = new DatanodeDescriptor( nodeReg );
-      // unless we get a heartbeat from this datanode, we will not mark it Alive
-      nodeDescr.isAlive = false;
       unprotectedAddDatanode( nodeDescr );
       getEditLog().logAddDatanode( nodeDescr );
+      
+      // also treat the registration message as a heartbeat
+      synchronized( heartbeats ) {
+          heartbeats.add( nodeDescr );
+          nodeDescr.isAlive = true;
+          // no need to update its timestamp
+          // because its is done when the descriptor is created
+      }
       return;
     }
     
@@ -1534,28 +1548,28 @@ class FSNamesystem implements FSConstants {
                                  long remaining,
                                  int xceiverCount
                                  ) throws IOException {
-      boolean needBlockReport;
       synchronized (heartbeats) {
         synchronized (datanodeMap) {
-          DatanodeDescriptor nodeinfo = getDatanode( nodeID );
-          needBlockReport = isDatanodeDead(nodeinfo); 
-          
-          if (nodeinfo == null) {
-            // We do not accept unregistered guests
-            throw new UnregisteredDatanodeException( nodeID );
-          }
-          if (nodeinfo.isAlive) {
-            updateStats(nodeinfo, false);
+          DatanodeDescriptor nodeinfo;
+          try {
+            nodeinfo = getDatanode( nodeID );
+            if (nodeinfo == null ) {
+                return true;
+            }
+          } catch(UnregisteredDatanodeException e) {
+              return true;
           }
-          nodeinfo.updateHeartbeat(capacity, remaining, xceiverCount);
-          updateStats(nodeinfo, true);
-          if (!nodeinfo.isAlive) {
-            heartbeats.add(nodeinfo);
-            nodeinfo.isAlive = true;
+          
+          if( !nodeinfo.isAlive ) {
+              return true;
+          } else {
+              updateStats(nodeinfo, false);
+              nodeinfo.updateHeartbeat(capacity, remaining, xceiverCount);
+              updateStats(nodeinfo, true);
+              return false;
           }
         }
       }
-      return needBlockReport;
     }
 
     private void updateStats(DatanodeDescriptor node, boolean isAdded) {

+ 1 - 1
src/java/org/apache/hadoop/dfs/NameNode.java

@@ -557,7 +557,7 @@ public class NameNode implements ClientProtocol, DatanodeProtocol, FSConstants {
         verifyRequest( nodeReg );
         if( namesystem.gotHeartbeat( nodeReg, capacity, remaining, xceiverCount )) {
           // request block report from the datanode
-          return new BlockCommand( DataNodeAction.DNA_REPORT );
+          return new BlockCommand( DataNodeAction.DNA_REGISTER );
         }
         
         //