Ver código fonte

HDFS-7714. Simultaneous restart of HA NameNodes and DataNode can cause DataNode to register successfully with only one NameNode.(Contributed by Vinayakumar B)

(cherry picked from commit 3d15728ff5301296801e541d9b23bd1687c4adad)
(cherry picked from commit a1bf7aecf7d018c5305fa3bd7a9e3ef9af3155c1)
(cherry picked from commit c1e65de57e8ef760586e28cd37397ea9a7ac7944)
Vinayakumar B 10 anos atrás
pai
commit
6154abfeda

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -70,6 +70,9 @@ Release 2.6.1 - UNRELEASED
     HDFS-7707. Edit log corruption due to delayed block removal again.
     (Yongjun Zhang via kihwal)
 
+    HDFS-7714. Simultaneous restart of HA NameNodes and DataNode can cause
+    DataNode to register successfully with only one NameNode.(vinayakumarb)
+
 Release 2.6.0 - 2014-11-18
 
   INCOMPATIBLE CHANGES

+ 5 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java

@@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.datanode;
 
 import static org.apache.hadoop.util.Time.now;
 
+import java.io.EOFException;
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.net.SocketTimeoutException;
@@ -802,6 +803,10 @@ class BPServiceActor implements Runnable {
         // Use returned registration from namenode with updated fields
         bpRegistration = bpNamenode.registerDatanode(bpRegistration);
         break;
+      } catch(EOFException e) {  // namenode might have just restarted
+        LOG.info("Problem connecting to server: " + nnAddr + " :"
+            + e.getLocalizedMessage());
+        sleepAndLogInterrupts(1000, "connecting to server");
       } catch(SocketTimeoutException e) {  // namenode is busy
         LOG.info("Problem connecting to server: " + nnAddr);
         sleepAndLogInterrupts(1000, "connecting to server");