浏览代码

HDFS-4581. DataNode.checkDiskError should not be called on network errors. Contributed by Rohit Kochar.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1461597 13f79535-47bb-0310-9956-ffa450edef68
Kihwal Lee 12 年之前
父节点
当前提交
c9f5052803

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -2467,6 +2467,9 @@ Release 0.23.7 - UNRELEASED
     HDFS-3367. WebHDFS doesn't use the logged in user when opening
     connections (daryn)
 
+    HDFS-4581. checkDiskError should not be called on network errors (Rohit
+    Kochar via kihwal)
+
 Release 0.23.6 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 16 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java

@@ -60,8 +60,11 @@ import java.io.PrintStream;
 import java.net.InetSocketAddress;
 import java.net.ServerSocket;
 import java.net.Socket;
+import java.net.SocketException;
+import java.net.SocketTimeoutException;
 import java.net.URI;
 import java.net.UnknownHostException;
+import java.nio.channels.ClosedByInterruptException;
 import java.nio.channels.ServerSocketChannel;
 import java.nio.channels.SocketChannel;
 import java.security.PrivilegedExceptionAction;
@@ -1172,7 +1175,13 @@ public class DataNode extends Configured
   protected void checkDiskError(Exception e ) throws IOException {
     
     LOG.warn("checkDiskError: exception: ", e);
-    
+    if (e instanceof SocketException || e instanceof SocketTimeoutException
+    	  || e instanceof ClosedByInterruptException 
+    	  || e.getMessage().startsWith("Broken pipe")) {
+      LOG.info("Not checking disk as checkDiskError was called on a network" +
+      		" related exception");	
+      return;
+    }
     if (e.getMessage() != null &&
         e.getMessage().startsWith("No space left on device")) {
       throw new DiskOutOfSpaceException("No space left on device");
@@ -1484,8 +1493,12 @@ public class DataNode extends Configured
       } catch (IOException ie) {
         LOG.warn(bpReg + ":Failed to transfer " + b + " to " +
             targets[0] + " got ", ie);
-        // check if there are any disk problem
-        checkDiskError();
+          // check if there are any disk problem
+        try{
+          checkDiskError(ie);
+        } catch(IOException e) {
+            LOG.warn("DataNode.checkDiskError failed in run() with: ", e);
+        }
         
       } finally {
         xmitsInProgress.getAndDecrement();