Browse Source

HADOOP-3998. Fix dfsclient exception when JVM is shutdown. (dhruba)


git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/branches/branch-0.20@752591 13f79535-47bb-0310-9956-ffa450edef68
Dhruba Borthakur 16 years ago
parent
commit
37ea6e0d63

+ 2 - 0
CHANGES.txt

@@ -724,6 +724,8 @@ Release 0.19.2 - Unreleased
     dfs.support.append is set to true. (dhruba)
 
     HADOOP-5333. libhdfs supports appending to files. (dhruba)
+
+    HADOOP-3998. Fix dfsclient exception when JVM is shutdown. (dhruba)
  
 Release 0.19.1 - Unreleased
 

+ 15 - 6
src/hdfs/org/apache/hadoop/hdfs/DFSClient.java

@@ -206,8 +206,8 @@ public class DFSClient implements FSConstants, java.io.Closeable {
    */
   public synchronized void close() throws IOException {
     if(clientRunning) {
-      clientRunning = false;
       leasechecker.close();
+      clientRunning = false;
   
       // close connections to the namenode
       RPC.stopProxy(rpcNamenode);
@@ -2180,7 +2180,6 @@ public class DFSClient implements FSConstants, java.io.Closeable {
       private volatile boolean closed = false;
   
       public void run() {
-
         while (!closed && clientRunning) {
 
           // if the Responder encountered an error, shutdown Responder
@@ -2515,21 +2514,30 @@ public class DFSClient implements FSConstants, java.io.Closeable {
               // The original bad datanode is left in the list because it is
               // conservative to remove only one datanode in one iteration.
               for (int j = 0; j < nodes.length; j++) {
-                if (nodes[j] ==  primaryNode) {
+                if (nodes[j].equals(primaryNode)) {
                   errorIndex = j; // forget original bad node.
                 }
               }
+              // remove primary node from list
+              newnodes =  new DatanodeInfo[nodes.length-1];
+              System.arraycopy(nodes, 0, newnodes, 0, errorIndex);
+              System.arraycopy(nodes, errorIndex+1, newnodes, errorIndex,
+                               newnodes.length-errorIndex);
+              nodes = newnodes;
               LOG.warn("Error Recovery for block " + block + " failed " +
                        " because recovery from primary datanode " +
                        primaryNode + " failed " + recoveryErrorCount +
-                       " times. Marking primary datanode as bad.");
+                       " times. " + " Pipeline was " + pipelineMsg +
+                       ". Marking primary datanode as bad.");
               recoveryErrorCount = 0; 
+              errorIndex = -1;
               return true;          // sleep when we return from here
             }
             String emsg = "Error Recovery for block " + block + " failed " +
                           " because recovery from primary datanode " +
                           primaryNode + " failed " + recoveryErrorCount + 
-                          " times. Aborting...";
+                          " times. "  + " Pipeline was " + pipelineMsg +
+                          ". Aborting...";
             LOG.warn(emsg);
             lastException = new IOException(emsg);
             closed = true;
@@ -2539,7 +2547,8 @@ public class DFSClient implements FSConstants, java.io.Closeable {
           LOG.warn("Error Recovery for block " + block + " failed " +
                    " because recovery from primary datanode " +
                    primaryNode + " failed " + recoveryErrorCount +
-                   " times. Will retry...");
+                   " times. "  + " Pipeline was " + pipelineMsg +
+                   ". Will retry...");
           return true;          // sleep when we return from here
         } finally {
           RPC.stopProxy(primary);

+ 28 - 0
src/test/org/apache/hadoop/hdfs/TestFileCreation.java

@@ -56,6 +56,7 @@ public class TestFileCreation extends junit.framework.TestCase {
     //((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL);
     ((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL);
     ((Log4JLogger)FSNamesystem.LOG).getLogger().setLevel(Level.ALL);
+    ((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL);
   }
 
   static final long seed = 0xDEADBEEFL;
@@ -703,4 +704,31 @@ public class TestFileCreation extends junit.framework.TestCase {
 
     System.out.println("testLeaseExpireHardLimit successful");
   }
+
+  // test closing file system before all file handles are closed.
+  public void testFsClose() throws Exception {
+    System.out.println("test file system close start");
+    final int DATANODE_NUM = 3;
+
+    Configuration conf = new Configuration();
+
+    // create cluster
+    MiniDFSCluster cluster = new MiniDFSCluster(conf, DATANODE_NUM, true, null);
+    DistributedFileSystem dfs = null;
+    try {
+      cluster.waitActive();
+      dfs = (DistributedFileSystem)cluster.getFileSystem();
+
+      // create a new file.
+      final String f = DIR + "foofs";
+      final Path fpath = new Path(f);
+      FSDataOutputStream out = TestFileCreation.createFile(dfs, fpath, DATANODE_NUM);
+      out.write("something".getBytes());
+
+      // close file system without closing file
+      dfs.close();
+    } finally {
+      System.out.println("testFsClose successful");
+    }
+  }
 }