Forráskód Böngészése

HDFS-7373. Clean up temporary files after fsimage transfer failures. Contributed by Kihwal Lee
(cherry picked from commit c0d666c74e9ea76564a2458c6c0a78ae7afa9fea)

Kihwal Lee 10 éve
szülő
commit
8bffaa46fc

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -211,6 +211,9 @@ Release 2.7.0 - UNRELEASED
     HDFS-7531. Improve the concurrent access on FsVolumeList (Lei Xu via Colin
     P. McCabe)
 
+    HDFS-7373. Clean up temporary files after fsimage transfer failures.
+    (kihwal)
+
   OPTIMIZATIONS
 
     HDFS-7454. Reduce memory footprint for AclEntries in NameNode.

+ 21 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java

@@ -528,10 +528,18 @@ public class TransferFsImage {
         fos.getChannel().force(true);
         fos.close();
       }
+
+      // Something went wrong and did not finish reading.
+      // Remove the temporary files.
+      if (!finishedReceiving) {
+        deleteTmpFiles(localPaths);
+      }
+
       if (finishedReceiving && received != advertisedSize) {
         // only throw this exception if we think we read all of it on our end
         // -- otherwise a client-side IOException would be masked by this
         // exception that makes it look like a server-side problem!
+        deleteTmpFiles(localPaths);
         throw new IOException("File " + url + " received length " + received +
                               " is not of the advertised size " +
                               advertisedSize);
@@ -548,6 +556,7 @@ public class TransferFsImage {
       
       if (advertisedDigest != null &&
           !computedDigest.equals(advertisedDigest)) {
+        deleteTmpFiles(localPaths);
         throw new IOException("File " + url + " computed digest " +
             computedDigest + " does not match advertised digest " + 
             advertisedDigest);
@@ -558,6 +567,18 @@ public class TransferFsImage {
     }    
   }
 
+  private static void deleteTmpFiles(List<File> files) {
+    if (files == null) {
+      return;
+    }
+
+    LOG.info("Deleting temporary files: " + files);
+    for (File file : files) {
+      file.delete(); // ignore the return value
+    }
+  }
+
+
   private static MD5Hash parseMD5Header(HttpURLConnection connection) {
     String header = connection.getHeaderField(MD5_HEADER);
     return (header != null) ? new MD5Hash(header) : null;

+ 19 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java

@@ -643,6 +643,22 @@ public class TestCheckpoint {
         });
   }
 
+  private void checkTempImages(NNStorage storage) throws IOException {
+    List<File> dirs = new ArrayList<File>();
+    dirs.add(storage.getStorageDir(0).getCurrentDir());
+    dirs.add(storage.getStorageDir(1).getCurrentDir());
+
+    for (File dir : dirs) {
+      File[] list = dir.listFiles();
+      for (File f : list) {
+        // Throw an exception if a temp image file is found.
+        if(f.getName().contains(NNStorage.NameNodeFile.IMAGE_NEW.getName())) {
+          throw new IOException("Found " + f);
+        }
+      }
+    }
+  }
+
   /**
    * Simulate 2NN failing to send the whole file (error type 3)
    * The length header in the HTTP transfer should prevent
@@ -704,6 +720,9 @@ public class TestCheckpoint {
         GenericTestUtils.assertExceptionContains(exceptionSubstring, e);
       }
       Mockito.reset(faultInjector);
+      // Make sure there is no temporary files left around.
+      checkTempImages(cluster.getNameNode().getFSImage().getStorage());
+      checkTempImages(secondary.getFSImage().getStorage());
       secondary.shutdown(); // secondary namenode crash!
       secondary = null;