Browse Source

HDFS-14697. Backport of HDFS-14513. FSImage which is saving should be clean while NameNode shutdown. Contributed by He Xiaoqiao.

Erik Krogen 5 years ago
parent
commit
ee1547a807

+ 28 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java

@@ -69,6 +69,7 @@ import org.apache.hadoop.hdfs.util.Canceler;
 import org.apache.hadoop.hdfs.util.MD5FileUtils;
 import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.ShutdownHookManager;
 import org.apache.hadoop.util.Time;
 
 import com.google.common.base.Joiner;
@@ -84,6 +85,11 @@ import com.google.common.collect.Lists;
 public class FSImage implements Closeable {
   public static final Log LOG = LogFactory.getLog(FSImage.class.getName());
 
+  /**
+   * Priority of the FSImageSaver shutdown hook: {@value}.
+   */
+  public static final int SHUTDOWN_HOOK_PRIORITY = 10;
+
   protected FSEditLog editLog = null;
   private boolean isUpgradeFinalized = false;
 
@@ -1019,6 +1025,21 @@ public class FSImage implements Closeable {
 
     @Override
     public void run() {
+      // Deletes checkpoint file in every storage directory when shutdown.
+      Runnable cancelCheckpointFinalizer = new Runnable() {
+        @Override
+        public void run() {
+          try {
+            deleteCancelledCheckpoint(context.getTxId());
+            LOG.info("FSImageSaver clean checkpoint: txid = "
+                + context.getTxId() + " when meet shutdown.");
+          } catch (IOException e) {
+            LOG.error("FSImageSaver cancel checkpoint threw an exception:", e);
+          }
+        }
+      };
+      ShutdownHookManager.get().addShutdownHook(cancelCheckpointFinalizer,
+          SHUTDOWN_HOOK_PRIORITY);
       try {
         saveFSImage(context, sd, nnf);
       } catch (SaveNamespaceCancelledException snce) {
@@ -1028,6 +1049,13 @@ public class FSImage implements Closeable {
       } catch (Throwable t) {
         LOG.error("Unable to save image for " + sd.getRoot(), t);
         context.reportErrorOnStorageDirectory(sd);
+        try {
+          deleteCancelledCheckpoint(context.getTxId());
+          LOG.info("FSImageSaver clean checkpoint: txid = "
+              + context.getTxId() + " when meet Throwable.");
+        } catch (IOException e) {
+          LOG.error("FSImageSaver cancel checkpoint threw an exception:", e);
+        }
       }
     }