浏览代码

HDFS-14513. FSImage which is saving should be clean while NameNode shutdown. Contributed by He Xiaoqiao.

Inigo Goiri 5 年之前
父节点
当前提交
5740eea081

+ 25 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java

@@ -37,6 +37,7 @@ import java.util.Set;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 
+import org.apache.hadoop.util.ShutdownHookManager;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -88,6 +89,11 @@ public class FSImage implements Closeable {
   public static final Logger LOG =
       LoggerFactory.getLogger(FSImage.class.getName());
 
+  /**
+   * Priority of the FSImageSaver shutdown hook: {@value}.
+   */
+  public static final int SHUTDOWN_HOOK_PRIORITY = 10;
+
   protected FSEditLog editLog = null;
   private boolean isUpgradeFinalized = false;
 
@@ -1037,6 +1043,18 @@ public class FSImage implements Closeable {
 
     @Override
     public void run() {
+      // Deletes checkpoint file in every storage directory when shutdown.
+      Runnable cancelCheckpointFinalizer = () -> {
+        try {
+          deleteCancelledCheckpoint(context.getTxId());
+          LOG.info("FSImageSaver clean checkpoint: txid={} when meet " +
+              "shutdown.", context.getTxId());
+        } catch (IOException e) {
+          LOG.error("FSImageSaver cancel checkpoint threw an exception:", e);
+        }
+      };
+      ShutdownHookManager.get().addShutdownHook(cancelCheckpointFinalizer,
+          SHUTDOWN_HOOK_PRIORITY);
       try {
         saveFSImage(context, sd, nnf);
       } catch (SaveNamespaceCancelledException snce) {
@@ -1046,6 +1064,13 @@ public class FSImage implements Closeable {
       } catch (Throwable t) {
         LOG.error("Unable to save image for " + sd.getRoot(), t);
         context.reportErrorOnStorageDirectory(sd);
+        try {
+          deleteCancelledCheckpoint(context.getTxId());
+          LOG.info("FSImageSaver clean checkpoint: txid={} when meet " +
+              "Throwable.", context.getTxId());
+        } catch (IOException e) {
+          LOG.error("FSImageSaver cancel checkpoint threw an exception:", e);
+        }
       }
     }