Forráskód Böngészése

HDFS-8229. LAZY_PERSIST file gets deleted after NameNode restart. (Contributed by Surendra Singh Lilhore)

Arpit Agarwal 10 éve
szülő
commit
6f541edce0

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -601,6 +601,9 @@ Release 2.8.0 - UNRELEASED
     HDFS-8276. LazyPersistFileScrubber should be disabled if scrubber interval
     configured zero. (Surendra Singh Lilhore via Arpit Agarwal)
 
+    HDFS-8229. LAZY_PERSIST file gets deleted after NameNode restart.
+    (Surendra Singh Lilhore via Arpit Agarwal) 
+
 Release 2.7.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES

+ 8 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -4767,7 +4767,14 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     public void run() {
       while (fsRunning && shouldRun) {
         try {
-          clearCorruptLazyPersistFiles();
+          if (!isInSafeMode()) {
+            clearCorruptLazyPersistFiles();
+          } else {
+            if (FSNamesystem.LOG.isDebugEnabled()) {
+              FSNamesystem.LOG
+                  .debug("Namenode is in safemode, skipping scrubbing of corrupted lazy-persist files.");
+            }
+          }
           Thread.sleep(scrubIntervalSec * 1000);
         } catch (InterruptedException e) {
           FSNamesystem.LOG.info(

+ 1 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/LazyPersistTestCase.java

@@ -258,6 +258,7 @@ public abstract class LazyPersistTestCase {
                 LAZY_WRITER_INTERVAL_SEC);
     conf.setLong(DFS_DATANODE_RAM_DISK_LOW_WATERMARK_BYTES,
                 evictionLowWatermarkReplicas * BLOCK_SIZE);
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY, 1);
 
     if (useSCR) {
       conf.setBoolean(HdfsClientConfigKeys.Read.ShortCircuit.KEY, true);

+ 31 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestLazyPersistFiles.java

@@ -16,6 +16,7 @@
  * limitations under the License.
  */
 package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl;
+import com.google.common.collect.Iterators;
 import com.google.common.util.concurrent.Uninterruptibles;
 import org.apache.hadoop.fs.CreateFlag;
 import org.apache.hadoop.fs.Path;
@@ -145,6 +146,36 @@ public class TestLazyPersistFiles extends LazyPersistTestCase {
     Assert.assertTrue(fs.exists(path1));
 
   }
+
+ /**
+  * If NN restarted then lazyPersist files should not deleted
+  */
+  @Test
+  public void testFileShouldNotDiscardedIfNNRestarted() throws IOException,
+      InterruptedException {
+    getClusterBuilder().setRamDiskReplicaCapacity(2).build();
+    final String METHOD_NAME = GenericTestUtils.getMethodName();
+    Path path1 = new Path("/" + METHOD_NAME + ".01.dat");
+    makeTestFile(path1, BLOCK_SIZE, true);
+    ensureFileReplicasOnStorageType(path1, RAM_DISK);
+
+    cluster.shutdownDataNodes();
+
+    cluster.restartNameNodes();
+
+    // wait for the replication monitor to mark the file as corrupt
+    Thread.sleep(2 * DFS_NAMENODE_REPLICATION_INTERVAL_DEFAULT * 1000);
+
+    Long corruptBlkCount = (long) Iterators.size(cluster.getNameNode()
+        .getNamesystem().getBlockManager().getCorruptReplicaBlockIterator());
+
+    // Check block detected as corrupted
+    assertThat(corruptBlkCount, is(1L));
+
+    // Ensure path1 exist.
+    Assert.assertTrue(fs.exists(path1));
+  }
+
   /**
    * Concurrent read from the same node and verify the contents.
    */