Ver código fonte

HDFS-14043. Tolerate corrupted seen_txid file. Contributed by Lukas Majercak.

(cherry picked from commit f3296501e09fa7f1e81548dfcefa56f20fe337ca)
Inigo Goiri 6 anos atrás
pai
commit
9bf4f3d614

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/PersistentLongFile.java

@@ -98,6 +98,8 @@ public class PersistentLongFile {
         val = Long.parseLong(br.readLine());
         br.close();
         br = null;
+      } catch (NumberFormatException e) {
+        throw new IOException(e);
       } finally {
         IOUtils.cleanup(LOG, br);
       }

+ 56 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java

@@ -28,8 +28,13 @@ import static org.mockito.Mockito.doThrow;
 import static org.mockito.Mockito.spy;
 
 import java.io.File;
+import java.io.FileWriter;
 import java.io.IOException;
 import java.io.OutputStream;
+import java.io.PrintWriter;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -37,6 +42,8 @@ import java.util.concurrent.Future;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -737,6 +744,55 @@ public class TestSaveNamespace {
     }
   }
 
+  @Test(timeout=30000)
+  public void testTxFaultTolerance() throws Exception {
+    String baseDir = MiniDFSCluster.getBaseDirectory();
+    List<String> nameDirs = new ArrayList<>();
+    nameDirs.add(fileAsURI(new File(baseDir, "name1")).toString());
+    nameDirs.add(fileAsURI(new File(baseDir, "name2")).toString());
+
+    Configuration conf = new HdfsConfiguration();
+    String nameDirsStr = StringUtils.join(",", nameDirs);
+    conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameDirsStr);
+    conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, nameDirsStr);
+
+    NameNode.initMetrics(conf, NamenodeRole.NAMENODE);
+    DFSTestUtil.formatNameNode(conf);
+    FSNamesystem fsn = FSNamesystem.loadFromDisk(conf);
+    try {
+      // We have a BEGIN_LOG_SEGMENT txn to start
+      assertEquals(1, fsn.getEditLog().getLastWrittenTxId());
+
+      doAnEdit(fsn, 1);
+
+      assertEquals(2, fsn.getEditLog().getLastWrittenTxId());
+
+      // Shut down
+      fsn.close();
+
+      // Corrupt one of the seen_txid files
+      File txidFile0 = new File(new URI(nameDirs.get(0) +
+          "/current/seen_txid"));
+      FileWriter fw = new FileWriter(txidFile0, false);
+      try (PrintWriter pw = new PrintWriter(fw)) {
+        pw.print("corrupt____!");
+      }
+
+      // Restart
+      fsn = FSNamesystem.loadFromDisk(conf);
+      assertEquals(4, fsn.getEditLog().getLastWrittenTxId());
+
+      // Check seen_txid is same in both dirs
+      File txidFile1 = new File(new URI(nameDirs.get(1) +
+          "/current/seen_txid"));
+      assertTrue(FileUtils.contentEquals(txidFile0, txidFile1));
+    } finally {
+      if (fsn != null) {
+        fsn.close();
+      }
+    }
+  }
+
   private void doAnEdit(FSNamesystem fsn, int id) throws IOException {
     // Make an edit
     fsn.mkdirs("/test" + id, new PermissionStatus("test", "Test",