Procházet zdrojové kódy

HDFS-17033. Update fsck to display stale state info of blocks accurately. (#5815). Contributed by Wang Yuanben.

Reviewed-by: Shilun Fan <slfan1989@apache.org>
Reviewed-by: Xing Lin <linxingnku@gmail.com>
Reviewed-by: Hualong Zhang <hualong.z@hotmail.com>
Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
WangYuanben před 1 rokem
rodič
revize
37b2d36edd

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java

@@ -126,6 +126,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
   public static final String ENTERING_MAINTENANCE_STATUS =
       "is ENTERING MAINTENANCE";
   public static final String IN_MAINTENANCE_STATUS = "is IN MAINTENANCE";
+  public static final String STALE_STATUS = "is STALE";
   public static final String NONEXISTENT_STATUS = "does not exist";
   public static final String FAILURE_STATUS = "FAILED";
   public static final String UNDEFINED = "undefined";
@@ -370,6 +371,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
       out.print(ENTERING_MAINTENANCE_STATUS);
     } else if (this.showMaintenanceState && dn.isInMaintenance()) {
       out.print(IN_MAINTENANCE_STATUS);
+    } else if (dn.isStale(this.staleInterval)) {
+      out.print(STALE_STATUS);
     } else {
       out.print(HEALTHY_STATUS);
     }

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java

@@ -404,6 +404,8 @@ public class DFSck extends Configured implements Tool {
       errCode = 4;
     } else if (lastLine.endsWith(NamenodeFsck.ENTERING_MAINTENANCE_STATUS)) {
       errCode = 5;
+    } else if (lastLine.endsWith(NamenodeFsck.STALE_STATUS)) {
+      errCode = 6;
     }
     return errCode;
   }

+ 86 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java

@@ -19,6 +19,7 @@
 package org.apache.hadoop.hdfs.server.namenode;
 
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CORRUPT_BLOCK_DELETE_IMMEDIATELY_ENABLED;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY;
 import static org.apache.hadoop.hdfs.MiniDFSCluster.HDFS_MINIDFS_BASEDIR;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -1681,6 +1682,91 @@ public class TestFsck {
     assertFalse(fsckOut.contains(NamenodeFsck.IN_MAINTENANCE_STATUS));
   }
 
+  /**
+   * Test for blockIdCK with datanode staleness.
+   */
+  @Test
+  public void testBlockIdCKStaleness() throws Exception {
+    final short replFactor = 1;
+    final long blockSize = 512;
+    Configuration configuration = new Configuration();
+
+    // Shorten dfs.namenode.stale.datanode.interval for easier testing.
+    configuration.setLong(DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY, 5000);
+    configuration.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
+    configuration.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, replFactor);
+
+    String[] racks = {"/rack1", "/rack2"};
+    String[] hosts = {"host1", "host2"};
+
+    File builderBaseDir = new File(GenericTestUtils.getRandomizedTempPath());
+    cluster = new MiniDFSCluster.Builder(configuration, builderBaseDir)
+        .hosts(hosts).racks(racks).build();
+    assertNotNull("Failed Cluster Creation", cluster);
+    cluster.waitClusterUp();
+    DistributedFileSystem fs = cluster.getFileSystem();
+    assertNotNull("Failed to get FileSystem", fs);
+
+    try {
+      DFSTestUtil util = new DFSTestUtil.Builder().
+          setName(getClass().getSimpleName()).setNumFiles(1).build();
+
+      // Create one file.
+      final String pathString = new String("/testfile");
+      final Path path = new Path(pathString);
+      util.createFile(fs, path, 1024L, replFactor, 1024L);
+      util.waitReplication(fs, path, replFactor);
+      StringBuilder sb = new StringBuilder();
+      for (LocatedBlock lb: util.getAllBlocks(fs, path)) {
+        sb.append(lb.getBlock().getLocalBlock().getBlockName() + " ");
+      }
+      String[] bIds = sb.toString().split(" ");
+
+      // Make sure datanode is HEALTHY before down.
+      String outStr = runFsck(configuration, 0, true, "/", "-blockId", bIds[0]);
+      assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));
+
+      FSNamesystem fsn = cluster.getNameNode().getNamesystem();
+      BlockManager bm = fsn.getBlockManager();
+      DatanodeManager dnm = bm.getDatanodeManager();
+      DatanodeDescriptor dn = dnm.getDatanode(cluster.getDataNodes().get(0)
+          .getDatanodeId());
+      final String dnName = dn.getXferAddr();
+
+      // Make the block on datanode enter stale state.
+      cluster.stopDataNode(0);
+      GenericTestUtils.waitFor(new Supplier<Boolean>() {
+        @Override
+        public Boolean get() {
+          try {
+            DatanodeInfo datanodeInfo = null;
+            for (DatanodeInfo info : fs.getDataNodeStats()) {
+              if (dnName.equals(info.getXferAddr())) {
+                datanodeInfo = info;
+              }
+            }
+            if (datanodeInfo != null && datanodeInfo.isStale(5000)) {
+              return true;
+            }
+          } catch (Exception e) {
+            LOG.warn("Unexpected exception: " + e);
+            return false;
+          }
+          return false;
+        }
+      }, 500, 30000);
+      outStr = runFsck(configuration, 6, true, "/", "-blockId", bIds[0]);
+      assertTrue(outStr.contains(NamenodeFsck.STALE_STATUS));
+    } finally {
+      if (fs != null) {
+        fs.close();
+      }
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+
   /**
    * Test for blockIdCK with block corruption.
    */