浏览代码

HDFS-10330. Add Corrupt Blocks Information in Metasave output. Contributed by Kuhu Shukla.

Kihwal Lee 9 年之前
父节点
当前提交
919a1d824a

+ 43 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

@@ -109,6 +109,7 @@ import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.Daemon;
 import org.apache.hadoop.util.ExitUtil;
 import org.apache.hadoop.util.LightWeightGSet;
+import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.Time;
 import org.apache.hadoop.util.VersionInfo;
 
@@ -612,6 +613,48 @@ public class BlockManager implements BlockStatsMXBean {
     // Dump blocks that are waiting to be deleted
     invalidateBlocks.dump(out);
 
+    //Dump corrupt blocks and their storageIDs
+    Set<Block> corruptBlocks = corruptReplicas.getCorruptBlocks();
+    out.println("Corrupt Blocks:");
+    for(Block block : corruptBlocks) {
+      Collection<DatanodeDescriptor> corruptNodes =
+          corruptReplicas.getNodes(block);
+      if (corruptNodes == null) {
+        LOG.warn(block.getBlockId() +
+            " is corrupt but has no associated node.");
+        continue;
+      }
+      int numNodesToFind = corruptNodes.size();
+      for (DatanodeStorageInfo storage : blocksMap.getStorages(block)) {
+        DatanodeDescriptor node = storage.getDatanodeDescriptor();
+        if (corruptNodes.contains(node)) {
+          String storageId = storage.getStorageID();
+          DatanodeStorageInfo storageInfo = node.getStorageInfo(storageId);
+          State state = (storageInfo == null) ? null : storageInfo.getState();
+          out.println("Block=" + block.getBlockId() + "\tNode=" + node.getName()
+              + "\tStorageID=" + storageId + "\tStorageState=" + state
+              + "\tTotalReplicas=" +
+              blocksMap.numNodes(block)
+              + "\tReason=" + corruptReplicas.getCorruptReason(block, node));
+          numNodesToFind--;
+          if (numNodesToFind == 0) {
+            break;
+          }
+        }
+      }
+      if (numNodesToFind > 0) {
+        String[] corruptNodesList = new String[corruptNodes.size()];
+        int i = 0;
+        for (DatanodeDescriptor d : corruptNodes) {
+          corruptNodesList[i] = d.getHostName();
+          i++;
+        }
+        out.println(block.getBlockId() + " corrupt on " +
+            StringUtils.join(",", corruptNodesList) + " but not all nodes are" +
+            "found in its block locations");
+      }
+    }
+
     // Dump all datanodes
     getDatanodeManager().datanodeDump(out);
   }

+ 12 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java

@@ -20,8 +20,10 @@ package org.apache.hadoop.hdfs.server.blockmanagement;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Map;
+import java.util.Set;
 import java.util.TreeMap;
 
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -231,6 +233,16 @@ public class CorruptReplicasMap{
     return ret;
   }
 
+  /**
+   * method to get the set of corrupt blocks in corruptReplicasMap.
+   * @return Set of Block objects
+   */
+  Set<Block> getCorruptBlocks() {
+    Set<Block> corruptBlocks = new HashSet<Block>();
+    corruptBlocks.addAll(corruptReplicasMap.keySet());
+    return corruptBlocks;
+  }
+
   /**
    * return the reason about corrupted replica for a given block
    * on a given dn

+ 58 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java

@@ -29,7 +29,13 @@ import static org.mockito.Mockito.reset;
 import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.verify;
 
+import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStreamReader;
 import java.io.IOException;
+import java.io.PrintWriter;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -527,6 +533,22 @@ public class TestBlockManager {
     return blockInfo;
   }
 
+  private BlockInfo addCorruptBlockOnNodes(long blockId,
+      List<DatanodeDescriptor> nodes) throws IOException {
+    long inodeId = ++mockINodeId;
+    final INodeFile bc = TestINodeFile.createINodeFile(inodeId);
+
+    BlockInfo blockInfo = blockOnNodes(blockId, nodes);
+    blockInfo.setReplication((short) 3);
+    blockInfo.setBlockCollectionId(inodeId);
+    Mockito.doReturn(bc).when(fsn).getBlockCollection(inodeId);
+    bm.blocksMap.addBlockCollection(blockInfo, bc);
+    bm.markBlockReplicasAsCorrupt(blockInfo, blockInfo,
+        blockInfo.getGenerationStamp() + 1, blockInfo.getNumBytes(),
+        new DatanodeStorageInfo[]{nodes.get(0).getStorageInfos()[0]});
+    return blockInfo;
+  }
+
   private DatanodeStorageInfo[] scheduleSingleReplication(BlockInfo block) {
     // list for priority 1
     List<BlockInfo> list_p1 = new ArrayList<>();
@@ -1129,4 +1151,40 @@ public class TestBlockManager {
       cluster.shutdown();
     }
   }
+
+  @Test
+  public void testMetaSaveCorruptBlocks() throws Exception {
+    List<DatanodeStorageInfo> origStorages = getStorages(0, 1);
+    List<DatanodeDescriptor> origNodes = getNodes(origStorages);
+    addCorruptBlockOnNodes(0, origNodes);
+    File file = new File("test.log");
+    PrintWriter out = new PrintWriter(file);
+    bm.metaSave(out);
+    out.flush();
+    FileInputStream fstream = new FileInputStream(file);
+    DataInputStream in = new DataInputStream(fstream);
+    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
+    try {
+      for(int i =0;i<6;i++) {
+        reader.readLine();
+      }
+      String corruptBlocksLine = reader.readLine();
+      assertEquals("Unexpected text in metasave," +
+              "was expecting corrupt blocks section!", 0,
+          corruptBlocksLine.compareTo("Corrupt Blocks:"));
+      corruptBlocksLine = reader.readLine();
+      String regex = "Block=[0-9]+\\tNode=.*\\tStorageID=.*StorageState.*" +
+          "TotalReplicas=.*Reason=GENSTAMP_MISMATCH";
+      assertTrue("Unexpected corrupt block section in metasave!",
+          corruptBlocksLine.matches(regex));
+      corruptBlocksLine = reader.readLine();
+      regex = "Metasave: Number of datanodes.*";
+      assertTrue("Unexpected corrupt block section in metasave!",
+          corruptBlocksLine.matches(regex));
+    } finally {
+      if (reader != null)
+        reader.close();
+      file.delete();
+    }
+  }
 }

+ 3 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java

@@ -159,9 +159,11 @@ public class TestMetaSave {
       assertTrue(line.equals("Metasave: Blocks being reconstructed: 0"));
       line = reader.readLine();
       assertTrue(line.equals("Metasave: Blocks 2 waiting deletion from 1 datanodes."));
-     //skip 2 lines to reach HDFS-9033 scenario.
+      //skip 2 lines to reach HDFS-9033 scenario.
       line = reader.readLine();
       line = reader.readLine();
+      // skip 1 line for Corrupt Blocks section.
+      line = reader.readLine();
       line = reader.readLine();
       assertTrue(line.equals("Metasave: Number of datanodes: 2"));
       line = reader.readLine();