Browse Source

HDFS-13027. Handle possible NPEs due to deleted blocks in race condition. Contributed by Vinayakumar B.

(cherry picked from commit 65977e5d8124be2bc208af25beed934933f170b3)
Vinayakumar B 6 years ago
parent
commit
c36d69a7b3

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java

@@ -52,7 +52,7 @@ public abstract class BlockInfo extends Block
   /**
   /**
    * Block collection ID.
    * Block collection ID.
    */
    */
-  private long bcId;
+  private volatile long bcId;
 
 
   /** For implementing {@link LightWeightGSet.LinkedElement} interface. */
   /** For implementing {@link LightWeightGSet.LinkedElement} interface. */
   private LightWeightGSet.LinkedElement nextLinkedElement;
   private LightWeightGSet.LinkedElement nextLinkedElement;

+ 4 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

@@ -4171,6 +4171,10 @@ public class BlockManager implements BlockStatsMXBean {
     int numExtraRedundancy = 0;
     int numExtraRedundancy = 0;
     while(it.hasNext()) {
     while(it.hasNext()) {
       final BlockInfo block = it.next();
       final BlockInfo block = it.next();
+      if (block.isDeleted()) {
+        //Orphan block, will be handled eventually, skip
+        continue;
+      }
       int expectedReplication = this.getExpectedRedundancyNum(block);
       int expectedReplication = this.getExpectedRedundancyNum(block);
       NumberReplicas num = countNodes(block);
       NumberReplicas num = countNodes(block);
       if (shouldProcessExtraRedundancy(num, expectedReplication)) {
       if (shouldProcessExtraRedundancy(num, expectedReplication)) {

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -4128,7 +4128,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
         while (it.hasNext()) {
         while (it.hasNext()) {
           Block b = it.next();
           Block b = it.next();
           BlockInfo blockInfo = blockManager.getStoredBlock(b);
           BlockInfo blockInfo = blockManager.getStoredBlock(b);
-          if (blockInfo == null) {
+          if (blockInfo == null || blockInfo.isDeleted()) {
             LOG.info("Cannot find block info for block " + b);
             LOG.info("Cannot find block info for block " + b);
           } else {
           } else {
             BlockCollection bc = getBlockCollection(blockInfo);
             BlockCollection bc = getBlockCollection(blockInfo);

+ 4 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java

@@ -264,12 +264,13 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
       return;
       return;
     }
     }
 
 
+    namenode.getNamesystem().readLock();
     try {
     try {
       //get blockInfo
       //get blockInfo
       Block block = new Block(Block.getBlockId(blockId));
       Block block = new Block(Block.getBlockId(blockId));
       //find which file this block belongs to
       //find which file this block belongs to
       BlockInfo blockInfo = blockManager.getStoredBlock(block);
       BlockInfo blockInfo = blockManager.getStoredBlock(block);
-      if(blockInfo == null) {
+      if (blockInfo == null || blockInfo.isDeleted()) {
         out.println("Block "+ blockId +" " + NONEXISTENT_STATUS);
         out.println("Block "+ blockId +" " + NONEXISTENT_STATUS);
         LOG.warn("Block "+ blockId + " " + NONEXISTENT_STATUS);
         LOG.warn("Block "+ blockId + " " + NONEXISTENT_STATUS);
         return;
         return;
@@ -329,6 +330,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory {
       out.println(e.getMessage());
       out.println(e.getMessage());
       out.print("\n\n" + errMsg);
       out.print("\n\n" + errMsg);
       LOG.warn("Error in looking up block", e);
       LOG.warn("Error in looking up block", e);
+    } finally {
+      namenode.getNamesystem().readUnlock("fsck");
     }
     }
   }
   }