소스 검색

HDFS-7930. commitBlockSynchronization() does not remove locations. (yliu)

yliu 10 년 전
부모
커밋
0da3fcb4b0

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -895,6 +895,8 @@ Release 2.7.0 - UNRELEASED
 
     HDFS-7932. Speed up the shutdown of datanode during rolling upgrade.(kihwal)
 
+    HDFS-7930. commitBlockSynchronization() does not remove locations. (yliu)
+
     BREAKDOWN OF HDFS-7584 SUBTASKS AND RELATED JIRAS
 
       HDFS-7720. Quota by Storage Type API, tools and ClientNameNode

+ 40 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

@@ -1954,6 +1954,46 @@ public class BlockManager {
     return toInvalidate;
   }
 
+  /**
+   * Mark block replicas as corrupt except those on the storages in 
+   * newStorages list.
+   */
+  public void markBlockReplicasAsCorrupt(BlockInfoContiguous block, 
+      long oldGenerationStamp, long oldNumBytes, 
+      DatanodeStorageInfo[] newStorages) throws IOException {
+    assert namesystem.hasWriteLock();
+    BlockToMarkCorrupt b = null;
+    if (block.getGenerationStamp() != oldGenerationStamp) {
+      b = new BlockToMarkCorrupt(block, oldGenerationStamp,
+          "genstamp does not match " + oldGenerationStamp
+          + " : " + block.getGenerationStamp(), Reason.GENSTAMP_MISMATCH);
+    } else if (block.getNumBytes() != oldNumBytes) {
+      b = new BlockToMarkCorrupt(block,
+          "length does not match " + oldNumBytes
+          + " : " + block.getNumBytes(), Reason.SIZE_MISMATCH);
+    } else {
+      return;
+    }
+
+    for (DatanodeStorageInfo storage : getStorages(block)) {
+      boolean isCorrupt = true;
+      if (newStorages != null) {
+        for (DatanodeStorageInfo newStorage : newStorages) {
+          if (newStorage!= null && storage.equals(newStorage)) {
+            isCorrupt = false;
+            break;
+          }
+        }
+      }
+      if (isCorrupt) {
+        blockLog.info("BLOCK* markBlockReplicasAsCorrupt: mark block replica" +
+            " {} on {} as corrupt because the dn is not in the new committed " +
+            "storage list.", b, storage.getDatanodeDescriptor());
+        markBlockAsCorrupt(b, storage, storage.getDatanodeDescriptor());
+      }
+    }
+  }
+
   /**
    * processFirstBlockReport is intended only for processing "initial" block
    * reports, the first block report received from a DN after it registers.

+ 6 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -4221,6 +4221,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
           throw new IOException("Block (=" + oldBlock + ") not found");
         }
       }
+      final long oldGenerationStamp = storedBlock.getGenerationStamp();
+      final long oldNumBytes = storedBlock.getNumBytes();
       //
       // The implementation of delete operation (see @deleteInternal method)
       // first removes the file paths from namespace, and delays the removal
@@ -4281,8 +4283,6 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
         }
 
         // find the DatanodeDescriptor objects
-        // There should be no locations in the blockManager till now because the
-        // file is underConstruction
         ArrayList<DatanodeDescriptor> trimmedTargets =
             new ArrayList<DatanodeDescriptor>(newtargets.length);
         ArrayList<String> trimmedStorages =
@@ -4326,6 +4326,10 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
           iFile.setLastBlock(truncatedBlock, trimmedStorageInfos);
         } else {
           iFile.setLastBlock(storedBlock, trimmedStorageInfos);
+          if (closeFile) {
+            blockManager.markBlockReplicasAsCorrupt(storedBlock,
+                oldGenerationStamp, oldNumBytes, trimmedStorageInfos);
+          }
         }
       }
 

+ 2 - 8
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileTruncate.java

@@ -688,11 +688,7 @@ public class TestFileTruncate {
     /*
      * For non copy-on-truncate, the truncated block id is the same, but the 
      * GS should increase.
-     * We trigger block report for dn0 after it restarts, since the GS 
-     * of replica for the last block on it is old, so the reported last block
-     * from dn0 should be marked corrupt on nn and the replicas of last block 
-     * on nn should decrease 1, then the truncated block will be replicated 
-     * to dn0.
+     * The truncated block will be replicated to dn0 after it restarts.
      */
     assertEquals(newBlock.getBlock().getBlockId(), 
         oldBlock.getBlock().getBlockId());
@@ -748,8 +744,7 @@ public class TestFileTruncate {
     LocatedBlock newBlock = getLocatedBlocks(p).getLastLocatedBlock();
     /*
      * For copy-on-truncate, new block is made with new block id and new GS.
-     * We trigger block report for dn1 after it restarts. The replicas of 
-     * the new block is 2, and then it will be replicated to dn1.
+     * The replicas of the new block is 2, then it will be replicated to dn1.
      */
     assertNotEquals(newBlock.getBlock().getBlockId(), 
         oldBlock.getBlock().getBlockId());
@@ -802,7 +797,6 @@ public class TestFileTruncate {
     cluster.restartDataNode(dn1, true, true);
     cluster.waitActive();
     checkBlockRecovery(p);
-    cluster.triggerBlockReports();
 
     LocatedBlock newBlock = getLocatedBlocks(p).getLastLocatedBlock();
     /*