|
@@ -33,6 +33,7 @@ import java.util.Map;
|
|
|
import java.util.Queue;
|
|
|
import java.util.Set;
|
|
|
import java.util.TreeMap;
|
|
|
+import java.util.TreeSet;
|
|
|
import java.util.concurrent.atomic.AtomicLong;
|
|
|
|
|
|
import org.apache.commons.logging.Log;
|
|
@@ -68,8 +69,10 @@ import org.apache.hadoop.hdfs.server.protocol.BlockCommand;
|
|
|
import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations;
|
|
|
import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations.BlockWithLocations;
|
|
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
|
|
|
+import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
|
|
|
import org.apache.hadoop.hdfs.server.protocol.KeyUpdateCommand;
|
|
|
import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
|
|
|
+import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks;
|
|
|
import org.apache.hadoop.hdfs.util.LightWeightLinkedSet;
|
|
|
import org.apache.hadoop.net.Node;
|
|
|
import org.apache.hadoop.security.UserGroupInformation;
|
|
@@ -1034,7 +1037,7 @@ public class BlockManager {
|
|
|
* for logging purposes
|
|
|
*/
|
|
|
public void findAndMarkBlockAsCorrupt(final ExtendedBlock blk,
|
|
|
- final DatanodeInfo dn, String reason) throws IOException {
|
|
|
+ final DatanodeInfo dn, String storageID, String reason) throws IOException {
|
|
|
assert namesystem.hasWriteLock();
|
|
|
final BlockInfo storedBlock = getStoredBlock(blk.getLocalBlock());
|
|
|
if (storedBlock == null) {
|
|
@@ -1046,11 +1049,11 @@ public class BlockManager {
|
|
|
+ blk + " not found");
|
|
|
return;
|
|
|
}
|
|
|
- markBlockAsCorrupt(new BlockToMarkCorrupt(storedBlock, reason), dn);
|
|
|
+ markBlockAsCorrupt(new BlockToMarkCorrupt(storedBlock, reason), dn, storageID);
|
|
|
}
|
|
|
|
|
|
private void markBlockAsCorrupt(BlockToMarkCorrupt b,
|
|
|
- DatanodeInfo dn) throws IOException {
|
|
|
+ DatanodeInfo dn, String storageID) throws IOException {
|
|
|
DatanodeDescriptor node = getDatanodeManager().getDatanode(dn);
|
|
|
if (node == null) {
|
|
|
throw new IOException("Cannot mark " + b
|
|
@@ -1066,7 +1069,7 @@ public class BlockManager {
|
|
|
}
|
|
|
|
|
|
// Add replica to the data-node if it is not already there
|
|
|
- node.addBlock(b.stored);
|
|
|
+ node.addBlock(storageID, b.stored);
|
|
|
|
|
|
// Add this replica to corruptReplicas Map
|
|
|
corruptReplicas.addToCorruptReplicasMap(b.corrupted, node, b.reason);
|
|
@@ -1601,10 +1604,11 @@ public class BlockManager {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * The given datanode is reporting all its blocks.
|
|
|
- * Update the (machine-->blocklist) and (block-->machinelist) maps.
|
|
|
+ * The given storage is reporting all its blocks.
|
|
|
+ * Update the (storage-->block list) and (block-->storage list) maps.
|
|
|
*/
|
|
|
- public void processReport(final DatanodeID nodeID, final String poolId,
|
|
|
+ public void processReport(final DatanodeID nodeID,
|
|
|
+ final DatanodeStorage storage, final String poolId,
|
|
|
final BlockListAsLongs newReport) throws IOException {
|
|
|
namesystem.writeLock();
|
|
|
final long startTime = Time.now(); //after acquiring write lock
|
|
@@ -1628,9 +1632,9 @@ public class BlockManager {
|
|
|
if (node.numBlocks() == 0) {
|
|
|
// The first block report can be processed a lot more efficiently than
|
|
|
// ordinary block reports. This shortens restart times.
|
|
|
- processFirstBlockReport(node, newReport);
|
|
|
+ processFirstBlockReport(node, storage.getStorageID(), newReport);
|
|
|
} else {
|
|
|
- processReport(node, newReport);
|
|
|
+ processReport(node, storage, newReport);
|
|
|
}
|
|
|
|
|
|
// Now that we have an up-to-date block report, we know that any
|
|
@@ -1691,28 +1695,31 @@ public class BlockManager {
|
|
|
}
|
|
|
|
|
|
private void processReport(final DatanodeDescriptor node,
|
|
|
+ final DatanodeStorage storage,
|
|
|
final BlockListAsLongs report) throws IOException {
|
|
|
// Normal case:
|
|
|
// Modify the (block-->datanode) map, according to the difference
|
|
|
// between the old and new block report.
|
|
|
//
|
|
|
Collection<BlockInfo> toAdd = new LinkedList<BlockInfo>();
|
|
|
- Collection<Block> toRemove = new LinkedList<Block>();
|
|
|
+ Collection<Block> toRemove = new TreeSet<Block>();
|
|
|
Collection<Block> toInvalidate = new LinkedList<Block>();
|
|
|
Collection<BlockToMarkCorrupt> toCorrupt = new LinkedList<BlockToMarkCorrupt>();
|
|
|
Collection<StatefulBlockInfo> toUC = new LinkedList<StatefulBlockInfo>();
|
|
|
- reportDiff(node, report, toAdd, toRemove, toInvalidate, toCorrupt, toUC);
|
|
|
+ reportDiff(node, storage, report,
|
|
|
+ toAdd, toRemove, toInvalidate, toCorrupt, toUC);
|
|
|
|
|
|
// Process the blocks on each queue
|
|
|
for (StatefulBlockInfo b : toUC) {
|
|
|
- addStoredBlockUnderConstruction(b.storedBlock, node, b.reportedState);
|
|
|
+ addStoredBlockUnderConstruction(b.storedBlock, node,
|
|
|
+ storage.getStorageID(), b.reportedState);
|
|
|
}
|
|
|
for (Block b : toRemove) {
|
|
|
removeStoredBlock(b, node);
|
|
|
}
|
|
|
int numBlocksLogged = 0;
|
|
|
for (BlockInfo b : toAdd) {
|
|
|
- addStoredBlock(b, node, null, numBlocksLogged < maxNumBlocksToLog);
|
|
|
+ addStoredBlock(b, node, storage.getStorageID(), null, numBlocksLogged < maxNumBlocksToLog);
|
|
|
numBlocksLogged++;
|
|
|
}
|
|
|
if (numBlocksLogged > maxNumBlocksToLog) {
|
|
@@ -1726,7 +1733,7 @@ public class BlockManager {
|
|
|
addToInvalidates(b, node);
|
|
|
}
|
|
|
for (BlockToMarkCorrupt b : toCorrupt) {
|
|
|
- markBlockAsCorrupt(b, node);
|
|
|
+ markBlockAsCorrupt(b, node, storage.getStorageID());
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -1742,6 +1749,7 @@ public class BlockManager {
|
|
|
* @throws IOException
|
|
|
*/
|
|
|
private void processFirstBlockReport(final DatanodeDescriptor node,
|
|
|
+ final String storageID,
|
|
|
final BlockListAsLongs report) throws IOException {
|
|
|
if (report == null) return;
|
|
|
assert (namesystem.hasWriteLock());
|
|
@@ -1754,7 +1762,7 @@ public class BlockManager {
|
|
|
|
|
|
if (shouldPostponeBlocksFromFuture &&
|
|
|
namesystem.isGenStampInFuture(iblk)) {
|
|
|
- queueReportedBlock(node, iblk, reportedState,
|
|
|
+ queueReportedBlock(node, storageID, iblk, reportedState,
|
|
|
QUEUE_REASON_FUTURE_GENSTAMP);
|
|
|
continue;
|
|
|
}
|
|
@@ -1771,10 +1779,10 @@ public class BlockManager {
|
|
|
if (shouldPostponeBlocksFromFuture) {
|
|
|
// In the Standby, we may receive a block report for a file that we
|
|
|
// just have an out-of-date gen-stamp or state for, for example.
|
|
|
- queueReportedBlock(node, iblk, reportedState,
|
|
|
+ queueReportedBlock(node, storageID, iblk, reportedState,
|
|
|
QUEUE_REASON_CORRUPT_STATE);
|
|
|
} else {
|
|
|
- markBlockAsCorrupt(c, node);
|
|
|
+ markBlockAsCorrupt(c, node, storageID);
|
|
|
}
|
|
|
continue;
|
|
|
}
|
|
@@ -1787,25 +1795,26 @@ public class BlockManager {
|
|
|
}
|
|
|
//add replica if appropriate
|
|
|
if (reportedState == ReplicaState.FINALIZED) {
|
|
|
- addStoredBlockImmediate(storedBlock, node);
|
|
|
+ addStoredBlockImmediate(storedBlock, node, storageID);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- private void reportDiff(DatanodeDescriptor dn,
|
|
|
+ private void reportDiff(DatanodeDescriptor dn, DatanodeStorage storage,
|
|
|
BlockListAsLongs newReport,
|
|
|
Collection<BlockInfo> toAdd, // add to DatanodeDescriptor
|
|
|
Collection<Block> toRemove, // remove from DatanodeDescriptor
|
|
|
Collection<Block> toInvalidate, // should be removed from DN
|
|
|
Collection<BlockToMarkCorrupt> toCorrupt, // add to corrupt replicas list
|
|
|
Collection<StatefulBlockInfo> toUC) { // add to under-construction list
|
|
|
- // place a delimiter in the list which separates blocks
|
|
|
- // that have been reported from those that have not
|
|
|
- BlockInfo delimiter = new BlockInfo(new Block(), 1);
|
|
|
- boolean added = dn.addBlock(delimiter);
|
|
|
- assert added : "Delimiting block cannot be present in the node";
|
|
|
- int headIndex = 0; //currently the delimiter is in the head of the list
|
|
|
- int curIndex;
|
|
|
+
|
|
|
+ dn.updateStorage(storage);
|
|
|
+
|
|
|
+ // add all blocks to remove list
|
|
|
+ for(Iterator<BlockInfo> it = dn.getBlockIterator(storage.getStorageID());
|
|
|
+ it.hasNext(); ) {
|
|
|
+ toRemove.add(it.next());
|
|
|
+ }
|
|
|
|
|
|
if (newReport == null)
|
|
|
newReport = new BlockListAsLongs();
|
|
@@ -1814,20 +1823,10 @@ public class BlockManager {
|
|
|
while(itBR.hasNext()) {
|
|
|
Block iblk = itBR.next();
|
|
|
ReplicaState iState = itBR.getCurrentReplicaState();
|
|
|
- BlockInfo storedBlock = processReportedBlock(dn, iblk, iState,
|
|
|
- toAdd, toInvalidate, toCorrupt, toUC);
|
|
|
- // move block to the head of the list
|
|
|
- if (storedBlock != null && (curIndex = storedBlock.findDatanode(dn)) >= 0) {
|
|
|
- headIndex = dn.moveBlockToHead(storedBlock, curIndex, headIndex);
|
|
|
- }
|
|
|
- }
|
|
|
- // collect blocks that have not been reported
|
|
|
- // all of them are next to the delimiter
|
|
|
- Iterator<? extends Block> it = new DatanodeDescriptor.BlockIterator(
|
|
|
- delimiter.getNext(0), dn);
|
|
|
- while(it.hasNext())
|
|
|
- toRemove.add(it.next());
|
|
|
- dn.removeBlock(delimiter);
|
|
|
+ BlockInfo storedBlock = processReportedBlock(dn, storage.getStorageID(),
|
|
|
+ iblk, iState, toAdd, toInvalidate, toCorrupt, toUC);
|
|
|
+ toRemove.remove(storedBlock);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -1861,7 +1860,8 @@ public class BlockManager {
|
|
|
* @return the up-to-date stored block, if it should be kept.
|
|
|
* Otherwise, null.
|
|
|
*/
|
|
|
- private BlockInfo processReportedBlock(final DatanodeDescriptor dn,
|
|
|
+ private BlockInfo processReportedBlock(final DatanodeDescriptor dn,
|
|
|
+ final String storageID,
|
|
|
final Block block, final ReplicaState reportedState,
|
|
|
final Collection<BlockInfo> toAdd,
|
|
|
final Collection<Block> toInvalidate,
|
|
@@ -1876,7 +1876,7 @@ public class BlockManager {
|
|
|
|
|
|
if (shouldPostponeBlocksFromFuture &&
|
|
|
namesystem.isGenStampInFuture(block)) {
|
|
|
- queueReportedBlock(dn, block, reportedState,
|
|
|
+ queueReportedBlock(dn, storageID, block, reportedState,
|
|
|
QUEUE_REASON_FUTURE_GENSTAMP);
|
|
|
return null;
|
|
|
}
|
|
@@ -1911,7 +1911,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
|
|
// If the block is an out-of-date generation stamp or state,
|
|
|
// but we're the standby, we shouldn't treat it as corrupt,
|
|
|
// but instead just queue it for later processing.
|
|
|
- queueReportedBlock(dn, storedBlock, reportedState,
|
|
|
+ queueReportedBlock(dn, storageID, storedBlock, reportedState,
|
|
|
QUEUE_REASON_CORRUPT_STATE);
|
|
|
} else {
|
|
|
toCorrupt.add(c);
|
|
@@ -1938,7 +1938,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
|
|
* standby node. @see PendingDataNodeMessages.
|
|
|
* @param reason a textual reason to report in the debug logs
|
|
|
*/
|
|
|
- private void queueReportedBlock(DatanodeDescriptor dn, Block block,
|
|
|
+ private void queueReportedBlock(DatanodeDescriptor dn, String storageID, Block block,
|
|
|
ReplicaState reportedState, String reason) {
|
|
|
assert shouldPostponeBlocksFromFuture;
|
|
|
|
|
@@ -1948,7 +1948,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
|
|
" from datanode " + dn + " for later processing " +
|
|
|
"because " + reason + ".");
|
|
|
}
|
|
|
- pendingDNMessages.enqueueReportedBlock(dn, block, reportedState);
|
|
|
+ pendingDNMessages.enqueueReportedBlock(dn, storageID, block, reportedState);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -1971,8 +1971,8 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
|
|
if (LOG.isDebugEnabled()) {
|
|
|
LOG.debug("Processing previouly queued message " + rbi);
|
|
|
}
|
|
|
- processAndHandleReportedBlock(
|
|
|
- rbi.getNode(), rbi.getBlock(), rbi.getReportedState(), null);
|
|
|
+ processAndHandleReportedBlock(rbi.getNode(), rbi.getStorageID(),
|
|
|
+ rbi.getBlock(), rbi.getReportedState(), null);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -2090,18 +2090,18 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
|
|
|
|
|
void addStoredBlockUnderConstruction(
|
|
|
BlockInfoUnderConstruction block,
|
|
|
- DatanodeDescriptor node,
|
|
|
+ DatanodeDescriptor node, String storageID,
|
|
|
ReplicaState reportedState)
|
|
|
throws IOException {
|
|
|
block.addReplicaIfNotPresent(node, block, reportedState);
|
|
|
if (reportedState == ReplicaState.FINALIZED && block.findDatanode(node) < 0) {
|
|
|
- addStoredBlock(block, node, null, true);
|
|
|
+ addStoredBlock(block, node, storageID, null, true);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* Faster version of
|
|
|
- * {@link #addStoredBlock(BlockInfo, DatanodeDescriptor, DatanodeDescriptor, boolean)}
|
|
|
+ * {@link #addStoredBlock(BlockInfo, DatanodeDescriptor, String, DatanodeDescriptor, boolean)}
|
|
|
* , intended for use with initial block report at startup. If not in startup
|
|
|
* safe mode, will call standard addStoredBlock(). Assumes this method is
|
|
|
* called "immediately" so there is no need to refresh the storedBlock from
|
|
@@ -2112,17 +2112,17 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
|
|
* @throws IOException
|
|
|
*/
|
|
|
private void addStoredBlockImmediate(BlockInfo storedBlock,
|
|
|
- DatanodeDescriptor node)
|
|
|
+ DatanodeDescriptor node, String storageID)
|
|
|
throws IOException {
|
|
|
assert (storedBlock != null && namesystem.hasWriteLock());
|
|
|
if (!namesystem.isInStartupSafeMode()
|
|
|
|| namesystem.isPopulatingReplQueues()) {
|
|
|
- addStoredBlock(storedBlock, node, null, false);
|
|
|
+ addStoredBlock(storedBlock, node, storageID, null, false);
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
// just add it
|
|
|
- node.addBlock(storedBlock);
|
|
|
+ node.addBlock(storageID, storedBlock);
|
|
|
|
|
|
// Now check for completion of blocks and safe block count
|
|
|
int numCurrentReplica = countLiveNodes(storedBlock);
|
|
@@ -2145,6 +2145,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
|
|
*/
|
|
|
private Block addStoredBlock(final BlockInfo block,
|
|
|
DatanodeDescriptor node,
|
|
|
+ String storageID,
|
|
|
DatanodeDescriptor delNodeHint,
|
|
|
boolean logEveryBlock)
|
|
|
throws IOException {
|
|
@@ -2170,7 +2171,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
|
|
assert bc != null : "Block must belong to a file";
|
|
|
|
|
|
// add block to the datanode
|
|
|
- boolean added = node.addBlock(storedBlock);
|
|
|
+ boolean added = node.addBlock(storageID, storedBlock);
|
|
|
|
|
|
int curReplicaDelta;
|
|
|
if (added) {
|
|
@@ -2614,7 +2615,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
|
|
* The given node is reporting that it received a certain block.
|
|
|
*/
|
|
|
@VisibleForTesting
|
|
|
- void addBlock(DatanodeDescriptor node, Block block, String delHint)
|
|
|
+ void addBlock(DatanodeDescriptor node, String storageID, Block block, String delHint)
|
|
|
throws IOException {
|
|
|
// decrement number of blocks scheduled to this datanode.
|
|
|
// for a retry request (of DatanodeProtocol#blockReceivedAndDeleted with
|
|
@@ -2635,11 +2636,12 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
|
|
// Modify the blocks->datanode map and node's map.
|
|
|
//
|
|
|
pendingReplications.decrement(block, node);
|
|
|
- processAndHandleReportedBlock(node, block, ReplicaState.FINALIZED,
|
|
|
+ processAndHandleReportedBlock(node, storageID, block, ReplicaState.FINALIZED,
|
|
|
delHintNode);
|
|
|
}
|
|
|
|
|
|
- private void processAndHandleReportedBlock(DatanodeDescriptor node, Block block,
|
|
|
+ private void processAndHandleReportedBlock(DatanodeDescriptor node,
|
|
|
+ String storageID, Block block,
|
|
|
ReplicaState reportedState, DatanodeDescriptor delHintNode)
|
|
|
throws IOException {
|
|
|
// blockReceived reports a finalized block
|
|
@@ -2647,7 +2649,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
|
|
Collection<Block> toInvalidate = new LinkedList<Block>();
|
|
|
Collection<BlockToMarkCorrupt> toCorrupt = new LinkedList<BlockToMarkCorrupt>();
|
|
|
Collection<StatefulBlockInfo> toUC = new LinkedList<StatefulBlockInfo>();
|
|
|
- processReportedBlock(node, block, reportedState,
|
|
|
+ processReportedBlock(node, storageID, block, reportedState,
|
|
|
toAdd, toInvalidate, toCorrupt, toUC);
|
|
|
// the block is only in one of the to-do lists
|
|
|
// if it is in none then data-node already has it
|
|
@@ -2655,11 +2657,11 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
|
|
: "The block should be only in one of the lists.";
|
|
|
|
|
|
for (StatefulBlockInfo b : toUC) {
|
|
|
- addStoredBlockUnderConstruction(b.storedBlock, node, b.reportedState);
|
|
|
+ addStoredBlockUnderConstruction(b.storedBlock, node, storageID, b.reportedState);
|
|
|
}
|
|
|
long numBlocksLogged = 0;
|
|
|
for (BlockInfo b : toAdd) {
|
|
|
- addStoredBlock(b, node, delHintNode, numBlocksLogged < maxNumBlocksToLog);
|
|
|
+ addStoredBlock(b, node, storageID, delHintNode, numBlocksLogged < maxNumBlocksToLog);
|
|
|
numBlocksLogged++;
|
|
|
}
|
|
|
if (numBlocksLogged > maxNumBlocksToLog) {
|
|
@@ -2673,7 +2675,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
|
|
addToInvalidates(b, node);
|
|
|
}
|
|
|
for (BlockToMarkCorrupt b : toCorrupt) {
|
|
|
- markBlockAsCorrupt(b, node);
|
|
|
+ markBlockAsCorrupt(b, node, storageID);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -2685,7 +2687,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
|
|
* This method must be called with FSNamesystem lock held.
|
|
|
*/
|
|
|
public void processIncrementalBlockReport(final DatanodeID nodeID,
|
|
|
- final String poolId, final ReceivedDeletedBlockInfo blockInfos[])
|
|
|
+ final String poolId, final StorageReceivedDeletedBlocks srdb)
|
|
|
throws IOException {
|
|
|
assert namesystem.hasWriteLock();
|
|
|
int received = 0;
|
|
@@ -2701,19 +2703,19 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
|
|
"Got incremental block report from unregistered or dead node");
|
|
|
}
|
|
|
|
|
|
- for (ReceivedDeletedBlockInfo rdbi : blockInfos) {
|
|
|
+ for (ReceivedDeletedBlockInfo rdbi : srdb.getBlocks()) {
|
|
|
switch (rdbi.getStatus()) {
|
|
|
case DELETED_BLOCK:
|
|
|
removeStoredBlock(rdbi.getBlock(), node);
|
|
|
deleted++;
|
|
|
break;
|
|
|
case RECEIVED_BLOCK:
|
|
|
- addBlock(node, rdbi.getBlock(), rdbi.getDelHints());
|
|
|
+ addBlock(node, srdb.getStorageID(), rdbi.getBlock(), rdbi.getDelHints());
|
|
|
received++;
|
|
|
break;
|
|
|
case RECEIVING_BLOCK:
|
|
|
receiving++;
|
|
|
- processAndHandleReportedBlock(node, rdbi.getBlock(),
|
|
|
+ processAndHandleReportedBlock(node, srdb.getStorageID(), rdbi.getBlock(),
|
|
|
ReplicaState.RBW, null);
|
|
|
break;
|
|
|
default:
|