|
@@ -33,8 +33,6 @@ import java.util.concurrent.TimeUnit;
|
|
|
import java.util.concurrent.locks.Condition;
|
|
|
import java.util.concurrent.locks.ReentrantLock;
|
|
|
|
|
|
-import org.apache.commons.logging.Log;
|
|
|
-import org.apache.commons.logging.LogFactory;
|
|
|
import org.apache.hadoop.classification.InterfaceAudience;
|
|
|
import org.apache.hadoop.fs.UnresolvedLinkException;
|
|
|
import org.apache.hadoop.hdfs.protocol.Block;
|
|
@@ -53,8 +51,11 @@ import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
|
|
|
import org.apache.hadoop.hdfs.util.ReadOnlyList;
|
|
|
import org.apache.hadoop.util.GSet;
|
|
|
import org.apache.hadoop.util.Time;
|
|
|
+import org.slf4j.Logger;
|
|
|
+import org.slf4j.LoggerFactory;
|
|
|
|
|
|
import com.google.common.base.Preconditions;
|
|
|
+;
|
|
|
|
|
|
/**
|
|
|
* Scans the namesystem, scheduling blocks to be cached as appropriate.
|
|
@@ -65,8 +66,8 @@ import com.google.common.base.Preconditions;
|
|
|
@InterfaceAudience.LimitedPrivate({"HDFS"})
|
|
|
public class CacheReplicationMonitor extends Thread implements Closeable {
|
|
|
|
|
|
- private static final Log LOG =
|
|
|
- LogFactory.getLog(CacheReplicationMonitor.class);
|
|
|
+ private static final Logger LOG =
|
|
|
+ LoggerFactory.getLogger(CacheReplicationMonitor.class);
|
|
|
|
|
|
private final FSNamesystem namesystem;
|
|
|
|
|
@@ -207,7 +208,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|
|
LOG.info("Shutting down CacheReplicationMonitor.");
|
|
|
return;
|
|
|
} catch (Throwable t) {
|
|
|
- LOG.fatal("Thread exiting", t);
|
|
|
+ LOG.error("Thread exiting", t);
|
|
|
terminate(1, t);
|
|
|
}
|
|
|
}
|
|
@@ -316,11 +317,8 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|
|
scannedDirectives++;
|
|
|
// Skip processing this entry if it has expired
|
|
|
if (directive.getExpiryTime() > 0 && directive.getExpiryTime() <= now) {
|
|
|
- if (LOG.isDebugEnabled()) {
|
|
|
- LOG.debug("Directive " + directive.getId() + ": the directive " +
|
|
|
- "expired at " + directive.getExpiryTime() + " (now = " +
|
|
|
- now + ")");
|
|
|
- }
|
|
|
+ LOG.debug("Directive {}: the directive expired at {} (now = {})",
|
|
|
+ directive.getId(), directive.getExpiryTime(), now);
|
|
|
continue;
|
|
|
}
|
|
|
String path = directive.getPath();
|
|
@@ -329,17 +327,14 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|
|
node = fsDir.getINode(path);
|
|
|
} catch (UnresolvedLinkException e) {
|
|
|
// We don't cache through symlinks
|
|
|
- if (LOG.isDebugEnabled()) {
|
|
|
- LOG.debug("Directive " + directive.getId() +
|
|
|
- ": got UnresolvedLinkException while resolving path " + path);
|
|
|
- }
|
|
|
+ LOG.debug("Directive {}: got UnresolvedLinkException while resolving "
|
|
|
+ + "path {}", directive.getId(), path
|
|
|
+ );
|
|
|
continue;
|
|
|
}
|
|
|
if (node == null) {
|
|
|
- if (LOG.isDebugEnabled()) {
|
|
|
- LOG.debug("Directive " + directive.getId() +
|
|
|
- ": No inode found at " + path);
|
|
|
- }
|
|
|
+ LOG.debug("Directive {}: No inode found at {}", directive.getId(),
|
|
|
+ path);
|
|
|
} else if (node.isDirectory()) {
|
|
|
INodeDirectory dir = node.asDirectory();
|
|
|
ReadOnlyList<INode> children = dir
|
|
@@ -352,10 +347,8 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|
|
} else if (node.isFile()) {
|
|
|
rescanFile(directive, node.asFile());
|
|
|
} else {
|
|
|
- if (LOG.isDebugEnabled()) {
|
|
|
- LOG.debug("Directive " + directive.getId() +
|
|
|
- ": ignoring non-directive, non-file inode " + node);
|
|
|
- }
|
|
|
+ LOG.debug("Directive {}: ignoring non-directive, non-file inode {} ",
|
|
|
+ directive.getId(), node);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -381,15 +374,13 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|
|
// do not cache this file.
|
|
|
CachePool pool = directive.getPool();
|
|
|
if (pool.getBytesNeeded() > pool.getLimit()) {
|
|
|
- if (LOG.isDebugEnabled()) {
|
|
|
- LOG.debug(String.format("Directive %d: not scanning file %s because " +
|
|
|
- "bytesNeeded for pool %s is %d, but the pool's limit is %d",
|
|
|
- directive.getId(),
|
|
|
- file.getFullPathName(),
|
|
|
- pool.getPoolName(),
|
|
|
- pool.getBytesNeeded(),
|
|
|
- pool.getLimit()));
|
|
|
- }
|
|
|
+ LOG.debug("Directive {}: not scanning file {} because " +
|
|
|
+ "bytesNeeded for pool {} is {}, but the pool's limit is {}",
|
|
|
+ directive.getId(),
|
|
|
+ file.getFullPathName(),
|
|
|
+ pool.getPoolName(),
|
|
|
+ pool.getBytesNeeded(),
|
|
|
+ pool.getLimit());
|
|
|
return;
|
|
|
}
|
|
|
|
|
@@ -397,11 +388,10 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|
|
for (BlockInfo blockInfo : blockInfos) {
|
|
|
if (!blockInfo.getBlockUCState().equals(BlockUCState.COMPLETE)) {
|
|
|
// We don't try to cache blocks that are under construction.
|
|
|
- if (LOG.isTraceEnabled()) {
|
|
|
- LOG.trace("Directive " + directive.getId() + ": can't cache " +
|
|
|
- "block " + blockInfo + " because it is in state " +
|
|
|
- blockInfo.getBlockUCState() + ", not COMPLETE.");
|
|
|
- }
|
|
|
+ LOG.trace("Directive {}: can't cache block {} because it is in state "
|
|
|
+ + "{}, not COMPLETE.", directive.getId(), blockInfo,
|
|
|
+ blockInfo.getBlockUCState()
|
|
|
+ );
|
|
|
continue;
|
|
|
}
|
|
|
Block block = new Block(blockInfo.getBlockId());
|
|
@@ -415,7 +405,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|
|
// Update bytesUsed using the current replication levels.
|
|
|
// Assumptions: we assume that all the blocks are the same length
|
|
|
// on each datanode. We can assume this because we're only caching
|
|
|
- // blocks in state COMMITTED.
|
|
|
+ // blocks in state COMPLETE.
|
|
|
// Note that if two directives are caching the same block(s), they will
|
|
|
// both get them added to their bytesCached.
|
|
|
List<DatanodeDescriptor> cachedOn =
|
|
@@ -441,21 +431,16 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|
|
ocblock.setReplicationAndMark(directive.getReplication(), mark);
|
|
|
}
|
|
|
}
|
|
|
- if (LOG.isTraceEnabled()) {
|
|
|
- LOG.trace("Directive " + directive.getId() + ": setting replication " +
|
|
|
- "for block " + blockInfo + " to " + ocblock.getReplication());
|
|
|
- }
|
|
|
+ LOG.trace("Directive {}: setting replication for block {} to {}",
|
|
|
+ directive.getId(), blockInfo, ocblock.getReplication());
|
|
|
}
|
|
|
// Increment the "cached" statistics
|
|
|
directive.addBytesCached(cachedTotal);
|
|
|
if (cachedTotal == neededTotal) {
|
|
|
directive.addFilesCached(1);
|
|
|
}
|
|
|
- if (LOG.isDebugEnabled()) {
|
|
|
- LOG.debug("Directive " + directive.getId() + ": caching " +
|
|
|
- file.getFullPathName() + ": " + cachedTotal + "/" + neededTotal +
|
|
|
- " bytes");
|
|
|
- }
|
|
|
+ LOG.debug("Directive {}: caching {}: {}/{} bytes", directive.getId(),
|
|
|
+ file.getFullPathName(), cachedTotal, neededTotal);
|
|
|
}
|
|
|
|
|
|
private String findReasonForNotCaching(CachedBlock cblock,
|
|
@@ -512,11 +497,9 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|
|
iter.hasNext(); ) {
|
|
|
DatanodeDescriptor datanode = iter.next();
|
|
|
if (!cblock.isInList(datanode.getCached())) {
|
|
|
- if (LOG.isTraceEnabled()) {
|
|
|
- LOG.trace("Block " + cblock.getBlockId() + ": removing from " +
|
|
|
- "PENDING_UNCACHED for node " + datanode.getDatanodeUuid() +
|
|
|
- "because the DataNode uncached it.");
|
|
|
- }
|
|
|
+ LOG.trace("Block {}: removing from PENDING_UNCACHED for node {} "
|
|
|
+ + "because the DataNode uncached it.", cblock.getBlockId(),
|
|
|
+ datanode.getDatanodeUuid());
|
|
|
datanode.getPendingUncached().remove(cblock);
|
|
|
iter.remove();
|
|
|
}
|
|
@@ -526,10 +509,8 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|
|
String reason = findReasonForNotCaching(cblock, blockInfo);
|
|
|
int neededCached = 0;
|
|
|
if (reason != null) {
|
|
|
- if (LOG.isTraceEnabled()) {
|
|
|
- LOG.trace("Block " + cblock.getBlockId() + ": can't cache " +
|
|
|
- "block because it is " + reason);
|
|
|
- }
|
|
|
+ LOG.trace("Block {}: can't cache block because it is {}",
|
|
|
+ cblock.getBlockId(), reason);
|
|
|
} else {
|
|
|
neededCached = cblock.getReplication();
|
|
|
}
|
|
@@ -541,12 +522,12 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|
|
DatanodeDescriptor datanode = iter.next();
|
|
|
datanode.getPendingCached().remove(cblock);
|
|
|
iter.remove();
|
|
|
- if (LOG.isTraceEnabled()) {
|
|
|
- LOG.trace("Block " + cblock.getBlockId() + ": removing from " +
|
|
|
- "PENDING_CACHED for node " + datanode.getDatanodeUuid() +
|
|
|
- "because we already have " + numCached + " cached " +
|
|
|
- "replicas and we only need " + neededCached);
|
|
|
- }
|
|
|
+ LOG.trace("Block {}: removing from PENDING_CACHED for node {}"
|
|
|
+ + "because we already have {} cached replicas and we only" +
|
|
|
+ " need {}",
|
|
|
+ cblock.getBlockId(), datanode.getDatanodeUuid(), numCached,
|
|
|
+ neededCached
|
|
|
+ );
|
|
|
}
|
|
|
}
|
|
|
if (numCached < neededCached) {
|
|
@@ -556,12 +537,11 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|
|
DatanodeDescriptor datanode = iter.next();
|
|
|
datanode.getPendingUncached().remove(cblock);
|
|
|
iter.remove();
|
|
|
- if (LOG.isTraceEnabled()) {
|
|
|
- LOG.trace("Block " + cblock.getBlockId() + ": removing from " +
|
|
|
- "PENDING_UNCACHED for node " + datanode.getDatanodeUuid() +
|
|
|
- "because we only have " + numCached + " cached replicas " +
|
|
|
- "and we need " + neededCached);
|
|
|
- }
|
|
|
+ LOG.trace("Block {}: removing from PENDING_UNCACHED for node {} "
|
|
|
+ + "because we only have {} cached replicas and we need " +
|
|
|
+ "{}", cblock.getBlockId(), datanode.getDatanodeUuid(),
|
|
|
+ numCached, neededCached
|
|
|
+ );
|
|
|
}
|
|
|
}
|
|
|
int neededUncached = numCached -
|
|
@@ -581,11 +561,10 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|
|
pendingUncached.isEmpty() &&
|
|
|
pendingCached.isEmpty()) {
|
|
|
// we have nothing more to do with this block.
|
|
|
- if (LOG.isTraceEnabled()) {
|
|
|
- LOG.trace("Block " + cblock.getBlockId() + ": removing from " +
|
|
|
- "cachedBlocks, since neededCached == 0, and " +
|
|
|
- "pendingUncached and pendingCached are empty.");
|
|
|
- }
|
|
|
+ LOG.trace("Block {}: removing from cachedBlocks, since neededCached "
|
|
|
+ + "== 0, and pendingUncached and pendingCached are empty.",
|
|
|
+ cblock.getBlockId()
|
|
|
+ );
|
|
|
cbIter.remove();
|
|
|
}
|
|
|
}
|
|
@@ -643,18 +622,14 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|
|
BlockInfo blockInfo = blockManager.
|
|
|
getStoredBlock(new Block(cachedBlock.getBlockId()));
|
|
|
if (blockInfo == null) {
|
|
|
- if (LOG.isDebugEnabled()) {
|
|
|
- LOG.debug("Block " + cachedBlock.getBlockId() + ": can't add new " +
|
|
|
- "cached replicas, because there is no record of this block " +
|
|
|
- "on the NameNode.");
|
|
|
- }
|
|
|
+ LOG.debug("Block {}: can't add new cached replicas," +
|
|
|
+ " because there is no record of this block " +
|
|
|
+ "on the NameNode.", cachedBlock.getBlockId());
|
|
|
return;
|
|
|
}
|
|
|
if (!blockInfo.isComplete()) {
|
|
|
- if (LOG.isDebugEnabled()) {
|
|
|
- LOG.debug("Block " + cachedBlock.getBlockId() + ": can't cache this " +
|
|
|
- "block, because it is not yet complete.");
|
|
|
- }
|
|
|
+ LOG.debug("Block {}: can't cache this block, because it is not yet"
|
|
|
+ + " complete.", cachedBlock.getBlockId());
|
|
|
return;
|
|
|
}
|
|
|
// Filter the list of replicas to only the valid targets
|
|
@@ -678,7 +653,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|
|
if (pendingCached.contains(datanode) || cached.contains(datanode)) {
|
|
|
continue;
|
|
|
}
|
|
|
- long pendingCapacity = datanode.getCacheRemaining();
|
|
|
+ long pendingBytes = 0;
|
|
|
// Subtract pending cached blocks from effective capacity
|
|
|
Iterator<CachedBlock> it = datanode.getPendingCached().iterator();
|
|
|
while (it.hasNext()) {
|
|
@@ -686,7 +661,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|
|
BlockInfo info =
|
|
|
blockManager.getStoredBlock(new Block(cBlock.getBlockId()));
|
|
|
if (info != null) {
|
|
|
- pendingCapacity -= info.getNumBytes();
|
|
|
+ pendingBytes -= info.getNumBytes();
|
|
|
}
|
|
|
}
|
|
|
it = datanode.getPendingUncached().iterator();
|
|
@@ -696,17 +671,17 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|
|
BlockInfo info =
|
|
|
blockManager.getStoredBlock(new Block(cBlock.getBlockId()));
|
|
|
if (info != null) {
|
|
|
- pendingCapacity += info.getNumBytes();
|
|
|
+ pendingBytes += info.getNumBytes();
|
|
|
}
|
|
|
}
|
|
|
+ long pendingCapacity = pendingBytes + datanode.getCacheRemaining();
|
|
|
if (pendingCapacity < blockInfo.getNumBytes()) {
|
|
|
- if (LOG.isTraceEnabled()) {
|
|
|
- LOG.trace("Block " + blockInfo.getBlockId() + ": DataNode " +
|
|
|
- datanode.getDatanodeUuid() + " is not a valid possibility " +
|
|
|
- "because the block has size " + blockInfo.getNumBytes() + ", but " +
|
|
|
- "the DataNode only has " + datanode.getCacheRemaining() + " " +
|
|
|
- "bytes of cache remaining.");
|
|
|
- }
|
|
|
+ LOG.trace("Block {}: DataNode {} is not a valid possibility " +
|
|
|
+ "because the block has size {}, but the DataNode only has {}" +
|
|
|
+ "bytes of cache remaining ({} pending bytes, {} already cached.",
|
|
|
+ blockInfo.getBlockId(), datanode.getDatanodeUuid(),
|
|
|
+ blockInfo.getNumBytes(), pendingCapacity, pendingBytes,
|
|
|
+ datanode.getCacheRemaining());
|
|
|
outOfCapacity++;
|
|
|
continue;
|
|
|
}
|
|
@@ -715,22 +690,20 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
|
|
List<DatanodeDescriptor> chosen = chooseDatanodesForCaching(possibilities,
|
|
|
neededCached, blockManager.getDatanodeManager().getStaleInterval());
|
|
|
for (DatanodeDescriptor datanode : chosen) {
|
|
|
- if (LOG.isTraceEnabled()) {
|
|
|
- LOG.trace("Block " + blockInfo.getBlockId() + ": added to " +
|
|
|
- "PENDING_CACHED on DataNode " + datanode.getDatanodeUuid());
|
|
|
- }
|
|
|
+ LOG.trace("Block {}: added to PENDING_CACHED on DataNode {}",
|
|
|
+ blockInfo.getBlockId(), datanode.getDatanodeUuid());
|
|
|
pendingCached.add(datanode);
|
|
|
boolean added = datanode.getPendingCached().add(cachedBlock);
|
|
|
assert added;
|
|
|
}
|
|
|
// We were unable to satisfy the requested replication factor
|
|
|
if (neededCached > chosen.size()) {
|
|
|
- if (LOG.isDebugEnabled()) {
|
|
|
- LOG.debug("Block " + blockInfo.getBlockId() + ": we only have " +
|
|
|
- (cachedBlock.getReplication() - neededCached + chosen.size()) +
|
|
|
- " of " + cachedBlock.getReplication() + " cached replicas. " +
|
|
|
- outOfCapacity + " DataNodes have insufficient cache capacity.");
|
|
|
- }
|
|
|
+ LOG.debug("Block {}: we only have {} of {} cached replicas."
|
|
|
+ + " {} DataNodes have insufficient cache capacity.",
|
|
|
+ blockInfo.getBlockId(),
|
|
|
+ (cachedBlock.getReplication() - neededCached + chosen.size()),
|
|
|
+ cachedBlock.getReplication(), outOfCapacity
|
|
|
+ );
|
|
|
}
|
|
|
}
|
|
|
|