|
@@ -64,7 +64,9 @@ import org.apache.hadoop.hdfs.server.common.AutoCloseDataSetLock;
|
|
|
import org.apache.hadoop.hdfs.server.common.DataNodeLockManager;
|
|
|
import org.apache.hadoop.hdfs.server.common.DataNodeLockManager.LockLevel;
|
|
|
import org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector;
|
|
|
+import org.apache.hadoop.hdfs.server.datanode.DataNodeLayoutSubLockStrategy;
|
|
|
import org.apache.hadoop.hdfs.server.datanode.DataSetLockManager;
|
|
|
+import org.apache.hadoop.hdfs.server.datanode.DataSetSubLockStrategy;
|
|
|
import org.apache.hadoop.hdfs.server.datanode.FileIoProvider;
|
|
|
import org.apache.hadoop.hdfs.server.datanode.FinalizedReplica;
|
|
|
import org.apache.hadoop.hdfs.server.datanode.LocalReplica;
|
|
@@ -198,8 +200,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
@Override // FsDatasetSpi
|
|
|
public Block getStoredBlock(String bpid, long blkid)
|
|
|
throws IOException {
|
|
|
- try (AutoCloseableLock lock = lockManager.readLock(LockLevel.BLOCK_POOl,
|
|
|
- bpid)) {
|
|
|
+ try (AutoCloseableLock lock = lockManager.readLock(LockLevel.DIR,
|
|
|
+ bpid, getReplicaInfo(bpid, blkid).getStorageUuid(),
|
|
|
+ datasetSubLockStrategy.blockIdToSubLock(blkid))) {
|
|
|
ReplicaInfo r = volumeMap.get(bpid, blkid);
|
|
|
if (r == null) {
|
|
|
return null;
|
|
@@ -288,6 +291,8 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
private long lastDirScannerNotifyTime;
|
|
|
private volatile long lastDirScannerFinishTime;
|
|
|
|
|
|
+ private final DataSetSubLockStrategy datasetSubLockStrategy;
|
|
|
+
|
|
|
/**
|
|
|
* An FSDataset has a directory where it loads its data files.
|
|
|
*/
|
|
@@ -392,6 +397,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_MAX_NOTIFY_COUNT_KEY,
|
|
|
DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_MAX_NOTIFY_COUNT_DEFAULT);
|
|
|
lastDirScannerNotifyTime = System.currentTimeMillis();
|
|
|
+ this.datasetSubLockStrategy = new DataNodeLayoutSubLockStrategy();
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -430,6 +436,12 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
FsVolumeReference ref) throws IOException {
|
|
|
for (String bp : volumeMap.getBlockPoolList()) {
|
|
|
lockManager.addLock(LockLevel.VOLUME, bp, ref.getVolume().getStorageID());
|
|
|
+ List<String> allSubDirNameForDataSetLock = datasetSubLockStrategy.getAllSubLockNames();
|
|
|
+ for (String dir : allSubDirNameForDataSetLock) {
|
|
|
+ lockManager.addLock(LockLevel.DIR, bp, ref.getVolume().getStorageID(), dir);
|
|
|
+ LOG.info("Added DIR lock for bpid:{}, volume storageid:{}, dir:{}",
|
|
|
+ bp, ref.getVolume().getStorageID(), dir);
|
|
|
+ }
|
|
|
}
|
|
|
DatanodeStorage dnStorage = storageMap.get(sd.getStorageUuid());
|
|
|
if (dnStorage != null) {
|
|
@@ -629,6 +641,12 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
for (String storageUuid : storageToRemove) {
|
|
|
storageMap.remove(storageUuid);
|
|
|
for (String bp : volumeMap.getBlockPoolList()) {
|
|
|
+ List<String> allSubDirNameForDataSetLock = datasetSubLockStrategy.getAllSubLockNames();
|
|
|
+ for (String dir : allSubDirNameForDataSetLock) {
|
|
|
+ lockManager.removeLock(LockLevel.DIR, bp, storageUuid, dir);
|
|
|
+ LOG.info("Removed DIR lock for bpid:{}, volume storageid:{}, dir:{}",
|
|
|
+ bp, storageUuid, dir);
|
|
|
+ }
|
|
|
lockManager.removeLock(LockLevel.VOLUME, bp, storageUuid);
|
|
|
}
|
|
|
}
|
|
@@ -819,8 +837,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
long seekOffset) throws IOException {
|
|
|
|
|
|
ReplicaInfo info;
|
|
|
- try (AutoCloseableLock lock = lockManager.readLock(LockLevel.BLOCK_POOl,
|
|
|
- b.getBlockPoolId())) {
|
|
|
+ try (AutoCloseableLock lock = lockManager.readLock(LockLevel.DIR,
|
|
|
+ b.getBlockPoolId(), getStorageUuidForLock(b),
|
|
|
+ datasetSubLockStrategy.blockIdToSubLock(b.getBlockId()))) {
|
|
|
info = volumeMap.get(b.getBlockPoolId(), b.getLocalBlock());
|
|
|
}
|
|
|
|
|
@@ -914,8 +933,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
@Override // FsDatasetSpi
|
|
|
public ReplicaInputStreams getTmpInputStreams(ExtendedBlock b,
|
|
|
long blkOffset, long metaOffset) throws IOException {
|
|
|
- try (AutoCloseDataSetLock l = lockManager.readLock(LockLevel.VOLUME,
|
|
|
- b.getBlockPoolId(), getStorageUuidForLock(b))) {
|
|
|
+ try (AutoCloseDataSetLock l = lockManager.readLock(LockLevel.DIR,
|
|
|
+ b.getBlockPoolId(), getStorageUuidForLock(b),
|
|
|
+ datasetSubLockStrategy.blockIdToSubLock(b.getBlockId()))) {
|
|
|
ReplicaInfo info = getReplicaInfo(b);
|
|
|
FsVolumeReference ref = info.getVolume().obtainReference();
|
|
|
try {
|
|
@@ -1380,8 +1400,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
@Override // FsDatasetSpi
|
|
|
public ReplicaHandler append(ExtendedBlock b,
|
|
|
long newGS, long expectedBlockLen) throws IOException {
|
|
|
- try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.VOLUME,
|
|
|
- b.getBlockPoolId(), getStorageUuidForLock(b))) {
|
|
|
+ try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.DIR,
|
|
|
+ b.getBlockPoolId(), getStorageUuidForLock(b),
|
|
|
+ datasetSubLockStrategy.blockIdToSubLock(b.getBlockId()))) {
|
|
|
// If the block was successfully finalized because all packets
|
|
|
// were successfully processed at the Datanode but the ack for
|
|
|
// some of the packets were not received by the client. The client
|
|
@@ -1433,8 +1454,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
private ReplicaInPipeline append(String bpid,
|
|
|
ReplicaInfo replicaInfo, long newGS, long estimateBlockLen)
|
|
|
throws IOException {
|
|
|
- try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.VOLUME,
|
|
|
- bpid, replicaInfo.getStorageUuid())) {
|
|
|
+ try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.DIR,
|
|
|
+ bpid, replicaInfo.getStorageUuid(),
|
|
|
+ datasetSubLockStrategy.blockIdToSubLock(replicaInfo.getBlockId()))) {
|
|
|
// If the block is cached, start uncaching it.
|
|
|
if (replicaInfo.getState() != ReplicaState.FINALIZED) {
|
|
|
throw new IOException("Only a Finalized replica can be appended to; "
|
|
@@ -1530,8 +1552,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
|
|
|
while (true) {
|
|
|
try {
|
|
|
- try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.BLOCK_POOl,
|
|
|
- b.getBlockPoolId())) {
|
|
|
+ try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.DIR,
|
|
|
+ b.getBlockPoolId(), getStorageUuidForLock(b),
|
|
|
+ datasetSubLockStrategy.blockIdToSubLock(b.getBlockId()))) {
|
|
|
ReplicaInfo replicaInfo = recoverCheck(b, newGS, expectedBlockLen);
|
|
|
FsVolumeReference ref = replicaInfo.getVolume().obtainReference();
|
|
|
ReplicaInPipeline replica;
|
|
@@ -1564,8 +1587,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
b, newGS, expectedBlockLen);
|
|
|
while (true) {
|
|
|
try {
|
|
|
- try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.VOLUME,
|
|
|
- b.getBlockPoolId(), getStorageUuidForLock(b))) {
|
|
|
+ try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.DIR,
|
|
|
+ b.getBlockPoolId(), getStorageUuidForLock(b),
|
|
|
+ datasetSubLockStrategy.blockIdToSubLock(b.getBlockId()))) {
|
|
|
// check replica's state
|
|
|
ReplicaInfo replicaInfo = recoverCheck(b, newGS, expectedBlockLen);
|
|
|
// bump the replica's GS
|
|
@@ -1650,8 +1674,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
}
|
|
|
|
|
|
ReplicaInPipeline newReplicaInfo;
|
|
|
- try (AutoCloseableLock l = lockManager.writeLock(LockLevel.VOLUME,
|
|
|
- b.getBlockPoolId(), v.getStorageID())) {
|
|
|
+ try (AutoCloseableLock l = lockManager.writeLock(LockLevel.DIR,
|
|
|
+ b.getBlockPoolId(), v.getStorageID(),
|
|
|
+ datasetSubLockStrategy.blockIdToSubLock(b.getBlockId()))) {
|
|
|
newReplicaInfo = v.createRbw(b);
|
|
|
if (newReplicaInfo.getReplicaInfo().getState() != ReplicaState.RBW) {
|
|
|
throw new IOException("CreateRBW returned a replica of state "
|
|
@@ -1681,8 +1706,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
try {
|
|
|
while (true) {
|
|
|
try {
|
|
|
- try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.VOLUME,
|
|
|
- b.getBlockPoolId(), getStorageUuidForLock(b))) {
|
|
|
+ try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.DIR,
|
|
|
+ b.getBlockPoolId(), getStorageUuidForLock(b),
|
|
|
+ datasetSubLockStrategy.blockIdToSubLock(b.getBlockId()))) {
|
|
|
ReplicaInfo replicaInfo =
|
|
|
getReplicaInfo(b.getBlockPoolId(), b.getBlockId());
|
|
|
// check the replica's state
|
|
@@ -1713,8 +1739,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
private ReplicaHandler recoverRbwImpl(ReplicaInPipeline rbw,
|
|
|
ExtendedBlock b, long newGS, long minBytesRcvd, long maxBytesRcvd)
|
|
|
throws IOException {
|
|
|
- try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.VOLUME,
|
|
|
- b.getBlockPoolId(), getStorageUuidForLock(b))) {
|
|
|
+ try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.DIR,
|
|
|
+ b.getBlockPoolId(), getStorageUuidForLock(b),
|
|
|
+ datasetSubLockStrategy.blockIdToSubLock(b.getBlockId()))) {
|
|
|
// check generation stamp
|
|
|
long replicaGenerationStamp = rbw.getGenerationStamp();
|
|
|
if (replicaGenerationStamp < b.getGenerationStamp() ||
|
|
@@ -1775,8 +1802,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
public ReplicaInPipeline convertTemporaryToRbw(
|
|
|
final ExtendedBlock b) throws IOException {
|
|
|
long startTimeMs = Time.monotonicNow();
|
|
|
- try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.VOLUME,
|
|
|
- b.getBlockPoolId(), getStorageUuidForLock(b))) {
|
|
|
+ try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.DIR,
|
|
|
+ b.getBlockPoolId(), getStorageUuidForLock(b),
|
|
|
+ datasetSubLockStrategy.blockIdToSubLock(b.getBlockId()))) {
|
|
|
final long blockId = b.getBlockId();
|
|
|
final long expectedGs = b.getGenerationStamp();
|
|
|
final long visible = b.getNumBytes();
|
|
@@ -1915,8 +1943,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
.getNumBytes());
|
|
|
FsVolumeImpl v = (FsVolumeImpl) ref.getVolume();
|
|
|
ReplicaInPipeline newReplicaInfo;
|
|
|
- try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.VOLUME,
|
|
|
- b.getBlockPoolId(), v.getStorageID())) {
|
|
|
+ try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.DIR,
|
|
|
+ b.getBlockPoolId(), v.getStorageID(),
|
|
|
+ datasetSubLockStrategy.blockIdToSubLock(b.getBlockId()))) {
|
|
|
try {
|
|
|
newReplicaInfo = v.createTemporary(b);
|
|
|
LOG.debug("creating temporary for block: {} on volume: {}",
|
|
@@ -1973,8 +2002,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
ReplicaInfo replicaInfo = null;
|
|
|
ReplicaInfo finalizedReplicaInfo = null;
|
|
|
long startTimeMs = Time.monotonicNow();
|
|
|
- try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.VOLUME,
|
|
|
- b.getBlockPoolId(), getStorageUuidForLock(b))) {
|
|
|
+ try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.DIR,
|
|
|
+ b.getBlockPoolId(), getStorageUuidForLock(b),
|
|
|
+ datasetSubLockStrategy.blockIdToSubLock(b.getBlockId()))) {
|
|
|
if (Thread.interrupted()) {
|
|
|
// Don't allow data modifications from interrupted threads
|
|
|
throw new IOException("Cannot finalize block: " + b + " from Interrupted Thread");
|
|
@@ -2010,8 +2040,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
|
|
|
private ReplicaInfo finalizeReplica(String bpid, ReplicaInfo replicaInfo)
|
|
|
throws IOException {
|
|
|
- try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.VOLUME,
|
|
|
- bpid, replicaInfo.getStorageUuid())) {
|
|
|
+ try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.DIR,
|
|
|
+ bpid, replicaInfo.getStorageUuid(),
|
|
|
+ datasetSubLockStrategy.blockIdToSubLock(replicaInfo.getBlockId()))) {
|
|
|
// Compare generation stamp of old and new replica before finalizing
|
|
|
if (volumeMap.get(bpid, replicaInfo.getBlockId()).getGenerationStamp()
|
|
|
> replicaInfo.getGenerationStamp()) {
|
|
@@ -2060,8 +2091,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
@Override // FsDatasetSpi
|
|
|
public void unfinalizeBlock(ExtendedBlock b) throws IOException {
|
|
|
long startTimeMs = Time.monotonicNow();
|
|
|
- try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.VOLUME,
|
|
|
- b.getBlockPoolId(), getStorageUuidForLock(b))) {
|
|
|
+ try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.DIR,
|
|
|
+ b.getBlockPoolId(), getStorageUuidForLock(b),
|
|
|
+ datasetSubLockStrategy.blockIdToSubLock(b.getBlockId()))) {
|
|
|
ReplicaInfo replicaInfo = volumeMap.get(b.getBlockPoolId(),
|
|
|
b.getLocalBlock());
|
|
|
if (replicaInfo != null &&
|
|
@@ -2459,7 +2491,8 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
final String bpid = block.getBlockPoolId();
|
|
|
final Block localBlock = block.getLocalBlock();
|
|
|
final long blockId = localBlock.getBlockId();
|
|
|
- try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.BLOCK_POOl, bpid)) {
|
|
|
+ try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.DIR, bpid, volume.getStorageID(),
|
|
|
+ datasetSubLockStrategy.blockIdToSubLock(blockId))) {
|
|
|
final ReplicaInfo info = volumeMap.get(bpid, localBlock);
|
|
|
if (info == null) {
|
|
|
ReplicaInfo infoByBlockId = volumeMap.get(bpid, blockId);
|
|
@@ -2548,8 +2581,8 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
bpid + ": ReplicaInfo not found.");
|
|
|
return;
|
|
|
}
|
|
|
- try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.VOLUME, bpid,
|
|
|
- info.getStorageUuid())) {
|
|
|
+ try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.DIR, bpid,
|
|
|
+ info.getStorageUuid(), datasetSubLockStrategy.blockIdToSubLock(blockId))) {
|
|
|
boolean success = false;
|
|
|
try {
|
|
|
info = volumeMap.get(bpid, blockId);
|
|
@@ -2746,7 +2779,8 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
lastDirScannerNotifyTime = startTimeMs;
|
|
|
}
|
|
|
String storageUuid = vol.getStorageID();
|
|
|
- try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.VOLUME, bpid, storageUuid)) {
|
|
|
+ try (AutoCloseableLock lock = lockManager.writeLock(LockLevel.DIR, bpid,
|
|
|
+ vol.getStorageID(), datasetSubLockStrategy.blockIdToSubLock(blockId))) {
|
|
|
if (!storageMap.containsKey(storageUuid)) {
|
|
|
// Storage was already removed
|
|
|
return;
|
|
@@ -3231,8 +3265,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
@Override // FsDatasetSpi
|
|
|
public long getReplicaVisibleLength(final ExtendedBlock block)
|
|
|
throws IOException {
|
|
|
- try (AutoCloseableLock lock = lockManager.readLock(LockLevel.BLOCK_POOl,
|
|
|
- block.getBlockPoolId())) {
|
|
|
+ try (AutoCloseableLock lock = lockManager.readLock(LockLevel.DIR,
|
|
|
+ block.getBlockPoolId(), getStorageUuidForLock(block),
|
|
|
+ datasetSubLockStrategy.blockIdToSubLock(block.getBlockId()))) {
|
|
|
final Replica replica = getReplicaInfo(block.getBlockPoolId(),
|
|
|
block.getBlockId());
|
|
|
if (replica.getGenerationStamp() < block.getGenerationStamp()) {
|
|
@@ -3259,6 +3294,12 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
Set<String> vols = storageMap.keySet();
|
|
|
for (String v : vols) {
|
|
|
lockManager.addLock(LockLevel.VOLUME, bpid, v);
|
|
|
+ List<String> allSubDirNameForDataSetLock = datasetSubLockStrategy.getAllSubLockNames();
|
|
|
+ for (String dir : allSubDirNameForDataSetLock) {
|
|
|
+ lockManager.addLock(LockLevel.DIR, bpid, v, dir);
|
|
|
+ LOG.info("Added DIR lock for bpid:{}, volume storageid:{}, dir:{}",
|
|
|
+ bpid, v, dir);
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
try {
|
|
@@ -3386,8 +3427,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
|
|
|
@Override // FsDatasetSpi
|
|
|
public BlockLocalPathInfo getBlockLocalPathInfo(ExtendedBlock block)
|
|
|
throws IOException {
|
|
|
- try (AutoCloseableLock lock = lockManager.readLock(LockLevel.BLOCK_POOl,
|
|
|
- block.getBlockPoolId())) {
|
|
|
+ try (AutoCloseableLock lock = lockManager.readLock(LockLevel.DIR,
|
|
|
+ block.getBlockPoolId(), getStorageUuidForLock(block),
|
|
|
+ datasetSubLockStrategy.blockIdToSubLock(block.getBlockId()))) {
|
|
|
final Replica replica = volumeMap.get(block.getBlockPoolId(),
|
|
|
block.getBlockId());
|
|
|
if (replica == null) {
|