소스 검색

HDFS-17388. [FGL] Client RPCs involving write process supports fine-grained lock (#6589)

ZanderXu 1 년 전
부모
커밋
7577f3ad61

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

@@ -1584,7 +1584,7 @@ public class BlockManager implements BlockStatsMXBean {
       final boolean inSnapshot, FileEncryptionInfo feInfo,
       final boolean inSnapshot, FileEncryptionInfo feInfo,
       ErasureCodingPolicy ecPolicy)
       ErasureCodingPolicy ecPolicy)
       throws IOException {
       throws IOException {
-    assert namesystem.hasReadLock();
+    assert namesystem.hasReadLock(FSNamesystemLockMode.BM);
     if (blocks == null) {
     if (blocks == null) {
       return null;
       return null;
     } else if (blocks.length == 0) {
     } else if (blocks.length == 0) {

+ 3 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirAppendOp.java

@@ -38,6 +38,7 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
 import org.apache.hadoop.hdfs.server.namenode.FSDirectory.DirOp;
 import org.apache.hadoop.hdfs.server.namenode.FSDirectory.DirOp;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem.RecoverLeaseOp;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem.RecoverLeaseOp;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeLayoutVersion.Feature;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeLayoutVersion.Feature;
+import org.apache.hadoop.hdfs.server.namenode.fgl.FSNamesystemLockMode;
 import org.apache.hadoop.ipc.RetriableException;
 import org.apache.hadoop.ipc.RetriableException;
 
 
 import org.apache.hadoop.util.Preconditions;
 import org.apache.hadoop.util.Preconditions;
@@ -82,7 +83,7 @@ final class FSDirAppendOp {
       final String srcArg, final FSPermissionChecker pc, final String holder,
       final String srcArg, final FSPermissionChecker pc, final String holder,
       final String clientMachine, final boolean newBlock,
       final String clientMachine, final boolean newBlock,
       final boolean logRetryCache) throws IOException {
       final boolean logRetryCache) throws IOException {
-    assert fsn.hasWriteLock();
+    assert fsn.hasWriteLock(FSNamesystemLockMode.GLOBAL);
 
 
     final LocatedBlock lb;
     final LocatedBlock lb;
     final FSDirectory fsd = fsn.getFSDirectory();
     final FSDirectory fsd = fsn.getFSDirectory();
@@ -180,7 +181,7 @@ final class FSDirAppendOp {
       final String clientMachine, final boolean newBlock,
       final String clientMachine, final boolean newBlock,
       final boolean writeToEditLog, final boolean logRetryCache)
       final boolean writeToEditLog, final boolean logRetryCache)
       throws IOException {
       throws IOException {
-    assert fsn.hasWriteLock();
+    assert fsn.hasWriteLock(FSNamesystemLockMode.GLOBAL);
 
 
     final INodeFile file = iip.getLastINode().asFile();
     final INodeFile file = iip.getLastINode().asFile();
     final QuotaCounts delta = verifyQuotaForUCBlock(fsn, file, iip);
     final QuotaCounts delta = verifyQuotaForUCBlock(fsn, file, iip);

+ 3 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirDeleteOp.java

@@ -24,6 +24,7 @@ import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.server.namenode.FSDirectory.DirOp;
 import org.apache.hadoop.hdfs.server.namenode.FSDirectory.DirOp;
 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
 import org.apache.hadoop.hdfs.server.namenode.INode.ReclaimContext;
 import org.apache.hadoop.hdfs.server.namenode.INode.ReclaimContext;
+import org.apache.hadoop.hdfs.server.namenode.fgl.FSNamesystemLockMode;
 import org.apache.hadoop.util.ChunkedArrayList;
 import org.apache.hadoop.util.ChunkedArrayList;
 
 
 import java.io.IOException;
 import java.io.IOException;
@@ -170,7 +171,8 @@ class FSDirDeleteOp {
   static BlocksMapUpdateInfo deleteInternal(
   static BlocksMapUpdateInfo deleteInternal(
       FSNamesystem fsn, INodesInPath iip, boolean logRetryCache)
       FSNamesystem fsn, INodesInPath iip, boolean logRetryCache)
       throws IOException {
       throws IOException {
-    assert fsn.hasWriteLock();
+    // Delete INode and modify BlockInfo
+    assert fsn.hasWriteLock(FSNamesystemLockMode.GLOBAL);
     if (NameNode.stateChangeLog.isDebugEnabled()) {
     if (NameNode.stateChangeLog.isDebugEnabled()) {
       NameNode.stateChangeLog.debug("DIR* NameSystem.delete: " + iip.getPath());
       NameNode.stateChangeLog.debug("DIR* NameSystem.delete: " + iip.getPath());
     }
     }

+ 4 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirEncryptionZoneOp.java

@@ -84,8 +84,8 @@ final class FSDirEncryptionZoneOp {
   private static EncryptedKeyVersion generateEncryptedDataEncryptionKey(
   private static EncryptedKeyVersion generateEncryptedDataEncryptionKey(
       final FSDirectory fsd, final String ezKeyName) throws IOException {
       final FSDirectory fsd, final String ezKeyName) throws IOException {
     // must not be holding lock during this operation
     // must not be holding lock during this operation
-    assert !fsd.getFSNamesystem().hasReadLock();
-    assert !fsd.getFSNamesystem().hasWriteLock();
+    assert !fsd.getFSNamesystem().hasReadLock(FSNamesystemLockMode.FS);
+    assert !fsd.getFSNamesystem().hasWriteLock(FSNamesystemLockMode.FS);
     if (ezKeyName == null) {
     if (ezKeyName == null) {
       return null;
       return null;
     }
     }
@@ -657,13 +657,13 @@ final class FSDirEncryptionZoneOp {
     Preconditions.checkNotNull(ezKeyName);
     Preconditions.checkNotNull(ezKeyName);
 
 
     // Generate EDEK while not holding the fsn lock.
     // Generate EDEK while not holding the fsn lock.
-    fsn.writeUnlock("getEncryptionKeyInfo");
+    fsn.writeUnlock(FSNamesystemLockMode.FS, "getEncryptionKeyInfo");
     try {
     try {
       EncryptionFaultInjector.getInstance().startFileBeforeGenerateKey();
       EncryptionFaultInjector.getInstance().startFileBeforeGenerateKey();
       return new EncryptionKeyInfo(protocolVersion, suite, ezKeyName,
       return new EncryptionKeyInfo(protocolVersion, suite, ezKeyName,
           generateEncryptedDataEncryptionKey(fsd, ezKeyName));
           generateEncryptedDataEncryptionKey(fsd, ezKeyName));
     } finally {
     } finally {
-      fsn.writeLock();
+      fsn.writeLock(FSNamesystemLockMode.FS);
       EncryptionFaultInjector.getInstance().startFileAfterGenerateKey();
       EncryptionFaultInjector.getInstance().startFileAfterGenerateKey();
     }
     }
   }
   }

+ 16 - 9
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java

@@ -18,6 +18,7 @@
 
 
 package org.apache.hadoop.hdfs.server.namenode;
 package org.apache.hadoop.hdfs.server.namenode;
 
 
+import org.apache.hadoop.hdfs.server.namenode.fgl.FSNamesystemLockMode;
 import org.apache.hadoop.util.Preconditions;
 import org.apache.hadoop.util.Preconditions;
 
 
 import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.fs.ContentSummary;
@@ -444,16 +445,22 @@ class FSDirStatAndListingOp {
       if (isEncrypted) {
       if (isEncrypted) {
         feInfo = FSDirEncryptionZoneOp.getFileEncryptionInfo(fsd, iip);
         feInfo = FSDirEncryptionZoneOp.getFileEncryptionInfo(fsd, iip);
       }
       }
+      // ComputeFileSize and needLocation need BM lock.
       if (needLocation) {
       if (needLocation) {
-        final boolean inSnapshot = snapshot != Snapshot.CURRENT_STATE_ID;
-        final boolean isUc = !inSnapshot && fileNode.isUnderConstruction();
-        final long fileSize = !inSnapshot && isUc
-            ? fileNode.computeFileSizeNotIncludingLastUcBlock() : size;
-        loc = fsd.getBlockManager().createLocatedBlocks(
-            fileNode.getBlocks(snapshot), fileSize, isUc, 0L, size,
-            needBlockToken, inSnapshot, feInfo, ecPolicy);
-        if (loc == null) {
-          loc = new LocatedBlocks();
+        fsd.getFSNamesystem().readLock(FSNamesystemLockMode.BM);
+        try {
+          final boolean inSnapshot = snapshot != Snapshot.CURRENT_STATE_ID;
+          final boolean isUc = !inSnapshot && fileNode.isUnderConstruction();
+          final long fileSize = !inSnapshot && isUc
+              ? fileNode.computeFileSizeNotIncludingLastUcBlock() : size;
+          loc = fsd.getBlockManager().createLocatedBlocks(
+              fileNode.getBlocks(snapshot), fileSize, isUc, 0L, size,
+              needBlockToken, inSnapshot, feInfo, ecPolicy);
+          if (loc == null) {
+            loc = new LocatedBlocks();
+          }
+        } finally {
+          fsd.getFSNamesystem().readUnlock(FSNamesystemLockMode.BM, "createFileStatus");
         }
         }
       }
       }
     } else if (node.isDirectory()) {
     } else if (node.isDirectory()) {

+ 24 - 19
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java

@@ -364,7 +364,7 @@ class FSDirWriteFileOp {
       boolean shouldReplicate, String ecPolicyName, String storagePolicy,
       boolean shouldReplicate, String ecPolicyName, String storagePolicy,
       boolean logRetryEntry)
       boolean logRetryEntry)
       throws IOException {
       throws IOException {
-    assert fsn.hasWriteLock();
+    assert fsn.hasWriteLock(FSNamesystemLockMode.FS);
     boolean overwrite = flag.contains(CreateFlag.OVERWRITE);
     boolean overwrite = flag.contains(CreateFlag.OVERWRITE);
     boolean isLazyPersist = flag.contains(CreateFlag.LAZY_PERSIST);
     boolean isLazyPersist = flag.contains(CreateFlag.LAZY_PERSIST);
 
 
@@ -372,22 +372,27 @@ class FSDirWriteFileOp {
     FSDirectory fsd = fsn.getFSDirectory();
     FSDirectory fsd = fsn.getFSDirectory();
 
 
     if (iip.getLastINode() != null) {
     if (iip.getLastINode() != null) {
-      if (overwrite) {
-        List<INode> toRemoveINodes = new ChunkedArrayList<>();
-        List<Long> toRemoveUCFiles = new ChunkedArrayList<>();
-        long ret = FSDirDeleteOp.delete(fsd, iip, toRemoveBlocks,
-                                        toRemoveINodes, toRemoveUCFiles, now());
-        if (ret >= 0) {
-          iip = INodesInPath.replace(iip, iip.length() - 1, null);
-          FSDirDeleteOp.incrDeletedFileCount(ret);
-          fsn.removeLeasesAndINodes(toRemoveUCFiles, toRemoveINodes, true);
+      fsn.writeLock(FSNamesystemLockMode.BM);
+      try {
+        if (overwrite) {
+          List<INode> toRemoveINodes = new ChunkedArrayList<>();
+          List<Long> toRemoveUCFiles = new ChunkedArrayList<>();
+          long ret = FSDirDeleteOp.delete(fsd, iip, toRemoveBlocks,
+              toRemoveINodes, toRemoveUCFiles, now());
+          if (ret >= 0) {
+            iip = INodesInPath.replace(iip, iip.length() - 1, null);
+            FSDirDeleteOp.incrDeletedFileCount(ret);
+            fsn.removeLeasesAndINodes(toRemoveUCFiles, toRemoveINodes, true);
+          }
+        } else {
+          // If lease soft limit time is expired, recover the lease
+          fsn.recoverLeaseInternal(FSNamesystem.RecoverLeaseOp.CREATE_FILE, iip,
+              src, holder, clientMachine, false);
+          throw new FileAlreadyExistsException(src + " for client " +
+              clientMachine + " already exists");
         }
         }
-      } else {
-        // If lease soft limit time is expired, recover the lease
-        fsn.recoverLeaseInternal(FSNamesystem.RecoverLeaseOp.CREATE_FILE, iip,
-                                 src, holder, clientMachine, false);
-        throw new FileAlreadyExistsException(src + " for client " +
-            clientMachine + " already exists");
+      } finally {
+        fsn.writeUnlock(FSNamesystemLockMode.BM, "create");
       }
       }
     }
     }
     fsn.checkFsObjectLimit();
     fsn.checkFsObjectLimit();
@@ -597,7 +602,7 @@ class FSDirWriteFileOp {
       FSNamesystem fsn, INodesInPath iip, long fileId, String clientName,
       FSNamesystem fsn, INodesInPath iip, long fileId, String clientName,
       ExtendedBlock previous, LocatedBlock[] onRetryBlock)
       ExtendedBlock previous, LocatedBlock[] onRetryBlock)
       throws IOException {
       throws IOException {
-    assert fsn.hasReadLock();
+    assert fsn.hasReadLock(FSNamesystemLockMode.GLOBAL);
     String src = iip.getPath();
     String src = iip.getPath();
     checkBlock(fsn, previous);
     checkBlock(fsn, previous);
     onRetryBlock[0] = null;
     onRetryBlock[0] = null;
@@ -695,7 +700,7 @@ class FSDirWriteFileOp {
       FSNamesystem fsn, INodesInPath iip,
       FSNamesystem fsn, INodesInPath iip,
       String holder, Block last, long fileId)
       String holder, Block last, long fileId)
       throws IOException {
       throws IOException {
-    assert fsn.hasWriteLock();
+    assert fsn.hasWriteLock(FSNamesystemLockMode.GLOBAL);
     final String src = iip.getPath();
     final String src = iip.getPath();
     final INodeFile pendingFile;
     final INodeFile pendingFile;
     INode inode = null;
     INode inode = null;
@@ -779,7 +784,7 @@ class FSDirWriteFileOp {
   static void saveAllocatedBlock(FSNamesystem fsn, String src,
   static void saveAllocatedBlock(FSNamesystem fsn, String src,
       INodesInPath inodesInPath, Block newBlock, DatanodeStorageInfo[] targets,
       INodesInPath inodesInPath, Block newBlock, DatanodeStorageInfo[] targets,
       BlockType blockType) throws IOException {
       BlockType blockType) throws IOException {
-    assert fsn.hasWriteLock();
+    assert fsn.hasWriteLock(FSNamesystemLockMode.GLOBAL);
     BlockInfo b = addBlock(fsn.dir, src, inodesInPath, newBlock, targets,
     BlockInfo b = addBlock(fsn.dir, src, inodesInPath, newBlock, targets,
         blockType);
         blockType);
     logAllocatedBlock(src, b);
     logAllocatedBlock(src, b);

+ 50 - 46
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -2439,14 +2439,14 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     checkOperation(OperationCategory.WRITE);
     checkOperation(OperationCategory.WRITE);
     FSPermissionChecker.setOperationType(operationName);
     FSPermissionChecker.setOperationType(operationName);
     try {
     try {
-      writeLock();
+      writeLock(FSNamesystemLockMode.FS);
       try {
       try {
         checkOperation(OperationCategory.WRITE);
         checkOperation(OperationCategory.WRITE);
         checkNameNodeSafeMode("Cannot create symlink " + link);
         checkNameNodeSafeMode("Cannot create symlink " + link);
         auditStat = FSDirSymlinkOp.createSymlinkInt(this, target, link,
         auditStat = FSDirSymlinkOp.createSymlinkInt(this, target, link,
             dirPerms, createParent, logRetryCache);
             dirPerms, createParent, logRetryCache);
       } finally {
       } finally {
-        writeUnlock(operationName,
+        writeUnlock(FSNamesystemLockMode.FS, operationName,
             getLockReportInfoSupplier(link, target, auditStat));
             getLockReportInfoSupplier(link, target, auditStat));
       }
       }
     } catch (AccessControlException e) {
     } catch (AccessControlException e) {
@@ -2796,7 +2796,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
 
 
     checkOperation(OperationCategory.WRITE);
     checkOperation(OperationCategory.WRITE);
     final FSPermissionChecker pc = getPermissionChecker();
     final FSPermissionChecker pc = getPermissionChecker();
-    writeLock();
+    writeLock(FSNamesystemLockMode.FS);
     try {
     try {
       checkOperation(OperationCategory.WRITE);
       checkOperation(OperationCategory.WRITE);
       checkNameNodeSafeMode("Cannot create file" + src);
       checkNameNodeSafeMode("Cannot create file" + src);
@@ -2858,7 +2858,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
         dir.writeUnlock();
         dir.writeUnlock();
       }
       }
     } finally {
     } finally {
-      writeUnlock("create", getLockReportInfoSupplier(src, null, stat));
+      writeUnlock(FSNamesystemLockMode.FS, "create",
+          getLockReportInfoSupplier(src, null, stat));
       // There might be transactions logged while trying to recover the lease.
       // There might be transactions logged while trying to recover the lease.
       // They need to be sync'ed even when an exception was thrown.
       // They need to be sync'ed even when an exception was thrown.
       if (!skipSync) {
       if (!skipSync) {
@@ -2892,7 +2893,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     checkOperation(OperationCategory.WRITE);
     checkOperation(OperationCategory.WRITE);
     final FSPermissionChecker pc = getPermissionChecker();
     final FSPermissionChecker pc = getPermissionChecker();
     FSPermissionChecker.setOperationType(operationName);
     FSPermissionChecker.setOperationType(operationName);
-    writeLock();
+    writeLock(FSNamesystemLockMode.GLOBAL);
     try {
     try {
       checkOperation(OperationCategory.WRITE);
       checkOperation(OperationCategory.WRITE);
       checkNameNodeSafeMode("Cannot recover the lease of " + src);
       checkNameNodeSafeMode("Cannot recover the lease of " + src);
@@ -2912,7 +2913,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
       skipSync = true;
       skipSync = true;
       throw se;
       throw se;
     } finally {
     } finally {
-      writeUnlock("recoverLease");
+      writeUnlock(FSNamesystemLockMode.GLOBAL, operationName);
       // There might be transactions logged while trying to recover the lease.
       // There might be transactions logged while trying to recover the lease.
       // They need to be sync'ed even when an exception was thrown.
       // They need to be sync'ed even when an exception was thrown.
       if (!skipSync) {
       if (!skipSync) {
@@ -3032,7 +3033,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
       checkOperation(OperationCategory.WRITE);
       checkOperation(OperationCategory.WRITE);
       final FSPermissionChecker pc = getPermissionChecker();
       final FSPermissionChecker pc = getPermissionChecker();
       FSPermissionChecker.setOperationType(operationName);
       FSPermissionChecker.setOperationType(operationName);
-      writeLock();
+      writeLock(FSNamesystemLockMode.GLOBAL);
       try {
       try {
         checkOperation(OperationCategory.WRITE);
         checkOperation(OperationCategory.WRITE);
         checkNameNodeSafeMode("Cannot append to file" + srcArg);
         checkNameNodeSafeMode("Cannot append to file" + srcArg);
@@ -3042,7 +3043,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
         skipSync = true;
         skipSync = true;
         throw se;
         throw se;
       } finally {
       } finally {
-        writeUnlock(operationName, getLockReportInfoSupplier(srcArg));
+        writeUnlock(FSNamesystemLockMode.GLOBAL, operationName, getLockReportInfoSupplier(srcArg));
         // There might be transactions logged while trying to recover the lease
         // There might be transactions logged while trying to recover the lease
         // They need to be sync'ed even when an exception was thrown.
         // They need to be sync'ed even when an exception was thrown.
         if (!skipSync) {
         if (!skipSync) {
@@ -3089,13 +3090,13 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     checkOperation(OperationCategory.WRITE);
     checkOperation(OperationCategory.WRITE);
     final FSPermissionChecker pc = getPermissionChecker();
     final FSPermissionChecker pc = getPermissionChecker();
     FSPermissionChecker.setOperationType(operationName);
     FSPermissionChecker.setOperationType(operationName);
-    readLock();
+    readLock(FSNamesystemLockMode.GLOBAL);
     try {
     try {
       checkOperation(OperationCategory.WRITE);
       checkOperation(OperationCategory.WRITE);
       r = FSDirWriteFileOp.validateAddBlock(this, pc, src, fileId, clientName,
       r = FSDirWriteFileOp.validateAddBlock(this, pc, src, fileId, clientName,
                                             previous, onRetryBlock);
                                             previous, onRetryBlock);
     } finally {
     } finally {
-      readUnlock(operationName);
+      readUnlock(FSNamesystemLockMode.GLOBAL, operationName);
     }
     }
 
 
     if (r == null) {
     if (r == null) {
@@ -3108,14 +3109,14 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
         blockManager, src, excludedNodes, favoredNodes, flags, r);
         blockManager, src, excludedNodes, favoredNodes, flags, r);
 
 
     checkOperation(OperationCategory.WRITE);
     checkOperation(OperationCategory.WRITE);
-    writeLock();
+    writeLock(FSNamesystemLockMode.GLOBAL);
     LocatedBlock lb;
     LocatedBlock lb;
     try {
     try {
       checkOperation(OperationCategory.WRITE);
       checkOperation(OperationCategory.WRITE);
       lb = FSDirWriteFileOp.storeAllocatedBlock(
       lb = FSDirWriteFileOp.storeAllocatedBlock(
           this, src, fileId, clientName, previous, targets);
           this, src, fileId, clientName, previous, targets);
     } finally {
     } finally {
-      writeUnlock(operationName);
+      writeUnlock(FSNamesystemLockMode.GLOBAL, operationName);
     }
     }
     getEditLog().logSync();
     getEditLog().logSync();
     return lb;
     return lb;
@@ -3141,7 +3142,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     checkOperation(OperationCategory.WRITE);
     checkOperation(OperationCategory.WRITE);
     final FSPermissionChecker pc = getPermissionChecker();
     final FSPermissionChecker pc = getPermissionChecker();
     FSPermissionChecker.setOperationType(operationName);
     FSPermissionChecker.setOperationType(operationName);
-    readLock();
+    readLock(FSNamesystemLockMode.FS);
     try {
     try {
       // Changing this operation category to WRITE instead of making getAdditionalDatanode as a
       // Changing this operation category to WRITE instead of making getAdditionalDatanode as a
       // read method is aim to let Active NameNode to handle this RPC, because Active NameNode
       // read method is aim to let Active NameNode to handle this RPC, because Active NameNode
@@ -3166,7 +3167,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
           "src=%s, fileId=%d, blk=%s, clientName=%s, clientMachine=%s",
           "src=%s, fileId=%d, blk=%s, clientName=%s, clientMachine=%s",
           src, fileId, blk, clientName, clientMachine));
           src, fileId, blk, clientName, clientMachine));
     } finally {
     } finally {
-      readUnlock("getAdditionalDatanode");
+      readUnlock(FSNamesystemLockMode.FS, operationName);
     }
     }
 
 
     if (clientnode == null) {
     if (clientnode == null) {
@@ -3193,7 +3194,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     checkOperation(OperationCategory.WRITE);
     checkOperation(OperationCategory.WRITE);
     final FSPermissionChecker pc = getPermissionChecker();
     final FSPermissionChecker pc = getPermissionChecker();
     FSPermissionChecker.setOperationType(operationName);
     FSPermissionChecker.setOperationType(operationName);
-    writeLock();
+    writeLock(FSNamesystemLockMode.GLOBAL);
     try {
     try {
       checkOperation(OperationCategory.WRITE);
       checkOperation(OperationCategory.WRITE);
       checkNameNodeSafeMode("Cannot abandon block " + b + " for file" + src);
       checkNameNodeSafeMode("Cannot abandon block " + b + " for file" + src);
@@ -3201,7 +3202,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
       NameNode.stateChangeLog.debug(
       NameNode.stateChangeLog.debug(
           "BLOCK* NameSystem.abandonBlock: {} is removed from pendingCreates", b);
           "BLOCK* NameSystem.abandonBlock: {} is removed from pendingCreates", b);
     } finally {
     } finally {
-      writeUnlock("abandonBlock");
+      writeUnlock(FSNamesystemLockMode.GLOBAL, operationName);
     }
     }
     getEditLog().logSync();
     getEditLog().logSync();
   }
   }
@@ -3216,7 +3217,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
       throws LeaseExpiredException, FileNotFoundException {
       throws LeaseExpiredException, FileNotFoundException {
     String src = iip.getPath();
     String src = iip.getPath();
     INode inode = iip.getLastINode();
     INode inode = iip.getLastINode();
-    assert hasReadLock();
+    assert hasReadLock(FSNamesystemLockMode.FS);
     if (inode == null) {
     if (inode == null) {
       throw new FileNotFoundException("File does not exist: "
       throw new FileNotFoundException("File does not exist: "
           + leaseExceptionString(src, fileId, holder));
           + leaseExceptionString(src, fileId, holder));
@@ -3260,14 +3261,14 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     checkOperation(OperationCategory.WRITE);
     checkOperation(OperationCategory.WRITE);
     final FSPermissionChecker pc = getPermissionChecker();
     final FSPermissionChecker pc = getPermissionChecker();
     FSPermissionChecker.setOperationType(operationName);
     FSPermissionChecker.setOperationType(operationName);
-    writeLock();
+    writeLock(FSNamesystemLockMode.GLOBAL);
     try {
     try {
       checkOperation(OperationCategory.WRITE);
       checkOperation(OperationCategory.WRITE);
       checkNameNodeSafeMode("Cannot complete file " + src);
       checkNameNodeSafeMode("Cannot complete file " + src);
       success = FSDirWriteFileOp.completeFile(this, pc, src, holder, last,
       success = FSDirWriteFileOp.completeFile(this, pc, src, holder, last,
                                               fileId);
                                               fileId);
     } finally {
     } finally {
-      writeUnlock("completeFile");
+      writeUnlock(FSNamesystemLockMode.GLOBAL, operationName);
     }
     }
     getEditLog().logSync();
     getEditLog().logSync();
     if (success) {
     if (success) {
@@ -3282,6 +3283,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
    * @param blockType is the file under striping or contiguous layout?
    * @param blockType is the file under striping or contiguous layout?
    */
    */
   Block createNewBlock(BlockType blockType) throws IOException {
   Block createNewBlock(BlockType blockType) throws IOException {
+    // nextBlockId and nextGenerationStamp need to write edit log, so it needs FSLock.
     assert hasWriteLock(FSNamesystemLockMode.GLOBAL);
     assert hasWriteLock(FSNamesystemLockMode.GLOBAL);
     Block b = new Block(nextBlockId(blockType), 0, 0);
     Block b = new Block(nextBlockId(blockType), 0, 0);
     // Increment the generation stamp for every new block.
     // Increment the generation stamp for every new block.
@@ -3295,7 +3297,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
    * all blocks, otherwise check only penultimate block.
    * all blocks, otherwise check only penultimate block.
    */
    */
   boolean checkFileProgress(String src, INodeFile v, boolean checkall) {
   boolean checkFileProgress(String src, INodeFile v, boolean checkall) {
-    assert hasReadLock();
+    assert hasReadLock(FSNamesystemLockMode.GLOBAL);
     if (checkall) {
     if (checkall) {
       return checkBlocksComplete(src, true, v.getBlocks());
       return checkBlocksComplete(src, true, v.getBlocks());
     } else {
     } else {
@@ -3341,14 +3343,14 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     final FSPermissionChecker pc = getPermissionChecker();
     final FSPermissionChecker pc = getPermissionChecker();
     FSPermissionChecker.setOperationType(operationName);
     FSPermissionChecker.setOperationType(operationName);
     try {
     try {
-      writeLock();
+      writeLock(FSNamesystemLockMode.FS);
       try {
       try {
         checkOperation(OperationCategory.WRITE);
         checkOperation(OperationCategory.WRITE);
         checkNameNodeSafeMode("Cannot rename " + src);
         checkNameNodeSafeMode("Cannot rename " + src);
         ret = FSDirRenameOp.renameToInt(dir, pc, src, dst, logRetryCache);
         ret = FSDirRenameOp.renameToInt(dir, pc, src, dst, logRetryCache);
       } finally {
       } finally {
         FileStatus status = ret != null ? ret.auditStat : null;
         FileStatus status = ret != null ? ret.auditStat : null;
-        writeUnlock(operationName,
+        writeUnlock(FSNamesystemLockMode.FS, operationName,
             getLockReportInfoSupplier(src, dst, status));
             getLockReportInfoSupplier(src, dst, status));
       }
       }
     } catch (AccessControlException e)  {
     } catch (AccessControlException e)  {
@@ -3373,7 +3375,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     final FSPermissionChecker pc = getPermissionChecker();
     final FSPermissionChecker pc = getPermissionChecker();
     FSPermissionChecker.setOperationType(operationName);
     FSPermissionChecker.setOperationType(operationName);
     try {
     try {
-      writeLock();
+      writeLock(FSNamesystemLockMode.GLOBAL);
       try {
       try {
         checkOperation(OperationCategory.WRITE);
         checkOperation(OperationCategory.WRITE);
         checkNameNodeSafeMode("Cannot rename " + src);
         checkNameNodeSafeMode("Cannot rename " + src);
@@ -3381,7 +3383,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
             options);
             options);
       } finally {
       } finally {
         FileStatus status = res != null ? res.auditStat : null;
         FileStatus status = res != null ? res.auditStat : null;
-        writeUnlock(operationName,
+        writeUnlock(FSNamesystemLockMode.GLOBAL, operationName,
             getLockReportInfoSupplier(src, dst, status));
             getLockReportInfoSupplier(src, dst, status));
       }
       }
     } catch (AccessControlException e) {
     } catch (AccessControlException e) {
@@ -3416,7 +3418,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     FSPermissionChecker.setOperationType(operationName);
     FSPermissionChecker.setOperationType(operationName);
     boolean ret = false;
     boolean ret = false;
     try {
     try {
-      writeLock();
+      writeLock(FSNamesystemLockMode.GLOBAL);
       try {
       try {
         checkOperation(OperationCategory.WRITE);
         checkOperation(OperationCategory.WRITE);
         checkNameNodeSafeMode("Cannot delete " + src);
         checkNameNodeSafeMode("Cannot delete " + src);
@@ -3424,7 +3426,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
             this, pc, src, recursive, logRetryCache);
             this, pc, src, recursive, logRetryCache);
         ret = toRemovedBlocks != null;
         ret = toRemovedBlocks != null;
       } finally {
       } finally {
-        writeUnlock(operationName, getLockReportInfoSupplier(src));
+        writeUnlock(FSNamesystemLockMode.GLOBAL, operationName, getLockReportInfoSupplier(src));
       }
       }
     } catch (AccessControlException e) {
     } catch (AccessControlException e) {
       logAuditEvent(false, operationName, src);
       logAuditEvent(false, operationName, src);
@@ -3454,7 +3456,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
   void removeLeasesAndINodes(List<Long> removedUCFiles,
   void removeLeasesAndINodes(List<Long> removedUCFiles,
       List<INode> removedINodes,
       List<INode> removedINodes,
       final boolean acquireINodeMapLock) {
       final boolean acquireINodeMapLock) {
-    assert hasWriteLock();
+    assert hasWriteLock(FSNamesystemLockMode.FS);
     for(long i : removedUCFiles) {
     for(long i : removedUCFiles) {
       leaseManager.removeLease(i);
       leaseManager.removeLease(i);
     }
     }
@@ -3559,14 +3561,14 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     final FSPermissionChecker pc = getPermissionChecker();
     final FSPermissionChecker pc = getPermissionChecker();
     FSPermissionChecker.setOperationType(operationName);
     FSPermissionChecker.setOperationType(operationName);
     try {
     try {
-      writeLock();
+      writeLock(FSNamesystemLockMode.FS);
       try {
       try {
         checkOperation(OperationCategory.WRITE);
         checkOperation(OperationCategory.WRITE);
         checkNameNodeSafeMode("Cannot create directory " + src);
         checkNameNodeSafeMode("Cannot create directory " + src);
         auditStat = FSDirMkdirOp.mkdirs(this, pc, src, permissions,
         auditStat = FSDirMkdirOp.mkdirs(this, pc, src, permissions,
             createParent);
             createParent);
       } finally {
       } finally {
-        writeUnlock(operationName,
+        writeUnlock(FSNamesystemLockMode.FS, operationName,
             getLockReportInfoSupplier(src, null, auditStat));
             getLockReportInfoSupplier(src, null, auditStat));
       }
       }
     } catch (AccessControlException e) {
     } catch (AccessControlException e) {
@@ -3707,7 +3709,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     checkOperation(OperationCategory.WRITE);
     checkOperation(OperationCategory.WRITE);
     final FSPermissionChecker pc = getPermissionChecker();
     final FSPermissionChecker pc = getPermissionChecker();
     FSPermissionChecker.setOperationType(operationName);
     FSPermissionChecker.setOperationType(operationName);
-    writeLock();
+    writeLock(FSNamesystemLockMode.GLOBAL);
     try {
     try {
       checkOperation(OperationCategory.WRITE);
       checkOperation(OperationCategory.WRITE);
       checkNameNodeSafeMode("Cannot fsync file " + src);
       checkNameNodeSafeMode("Cannot fsync file " + src);
@@ -3720,7 +3722,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
       }
       }
       FSDirWriteFileOp.persistBlocks(dir, src, pendingFile, false);
       FSDirWriteFileOp.persistBlocks(dir, src, pendingFile, false);
     } finally {
     } finally {
-      writeUnlock("fsync");
+      writeUnlock(FSNamesystemLockMode.GLOBAL, operationName);
     }
     }
     getEditLog().logSync();
     getEditLog().logSync();
   }
   }
@@ -3743,7 +3745,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
       String recoveryLeaseHolder) throws IOException {
       String recoveryLeaseHolder) throws IOException {
     LOG.info("Recovering " + lease + ", src=" + src);
     LOG.info("Recovering " + lease + ", src=" + src);
     assert !isInSafeMode();
     assert !isInSafeMode();
-    assert hasWriteLock();
+    // finalizeINodeFileUnderConstruction needs global write lock.
+    assert hasWriteLock(FSNamesystemLockMode.GLOBAL);
 
 
     final INodeFile pendingFile = iip.getLastINode().asFile();
     final INodeFile pendingFile = iip.getLastINode().asFile();
     int nrBlocks = pendingFile.numBlocks();
     int nrBlocks = pendingFile.numBlocks();
@@ -3905,7 +3908,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
 
 
   private Lease reassignLease(Lease lease, String src, String newHolder,
   private Lease reassignLease(Lease lease, String src, String newHolder,
       INodeFile pendingFile) {
       INodeFile pendingFile) {
-    assert hasWriteLock();
+    assert hasWriteLock(FSNamesystemLockMode.FS);
     if(newHolder == null)
     if(newHolder == null)
       return lease;
       return lease;
     // The following transaction is not synced. Make sure it's sync'ed later.
     // The following transaction is not synced. Make sure it's sync'ed later.
@@ -3914,7 +3917,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
   }
   }
   
   
   Lease reassignLeaseInternal(Lease lease, String newHolder, INodeFile pendingFile) {
   Lease reassignLeaseInternal(Lease lease, String newHolder, INodeFile pendingFile) {
-    assert hasWriteLock();
+    assert hasWriteLock(FSNamesystemLockMode.FS);
     pendingFile.getFileUnderConstructionFeature().setClientName(newHolder);
     pendingFile.getFileUnderConstructionFeature().setClientName(newHolder);
     return leaseManager.reassignLease(lease, pendingFile, newHolder);
     return leaseManager.reassignLease(lease, pendingFile, newHolder);
   }
   }
@@ -5888,6 +5891,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
    */
    */
   long nextGenerationStamp(boolean legacyBlock)
   long nextGenerationStamp(boolean legacyBlock)
       throws IOException {
       throws IOException {
+    // TODO: Use FSLock to make nextGenerationStamp thread safe.
     assert hasWriteLock(FSNamesystemLockMode.GLOBAL);
     assert hasWriteLock(FSNamesystemLockMode.GLOBAL);
     checkNameNodeSafeMode("Cannot get next generation stamp");
     checkNameNodeSafeMode("Cannot get next generation stamp");
 
 
@@ -5907,7 +5911,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
    * @param blockType is the file under striping or contiguous layout?
    * @param blockType is the file under striping or contiguous layout?
    */
    */
   private long nextBlockId(BlockType blockType) throws IOException {
   private long nextBlockId(BlockType blockType) throws IOException {
-    assert hasWriteLock();
+    assert hasWriteLock(FSNamesystemLockMode.GLOBAL);
     checkNameNodeSafeMode("Cannot get next block ID");
     checkNameNodeSafeMode("Cannot get next block ID");
     final long blockId = blockManager.nextBlockId(blockType);
     final long blockId = blockManager.nextBlockId(blockType);
     getEditLog().logAllocateBlockId(blockId);
     getEditLog().logAllocateBlockId(blockId);
@@ -5956,7 +5960,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
 
 
   private INodeFile checkUCBlock(ExtendedBlock block,
   private INodeFile checkUCBlock(ExtendedBlock block,
       String clientName) throws IOException {
       String clientName) throws IOException {
-    assert hasWriteLock();
+    assert hasWriteLock(FSNamesystemLockMode.GLOBAL);
     checkNameNodeSafeMode("Cannot get a new generation stamp and an "
     checkNameNodeSafeMode("Cannot get a new generation stamp and an "
         + "access token for block " + block);
         + "access token for block " + block);
     
     
@@ -6030,7 +6034,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
       String clientName) throws IOException {
       String clientName) throws IOException {
     final LocatedBlock locatedBlock;
     final LocatedBlock locatedBlock;
     checkOperation(OperationCategory.WRITE);
     checkOperation(OperationCategory.WRITE);
-    writeLock();
+    writeLock(FSNamesystemLockMode.GLOBAL);
     try {
     try {
       checkOperation(OperationCategory.WRITE);
       checkOperation(OperationCategory.WRITE);
 
 
@@ -6064,7 +6068,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
       blockManager.setBlockToken(locatedBlock,
       blockManager.setBlockToken(locatedBlock,
           BlockTokenIdentifier.AccessMode.WRITE);
           BlockTokenIdentifier.AccessMode.WRITE);
     } finally {
     } finally {
-      writeUnlock("bumpBlockGenerationStamp");
+      writeUnlock(FSNamesystemLockMode.GLOBAL, "bumpBlockGenerationStamp");
     }
     }
     // Ensure we record the new generation stamp
     // Ensure we record the new generation stamp
     getEditLog().logSync();
     getEditLog().logSync();
@@ -6093,7 +6097,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
              + ", newNodes=" + Arrays.asList(newNodes)
              + ", newNodes=" + Arrays.asList(newNodes)
              + ", client=" + clientName
              + ", client=" + clientName
              + ")");
              + ")");
-    writeLock();
+    writeLock(FSNamesystemLockMode.GLOBAL);
     try {
     try {
       checkOperation(OperationCategory.WRITE);
       checkOperation(OperationCategory.WRITE);
       checkNameNodeSafeMode("Pipeline not updated");
       checkNameNodeSafeMode("Pipeline not updated");
@@ -6102,7 +6106,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
       updatePipelineInternal(clientName, oldBlock, newBlock, newNodes,
       updatePipelineInternal(clientName, oldBlock, newBlock, newNodes,
           newStorageIDs, logRetryCache);
           newStorageIDs, logRetryCache);
     } finally {
     } finally {
-      writeUnlock("updatePipeline");
+      writeUnlock(FSNamesystemLockMode.GLOBAL, "updatePipeline");
     }
     }
     getEditLog().logSync();
     getEditLog().logSync();
     LOG.info("updatePipeline(" + oldBlock.getLocalBlock() + " => "
     LOG.info("updatePipeline(" + oldBlock.getLocalBlock() + " => "
@@ -6113,7 +6117,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
       ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs,
       ExtendedBlock newBlock, DatanodeID[] newNodes, String[] newStorageIDs,
       boolean logRetryCache)
       boolean logRetryCache)
       throws IOException {
       throws IOException {
-    assert hasWriteLock();
+    assert hasWriteLock(FSNamesystemLockMode.GLOBAL);
     // check the vadility of the block and lease holder name
     // check the vadility of the block and lease holder name
     final INodeFile pendingFile = checkUCBlock(oldBlock, clientName);
     final INodeFile pendingFile = checkUCBlock(oldBlock, clientName);
     final String src = pendingFile.getFullPathName();
     final String src = pendingFile.getFullPathName();
@@ -6409,7 +6413,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     long expiryTime;
     long expiryTime;
     checkOperation(OperationCategory.WRITE);
     checkOperation(OperationCategory.WRITE);
     try {
     try {
-      writeLock();
+      writeLock(FSNamesystemLockMode.FS);
       try {
       try {
         checkOperation(OperationCategory.WRITE);
         checkOperation(OperationCategory.WRITE);
 
 
@@ -6426,7 +6430,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
         getEditLog().logRenewDelegationToken(id, expiryTime);
         getEditLog().logRenewDelegationToken(id, expiryTime);
         tokenId = id.toStringStable();
         tokenId = id.toStringStable();
       } finally {
       } finally {
-        writeUnlock(operationName, getLockReportInfoSupplier(tokenId));
+        writeUnlock(FSNamesystemLockMode.FS, operationName, getLockReportInfoSupplier(tokenId));
       }
       }
     } catch (AccessControlException ace) {
     } catch (AccessControlException ace) {
       final DelegationTokenIdentifier id = DFSUtil.decodeDelegationToken(token);
       final DelegationTokenIdentifier id = DFSUtil.decodeDelegationToken(token);
@@ -6450,7 +6454,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
     String tokenId = null;
     String tokenId = null;
     checkOperation(OperationCategory.WRITE);
     checkOperation(OperationCategory.WRITE);
     try {
     try {
-      writeLock();
+      writeLock(FSNamesystemLockMode.FS);
       try {
       try {
         checkOperation(OperationCategory.WRITE);
         checkOperation(OperationCategory.WRITE);
         checkNameNodeSafeMode("Cannot cancel delegation token");
         checkNameNodeSafeMode("Cannot cancel delegation token");
@@ -6460,7 +6464,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
         getEditLog().logCancelDelegationToken(id);
         getEditLog().logCancelDelegationToken(id);
         tokenId = id.toStringStable();
         tokenId = id.toStringStable();
       } finally {
       } finally {
-        writeUnlock(operationName, getLockReportInfoSupplier(tokenId));
+        writeUnlock(FSNamesystemLockMode.FS, operationName, getLockReportInfoSupplier(tokenId));
       }
       }
     } catch (AccessControlException ace) {
     } catch (AccessControlException ace) {
       final DelegationTokenIdentifier id = DFSUtil.decodeDelegationToken(token);
       final DelegationTokenIdentifier id = DFSUtil.decodeDelegationToken(token);
@@ -6535,7 +6539,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
   
   
   private void logReassignLease(String leaseHolder, String src,
   private void logReassignLease(String leaseHolder, String src,
       String newHolder) {
       String newHolder) {
-    assert hasWriteLock();
+    assert hasWriteLock(FSNamesystemLockMode.FS);
     getEditLog().logReassignLease(leaseHolder, src, newHolder);
     getEditLog().logReassignLease(leaseHolder, src, newHolder);
   }
   }
   
   

+ 6 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java

@@ -971,6 +971,12 @@ public class INodeFile extends INodeWithAdditionalFields
 
 
   /**
   /**
    * Compute file size of the current file.
    * Compute file size of the current file.
+   *
+   * ComputeFileSize only needs the FSLock even through it involves block.
+   * BlockSize only be changed by hsync, addBlock, commitBlockSynchronization,
+   * complete, updatePipeline and forceCompleteBlock, all these operations
+   * already hold the FSWriteLock.
+   * CompleteBlock also hold the FSWriteLock since it needs to update Quota
    * 
    * 
    * @param includesLastUcBlock
    * @param includesLastUcBlock
    *          If the last block is under construction, should it be included?
    *          If the last block is under construction, should it be included?