Jelajahi Sumber

HDFS-2229. Fix a deadlock in namenode by enforcing lock acquisition ordering.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1156847 13f79535-47bb-0310-9956-ffa450edef68
Tsz-wo Sze 13 tahun lalu
induk
melakukan
1dd48b1aee

+ 3 - 0
hdfs/CHANGES.txt

@@ -954,6 +954,9 @@ Trunk (unreleased changes)
     HDFS-2245. Fix a NullPointerException in BlockManager.chooseTarget(..).
     (szetszwo)
 
+    HDFS-2229. Fix a deadlock in namenode by enforcing lock acquisition
+    ordering.  (szetszwo)
+
   BREAKDOWN OF HDFS-1073 SUBTASKS
 
     HDFS-1521. Persist transaction ID on disk between NN restarts.

+ 26 - 28
hdfs/src/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

@@ -1829,39 +1829,37 @@ public class BlockManager {
    * over or under replicated. Place it into the respective queue.
    */
   public void processMisReplicatedBlocks() {
+    assert namesystem.hasWriteLock();
+
     long nrInvalid = 0, nrOverReplicated = 0, nrUnderReplicated = 0;
-    namesystem.writeLock();
-    try {
-      neededReplications.clear();
-      for (BlockInfo block : blocksMap.getBlocks()) {
-        INodeFile fileINode = block.getINode();
-        if (fileINode == null) {
-          // block does not belong to any file
-          nrInvalid++;
-          addToInvalidates(block);
-          continue;
-        }
-        // calculate current replication
-        short expectedReplication = fileINode.getReplication();
-        NumberReplicas num = countNodes(block);
-        int numCurrentReplica = num.liveReplicas();
-        // add to under-replicated queue if need to be
-        if (isNeededReplication(block, expectedReplication, numCurrentReplica)) {
-          if (neededReplications.add(block, numCurrentReplica, num
-              .decommissionedReplicas(), expectedReplication)) {
-            nrUnderReplicated++;
-          }
+    neededReplications.clear();
+    for (BlockInfo block : blocksMap.getBlocks()) {
+      INodeFile fileINode = block.getINode();
+      if (fileINode == null) {
+        // block does not belong to any file
+        nrInvalid++;
+        addToInvalidates(block);
+        continue;
+      }
+      // calculate current replication
+      short expectedReplication = fileINode.getReplication();
+      NumberReplicas num = countNodes(block);
+      int numCurrentReplica = num.liveReplicas();
+      // add to under-replicated queue if need to be
+      if (isNeededReplication(block, expectedReplication, numCurrentReplica)) {
+        if (neededReplications.add(block, numCurrentReplica, num
+            .decommissionedReplicas(), expectedReplication)) {
+          nrUnderReplicated++;
         }
+      }
 
-        if (numCurrentReplica > expectedReplication) {
-          // over-replicated block
-          nrOverReplicated++;
-          processOverReplicatedBlock(block, expectedReplication, null, null);
-        }
+      if (numCurrentReplica > expectedReplication) {
+        // over-replicated block
+        nrOverReplicated++;
+        processOverReplicatedBlock(block, expectedReplication, null, null);
       }
-    } finally {
-      namesystem.writeUnlock();
     }
+
     LOG.info("Total number of blocks            = " + blocksMap.size());
     LOG.info("Number of invalid blocks          = " + nrInvalid);
     LOG.info("Number of under-replicated blocks = " + nrUnderReplicated);

+ 12 - 7
hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -313,14 +313,19 @@ public class FSNamesystem implements RwLock, FSClusterStats,
    * Activate FSNamesystem daemons.
    */
   void activate(Configuration conf) throws IOException {
-    setBlockTotal();
-    blockManager.activate(conf);
-    this.lmthread = new Daemon(leaseManager.new Monitor());
-    lmthread.start();
-
-    this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
-    nnrmthread.start();
+    writeLock();
+    try {
+      setBlockTotal();
+      blockManager.activate(conf);
 
+      this.lmthread = new Daemon(leaseManager.new Monitor());
+      lmthread.start();
+      this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
+      nnrmthread.start();
+    } finally {
+      writeUnlock();
+    }
+    
     registerMXBean();
     DefaultMetricsSystem.instance().register(this);
   }