ソースを参照

HDFS-15651. Client could not obtain block when DN CommandProcessingThread exit. Contributed by Aiphago.

Reviewed-by: He Xiaoqiao <hexiaoqiao@apache.org>
Reviewed-by: Yiqun Lin <yqlin@apache.org>
He Xiaoqiao 4 年 前
コミット
3067a25fa1

+ 12 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java

@@ -1312,6 +1312,10 @@ class BPServiceActor implements Runnable {
         processQueue();
       } catch (Throwable t) {
         LOG.error("{} encountered fatal exception and exit.", getName(), t);
+        runningState = RunningState.FAILED;
+      } finally {
+        LOG.warn("Ending command processor service for: " + this);
+        shouldServiceRun = false;
       }
     }
 
@@ -1327,6 +1331,7 @@ class BPServiceActor implements Runnable {
           dn.getMetrics().incrNumProcessedCommands();
         } catch (InterruptedException e) {
           LOG.error("{} encountered interrupt and exit.", getName());
+          Thread.currentThread().interrupt();
           // ignore unless thread was specifically interrupted.
           if (Thread.interrupted()) {
             break;
@@ -1398,4 +1403,11 @@ class BPServiceActor implements Runnable {
       dn.getMetrics().incrActorCmdQueueLength(1);
     }
   }
+
+  @VisibleForTesting
+  void stopCommandProcessingThread() {
+    if (commandProcessingThread != null) {
+      commandProcessingThread.interrupt();
+    }
+  }
 }

+ 23 - 1
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java

@@ -1211,4 +1211,26 @@ public class TestBPOfferService {
       }
     }
   }
-}
+
+  @Test(timeout = 5000)
+  public void testCommandProcessingThreadExit() throws Exception {
+    Configuration conf = new HdfsConfiguration();
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).
+        numDataNodes(1).build();
+    try {
+      List<DataNode> datanodes = cluster.getDataNodes();
+      DataNode dataNode = datanodes.get(0);
+      List<BPOfferService> allBpOs = dataNode.getAllBpOs();
+      BPOfferService bpos = allBpOs.get(0);
+      waitForInitialization(bpos);
+      BPServiceActor actor = bpos.getBPServiceActors().get(0);
+      // Stop and wait util actor exit.
+      actor.stopCommandProcessingThread();
+      GenericTestUtils.waitFor(() -> !actor.isAlive(), 100, 3000);
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+}