Browse Source

HADOOP-11333. Fix deadlock in DomainSocketWatcher when the notification pipe is full (zhaoyunjiong via cmccabe)

Colin Patrick Mccabe 10 years ago
parent
commit
86e3993def

+ 3 - 0
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -473,6 +473,9 @@ Release 2.7.0 - UNRELEASED
     HADOOP-11300. KMS startup scripts must not display the keystore /
     truststore passwords. (Arun Suresh via wang)
 
+    HADOOP-11333. Fix deadlock in DomainSocketWatcher when the notification
+    pipe is full (zhaoyunjiong via cmccabe)
+
 Release 2.6.0 - 2014-11-18
 
   INCOMPATIBLE CHANGES

+ 15 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocketWatcher.java

@@ -103,6 +103,7 @@ public final class DomainSocketWatcher implements Closeable {
     public boolean handle(DomainSocket sock) {
       assert(lock.isHeldByCurrentThread());
       try {
+        kicked = false;
         if (LOG.isTraceEnabled()) {
           LOG.trace(this + ": NotificationHandler: doing a read on " +
             sock.fd);
@@ -228,6 +229,14 @@ public final class DomainSocketWatcher implements Closeable {
    * Whether or not this DomainSocketWatcher is closed.
    */
   private boolean closed = false;
+  
+  /**
+   * True if we have written a byte to the notification socket. We should not
+   * write anything else to the socket until the notification handler has had a
+   * chance to run. Otherwise, our thread might block, causing deadlock. 
+   * See HADOOP-11333 for details.
+   */
+  private boolean kicked = false;
 
   public DomainSocketWatcher(int interruptCheckPeriodMs) throws IOException {
     if (loadingFailureReason != null) {
@@ -348,8 +357,14 @@ public final class DomainSocketWatcher implements Closeable {
    */
   private void kick() {
     assert(lock.isHeldByCurrentThread());
+    
+    if (kicked) {
+      return;
+    }
+    
     try {
       notificationSockets[0].getOutputStream().write(0);
+      kicked = true;
     } catch (IOException e) {
       if (!closed) {
         LOG.error(this + ": error writing to notificationSockets[0]", e);