浏览代码

HADOOP-2092. Added a ping mechanism to the pipes' task to periodically check if the parent Java task is running, and exit if the parent isn't alive and responding. Contributed by Amareshwari Sri Ramadasu.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/trunk@615423 13f79535-47bb-0310-9956-ffa450edef68
Arun Murthy 17 年之前
父节点
当前提交
d644094a7b

+ 4 - 0
CHANGES.txt

@@ -581,6 +581,10 @@ Trunk (unreleased changes)
     HADOOP-2646. Fix SortValidator to work with fully-qualified 
     HADOOP-2646. Fix SortValidator to work with fully-qualified 
     working directories.  (Arun C Murthy via nigel)
     working directories.  (Arun C Murthy via nigel)
 
 
+    HADOOP-2092. Added a ping mechanism to the pipes' task to periodically
+    check if the parent Java task is running, and exit if the parent isn't
+    alive and responding. (Amareshwari Sri Ramadasu via acmurthy) 
+
 Release 0.15.3 - 2008-01-18
 Release 0.15.3 - 2008-01-18
 
 
   BUG FIXES
   BUG FIXES

文件差异内容过多而无法显示
+ 1635 - 1409
src/c++/pipes/configure


+ 1 - 0
src/c++/pipes/configure.ac

@@ -29,6 +29,7 @@ AC_CONFIG_FILES([Makefile])
 AC_PREFIX_DEFAULT(`pwd`/../install)
 AC_PREFIX_DEFAULT(`pwd`/../install)
 
 
 USE_HADOOP_UTILS
 USE_HADOOP_UTILS
+HADOOP_PIPES_SETUP
 CHECK_INSTALL_CFLAG
 CHECK_INSTALL_CFLAG
 
 
 # Checks for programs.
 # Checks for programs.

+ 63 - 1
src/c++/pipes/impl/HadoopPipes.cc

@@ -30,6 +30,7 @@
 #include <stdlib.h>
 #include <stdlib.h>
 #include <strings.h>
 #include <strings.h>
 #include <sys/socket.h>
 #include <sys/socket.h>
+#include <pthread.h>
 
 
 using std::map;
 using std::map;
 using std::string;
 using std::string;
@@ -584,6 +585,7 @@ namespace HadoopPipes {
     Partitioner* partitioner;
     Partitioner* partitioner;
     int numReduces;
     int numReduces;
     const Factory* factory;
     const Factory* factory;
+    pthread_mutex_t mutexDone;
 
 
   public:
   public:
 
 
@@ -607,6 +609,7 @@ namespace HadoopPipes {
       lastProgress = 0;
       lastProgress = 0;
       progressFloat = 0.0f;
       progressFloat = 0.0f;
       hasTask = false;
       hasTask = false;
+      pthread_mutex_init(&mutexDone, NULL);
     }
     }
 
 
     void setProtocol(Protocol* _protocol, UpwardProtocol* _uplink) {
     void setProtocol(Protocol* _protocol, UpwardProtocol* _uplink) {
@@ -689,11 +692,16 @@ namespace HadoopPipes {
     }
     }
     
     
     virtual bool isDone() {
     virtual bool isDone() {
-      return done;
+      pthread_mutex_lock(&mutexDone);
+      bool doneCopy = done;
+      pthread_mutex_unlock(&mutexDone);
+      return doneCopy;
     }
     }
 
 
     virtual void close() {
     virtual void close() {
+      pthread_mutex_lock(&mutexDone);
       done = true;
       done = true;
+      pthread_mutex_unlock(&mutexDone);
     }
     }
 
 
     virtual void abort() {
     virtual void abort() {
@@ -717,7 +725,9 @@ namespace HadoopPipes {
         key = *newKey;
         key = *newKey;
       } else {
       } else {
         if (!reader->next(key, const_cast<string&>(*value))) {
         if (!reader->next(key, const_cast<string&>(*value))) {
+          pthread_mutex_lock(&mutexDone);
           done = true;
           done = true;
+          pthread_mutex_unlock(&mutexDone);
           return false;
           return false;
         }
         }
         progressFloat = reader->getProgress();
         progressFloat = reader->getProgress();
@@ -856,9 +866,58 @@ namespace HadoopPipes {
       delete reducer;
       delete reducer;
       delete writer;
       delete writer;
       delete partitioner;
       delete partitioner;
+      pthread_mutex_destroy(&mutexDone);
     }
     }
   };
   };
 
 
+  /**
+   * Ping the parent every 5 seconds to know if it is alive 
+   */
+  void* ping(void* ptr) {
+    TaskContextImpl* context = (TaskContextImpl*) ptr;
+    char* portStr = getenv("hadoop.pipes.command.port");
+    int MAX_RETRIES = 3;
+    int remaining_retries = MAX_RETRIES;
+    while (!context->isDone()) {
+      try{
+        sleep(5);
+        int sock = -1;
+        if (portStr) {
+          sock = socket(PF_INET, SOCK_STREAM, 0);
+          HADOOP_ASSERT(sock != - 1,
+                        string("problem creating socket: ") + strerror(errno));
+          sockaddr_in addr;
+          addr.sin_family = AF_INET;
+          addr.sin_port = htons(toInt(portStr));
+          addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+          HADOOP_ASSERT(connect(sock, (sockaddr*) &addr, sizeof(addr)) == 0,
+                        string("problem connecting command socket: ") +
+                        strerror(errno));
+
+        }
+        if (sock != -1) {
+          int result = shutdown(sock, SHUT_RDWR);
+          HADOOP_ASSERT(result == 0, "problem shutting socket");
+          result = close(sock);
+          HADOOP_ASSERT(result == 0, "problem closing socket");
+        }
+        remaining_retries = MAX_RETRIES;
+      } catch (Error& err) {
+        if (!context->isDone()) {
+          fprintf(stderr, "Hadoop Pipes Exception: in ping %s\n", 
+                err.getMessage().c_str());
+          remaining_retries -= 1;
+          if (remaining_retries == 0) {
+            exit(1);
+          }
+        } else {
+          return NULL;
+        }
+      }
+    }
+    return NULL;
+  }
+
   /**
   /**
    * Run the assigned task in the framework.
    * Run the assigned task in the framework.
    * The user's main function should set the various functions using the 
    * The user's main function should set the various functions using the 
@@ -914,12 +973,15 @@ namespace HadoopPipes {
         connection = new TextProtocol(stdin, context, stdout);
         connection = new TextProtocol(stdin, context, stdout);
       }
       }
       context->setProtocol(connection, connection->getUplink());
       context->setProtocol(connection, connection->getUplink());
+      pthread_t pingThread;
+      pthread_create(&pingThread, NULL, ping, (void*)(context));
       context->waitForTask();
       context->waitForTask();
       while (!context->isDone()) {
       while (!context->isDone()) {
         context->nextKey();
         context->nextKey();
       }
       }
       context->closeAll();
       context->closeAll();
       connection->getUplink()->done();
       connection->getUplink()->done();
+      pthread_join(pingThread,NULL);
       delete context;
       delete context;
       delete connection;
       delete connection;
       if (stream != NULL) {
       if (stream != NULL) {

+ 6 - 0
src/c++/pipes/impl/config.h.in

@@ -10,12 +10,18 @@
 /* Define to 1 if you have the <inttypes.h> header file. */
 /* Define to 1 if you have the <inttypes.h> header file. */
 #undef HAVE_INTTYPES_H
 #undef HAVE_INTTYPES_H
 
 
+/* Define to 1 if you have the `pthread' library (-lpthread). */
+#undef HAVE_LIBPTHREAD
+
 /* Define to 1 if you have the <memory.h> header file. */
 /* Define to 1 if you have the <memory.h> header file. */
 #undef HAVE_MEMORY_H
 #undef HAVE_MEMORY_H
 
 
 /* Define to 1 if you have the `mkdir' function. */
 /* Define to 1 if you have the `mkdir' function. */
 #undef HAVE_MKDIR
 #undef HAVE_MKDIR
 
 
+/* Define to 1 if you have the <pthread.h> header file. */
+#undef HAVE_PTHREAD_H
+
 /* Define to 1 if stdbool.h conforms to C99. */
 /* Define to 1 if stdbool.h conforms to C99. */
 #undef HAVE_STDBOOL_H
 #undef HAVE_STDBOOL_H
 
 

+ 8 - 0
src/c++/utils/m4/hadoop_utils.m4

@@ -46,9 +46,17 @@ AC_ARG_WITH([hadoop-utils],
 AC_SUBST(HADOOP_UTILS_PREFIX)
 AC_SUBST(HADOOP_UTILS_PREFIX)
 ])
 ])
 
 
+AC_DEFUN([HADOOP_PIPES_SETUP],[
+AC_CHECK_HEADERS([pthread.h], [], 
+  AC_MSG_ERROR(Please check if you have installed the pthread library)) 
+AC_CHECK_LIB([pthread], [pthread_create], [], 
+  AC_MSG_ERROR(Cannot find libpthread.so, please check))
+])
+
 # define a macro for using hadoop pipes
 # define a macro for using hadoop pipes
 AC_DEFUN([USE_HADOOP_PIPES],[
 AC_DEFUN([USE_HADOOP_PIPES],[
 AC_REQUIRE([USE_HADOOP_UTILS])
 AC_REQUIRE([USE_HADOOP_UTILS])
+AC_REQUIRE([HADOOP_PIPES_SETUP])
 AC_ARG_WITH([hadoop-pipes],
 AC_ARG_WITH([hadoop-pipes],
             AS_HELP_STRING([--with-hadoop-pipes=<dir>],
             AS_HELP_STRING([--with-hadoop-pipes=<dir>],
                            [directory to get hadoop pipes from]),
                            [directory to get hadoop pipes from]),

文件差异内容过多而无法显示
+ 672 - 1075
src/examples/pipes/configure


+ 6 - 0
src/examples/pipes/impl/config.h.in

@@ -13,6 +13,9 @@
 /* Define to 1 if you have the `nsl' library (-lnsl). */
 /* Define to 1 if you have the `nsl' library (-lnsl). */
 #undef HAVE_LIBNSL
 #undef HAVE_LIBNSL
 
 
+/* Define to 1 if you have the `pthread' library (-lpthread). */
+#undef HAVE_LIBPTHREAD
+
 /* Define to 1 if you have the `socket' library (-lsocket). */
 /* Define to 1 if you have the `socket' library (-lsocket). */
 #undef HAVE_LIBSOCKET
 #undef HAVE_LIBSOCKET
 
 
@@ -22,6 +25,9 @@
 /* Define to 1 if you have the `mkdir' function. */
 /* Define to 1 if you have the `mkdir' function. */
 #undef HAVE_MKDIR
 #undef HAVE_MKDIR
 
 
+/* Define to 1 if you have the <pthread.h> header file. */
+#undef HAVE_PTHREAD_H
+
 /* Define to 1 if stdbool.h conforms to C99. */
 /* Define to 1 if stdbool.h conforms to C99. */
 #undef HAVE_STDBOOL_H
 #undef HAVE_STDBOOL_H
 
 

部分文件因为文件数量过多而无法显示