浏览代码

HADOOP-15527. Improve delay check for stopping processes.
Contributed by Vinod Kumar Vavilapalli

Eric Yang 6 年之前
父节点
当前提交
108da85320

+ 33 - 1
hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh

@@ -2040,6 +2040,35 @@ function hadoop_start_secure_daemon_wrapper
   return 0
   return 0
 }
 }
 
 
+## @description  Wait till process dies or till timeout
+## @audience     private
+## @stability    evolving
+## @param        pid
+## @param        timeout
+function wait_process_to_die_or_timeout
+{
+  local pid=$1
+  local timeout=$2
+
+  # Normalize timeout
+  # Round up or down
+  timeout=$(printf "%.0f\n" "${timeout}")
+  if [[ ${timeout} -lt 1  ]]; then
+    # minimum 1 second
+    timeout=1
+  fi
+
+  # Wait to see if it's still alive
+  for (( i=0; i < "${timeout}"; i++ ))
+  do
+    if kill -0 "${pid}" > /dev/null 2>&1; then
+      sleep 1
+    else
+      break
+    fi
+  done
+}
+
 ## @description  Stop the non-privileged `command` daemon with that
 ## @description  Stop the non-privileged `command` daemon with that
 ## @description  that is running at `pidfile`.
 ## @description  that is running at `pidfile`.
 ## @audience     public
 ## @audience     public
@@ -2060,11 +2089,14 @@ function hadoop_stop_daemon
     pid=$(cat "$pidfile")
     pid=$(cat "$pidfile")
 
 
     kill "${pid}" >/dev/null 2>&1
     kill "${pid}" >/dev/null 2>&1
-    sleep "${HADOOP_STOP_TIMEOUT}"
+
+    wait_process_to_die_or_timeout "${pid}" "${HADOOP_STOP_TIMEOUT}"
+
     if kill -0 "${pid}" > /dev/null 2>&1; then
     if kill -0 "${pid}" > /dev/null 2>&1; then
       hadoop_error "WARNING: ${cmd} did not stop gracefully after ${HADOOP_STOP_TIMEOUT} seconds: Trying to kill with kill -9"
       hadoop_error "WARNING: ${cmd} did not stop gracefully after ${HADOOP_STOP_TIMEOUT} seconds: Trying to kill with kill -9"
       kill -9 "${pid}" >/dev/null 2>&1
       kill -9 "${pid}" >/dev/null 2>&1
     fi
     fi
+    wait_process_to_die_or_timeout "${pid}" "${HADOOP_STOP_TIMEOUT}"
     if ps -p "${pid}" > /dev/null 2>&1; then
     if ps -p "${pid}" > /dev/null 2>&1; then
       hadoop_error "ERROR: Unable to kill ${pid}"
       hadoop_error "ERROR: Unable to kill ${pid}"
     else
     else

+ 23 - 1
hadoop-common-project/hadoop-common/src/test/scripts/hadoop_stop_daemon.bats

@@ -15,7 +15,7 @@
 
 
 load hadoop-functions_test_helper
 load hadoop-functions_test_helper
 
 
-@test "hadoop_stop_daemon" {
+@test "hadoop_stop_daemon_changing_pid" {
   old_pid=12345
   old_pid=12345
   new_pid=54321
   new_pid=54321
   HADOOP_STOP_TIMEOUT=3
   HADOOP_STOP_TIMEOUT=3
@@ -29,3 +29,25 @@ load hadoop-functions_test_helper
   [ -f pidfile ]
   [ -f pidfile ]
   [ "$(cat pidfile)" = "${new_pid}" ]
   [ "$(cat pidfile)" = "${new_pid}" ]
 }
 }
+
+@test "hadoop_stop_daemon_force_kill" {
+
+  HADOOP_STOP_TIMEOUT=4
+
+  # Run the following in a sub-shell so that its termination doesn't affect the test
+  (sh ${TESTBINDIR}/process_with_sigterm_trap.sh ${TMP}/pidfile &)
+
+  # Wait for the process to go into tight loop
+  sleep 1
+
+  [ -f ${TMP}/pidfile ]
+  pid=$(cat "${TMP}/pidfile")
+
+  run hadoop_stop_daemon my_command ${TMP}/pidfile 2>&1
+
+  # The process should no longer be alive
+  ! kill -0 ${pid} > /dev/null 2>&1
+
+  # The PID file should be gone
+  [ ! -f ${TMP}/pidfile ]
+}