소스 검색

HADOOP-14451. Deadlock in NativeIO (#6632)

Vinayakumar B 1 년 전
부모
커밋
0f51d2a4ec

+ 16 - 10
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java

@@ -220,6 +220,9 @@ public class NativeIO {
       }
     }
 
+    /** Initialize the JNI method ID and class ID cache. */
+    private static native void initNativePosix(boolean doThreadsafeWorkaround);
+
     /**
      * JNI wrapper of persist memory operations.
      */
@@ -331,11 +334,11 @@ public class NativeIO {
       if (NativeCodeLoader.isNativeCodeLoaded()) {
         try {
           Configuration conf = new Configuration();
-          workaroundNonThreadSafePasswdCalls = conf.getBoolean(
-            WORKAROUND_NON_THREADSAFE_CALLS_KEY,
-            WORKAROUND_NON_THREADSAFE_CALLS_DEFAULT);
+          boolean workaroundNonThreadSafePasswdCalls = conf.getBoolean(
+              WORKAROUND_NON_THREADSAFE_CALLS_KEY,
+              WORKAROUND_NON_THREADSAFE_CALLS_DEFAULT);
 
-          initNative();
+          initNativePosix(workaroundNonThreadSafePasswdCalls);
           nativeLoaded = true;
 
           cacheTimeout = conf.getLong(
@@ -679,9 +682,6 @@ public class NativeIO {
         throws IOException;
   }
 
-  private static boolean workaroundNonThreadSafePasswdCalls = false;
-
-
   public static class Windows {
     // Flags for CreateFile() call on Windows
     public static final long GENERIC_READ = 0x80000000L;
@@ -833,7 +833,9 @@ public class NativeIO {
     static {
       if (NativeCodeLoader.isNativeCodeLoaded()) {
         try {
-          initNative();
+          initNativeWindows(false);
+          // As of now there is no change between initNative()
+          // and initNativeWindows() native impls.
           nativeLoaded = true;
         } catch (Throwable t) {
           // This can happen if the user has an older version of libhadoop.so
@@ -843,6 +845,10 @@ public class NativeIO {
         }
       }
     }
+
+    /** Initialize the JNI method ID and class ID cache. */
+    private static native void initNativeWindows(
+        boolean doThreadsafeWorkaround);
   }
 
   private static final Logger LOG = LoggerFactory.getLogger(NativeIO.class);
@@ -852,7 +858,7 @@ public class NativeIO {
   static {
     if (NativeCodeLoader.isNativeCodeLoaded()) {
       try {
-        initNative();
+        initNative(false);
         nativeLoaded = true;
       } catch (Throwable t) {
         // This can happen if the user has an older version of libhadoop.so
@@ -871,7 +877,7 @@ public class NativeIO {
   }
 
   /** Initialize the JNI method ID and class ID cache */
-  private static native void initNative();
+  private static native void initNative(boolean doThreadsafeWorkaround);
 
   /**
    * Get the maximum number of bytes that can be locked into memory at any

+ 88 - 38
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/nativeio/NativeIO.c

@@ -103,24 +103,6 @@ extern void throw_ioe(JNIEnv* env, int errnum);
 static ssize_t get_pw_buflen();
 #endif
 
-/**
- * Returns non-zero if the user has specified that the system
- * has non-threadsafe implementations of getpwuid_r or getgrgid_r.
- **/
-static int workaround_non_threadsafe_calls(JNIEnv *env, jclass clazz) {
-  jboolean result;
-  jfieldID needs_workaround_field = (*env)->GetStaticFieldID(
-    env, clazz,
-    "workaroundNonThreadSafePasswdCalls",
-    "Z");
-  PASS_EXCEPTIONS_RET(env, 0);
-  assert(needs_workaround_field);
-
-  result = (*env)->GetStaticBooleanField(
-    env, clazz, needs_workaround_field);
-  return result;
-}
-
 /**
  * Sets a static boolean field to the specified value.
  */
@@ -201,10 +183,9 @@ static void consts_init(JNIEnv *env) {
 }
 #endif
 
-static void stat_init(JNIEnv *env, jclass nativeio_class) {
+static void stat_init(JNIEnv *env) {
   jclass clazz = NULL;
-  jclass obj_class = NULL;
-  jmethodID  obj_ctor = NULL;
+  if (stat_ctor2 != NULL) return; //Already inited
   // Init Stat
   clazz = (*env)->FindClass(env, NATIVE_IO_STAT_CLASS);
   if (!clazz) {
@@ -224,6 +205,20 @@ static void stat_init(JNIEnv *env, jclass nativeio_class) {
   if (!stat_ctor2) {
     return; // exception has been raised
   }
+}
+
+static void stat_deinit(JNIEnv *env) {
+  if (stat_clazz != NULL) {
+    (*env)->DeleteGlobalRef(env, stat_clazz);
+    stat_clazz = NULL;
+  }
+}
+
+static void workaround_non_threadsafe_calls_init(JNIEnv *env){
+  jclass obj_class = NULL;
+  jmethodID  obj_ctor = NULL;
+  if (pw_lock_object != NULL) return; // Already inited
+
   obj_class = (*env)->FindClass(env, "java/lang/Object");
   if (!obj_class) {
     return; // exception has been raised
@@ -233,21 +228,13 @@ static void stat_init(JNIEnv *env, jclass nativeio_class) {
   if (!obj_ctor) {
     return; // exception has been raised
   }
-
-  if (workaround_non_threadsafe_calls(env, nativeio_class)) {
-    pw_lock_object = (*env)->NewObject(env, obj_class, obj_ctor);
-    PASS_EXCEPTIONS(env);
-    pw_lock_object = (*env)->NewGlobalRef(env, pw_lock_object);
-
-    PASS_EXCEPTIONS(env);
-  }
+  pw_lock_object = (*env)->NewObject(env, obj_class, obj_ctor);
+  PASS_EXCEPTIONS(env);
+  pw_lock_object = (*env)->NewGlobalRef(env, pw_lock_object);
+  PASS_EXCEPTIONS(env);
 }
 
-static void stat_deinit(JNIEnv *env) {
-  if (stat_clazz != NULL) {  
-    (*env)->DeleteGlobalRef(env, stat_clazz);
-    stat_clazz = NULL;
-  }
+static void workaround_non_threadsafe_calls_deinit(JNIEnv *env){
   if (pw_lock_object != NULL) {
     (*env)->DeleteGlobalRef(env, pw_lock_object);
     pw_lock_object = NULL;
@@ -255,6 +242,7 @@ static void stat_deinit(JNIEnv *env) {
 }
 
 static void nioe_init(JNIEnv *env) {
+  if (nioe_ctor != NULL) return; // Already inited
   // Init NativeIOException
   nioe_clazz = (*env)->FindClass(
     env, "org/apache/hadoop/io/nativeio/NativeIOException");
@@ -349,17 +337,53 @@ static void pmem_region_deinit(JNIEnv *env) {
  */
 JNIEXPORT void JNICALL
 Java_org_apache_hadoop_io_nativeio_NativeIO_initNative(
-  JNIEnv *env, jclass clazz) {
+  JNIEnv *env, jclass clazz, jboolean doThreadsafeWorkaround) {
+  nioe_init(env);
+  PASS_EXCEPTIONS_GOTO(env, error);
+  fd_init(env);
+  PASS_EXCEPTIONS_GOTO(env, error);
+#ifdef UNIX
+  errno_enum_init(env);
+  PASS_EXCEPTIONS_GOTO(env, error);
+#endif
+  if (doThreadsafeWorkaround) {
+    workaround_non_threadsafe_calls_init(env);
+    PASS_EXCEPTIONS_GOTO(env, error);
+  }
+  return;
+error:
+  // these are all idempotent and safe to call even if the
+  // class wasn't inited yet
+  nioe_deinit(env);
+  fd_deinit(env);
+#ifdef UNIX
+  errno_enum_deinit(env);
+#endif
+  if (doThreadsafeWorkaround) {
+    workaround_non_threadsafe_calls_deinit(env);
+  }
+}
+
+/*
+ * private static native void initNativePosix();
+ */
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_io_nativeio_NativeIO_00024POSIX_initNativePosix(
+  JNIEnv *env, jclass clazz, jboolean doThreadsafeWorkaround) {
 #ifdef UNIX
   consts_init(env);
   PASS_EXCEPTIONS_GOTO(env, error);
 #endif
-  stat_init(env, clazz);
+  stat_init(env);
   PASS_EXCEPTIONS_GOTO(env, error);
   nioe_init(env);
   PASS_EXCEPTIONS_GOTO(env, error);
   fd_init(env);
   PASS_EXCEPTIONS_GOTO(env, error);
+  if (doThreadsafeWorkaround) {
+    workaround_non_threadsafe_calls_init(env);
+    PASS_EXCEPTIONS_GOTO(env, error);
+  }
 #ifdef UNIX
   errno_enum_init(env);
   PASS_EXCEPTIONS_GOTO(env, error);
@@ -373,17 +397,43 @@ Java_org_apache_hadoop_io_nativeio_NativeIO_initNative(
 error:
   // these are all idempodent and safe to call even if the
   // class wasn't initted yet
-#ifdef UNIX
   stat_deinit(env);
 #ifdef HADOOP_PMDK_LIBRARY
   pmem_region_deinit(env);
-#endif
 #endif
   nioe_deinit(env);
   fd_deinit(env);
 #ifdef UNIX
   errno_enum_deinit(env);
 #endif
+  if (doThreadsafeWorkaround) {
+    workaround_non_threadsafe_calls_deinit(env);
+  }
+}
+
+/*
+ * private static native void initNativeWindows();
+ */
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_io_nativeio_NativeIO_00024Windows_initNativeWindows(
+  JNIEnv *env, jclass clazz, jboolean doThreadsafeWorkaround) {
+  nioe_init(env);
+  PASS_EXCEPTIONS_GOTO(env, error);
+  fd_init(env);
+  PASS_EXCEPTIONS_GOTO(env, error);
+  if (doThreadsafeWorkaround) {
+    workaround_non_threadsafe_calls_init(env);
+    PASS_EXCEPTIONS_GOTO(env, error);
+  }
+  return;
+error:
+  // these are all idempodent and safe to call even if the
+  // class wasn't initted yet
+  nioe_deinit(env);
+  fd_deinit(env);
+  if (doThreadsafeWorkaround) {
+    workaround_non_threadsafe_calls_deinit(env);
+  }
 }
 
 /*

+ 87 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/nativeio/TestNativeIoInit.java

@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.nativeio;
+
+import static org.junit.Assume.assumeTrue;
+
+import java.io.IOException;
+
+import org.apache.hadoop.fs.Path;
+import org.junit.Test;
+
+/**
+ * Separate class to ensure forked Tests load the static blocks again.
+ */
+public class TestNativeIoInit {
+
+  /**
+   * Refer HADOOP-14451
+   * Scenario:
+   * 1. One thread calls a static method of NativeIO, which loads static block
+   * of NativeIo.
+   * 2. Second thread calls a static method of NativeIo.POSIX, which loads a
+   * static block of NativeIO.POSIX class
+   * <p>
+   * Expected: Loading these two static blocks separately should not result in
+   * deadlock.
+   */
+  @Test(timeout = 10000)
+  public void testDeadlockLinux() throws Exception {
+    Thread one = new Thread() {
+      @Override
+      public void run() {
+        NativeIO.isAvailable();
+      }
+    };
+    Thread two = new Thread() {
+      @Override
+      public void run() {
+        NativeIO.POSIX.isAvailable();
+      }
+    };
+    two.start();
+    one.start();
+    one.join();
+    two.join();
+  }
+
+  @Test(timeout = 10000)
+  public void testDeadlockWindows() throws Exception {
+    assumeTrue("Expected windows", Path.WINDOWS);
+    Thread one = new Thread() {
+      @Override
+      public void run() {
+        NativeIO.isAvailable();
+      }
+    };
+    Thread two = new Thread() {
+      @Override
+      public void run() {
+        try {
+          NativeIO.Windows.extendWorkingSetSize(100);
+        } catch (IOException e) {
+          //igored
+        }
+      }
+    };
+    two.start();
+    one.start();
+    one.join();
+    two.join();
+  }
+}