Explorar o código

HDFS-15273. CacheReplicationMonitor hold lock for long time and lead to NN out of service. Contributed by Xiaoqiao He.

Wei-Chiu Chuang hai 1 ano
pai
achega
821ed83873

+ 12 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

@@ -192,6 +192,18 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
       "dfs.namenode.path.based.cache.block.map.allocation.percent";
   public static final float    DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT = 0.25f;
 
+  public static final String DFS_NAMENODE_CRM_CHECKLOCKTIME_ENABLE =
+      "dfs.namenode.crm.checklocktime.enable";
+  public static final boolean DFS_NAMENODE_CRM_CHECKLOCKTIME_DEFAULT = false;
+
+  public static final String DFS_NAMENODE_CRM_MAXLOCKTIME_MS =
+      "dfs.namenode.crm.maxlocktime.ms";
+  public static final long DFS_NAMENODE_CRM_MAXLOCKTIME_MS_DEFAULT = 1000;
+
+  public static final String DFS_NAMENODE_CRM_SLEEP_TIME_MS =
+      "dfs.namenode.crm.sleeptime.ms";
+  public static final long DFS_NAMENODE_CRM_SLEEP_TIME_MS_DEFAULT = 300;
+
   public static final int     DFS_NAMENODE_HTTP_PORT_DEFAULT =
       HdfsClientConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT;
   public static final String  DFS_NAMENODE_HTTP_ADDRESS_KEY =

+ 31 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java

@@ -140,6 +140,11 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
    */
   private long scannedBlocks;
 
+  /**
+   * Avoid to hold global lock for long times.
+   */
+  private long lastScanTimeMs;
+
   public CacheReplicationMonitor(FSNamesystem namesystem,
       CacheManager cacheManager, long intervalMs, ReentrantLock lock) {
     this.namesystem = namesystem;
@@ -284,6 +289,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
   private void rescan() throws InterruptedException {
     scannedDirectives = 0;
     scannedBlocks = 0;
+    lastScanTimeMs = Time.monotonicNow();
     try {
       namesystem.writeLock();
       try {
@@ -315,6 +321,19 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
     }
   }
 
+  private void reacquireLock(long last) {
+    long now = Time.monotonicNow();
+    if (now - last > cacheManager.getMaxLockTimeMs()) {
+      try {
+        namesystem.writeUnlock();
+        Thread.sleep(cacheManager.getSleepTimeMs());
+      } catch (InterruptedException e) {
+      } finally {
+        namesystem.writeLock();
+      }
+    }
+  }
+
   /**
    * Scan all CacheDirectives.  Use the information to figure out
    * what cache replication factor each block should have.
@@ -447,6 +466,10 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
     if (cachedTotal == neededTotal) {
       directive.addFilesCached(1);
     }
+    if (cacheManager.isCheckLockTimeEnable()) {
+      reacquireLock(lastScanTimeMs);
+      lastScanTimeMs = Time.monotonicNow();
+    }
     LOG.debug("Directive {}: caching {}: {}/{} bytes", directive.getId(),
         file.getFullPathName(), cachedTotal, neededTotal);
   }
@@ -518,6 +541,10 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
         }
       }
     }
+    if (cacheManager.isCheckLockTimeEnable()) {
+      reacquireLock(lastScanTimeMs);
+      lastScanTimeMs = Time.monotonicNow();
+    }
     for (Iterator<CachedBlock> cbIter = cachedBlocks.iterator();
         cbIter.hasNext(); ) {
       scannedBlocks++;
@@ -603,6 +630,10 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
         );
         cbIter.remove();
       }
+      if (cacheManager.isCheckLockTimeEnable()) {
+        reacquireLock(lastScanTimeMs);
+        lastScanTimeMs = Time.monotonicNow();
+      }
     }
   }
 

+ 28 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java

@@ -17,6 +17,12 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CRM_CHECKLOCKTIME_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CRM_CHECKLOCKTIME_ENABLE;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CRM_MAXLOCKTIME_MS;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CRM_MAXLOCKTIME_MS_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CRM_SLEEP_TIME_MS;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CRM_SLEEP_TIME_MS_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES;
@@ -194,6 +200,9 @@ public class CacheManager {
    * The CacheReplicationMonitor.
    */
   private CacheReplicationMonitor monitor;
+  private boolean isCheckLockTimeEnable;
+  private long maxLockTimeMs;
+  private long sleepTimeMs;
 
   public static final class PersistState {
     public final CacheManagerSection section;
@@ -235,12 +244,31 @@ public class CacheManager {
     this.cachedBlocks = enabled ? new LightWeightGSet<CachedBlock, CachedBlock>(
           LightWeightGSet.computeCapacity(cachedBlocksPercent,
               "cachedBlocks")) : new LightWeightGSet<>(0);
+    this.isCheckLockTimeEnable = conf.getBoolean(
+        DFS_NAMENODE_CRM_CHECKLOCKTIME_ENABLE,
+        DFS_NAMENODE_CRM_CHECKLOCKTIME_DEFAULT);
+    this.maxLockTimeMs = conf.getLong(DFS_NAMENODE_CRM_MAXLOCKTIME_MS,
+        DFS_NAMENODE_CRM_MAXLOCKTIME_MS_DEFAULT);
+    this.sleepTimeMs = conf.getLong(DFS_NAMENODE_CRM_SLEEP_TIME_MS,
+        DFS_NAMENODE_CRM_SLEEP_TIME_MS_DEFAULT);
   }
 
   public boolean isEnabled() {
     return enabled;
   }
 
+  public boolean isCheckLockTimeEnable() {
+    return isCheckLockTimeEnable;
+  }
+
+  public long getMaxLockTimeMs() {
+    return this.maxLockTimeMs;
+  }
+
+  public long getSleepTimeMs() {
+    return this.sleepTimeMs;
+  }
+
   /**
    * Resets all tracked directives and pools. Called during 2NN checkpointing to
    * reset FSNamesystem state. See {@link FSNamesystem#clear()}.

+ 27 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

@@ -2940,6 +2940,33 @@
   </description>
 </property>
 
+<property>
+  <name>dfs.namenode.crm.checklocktime.enable</name>
+  <value>false</value>
+  <description>
+    Set to true to enable CacheManager to check amount of time to hold the
+    global rwlock.
+  </description>
+</property>
+
+<property>
+  <name>dfs.namenode.crm.maxlocktime.ms</name>
+  <value>1000</value>
+  <description>
+    The maximum amount of time that CacheManager should hold the global rwlock.
+    This configuration enable when set `dfs.namenode.crm.checklocktime.enable`.
+  </description>
+</property>
+
+<property>
+  <name>dfs.namenode.crm.sleeptime.ms</name>
+  <value>300</value>
+  <description>
+    The amount of time that CacheManager should relase the global rwlock.
+    This configuration enable when set `dfs.namenode.crm.checklocktime.enable`.
+  </description>
+</property>
+
 <property>
   <name>dfs.datanode.max.locked.memory</name>
   <value>0</value>