浏览代码

HDFS-6046. add dfs.client.mmap.enabled (cmccabe)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1573887 13f79535-47bb-0310-9956-ffa450edef68
Colin McCabe 11 年之前
父节点
当前提交
c94e43c6df

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -373,6 +373,8 @@ Release 2.4.0 - UNRELEASED
     HDFS-5950. The DFSClient and DataNode should use shared memory segments to
     HDFS-5950. The DFSClient and DataNode should use shared memory segments to
     communicate short-circuit information. (cmccabe)
     communicate short-circuit information. (cmccabe)
 
 
+    HDFS-6046. add dfs.client.mmap.enabled (cmccabe)
+
   OPTIMIZATIONS
   OPTIMIZATIONS
 
 
     HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery
     HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery

+ 4 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java

@@ -279,6 +279,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory {
     final long shortCircuitStreamsCacheExpiryMs; 
     final long shortCircuitStreamsCacheExpiryMs; 
     final int shortCircuitSharedMemoryWatcherInterruptCheckMs;
     final int shortCircuitSharedMemoryWatcherInterruptCheckMs;
     
     
+    final boolean shortCircuitMmapEnabled;
     final int shortCircuitMmapCacheSize;
     final int shortCircuitMmapCacheSize;
     final long shortCircuitMmapCacheExpiryMs;
     final long shortCircuitMmapCacheExpiryMs;
     final long shortCircuitMmapCacheRetryTimeout;
     final long shortCircuitMmapCacheRetryTimeout;
@@ -398,6 +399,9 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory {
       shortCircuitStreamsCacheExpiryMs = conf.getLong(
       shortCircuitStreamsCacheExpiryMs = conf.getLong(
           DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY,
           DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY,
           DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT);
           DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT);
+      shortCircuitMmapEnabled = conf.getBoolean(
+          DFSConfigKeys.DFS_CLIENT_MMAP_ENABLED,
+          DFSConfigKeys.DFS_CLIENT_MMAP_ENABLED_DEFAULT);
       shortCircuitMmapCacheSize = conf.getInt(
       shortCircuitMmapCacheSize = conf.getInt(
           DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE,
           DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE,
           DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT);
           DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT);

+ 2 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

@@ -429,6 +429,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final int DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT = 1024 * 1024;
   public static final int DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT = 1024 * 1024;
   public static final String DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC = "dfs.client.domain.socket.data.traffic";
   public static final String DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC = "dfs.client.domain.socket.data.traffic";
   public static final boolean DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT = false;
   public static final boolean DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT = false;
+  public static final String DFS_CLIENT_MMAP_ENABLED= "dfs.client.mmap.enabled";
+  public static final boolean DFS_CLIENT_MMAP_ENABLED_DEFAULT = true;
   public static final String DFS_CLIENT_MMAP_CACHE_SIZE = "dfs.client.mmap.cache.size";
   public static final String DFS_CLIENT_MMAP_CACHE_SIZE = "dfs.client.mmap.cache.size";
   public static final int DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT = 256;
   public static final int DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT = 256;
   public static final String DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS = "dfs.client.mmap.cache.timeout.ms";
   public static final String DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS = "dfs.client.mmap.cache.timeout.ms";

+ 4 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java

@@ -1571,7 +1571,10 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
             "at position " + pos);
             "at position " + pos);
       }
       }
     }
     }
-    ByteBuffer buffer = tryReadZeroCopy(maxLength, opts);
+    ByteBuffer buffer = null;
+    if (dfsClient.getConf().shortCircuitMmapEnabled) {
+      buffer = tryReadZeroCopy(maxLength, opts);
+    }
     if (buffer != null) {
     if (buffer != null) {
       return buffer;
       return buffer;
     }
     }

+ 13 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml

@@ -1520,26 +1520,34 @@
   </description>
   </description>
 </property>
 </property>
 
 
+<property>
+  <name>dfs.client.mmap.enabled</name>
+  <value>true</value>
+  <description>
+    If this is set to false, the client won't attempt to perform memory-mapped reads.
+  </description>
+</property>
+
 <property>
 <property>
   <name>dfs.client.mmap.cache.size</name>
   <name>dfs.client.mmap.cache.size</name>
-  <value>1024</value>
+  <value>256</value>
   <description>
   <description>
     When zero-copy reads are used, the DFSClient keeps a cache of recently used
     When zero-copy reads are used, the DFSClient keeps a cache of recently used
     memory mapped regions.  This parameter controls the maximum number of
     memory mapped regions.  This parameter controls the maximum number of
     entries that we will keep in that cache.
     entries that we will keep in that cache.
 
 
-    If this is set to 0, we will not allow mmap.
-
     The larger this number is, the more file descriptors we will potentially
     The larger this number is, the more file descriptors we will potentially
     use for memory-mapped files.  mmaped files also use virtual address space.
     use for memory-mapped files.  mmaped files also use virtual address space.
     You may need to increase your ulimit virtual address space limits before
     You may need to increase your ulimit virtual address space limits before
     increasing the client mmap cache size.
     increasing the client mmap cache size.
+
+    Note that you can still do zero-copy reads when this size is set to 0.
   </description>
   </description>
 </property>
 </property>
 
 
 <property>
 <property>
   <name>dfs.client.mmap.cache.timeout.ms</name>
   <name>dfs.client.mmap.cache.timeout.ms</name>
-  <value>900000</value>
+  <value>3600000</value>
   <description>
   <description>
     The minimum length of time that we will keep an mmap entry in the cache
     The minimum length of time that we will keep an mmap entry in the cache
     between uses.  If an entry is in the cache longer than this, and nobody
     between uses.  If an entry is in the cache longer than this, and nobody
@@ -1558,7 +1566,7 @@
 
 
 <property>
 <property>
   <name>dfs.client.short.circuit.replica.stale.threshold.ms</name>
   <name>dfs.client.short.circuit.replica.stale.threshold.ms</name>
-  <value>3000000</value>
+  <value>1800000</value>
   <description>
   <description>
     The maximum amount of time that we will consider a short-circuit replica to
     The maximum amount of time that we will consider a short-circuit replica to
     be valid, if there is no communication from the DataNode.  After this time
     be valid, if there is no communication from the DataNode.  After this time

+ 61 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestEnhancedByteBufferAccess.java

@@ -21,6 +21,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_ENABLED;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE;
 
 
 import java.io.File;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileInputStream;
@@ -698,4 +700,63 @@ public class TestEnhancedByteBufferAccess {
       }
       }
     }, 10, 60000);
     }, 10, 60000);
   }
   }
+  
+  @Test
+  public void testClientMmapDisable() throws Exception {
+    HdfsConfiguration conf = initZeroCopyTest();
+    conf.setBoolean(DFS_CLIENT_MMAP_ENABLED, false);
+    MiniDFSCluster cluster = null;
+    final Path TEST_PATH = new Path("/a");
+    final int TEST_FILE_LENGTH = 16385;
+    final int RANDOM_SEED = 23453;
+    final String CONTEXT = "testClientMmapDisable";
+    FSDataInputStream fsIn = null;
+    DistributedFileSystem fs = null;
+    conf.set(DFSConfigKeys.DFS_CLIENT_CONTEXT, CONTEXT);
+
+    try {
+      // With DFS_CLIENT_MMAP_ENABLED set to false, we should not do memory
+      // mapped reads.
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+      cluster.waitActive();
+      fs = cluster.getFileSystem();
+      DFSTestUtil.createFile(fs, TEST_PATH,
+          TEST_FILE_LENGTH, (short)1, RANDOM_SEED);
+      DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
+      fsIn = fs.open(TEST_PATH);
+      try {
+        fsIn.read(null, 1, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
+        Assert.fail("expected zero-copy read to fail when client mmaps " +
+            "were disabled.");
+      } catch (UnsupportedOperationException e) {
+      }
+    } finally {
+      if (fsIn != null) fsIn.close();
+      if (fs != null) fs.close();
+      if (cluster != null) cluster.shutdown();
+    }
+
+    fsIn = null;
+    fs = null;
+    cluster = null;
+    try {
+      // Now try again with DFS_CLIENT_MMAP_CACHE_SIZE == 0.  It should work.
+      conf.setBoolean(DFS_CLIENT_MMAP_ENABLED, true);
+      conf.setInt(DFS_CLIENT_MMAP_CACHE_SIZE, 0);
+      conf.set(DFSConfigKeys.DFS_CLIENT_CONTEXT, CONTEXT + ".1");
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+      cluster.waitActive();
+      fs = cluster.getFileSystem();
+      DFSTestUtil.createFile(fs, TEST_PATH,
+          TEST_FILE_LENGTH, (short)1, RANDOM_SEED);
+      DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
+      fsIn = fs.open(TEST_PATH);
+      ByteBuffer buf = fsIn.read(null, 1, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
+      fsIn.releaseBuffer(buf);
+    } finally {
+      if (fsIn != null) fsIn.close();
+      if (fs != null) fs.close();
+      if (cluster != null) cluster.shutdown();
+    }
+  }
 }
 }