Browse Source

HADOOP-13680. fs.s3a.readahead.range to use getLongBytes. Contributed by Abhishek Modi.

Steve Loughran 8 years ago
parent
commit
a1761a841e

+ 12 - 6
hadoop-common-project/hadoop-common/src/main/resources/core-default.xml

@@ -1055,8 +1055,10 @@
 
 <property>
   <name>fs.s3a.multipart.size</name>
-  <value>104857600</value>
-  <description>How big (in bytes) to split upload or copy operations up into.</description>
+  <value>100M</value>
+  <description>How big (in bytes) to split upload or copy operations up into.
+    A suffix from the set {K,M,G,T,P} may be used to scale the numeric value.
+  </description>
 </property>
 
 <property>
@@ -1064,7 +1066,8 @@
   <value>2147483647</value>
   <description>How big (in bytes) to split upload or copy operations up into.
     This also controls the partition size in renamed files, as rename() involves
-    copying the source file(s)
+    copying the source file(s).
+    A suffix from the set {K,M,G,T,P} may be used to scale the numeric value.
   </description>
 </property>
 
@@ -1120,8 +1123,9 @@
 
 <property>
   <name>fs.s3a.block.size</name>
-  <value>33554432</value>
+  <value>32M</value>
   <description>Block size to use when reading files using s3a: file system.
+    A suffix from the set {K,M,G,T,P} may be used to scale the numeric value.
   </description>
 </property>
 
@@ -1183,10 +1187,12 @@
 
 <property>
   <name>fs.s3a.readahead.range</name>
-  <value>65536</value>
+  <value>64K</value>
   <description>Bytes to read ahead during a seek() before closing and
   re-opening the S3 HTTP connection. This option will be overridden if
-  any call to setReadahead() is made to an open stream.</description>
+  any call to setReadahead() is made to an open stream.
+  A suffix from the set {K,M,G,T,P} may be used to scale the numeric value.
+  </description>
 </property>
 
 <property>

+ 14 - 3
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java

@@ -183,10 +183,11 @@ public class S3AFileSystem extends FileSystem {
           MIN_MULTIPART_THRESHOLD, DEFAULT_MIN_MULTIPART_THRESHOLD);
 
       //check but do not store the block size
-      longOption(conf, FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE, 1);
+      longBytesOption(conf, FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE, 1);
       enableMultiObjectsDelete = conf.getBoolean(ENABLE_MULTI_DELETE, true);
 
-      readAhead = longOption(conf, READAHEAD_RANGE, DEFAULT_READAHEAD_RANGE, 0);
+      readAhead = longBytesOption(conf, READAHEAD_RANGE,
+          DEFAULT_READAHEAD_RANGE, 0);
       storageStatistics = (S3AStorageStatistics)
           GlobalStorageStatistics.INSTANCE
               .put(S3AStorageStatistics.NAME,
@@ -357,6 +358,16 @@ public class S3AFileSystem extends FileSystem {
     return s3;
   }
 
+  /**
+   * Returns the read ahead range value used by this filesystem
+   * @return
+   */
+
+  @VisibleForTesting
+  long getReadAheadRange() {
+    return readAhead;
+  }
+
   /**
    * Get the input policy for this FS instance.
    * @return the input policy
@@ -1883,7 +1894,7 @@ public class S3AFileSystem extends FileSystem {
    */
   @Deprecated
   public long getDefaultBlockSize() {
-    return getConf().getLong(FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE);
+    return getConf().getLongBytes(FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE);
   }
 
   @Override

+ 22 - 1
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java

@@ -509,6 +509,27 @@ public final class S3AUtils {
     return v;
   }
 
+  /**
+   * Get a long option >= the minimum allowed value, supporting memory
+   * prefixes K,M,G,T,P.
+   * @param conf configuration
+   * @param key key to look up
+   * @param defVal default value
+   * @param min minimum value
+   * @return the value
+   * @throws IllegalArgumentException if the value is below the minimum
+   */
+  static long longBytesOption(Configuration conf,
+                             String key,
+                             long defVal,
+                             long min) {
+    long v = conf.getLongBytes(key, defVal);
+    Preconditions.checkArgument(v >= min,
+            String.format("Value of %s: %d is below the minimum value %d",
+                    key, v, min));
+    return v;
+  }
+
   /**
    * Get a size property from the configuration: this property must
    * be at least equal to {@link Constants#MULTIPART_MIN_SIZE}.
@@ -521,7 +542,7 @@ public final class S3AUtils {
    */
   public static long getMultipartSizeProperty(Configuration conf,
       String property, long defVal) {
-    long partSize = conf.getLong(property, defVal);
+    long partSize = conf.getLongBytes(property, defVal);
     if (partSize < MULTIPART_MIN_SIZE) {
       LOG.warn("{} must be at least 5 MB; configured value is {}",
           property, partSize);

+ 13 - 9
hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md

@@ -762,16 +762,20 @@ from placing its declaration on the command line.
 
     <property>
       <name>fs.s3a.multipart.size</name>
-      <value>104857600</value>
+      <value>100M</value>
       <description>How big (in bytes) to split upload or copy operations up into.
-      This also controls the partition size in renamed files, as rename() involves
-      copying the source file(s)</description>
+        A suffix from the set {K,M,G,T,P} may be used to scale the numeric value.
+      </description>
     </property>
 
     <property>
       <name>fs.s3a.multipart.threshold</name>
       <value>2147483647</value>
-      <description>Threshold before uploads or copies use parallel multipart operations.</description>
+      <description>How big (in bytes) to split upload or copy operations up into.
+        This also controls the partition size in renamed files, as rename() involves
+        copying the source file(s).
+        A suffix from the set {K,M,G,T,P} may be used to scale the numeric value.
+      </description>
     </property>
 
     <property>
@@ -825,7 +829,7 @@ from placing its declaration on the command line.
 
     <property>
       <name>fs.s3a.block.size</name>
-      <value>33554432</value>
+      <value>32M</value>
       <description>Block size to use when reading files using s3a: file system.
       </description>
     </property>
@@ -859,7 +863,7 @@ from placing its declaration on the command line.
 
     <property>
       <name>fs.s3a.readahead.range</name>
-      <value>65536</value>
+      <value>64K</value>
       <description>Bytes to read ahead during a seek() before closing and
       re-opening the S3 HTTP connection. This option will be overridden if
       any call to setReadahead() is made to an open stream.</description>
@@ -1029,9 +1033,9 @@ S3 endpoints, as disks are not used for intermediate data storage.
 
 <property>
   <name>fs.s3a.multipart.size</name>
-  <value>104857600</value>
-  <description>
-  How big (in bytes) to split upload or copy operations up into.
+  <value>100M</value>
+  <description>How big (in bytes) to split upload or copy operations up into.
+    A suffix from the set {K,M,G,T,P} may be used to scale the numeric value.
   </description>
 </property>
 

+ 12 - 1
hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java

@@ -380,7 +380,7 @@ public class ITestS3AConfiguration {
       byte[] file = ContractTestUtils.toAsciiByteArray("test file");
       ContractTestUtils.writeAndRead(fs,
           new Path("/path/style/access/testFile"), file, file.length,
-          conf.getInt(Constants.FS_S3A_BLOCK_SIZE, file.length), false, true);
+              (int) conf.getLongBytes(Constants.FS_S3A_BLOCK_SIZE, file.length), false, true);
     } catch (final AWSS3IOException e) {
       LOG.error("Caught exception: ", e);
       // Catch/pass standard path style access behaviour when live bucket
@@ -451,6 +451,17 @@ public class ITestS3AConfiguration {
         tmp1.getParent(), tmp2.getParent());
   }
 
+  @Test
+  public void testReadAheadRange() throws Exception {
+    conf = new Configuration();
+    conf.set(Constants.READAHEAD_RANGE, "300K");
+    fs = S3ATestUtils.createTestFileSystem(conf);
+    assertNotNull(fs);
+    long readAheadRange = fs.getReadAheadRange();
+    assertNotNull(readAheadRange);
+    assertEquals("Read Ahead Range Incorrect.", 300 * 1024, readAheadRange);
+  }
+
   @Test
   public void testUsernameFromUGI() throws Throwable {
     final String alice = "alice";