Browse Source

MAPREDUCE-6147. Support mapreduce.input.fileinputformat.split.maxsize. (Zhihai Xu via kasha)

Karthik Kambatla 10 years ago
parent
commit
ebec913e20

+ 3 - 0
CHANGES.txt

@@ -61,6 +61,9 @@ Release 1.3.0 - unreleased
     HADOOP-10614. CBZip2InputStream is not threadsafe (Xiangrui Meng via Sandy
     Ryza)
 
+    MAPREDUCE-6147. Support mapreduce.input.fileinputformat.split.maxsize.
+    (Zhihai Xu via kasha)
+
   BUG FIXES
 
     MAPREDUCE-4490. Fixed LinuxTaskController to re-initialize user log

+ 18 - 2
src/mapred/org/apache/hadoop/mapred/lib/CombineFileInputFormat.java

@@ -141,7 +141,23 @@ public abstract class CombineFileInputFormat<K, V>
     }
     return codec instanceof SplittableCompressionCodec;
   }
-  
+
+  /**
+   * First get "mapred.max.split.size".
+   * If "mapred.max.split.size" is not set,
+   * then get "mapreduce.input.fileinputformat.split.maxsize".
+   * If "mapreduce.input.fileinputformat.split.maxsize" is not set,
+   * then return 0.
+   */
+  long getConfiguredMaxSplitSize(JobConf job) {
+    long maxSize = job.getLong("mapred.max.split.size", -1L);
+    if (maxSize == -1L) {
+      maxSize = job.getLong("mapreduce.input.fileinputformat.split.maxsize",
+          0);
+    }
+    return maxSize;
+  }
+
   /**
    * default constructor
    */
@@ -171,7 +187,7 @@ public abstract class CombineFileInputFormat<K, V>
     if (maxSplitSize != 0) {
       maxSize = maxSplitSize;
     } else {
-      maxSize = job.getLong("mapred.max.split.size", 0);
+      maxSize = getConfiguredMaxSplitSize(job);
     }
     if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) {
       throw new IOException("Minimum split size pernode " + minSizeNode +

+ 22 - 0
src/test/org/apache/hadoop/mapred/lib/TestCombineFileInputFormat.java

@@ -898,6 +898,28 @@ public class TestCombineFileInputFormat extends TestCase{
     }
   }
 
+  public void testGetConfiguredMaxSplitSize() throws Throwable {
+    JobConf conf = new JobConf();
+    DummyInputFormat inFormat = new DummyInputFormat();
+    // if both mapred.max.split.size and
+    // mapreduce.input.fileinputformat.split.maxsize are not set, return 0.
+    long maxSize = inFormat.getConfiguredMaxSplitSize(conf);
+    assertEquals(maxSize, 0L);
+
+    // if only mapreduce.input.fileinputformat.split.maxsize is set,
+    // return the value of mapreduce.input.fileinputformat.split.maxsize.
+    conf.setLong("mapreduce.input.fileinputformat.split.maxsize", 100L);
+    maxSize = inFormat.getConfiguredMaxSplitSize(conf);
+    assertEquals(maxSize, 100L);
+
+    // if both mapred.max.split.size and
+    // mapreduce.input.fileinputformat.split.maxsize are set,
+    // return the value of mapred.max.split.size.
+    conf.setLong("mapred.max.split.size", 1000L);
+    maxSize = inFormat.getConfiguredMaxSplitSize(conf);
+    assertEquals(maxSize, 1000L);
+  }
+
   static class TestFilter implements PathFilter {
     private Path p;