Bladeren bron

Fix for HADOOP-93. Convert min split size from int to long, and permit its specification in the config.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@387587 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 19 jaren geleden
bovenliggende
commit
7b5ab5946f
2 gewijzigde bestanden met toevoegingen van 17 en 4 verwijderingen
  1. 8 0
      conf/hadoop-default.xml
  2. 9 4
      src/java/org/apache/hadoop/mapred/InputFormatBase.java

+ 8 - 0
conf/hadoop-default.xml

@@ -221,6 +221,14 @@
   be executed in parallel.</description>
 </property>
 
+<property>
+  <name>mapred.min.split.size</name>
+  <value>0</value>
+  <description>The minimum size chunk that map input should be split
+  into.  Note that some file formats may have minimum split sizes that
+  take priority over this setting.</description>
+</property>
+
 
 <!-- ipc properties -->
 

+ 9 - 4
src/java/org/apache/hadoop/mapred/InputFormatBase.java

@@ -33,9 +33,9 @@ public abstract class InputFormatBase implements InputFormat {
 
   private static final double SPLIT_SLOP = 0.1;   // 10% slop
 
-  private int minSplitSize = 1;
+  private long minSplitSize = 1;
 
-  protected void setMinSplitSize(int minSplitSize) {
+  protected void setMinSplitSize(long minSplitSize) {
     this.minSplitSize = minSplitSize;
   }
 
@@ -112,8 +112,11 @@ public abstract class InputFormatBase implements InputFormat {
       bytesPerSplit = fsBlockSize;
     }
 
-    if (bytesPerSplit < minSplitSize) {           // no smaller than min size
-      bytesPerSplit = minSplitSize;
+    long configuredMinSplitSize = job.getLong("mapred.min.split.size", 0);
+    if( configuredMinSplitSize < minSplitSize )
+    	configuredMinSplitSize = minSplitSize;
+    if (bytesPerSplit < configuredMinSplitSize) { // no smaller than min size
+      bytesPerSplit = configuredMinSplitSize;
     }
 
     long maxPerSplit = bytesPerSplit + (long)(bytesPerSplit*SPLIT_SLOP);
@@ -135,7 +138,9 @@ public abstract class InputFormatBase implements InputFormat {
       if (bytesRemaining != 0) {
         splits.add(new FileSplit(file, length-bytesRemaining, bytesRemaining));
       }
+      //LOG.info( "Generating splits for " + i + "th file: " + file.getName() );
     }
+    //LOG.info( "Total # of splits: " + splits.size() );
     return (FileSplit[])splits.toArray(new FileSplit[splits.size()]);
   }