Explorar el Código

HADOOP-3954. Disable record skipping by default. Contributed by Sharad Agarwal.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/trunk@691099 13f79535-47bb-0310-9956-ffa450edef68
Christopher Douglas hace 17 años
padre
commit
1fbc087cf9

+ 3 - 0
CHANGES.txt

@@ -440,6 +440,9 @@ Trunk (unreleased changes)
     HADOOP-3910. Remove unused ClusterTestDFSNamespaceLogging and
     ClusterTestDFS. (Tsz Wo (Nicholas), SZE via cdouglas)
 
+    HADOOP-3954. Disable record skipping by default. (Sharad Agarwal via
+    cdouglas)
+
 Release 0.18.1 - Unreleased
 
   BUG FIXES

+ 45 - 0
conf/hadoop-default.xml

@@ -1129,6 +1129,51 @@ creations/deletions), or "all".</description>
     <description> Number of lines per split in NLineInputFormat.
     </description>
   </property>
+  
+  <property>
+    <name>mapred.skip.mode.enabled</name>
+    <value>false</value>
+    <description> Indicates whether skipping of bad records is enabled or not.
+    If enabled the framework will try to find bad records and skip  
+    them on further attempts.
+    </description>
+  </property>
+  
+  <property>
+    <name>mapred.skip.attempts.to.start.skipping</name>
+    <value>2</value>
+    <description> The number of Task attempts AFTER which skip mode 
+    will be kicked off. When skip mode is kicked off, the 
+    tasks reports the range of records which it will process 
+    next, to the TaskTracker. So that on failures, TT knows which 
+    ones are possibly the bad records. On further executions, 
+    those are skipped.
+    </description>
+  </property>
+  
+  <property>
+    <name>mapred.skip.map.auto.incr.proc.count</name>
+    <value>true</value>
+    <description> The flag which if set to true, 
+    Counters.Application.MAP_PROCESSED_RECORDS is incremented 
+    by MapRunner after invoking the map function. This value must be set to 
+    false for applications which process the records asynchronously 
+    or buffer the input records. For example streaming. 
+    In such cases applications should increment this counter on their own.
+    </description>
+  </property>
+  
+  <property>
+    <name>mapred.skip.reduce.auto.incr.proc.count</name>
+    <value>true</value>
+    <description> The flag which if set to true, 
+    Counters.Application.REDUCE_PROCESSED_RECORDS is incremented 
+    by framework after invoking the reduce function. This value must be set to 
+    false for applications which process the records asynchronously 
+    or buffer the input records. For example streaming. 
+    In such cases applications should increment this counter on their own.
+    </description>
+  </property>
 
 <!-- ipc properties -->
 

+ 1 - 0
src/contrib/streaming/src/test/org/apache/hadoop/streaming/TestStreamingBadRecords.java

@@ -174,6 +174,7 @@ public class TestStreamingBadRecords extends ClusterMapReduceTestCase
       "-jobconf", "mapred.skip.attempts.to.start.skipping="+attSkip,
       "-jobconf", "mapred.map.max.attempts="+mapperAttempts,
       "-jobconf", "mapred.reduce.max.attempts="+reducerAttempts,
+      "-jobconf", "mapred.skip.mode.enabled=true",
       "-jobconf", "mapred.map.tasks=1",
       "-jobconf", "mapred.reduce.tasks=1",
       "-jobconf", "mapred.task.timeout=30000",

+ 1 - 1
src/mapred/org/apache/hadoop/mapred/SkipBadRecords.java

@@ -46,7 +46,7 @@ public class SkipBadRecords {
    *         <code>false</code> otherwise.
    */
   public static boolean getEnabled(Configuration conf) {
-    return conf.getBoolean(ENABLED, true);
+    return conf.getBoolean(ENABLED, false);
   }
   
   /**

+ 1 - 0
src/test/org/apache/hadoop/mapred/TestBadRecords.java

@@ -72,6 +72,7 @@ public class TestBadRecords extends ClusterMapReduceTestCase {
     conf.setNumMapTasks(1);
     conf.setNumReduceTasks(1);
     conf.setInt("mapred.task.timeout", 30*1000);
+    SkipBadRecords.setEnabled(conf, true);
     
     SkipBadRecords.setAttemptsToStartSkipping(conf,0);
     //the no of attempts to successfully complete the task depends