Explorar o código

MAPREDUCE-2632. Avoid calling the partitioner when the numReduceTasks is 1. (Ravi Teja Ch N V and Sunil G via kasha)

Karthik Kambatla %!s(int64=10) %!d(string=hai) anos
pai
achega
bdbd10fde1

+ 3 - 0
hadoop-mapreduce-project/CHANGES.txt

@@ -109,6 +109,9 @@ Trunk (Unreleased)
     MAPREDUCE-6057. Remove obsolete entries from mapred-default.xml
     (Ray Chiang via aw)
 
+    MAPREDUCE-2632. Avoid calling the partitioner when the numReduceTasks is 1.
+    (Ravi Teja Ch N V and Sunil G via kasha)
+
   BUG FIXES
 
     MAPREDUCE-6191. Improve clearing stale state of Java serialization

+ 7 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapFileOutputFormat.java

@@ -95,7 +95,13 @@ extends FileOutputFormat<WritableComparable, Writable> {
                                   Partitioner<K, V> partitioner,
                                   K key,
                                   V value) throws IOException {
-    int part = partitioner.getPartition(key, value, readers.length);
+    int readerLength = readers.length;
+    int part;
+    if (readerLength <= 1) {
+      part = 0;
+    } else {
+      part = partitioner.getPartition(key, value, readers.length);
+    }
     return readers[part].get(key, value);
   }
 

+ 3 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Partitioner.java

@@ -30,6 +30,9 @@ import org.apache.hadoop.classification.InterfaceStability;
  * is the same as the number of reduce tasks for the job. Hence this controls
  * which of the <code>m</code> reduce tasks the intermediate key (and hence the 
  * record) is sent for reduction.</p>
+ *
+ * <p>Note: A <code>Partitioner</code> is created only when there are multiple
+ * reducers.</p>
  * 
  * @see Reducer
  */

+ 6 - 3
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Partitioner.java

@@ -31,9 +31,12 @@ import org.apache.hadoop.conf.Configurable;
  * is the same as the number of reduce tasks for the job. Hence this controls
  * which of the <code>m</code> reduce tasks the intermediate key (and hence the 
  * record) is sent for reduction.</p>
- * 
- * Note: If you require your Partitioner class to obtain the Job's configuration
- * object, implement the {@link Configurable} interface.
+ *
+ * <p>Note: A <code>Partitioner</code> is created only when there are multiple
+ * reducers.</p>
+ *
+ * <p>Note: If you require your Partitioner class to obtain the Job's
+ * configuration object, implement the {@link Configurable} interface.</p>
  * 
  * @see Reducer
  */

+ 7 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/MapFileOutputFormat.java

@@ -114,7 +114,13 @@ public class MapFileOutputFormat
   public static <K extends WritableComparable<?>, V extends Writable>
       Writable getEntry(MapFile.Reader[] readers, 
       Partitioner<K, V> partitioner, K key, V value) throws IOException {
-    int part = partitioner.getPartition(key, value, readers.length);
+    int readerLength = readers.length;
+    int part;
+    if (readerLength <= 1) {
+      part = 0;
+    } else {
+      part = partitioner.getPartition(key, value, readers.length);
+    }
     return readers[part].get(key, value);
   }
 }

+ 70 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestMapFileOutputFormat.java

@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred;
+
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.MapFile.Reader;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+public class TestMapFileOutputFormat {
+
+  @SuppressWarnings("static-access")
+  @Test
+  public void testPartitionerShouldNotBeCalledWhenOneReducerIsPresent()
+      throws Exception {
+    MapFileOutputFormat outputFormat = new MapFileOutputFormat();
+    Reader reader = Mockito.mock(Reader.class);
+    Reader[] readers = new Reader[]{reader};
+    outputFormat.getEntry(readers, new MyPartitioner(), new Text(), new Text());
+    assertTrue(!MyPartitioner.isGetPartitionCalled());
+  }
+
+  protected void tearDown() throws Exception {
+    MyPartitioner.setGetPartitionCalled(false);
+  };
+  private static class MyPartitioner
+      implements
+        Partitioner<WritableComparable, Writable> {
+    private static boolean getPartitionCalled = false;
+
+    @Override
+    public int getPartition(WritableComparable key, Writable value,
+        int numPartitions) {
+      setGetPartitionCalled(true);
+      return -1;
+    }
+
+    public static boolean isGetPartitionCalled() {
+      return getPartitionCalled;
+    }
+
+    @Override
+    public void configure(JobConf job) {
+    }
+
+    public static void setGetPartitionCalled(boolean getPartitionCalled) {
+      MyPartitioner.getPartitionCalled = getPartitionCalled;
+    }
+  }
+}

+ 65 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestMapFileOutputFormat.java

@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.lib.output;
+
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.MapFile.Reader;
+import org.apache.hadoop.mapreduce.Partitioner;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+public class TestMapFileOutputFormat {
+
+  @SuppressWarnings("static-access")
+  @Test
+  public void testPartitionerShouldNotBeCalledWhenOneReducerIsPresent()
+      throws Exception {
+    MapFileOutputFormat outputFormat = new MapFileOutputFormat();
+    Reader reader = Mockito.mock(Reader.class);
+    Reader[] readers = new Reader[]{reader};
+    outputFormat.getEntry(readers, new MyPartitioner(), new Text(), new Text());
+    assertTrue(!MyPartitioner.isGetPartitionCalled());
+  }
+
+  public void tearDown() throws Exception {
+    MyPartitioner.setGetPartitionCalled(false);
+  }
+  private static class MyPartitioner
+      extends
+        Partitioner<WritableComparable, Writable> {
+    private static boolean getPartitionCalled = false;
+
+    public static boolean isGetPartitionCalled() {
+      return getPartitionCalled;
+    }
+    @Override
+    public int getPartition(WritableComparable key, Writable value,
+        int numPartitions) {
+      setGetPartitionCalled(true);
+      return -1;
+    }
+    public static void setGetPartitionCalled(boolean getPartitionCalled) {
+      MyPartitioner.getPartitionCalled = getPartitionCalled;
+    }
+  }
+}