Ver código fonte

MAPREDUCE-5808. Port output replication factor configurable for terasort to Hadoop 1.x. Contributed by Chuan Liu.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-1@1580983 13f79535-47bb-0310-9956-ffa450edef68
Chris Nauroth 11 anos atrás
pai
commit
47c5cd7190

+ 3 - 0
CHANGES.txt

@@ -202,6 +202,9 @@ Release 1.3.0 - unreleased
     HDFS-5516. WebHDFS does not require user name when anonymous http requests
     are disallowed. (Miodrag Radulovic via cnauroth)
 
+    MAPREDUCE-5808. Port output replication factor configurable for terasort to
+    Hadoop 1.x. (Chuan Liu via cnauroth)
+
 Release 1.2.2 - unreleased
 
   INCOMPATIBLE CHANGES

+ 6 - 1
src/examples/org/apache/hadoop/examples/terasort/TeraSort.java

@@ -49,6 +49,7 @@ import org.apache.hadoop.util.ToolRunner;
  */
 public class TeraSort extends Configured implements Tool {
   private static final Log LOG = LogFactory.getLog(TeraSort.class);
+  private static final String OUTPUT_REPLICATION = "mapreduce.terasort.output.replication";
 
   /**
    * A partitioner that splits text keys into roughly equal partitions
@@ -222,6 +223,10 @@ public class TeraSort extends Configured implements Tool {
     }
     
   }
+
+  public static int getOutputReplication(JobConf job) {
+    return job.getInt(OUTPUT_REPLICATION, 1);
+  }
   
   public int run(String[] args) throws Exception {
     LOG.info("starting");
@@ -243,7 +248,7 @@ public class TeraSort extends Configured implements Tool {
     TeraInputFormat.writePartitionFile(job, partitionFile);
     DistributedCache.addCacheFile(partitionUri, job);
     DistributedCache.createSymlink(job);
-    job.setInt("dfs.replication", 1);
+    job.setInt("dfs.replication", getOutputReplication(job));
     TeraOutputFormat.setFinalSync(job, true);
     JobClient.runJob(job);
     LOG.info("done");