TestMapReduceLazyOutput.java 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.mapreduce;
  19. import java.io.IOException;
  20. import java.io.OutputStream;
  21. import java.io.OutputStreamWriter;
  22. import java.io.Writer;
  23. import java.util.Arrays;
  24. import java.util.List;
  25. import junit.framework.TestCase;
  26. import org.apache.hadoop.conf.Configuration;
  27. import org.apache.hadoop.fs.FileSystem;
  28. import org.apache.hadoop.fs.FileUtil;
  29. import org.apache.hadoop.fs.Path;
  30. import org.apache.hadoop.hdfs.MiniDFSCluster;
  31. import org.apache.hadoop.io.LongWritable;
  32. import org.apache.hadoop.io.Text;
  33. import org.apache.hadoop.mapred.JobConf;
  34. import org.apache.hadoop.mapred.MiniMRCluster;
  35. import org.apache.hadoop.mapred.Utils;
  36. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  37. import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
  38. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  39. import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
  40. import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
  41. /**
  42. * A JUnit test to test the Map-Reduce framework's feature to create part
  43. * files only if there is an explicit output.collect. This helps in preventing
  44. * 0 byte files
  45. */
  46. public class TestMapReduceLazyOutput extends TestCase {
  47. private static final int NUM_HADOOP_SLAVES = 3;
  48. private static final int NUM_MAPS_PER_NODE = 2;
  49. private static final Path INPUT = new Path("/testlazy/input");
  50. private static final List<String> input =
  51. Arrays.asList("All","Roads","Lead","To","Hadoop");
  52. public static class TestMapper
  53. extends Mapper<LongWritable, Text, LongWritable, Text>{
  54. public void map(LongWritable key, Text value, Context context
  55. ) throws IOException, InterruptedException {
  56. String id = context.getTaskAttemptID().toString();
  57. // Mapper 0 does not output anything
  58. if (!id.endsWith("0_0")) {
  59. context.write(key, value);
  60. }
  61. }
  62. }
  63. public static class TestReducer
  64. extends Reducer<LongWritable,Text,LongWritable,Text> {
  65. public void reduce(LongWritable key, Iterable<Text> values,
  66. Context context) throws IOException, InterruptedException {
  67. String id = context.getTaskAttemptID().toString();
  68. // Reducer 0 does not output anything
  69. if (!id.endsWith("0_0")) {
  70. for (Text val: values) {
  71. context.write(key, val);
  72. }
  73. }
  74. }
  75. }
  76. private static void runTestLazyOutput(Configuration conf, Path output,
  77. int numReducers, boolean createLazily)
  78. throws Exception {
  79. Job job = new Job(conf, "Test-Lazy-Output");
  80. FileInputFormat.setInputPaths(job, INPUT);
  81. FileOutputFormat.setOutputPath(job, output);
  82. job.setJarByClass(TestMapReduceLazyOutput.class);
  83. job.setInputFormatClass(TextInputFormat.class);
  84. job.setOutputKeyClass(LongWritable.class);
  85. job.setOutputValueClass(Text.class);
  86. job.setNumReduceTasks(numReducers);
  87. job.setMapperClass(TestMapper.class);
  88. job.setReducerClass(TestReducer.class);
  89. if (createLazily) {
  90. LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
  91. } else {
  92. job.setOutputFormatClass(TextOutputFormat.class);
  93. }
  94. assertTrue(job.waitForCompletion(true));
  95. }
  96. public void createInput(FileSystem fs, int numMappers) throws Exception {
  97. for (int i =0; i < numMappers; i++) {
  98. OutputStream os = fs.create(new Path(INPUT,
  99. "text" + i + ".txt"));
  100. Writer wr = new OutputStreamWriter(os);
  101. for(String inp : input) {
  102. wr.write(inp+"\n");
  103. }
  104. wr.close();
  105. }
  106. }
  107. public void testLazyOutput() throws Exception {
  108. MiniDFSCluster dfs = null;
  109. MiniMRCluster mr = null;
  110. FileSystem fileSys = null;
  111. try {
  112. Configuration conf = new Configuration();
  113. // Start the mini-MR and mini-DFS clusters
  114. dfs = new MiniDFSCluster(conf, NUM_HADOOP_SLAVES, true, null);
  115. fileSys = dfs.getFileSystem();
  116. mr = new MiniMRCluster(NUM_HADOOP_SLAVES, fileSys.getUri().toString(), 1);
  117. int numReducers = 2;
  118. int numMappers = NUM_HADOOP_SLAVES * NUM_MAPS_PER_NODE;
  119. createInput(fileSys, numMappers);
  120. Path output1 = new Path("/testlazy/output1");
  121. // Test 1.
  122. runTestLazyOutput(mr.createJobConf(), output1,
  123. numReducers, true);
  124. Path[] fileList =
  125. FileUtil.stat2Paths(fileSys.listStatus(output1,
  126. new Utils.OutputFileUtils.OutputFilesFilter()));
  127. for(int i=0; i < fileList.length; ++i) {
  128. System.out.println("Test1 File list[" + i + "]" + ": "+ fileList[i]);
  129. }
  130. assertTrue(fileList.length == (numReducers - 1));
  131. // Test 2. 0 Reducers, maps directly write to the output files
  132. Path output2 = new Path("/testlazy/output2");
  133. runTestLazyOutput(mr.createJobConf(), output2, 0, true);
  134. fileList =
  135. FileUtil.stat2Paths(fileSys.listStatus(output2,
  136. new Utils.OutputFileUtils.OutputFilesFilter()));
  137. for(int i=0; i < fileList.length; ++i) {
  138. System.out.println("Test2 File list[" + i + "]" + ": "+ fileList[i]);
  139. }
  140. assertTrue(fileList.length == numMappers - 1);
  141. // Test 3. 0 Reducers, but flag is turned off
  142. Path output3 = new Path("/testlazy/output3");
  143. runTestLazyOutput(mr.createJobConf(), output3, 0, false);
  144. fileList =
  145. FileUtil.stat2Paths(fileSys.listStatus(output3,
  146. new Utils.OutputFileUtils.OutputFilesFilter()));
  147. for(int i=0; i < fileList.length; ++i) {
  148. System.out.println("Test3 File list[" + i + "]" + ": "+ fileList[i]);
  149. }
  150. assertTrue(fileList.length == numMappers);
  151. } finally {
  152. if (dfs != null) { dfs.shutdown(); }
  153. if (mr != null) { mr.shutdown();
  154. }
  155. }
  156. }
  157. }