1
0

TestMultipleTextOutputFormat.java 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.mapred;
  19. import java.io.*;
  20. import junit.framework.TestCase;
  21. import org.apache.hadoop.fs.*;
  22. import org.apache.hadoop.io.*;
  23. import org.apache.hadoop.mapred.lib.*;
  24. public class TestMultipleTextOutputFormat extends TestCase {
  25. private static JobConf defaultConf = new JobConf();
  26. private static FileSystem localFs = null;
  27. static {
  28. try {
  29. localFs = FileSystem.getLocal(defaultConf);
  30. } catch (IOException e) {
  31. throw new RuntimeException("init failure", e);
  32. }
  33. }
  34. // A random task attempt id for testing.
  35. private static String attempt = "attempt_200707121733_0001_m_000000_0";
  36. private static Path workDir =
  37. new Path(new Path(
  38. new Path(System.getProperty("test.build.data", "."),
  39. "data"),
  40. FileOutputCommitter.TEMP_DIR_NAME), "_" + attempt);
  41. private static void writeData(RecordWriter<Text, Text> rw) throws IOException {
  42. for (int i = 10; i < 40; i++) {
  43. String k = "" + i;
  44. String v = "" + i;
  45. rw.write(new Text(k), new Text(v));
  46. }
  47. }
  48. static class KeyBasedMultipleTextOutputFormat extends MultipleTextOutputFormat<Text, Text> {
  49. protected String generateFileNameForKeyValue(Text key, Text v, String name) {
  50. return key.toString().substring(0, 1) + "-" + name;
  51. }
  52. }
  53. private static void test1(JobConf job) throws IOException {
  54. FileSystem fs = FileSystem.getLocal(job);
  55. String name = "part-00000";
  56. KeyBasedMultipleTextOutputFormat theOutputFormat = new KeyBasedMultipleTextOutputFormat();
  57. RecordWriter<Text, Text> rw = theOutputFormat.getRecordWriter(fs, job, name, null);
  58. writeData(rw);
  59. rw.close(null);
  60. }
  61. private static void test2(JobConf job) throws IOException {
  62. FileSystem fs = FileSystem.getLocal(job);
  63. String name = "part-00000";
  64. //pretend that we have input file with 1/2/3 as the suffix
  65. job.set("map.input.file", "1/2/3");
  66. // we use the last two legs of the input file as the output file
  67. job.set("mapred.outputformat.numOfTrailingLegs", "2");
  68. MultipleTextOutputFormat<Text, Text> theOutputFormat = new MultipleTextOutputFormat<Text, Text>();
  69. RecordWriter<Text, Text> rw = theOutputFormat.getRecordWriter(fs, job, name, null);
  70. writeData(rw);
  71. rw.close(null);
  72. }
  73. public void testFormat() throws Exception {
  74. JobConf job = new JobConf();
  75. job.set("mapred.task.id", attempt);
  76. FileOutputFormat.setOutputPath(job, workDir.getParent().getParent());
  77. FileOutputFormat.setWorkOutputPath(job, workDir);
  78. FileSystem fs = workDir.getFileSystem(job);
  79. if (!fs.mkdirs(workDir)) {
  80. fail("Failed to create output directory");
  81. }
  82. //System.out.printf("workdir: %s\n", workDir.toString());
  83. TestMultipleTextOutputFormat.test1(job);
  84. TestMultipleTextOutputFormat.test2(job);
  85. String file_11 = "1-part-00000";
  86. File expectedFile_11 = new File(new Path(workDir, file_11).toString());
  87. //System.out.printf("expectedFile_11: %s\n", new Path(workDir, file_11).toString());
  88. StringBuffer expectedOutput = new StringBuffer();
  89. for (int i = 10; i < 20; i++) {
  90. expectedOutput.append(""+i).append('\t').append(""+i).append("\n");
  91. }
  92. String output = UtilsForTests.slurp(expectedFile_11);
  93. //System.out.printf("File_2 output: %s\n", output);
  94. assertEquals(output, expectedOutput.toString());
  95. String file_12 = "2-part-00000";
  96. File expectedFile_12 = new File(new Path(workDir, file_12).toString());
  97. //System.out.printf("expectedFile_12: %s\n", new Path(workDir, file_12).toString());
  98. expectedOutput = new StringBuffer();
  99. for (int i = 20; i < 30; i++) {
  100. expectedOutput.append(""+i).append('\t').append(""+i).append("\n");
  101. }
  102. output = UtilsForTests.slurp(expectedFile_12);
  103. //System.out.printf("File_2 output: %s\n", output);
  104. assertEquals(output, expectedOutput.toString());
  105. String file_13 = "3-part-00000";
  106. File expectedFile_13 = new File(new Path(workDir, file_13).toString());
  107. //System.out.printf("expectedFile_13: %s\n", new Path(workDir, file_13).toString());
  108. expectedOutput = new StringBuffer();
  109. for (int i = 30; i < 40; i++) {
  110. expectedOutput.append(""+i).append('\t').append(""+i).append("\n");
  111. }
  112. output = UtilsForTests.slurp(expectedFile_13);
  113. //System.out.printf("File_2 output: %s\n", output);
  114. assertEquals(output, expectedOutput.toString());
  115. String file_2 = "2/3";
  116. File expectedFile_2 = new File(new Path(workDir, file_2).toString());
  117. //System.out.printf("expectedFile_2: %s\n", new Path(workDir, file_2).toString());
  118. expectedOutput = new StringBuffer();
  119. for (int i = 10; i < 40; i++) {
  120. expectedOutput.append(""+i).append('\t').append(""+i).append("\n");
  121. }
  122. output = UtilsForTests.slurp(expectedFile_2);
  123. //System.out.printf("File_2 output: %s\n", output);
  124. assertEquals(output, expectedOutput.toString());
  125. }
  126. public static void main(String[] args) throws Exception {
  127. new TestMultipleTextOutputFormat().testFormat();
  128. }
  129. }