فهرست منبع

HADOOP-4466. Ensure that SequenceFileOutputFormat isn't tied to Writables and can be used with other Serialization frameworks. Contributed by Chris Wensel.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/trunk@707262 13f79535-47bb-0310-9956-ffa450edef68
Arun Murthy 16 سال پیش
والد
کامیت
14553758cb

+ 4 - 0
CHANGES.txt

@@ -1018,6 +1018,10 @@ Release 0.19.0 - Unreleased
     HADOOP-4387. TestHDFSFileSystemContract fails on windows nightly builds.
     (Raghu Angadi)
 
+    HADOOP-4466. Ensure that SequenceFileOutputFormat isn't tied to Writables
+    and can be used with other Serialization frameworks. (Chris Wensel via
+    acmurthy)
+
 Release 0.18.2 - Unreleased
 
   BUG FIXES

+ 2 - 4
src/mapred/org/apache/hadoop/mapred/SequenceFileOutputFormat.java

@@ -26,8 +26,6 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.FileUtil;
 
 import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.io.SequenceFile.CompressionType;
 import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.io.compress.DefaultCodec;
@@ -58,8 +56,8 @@ public class SequenceFileOutputFormat <K,V> extends FileOutputFormat<K, V> {
     }
     final SequenceFile.Writer out = 
       SequenceFile.createWriter(fs, job, file,
-                                job.getOutputKeyClass().asSubclass(WritableComparable.class),
-                                job.getOutputValueClass().asSubclass(Writable.class),
+                                job.getOutputKeyClass(),
+                                job.getOutputValueClass(),
                                 compressionType,
                                 codec,
                                 progress);

+ 42 - 0
src/test/org/apache/hadoop/mapred/TestJavaSerialization.java

@@ -106,4 +106,46 @@ public class TestJavaSerialization extends ClusterMapReduceTestCase {
     reader.close();
   }
 
+  /**
+   * HADOOP-4466:
+   * This test verifies the JavSerialization impl can write to SequenceFiles. by virtue other
+   * SequenceFileOutputFormat is not coupled to Writable types, if so, the job will fail.
+   *
+   */
+  public void testWriteToSequencefile() throws Exception {
+    OutputStream os = getFileSystem().create(new Path(getInputDir(),
+        "text.txt"));
+    Writer wr = new OutputStreamWriter(os);
+    wr.write("b a\n");
+    wr.close();
+
+    JobConf conf = createJobConf();
+    conf.setJobName("JavaSerialization");
+
+    conf.set("io.serializations",
+    "org.apache.hadoop.io.serializer.JavaSerialization," +
+    "org.apache.hadoop.io.serializer.WritableSerialization");
+
+    conf.setInputFormat(TextInputFormat.class);
+    conf.setOutputFormat(SequenceFileOutputFormat.class); // test we can write to sequence files
+
+    conf.setOutputKeyClass(String.class);
+    conf.setOutputValueClass(Long.class);
+    conf.setOutputKeyComparatorClass(JavaSerializationComparator.class);
+
+    conf.setMapperClass(WordCountMapper.class);
+    conf.setReducerClass(SumReducer.class);
+
+    FileInputFormat.setInputPaths(conf, getInputDir());
+
+    FileOutputFormat.setOutputPath(conf, getOutputDir());
+
+    JobClient.runJob(conf);
+
+    Path[] outputFiles = FileUtil.stat2Paths(
+                           getFileSystem().listStatus(getOutputDir(),
+                           new OutputLogFilter()));
+    assertEquals(1, outputFiles.length);
+}
+
 }