浏览代码

HADOOP-819. Change LineRecordWriter to not insert a tab between key and value when either is null, and to print nothing when both are null. Contributed by Runping Qi.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@527632 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 18 年之前
父节点
当前提交
19a3dc4082

+ 4 - 0
CHANGES.txt

@@ -165,6 +165,10 @@ Trunk (unreleased changes)
 50. HADOOP-1189.  Fix 'No space left on device' exceptions on datanodes.
 50. HADOOP-1189.  Fix 'No space left on device' exceptions on datanodes.
     (Raghu Angadi via tomwhite)
     (Raghu Angadi via tomwhite)
 
 
+51. HADOOP-819.  Change LineRecordWriter to not insert a tab between
+    key and value when either is null, and to print nothing when both
+    are null.  (Runping Qi via cutting)
+
 
 
 Release 0.12.3 - 2007-04-06
 Release 0.12.3 - 2007-04-06
 
 

+ 16 - 5
src/java/org/apache/hadoop/mapred/TextOutputFormat.java

@@ -42,15 +42,26 @@ public class TextOutputFormat extends OutputFormatBase {
     }
     }
     
     
     public synchronized void write(WritableComparable key, Writable value)
     public synchronized void write(WritableComparable key, Writable value)
-    throws IOException {
-      out.write(key.toString().getBytes("UTF-8"));
-      out.writeByte('\t');
-      out.write(value.toString().getBytes("UTF-8"));
+        throws IOException {
+
+      if (key == null && value == null) {
+        return;
+      }
+      if (key != null) {
+        out.write(key.toString().getBytes("UTF-8"));
+      }
+      if (key != null && value != null) {
+        out.write("\t".getBytes("UTF-8"));
+      }
+      if (value != null) {
+        out.write(value.toString().getBytes("UTF-8"));
+      }
       out.writeByte('\n');
       out.writeByte('\n');
     }
     }
+
     public synchronized void close(Reporter reporter) throws IOException {
     public synchronized void close(Reporter reporter) throws IOException {
       out.close();
       out.close();
-    }   
+    }
   }
   }
   
   
   public RecordWriter getRecordWriter(FileSystem ignored, JobConf job,
   public RecordWriter getRecordWriter(FileSystem ignored, JobConf job,

+ 90 - 0
src/test/org/apache/hadoop/mapred/TestTextOutputFormat.java

@@ -0,0 +1,90 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred;
+
+import java.io.*;
+import java.util.*;
+import junit.framework.TestCase;
+
+import org.apache.commons.logging.*;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.io.compress.*;
+import org.apache.hadoop.util.ReflectionUtils;
+
+public class TestTextOutputFormat extends TestCase {
+  private static final Log LOG = LogFactory.getLog(TestTextOutputFormat.class
+      .getName());
+
+  private static JobConf defaultConf = new JobConf();
+
+  private static FileSystem localFs = null;
+  static {
+    try {
+      localFs = FileSystem.getLocal(defaultConf);
+    } catch (IOException e) {
+      throw new RuntimeException("init failure", e);
+    }
+  }
+
+  private static Path workDir = new Path(new Path(System.getProperty(
+      "test.build.data", "."), "data"), "TestTextOutputFormat");
+
+  public void testFormat() throws Exception {
+    JobConf job = new JobConf();
+    job.setOutputPath(workDir);
+    String file = "test.txt";
+    
+    // A reporter that does nothing
+    Reporter reporter = Reporter.NULL;
+
+    TextOutputFormat theOutputFormat = new TextOutputFormat();
+    RecordWriter theRecodWriter = theOutputFormat.getRecordWriter(localFs, job,
+        file, reporter);
+
+    Text key1 = new Text("key1");
+    Text key2 = new Text("key2");
+    Text val1 = new Text("val1");
+    Text val2 = new Text("val2");
+
+    try {
+      theRecodWriter.write(key1, val1);
+      theRecodWriter.write(null, val1);
+      theRecodWriter.write(key1, null);
+      theRecodWriter.write(null, null);
+      theRecodWriter.write(key2, val2);
+
+    } finally {
+      theRecodWriter.close(reporter);
+    }
+    File expectedFile = new File(new Path(workDir, file).toString()); 
+    StringBuffer expectedOutput = new StringBuffer();
+    expectedOutput.append(key1).append('\t').append(val1).append("\n");
+    expectedOutput.append(val1).append("\n");
+    expectedOutput.append(key1).append("\n");
+    expectedOutput.append(key2).append('\t').append(val2).append("\n");
+    String output = UtilsForTests.slurp(expectedFile);
+    assertEquals(output, expectedOutput.toString());
+    
+  }
+
+  public static void main(String[] args) throws Exception {
+    new TestTextOutputFormat().testFormat();
+  }
+}

+ 192 - 0
src/test/org/apache/hadoop/mapred/UtilsForTests.java

@@ -0,0 +1,192 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred;
+
+import java.text.DecimalFormat;
+import java.io.*;
+import java.net.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Enumeration;
+import java.util.Iterator;
+import java.util.List;
+import java.util.jar.*;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.JobConf;
+
+/** 
+ * Utilities used in unit test.
+ *  
+ */
+public class UtilsForTests {
+
+  final static long KB = 1024L * 1;
+  final static long MB = 1024L * KB;
+  final static long GB = 1024L * MB;
+  final static long TB = 1024L * GB;
+  final static long PB = 1024L * TB;
+
+  static DecimalFormat dfm = new DecimalFormat("####.000");
+  static DecimalFormat ifm = new DecimalFormat("###,###,###,###,###");
+
+  public static String dfmt(double d) {
+    return dfm.format(d);
+  }
+
+  public static String ifmt(double d) {
+    return ifm.format(d);
+  }
+
+  public static String formatBytes(long numBytes) {
+    StringBuffer buf = new StringBuffer();
+    boolean bDetails = true;
+    double num = numBytes;
+
+    if (numBytes < KB) {
+      buf.append(numBytes + " B");
+      bDetails = false;
+    } else if (numBytes < MB) {
+      buf.append(dfmt(num / KB) + " KB");
+    } else if (numBytes < GB) {
+      buf.append(dfmt(num / MB) + " MB");
+    } else if (numBytes < TB) {
+      buf.append(dfmt(num / GB) + " GB");
+    } else if (numBytes < PB) {
+      buf.append(dfmt(num / TB) + " TB");
+    } else {
+      buf.append(dfmt(num / PB) + " PB");
+    }
+    if (bDetails) {
+      buf.append(" (" + ifmt(numBytes) + " bytes)");
+    }
+    return buf.toString();
+  }
+
+  public static String formatBytes2(long numBytes) {
+    StringBuffer buf = new StringBuffer();
+    long u = 0;
+    if (numBytes >= TB) {
+      u = numBytes / TB;
+      numBytes -= u * TB;
+      buf.append(u + " TB ");
+    }
+    if (numBytes >= GB) {
+      u = numBytes / GB;
+      numBytes -= u * GB;
+      buf.append(u + " GB ");
+    }
+    if (numBytes >= MB) {
+      u = numBytes / MB;
+      numBytes -= u * MB;
+      buf.append(u + " MB ");
+    }
+    if (numBytes >= KB) {
+      u = numBytes / KB;
+      numBytes -= u * KB;
+      buf.append(u + " KB ");
+    }
+    buf.append(u + " B"); //even if zero
+    return buf.toString();
+  }
+
+  static final String regexpSpecials = "[]()?*+|.!^-\\~@";
+
+  public static String regexpEscape(String plain) {
+    StringBuffer buf = new StringBuffer();
+    char[] ch = plain.toCharArray();
+    int csup = ch.length;
+    for (int c = 0; c < csup; c++) {
+      if (regexpSpecials.indexOf(ch[c]) != -1) {
+        buf.append("\\");
+      }
+      buf.append(ch[c]);
+    }
+    return buf.toString();
+  }
+
+  public static String safeGetCanonicalPath(File f) {
+    try {
+      String s = f.getCanonicalPath();
+      return (s == null) ? f.toString() : s;
+    } catch (IOException io) {
+      return f.toString();
+    }
+  }
+
+  static String slurp(File f) throws IOException {
+    int len = (int) f.length();
+    byte[] buf = new byte[len];
+    FileInputStream in = new FileInputStream(f);
+    String contents = null;
+    try {
+      in.read(buf, 0, len);
+      contents = new String(buf, "UTF-8");
+    } finally {
+      in.close();
+    }
+    return contents;
+  }
+
+  static String slurpHadoop(Path p, FileSystem fs) throws IOException {
+    int len = (int) fs.getLength(p);
+    byte[] buf = new byte[len];
+    InputStream in = fs.open(p);
+    String contents = null;
+    try {
+      in.read(buf, 0, len);
+      contents = new String(buf, "UTF-8");
+    } finally {
+      in.close();
+    }
+    return contents;
+  }
+
+  public static String rjustify(String s, int width) {
+    if (s == null) s = "null";
+    if (width > s.length()) {
+      s = getSpace(width - s.length()) + s;
+    }
+    return s;
+  }
+
+  public static String ljustify(String s, int width) {
+    if (s == null) s = "null";
+    if (width > s.length()) {
+      s = s + getSpace(width - s.length());
+    }
+    return s;
+  }
+
+  static char[] space;
+  static {
+    space = new char[300];
+    Arrays.fill(space, '\u0020');
+  }
+
+  public static String getSpace(int len) {
+    if (len > space.length) {
+      space = new char[Math.max(len, 2 * space.length)];
+      Arrays.fill(space, '\u0020');
+    }
+    return new String(space, 0, len);
+  }
+}