17 years ago · 7e9ca88886
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -67,6 +67,9 @@ Trunk (unreleased changes)
 
															     the TaskTracker, refactor Hadoop Metrics as an implementation of the api.
														
 
															     (Ari Rabkin via acmurthy) 
														
 
															+    HADOOP-2302. Provides a comparator for numerical sorting of key fields.
														
 
															+    (ddas)
														
 
															+
														
 
															   IMPROVEMENTS
														
 
															     HADOOP-3732. Delay intialization of datanode block verification till
														
--- a/src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeMapRed.java
+++ b/src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeMapRed.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.mapred.Reporter;
 
															 import org.apache.hadoop.mapred.OutputCollector;
														
 
															 import org.apache.hadoop.mapred.LineRecordReader.LineReader;
														
 
															 import org.apache.hadoop.util.StringUtils;
														
 
															+import org.apache.hadoop.util.UTF8ByteArrayUtils;
														
 
															 import org.apache.hadoop.io.Text;
														
 
															 import org.apache.hadoop.io.BytesWritable;
														
@@ -332,7 +333,7 @@ public abstract class PipeMapRed {
 
															         key.set(line, 0, length);
														
 
															         val.set("");
														
 
															       } else {
														
 
															-        UTF8ByteArrayUtils.splitKeyVal(line, 0, length, key, val, pos, separator.length);
														
 
															+        StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos, separator.length);
														
 
															       }
														
 
															     } catch (CharacterCodingException e) {
														
 
															       LOG.warn(StringUtils.stringifyException(e));
														
--- a/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamKeyValUtil.java
+++ b/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamKeyValUtil.java
@@ -0,0 +1,141 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.streaming;
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.mapred.LineRecordReader.LineReader;
														
 
															+
														
 
															+public class StreamKeyValUtil {
														
 
															+
														
 
															+  /**
														
 
															+   * Find the first occured tab in a UTF-8 encoded string
														
 
															+   * @param utf a byte array containing a UTF-8 encoded string
														
 
															+   * @param start starting offset
														
 
															+   * @param length no. of bytes
														
 
															+   * @return position that first tab occures otherwise -1
														
 
															+   */
														
 
															+  public static int findTab(byte [] utf, int start, int length) {
														
 
															+    for(int i=start; i<(start+length); i++) {
														
 
															+      if (utf[i]==(byte)'\t') {
														
 
															+        return i;
														
 
															+      }
														
 
															+    }
														
 
															+    return -1;      
														
 
															+  }
														
 
															+  /**
														
 
															+   * Find the first occured tab in a UTF-8 encoded string
														
 
															+   * @param utf a byte array containing a UTF-8 encoded string
														
 
															+   * @return position that first tab occures otherwise -1
														
 
															+   */
														
 
															+  public static int findTab(byte [] utf) {
														
 
															+    return org.apache.hadoop.util.UTF8ByteArrayUtils.findNthByte(utf, 0, 
														
 
															+        utf.length, (byte)'\t', 1);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * split a UTF-8 byte array into key and value 
														
 
															+   * assuming that the delimilator is at splitpos. 
														
 
															+   * @param utf utf-8 encoded string
														
 
															+   * @param start starting offset
														
 
															+   * @param length no. of bytes
														
 
															+   * @param key contains key upon the method is returned
														
 
															+   * @param val contains value upon the method is returned
														
 
															+   * @param splitPos the split pos
														
 
															+   * @param separatorLength the length of the separator between key and value
														
 
															+   * @throws IOException
														
 
															+   */
														
 
															+  public static void splitKeyVal(byte[] utf, int start, int length, 
														
 
															+                                 Text key, Text val, int splitPos,
														
 
															+                                 int separatorLength) throws IOException {
														
 
															+    if (splitPos<start || splitPos >= (start+length))
														
 
															+      throw new IllegalArgumentException("splitPos must be in the range " +
														
 
															+                                         "[" + start + ", " + (start+length) + "]: " + splitPos);
														
 
															+    int keyLen = (splitPos-start);
														
 
															+    byte [] keyBytes = new byte[keyLen];
														
 
															+    System.arraycopy(utf, start, keyBytes, 0, keyLen);
														
 
															+    int valLen = (start+length)-splitPos-separatorLength;
														
 
															+    byte [] valBytes = new byte[valLen];
														
 
															+    System.arraycopy(utf, splitPos+separatorLength, valBytes, 0, valLen);
														
 
															+    key.set(keyBytes);
														
 
															+    val.set(valBytes);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * split a UTF-8 byte array into key and value 
														
 
															+   * assuming that the delimilator is at splitpos. 
														
 
															+   * @param utf utf-8 encoded string
														
 
															+   * @param start starting offset
														
 
															+   * @param length no. of bytes
														
 
															+   * @param key contains key upon the method is returned
														
 
															+   * @param val contains value upon the method is returned
														
 
															+   * @param splitPos the split pos
														
 
															+   * @throws IOException
														
 
															+   */
														
 
															+  public static void splitKeyVal(byte[] utf, int start, int length, 
														
 
															+                                 Text key, Text val, int splitPos) throws IOException {
														
 
															+    splitKeyVal(utf, start, length, key, val, splitPos, 1);
														
 
															+  }
														
 
															+  
														
 
															+
														
 
															+  /**
														
 
															+   * split a UTF-8 byte array into key and value 
														
 
															+   * assuming that the delimilator is at splitpos. 
														
 
															+   * @param utf utf-8 encoded string
														
 
															+   * @param key contains key upon the method is returned
														
 
															+   * @param val contains value upon the method is returned
														
 
															+   * @param splitPos the split pos
														
 
															+   * @param separatorLength the length of the separator between key and value
														
 
															+   * @throws IOException
														
 
															+   */
														
 
															+  public static void splitKeyVal(byte[] utf, Text key, Text val, int splitPos, 
														
 
															+                                 int separatorLength) 
														
 
															+    throws IOException {
														
 
															+    splitKeyVal(utf, 0, utf.length, key, val, splitPos, separatorLength);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * split a UTF-8 byte array into key and value 
														
 
															+   * assuming that the delimilator is at splitpos. 
														
 
															+   * @param utf utf-8 encoded string
														
 
															+   * @param key contains key upon the method is returned
														
 
															+   * @param val contains value upon the method is returned
														
 
															+   * @param splitPos the split pos
														
 
															+   * @throws IOException
														
 
															+   */
														
 
															+  public static void splitKeyVal(byte[] utf, Text key, Text val, int splitPos) 
														
 
															+    throws IOException {
														
 
															+    splitKeyVal(utf, 0, utf.length, key, val, splitPos, 1);
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * Read a utf8 encoded line from a data input stream. 
														
 
															+   * @param lineReader LineReader to read the line from.
														
 
															+   * @param out Text to read into
														
 
															+   * @return number of bytes read 
														
 
															+   * @throws IOException
														
 
															+   */
														
 
															+  public static int readLine(LineReader lineReader, Text out) 
														
 
															+  throws IOException {
														
 
															+    out.clear();
														
 
															+    return lineReader.readLine(out);
														
 
															+  }
														
 
															+
														
 
															+}
														
--- a/src/contrib/streaming/src/java/org/apache/hadoop/streaming/UTF8ByteArrayUtils.java
+++ b/src/contrib/streaming/src/java/org/apache/hadoop/streaming/UTF8ByteArrayUtils.java
@@ -21,10 +21,13 @@ package org.apache.hadoop.streaming;
 
															 import java.io.IOException;
														
 
															 import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.mapred.LineRecordReader;
														
 
															 import org.apache.hadoop.mapred.LineRecordReader.LineReader;
														
 
															 /**
														
 
															  * General utils for byte array containing UTF-8 encoded strings
														
 
															+ * @deprecated use {@link org.apache.hadoop.util.UTF8ByteArrayUtils} and
														
 
															+ * {@link StreamKeyValUtil} instead
														
 
															  */
														
 
															 public class UTF8ByteArrayUtils {
														
@@ -34,14 +37,11 @@ public class UTF8ByteArrayUtils {
 
															    * @param start starting offset
														
 
															    * @param length no. of bytes
														
 
															    * @return position that first tab occures otherwise -1
														
 
															+   * @deprecated use {@link StreamKeyValUtil#findTab(byte[], int, int)}
														
 
															    */
														
 
															+  @Deprecated
														
 
															   public static int findTab(byte [] utf, int start, int length) {
														
 
															-    for(int i=start; i<(start+length); i++) {
														
 
															-      if (utf[i]==(byte)'\t') {
														
 
															-        return i;
														
 
															-      }
														
 
															-    }
														
 
															-    return -1;      
														
 
															+    return StreamKeyValUtil.findTab(utf, start, length);      
														
 
															   }
														
 
															   /**
														
@@ -51,14 +51,13 @@ public class UTF8ByteArrayUtils {
 
															    * @param end ending position
														
 
															    * @param b the byte to find
														
 
															    * @return position that first byte occures otherwise -1
														
 
															+   * @deprecated use 
														
 
															+   * {@link org.apache.hadoop.util.UTF8ByteArrayUtils#findByte(byte[], int,
														
 
															+   *  int, byte)}
														
 
															    */
														
 
															+  @Deprecated
														
 
															   public static int findByte(byte [] utf, int start, int end, byte b) {
														
 
															-    for(int i=start; i<end; i++) {
														
 
															-      if (utf[i]==b) {
														
 
															-        return i;
														
 
															-      }
														
 
															-    }
														
 
															-    return -1;      
														
 
															+    return org.apache.hadoop.util.UTF8ByteArrayUtils.findByte(utf, start, end, b);
														
 
															   }
														
 
															   /**
														
@@ -68,22 +67,13 @@ public class UTF8ByteArrayUtils {
 
															    * @param end ending position
														
 
															    * @param b the bytes to find
														
 
															    * @return position that first byte occures otherwise -1
														
 
															+   * @deprecated use 
														
 
															+   * {@link org.apache.hadoop.util.UTF8ByteArrayUtils#findBytes(byte[], int, 
														
 
															+   * int, byte[])}
														
 
															    */
														
 
															+  @Deprecated
														
 
															   public static int findBytes(byte [] utf, int start, int end, byte[] b) {
														
 
															-    int matchEnd = end - b.length;
														
 
															-    for(int i=start; i<=matchEnd; i++) {
														
 
															-      boolean matched = true;
														
 
															-      for(int j=0; j<b.length; j++) {
														
 
															-        if (utf[i+j] != b[j]) {
														
 
															-          matched = false;
														
 
															-          break;
														
 
															-        }
														
 
															-      }
														
 
															-      if (matched) {
														
 
															-        return i;
														
 
															-      }
														
 
															-    }
														
 
															-    return -1;      
														
 
															+    return org.apache.hadoop.util.UTF8ByteArrayUtils.findBytes(utf, start, end, b);      
														
 
															   }
														
 
															   /**
														
@@ -94,18 +84,14 @@ public class UTF8ByteArrayUtils {
 
															    * @param b the byte to find
														
 
															    * @param n the desired occurrence of the given byte
														
 
															    * @return position that nth occurrence of the given byte if exists; otherwise -1
														
 
															+   * @deprecated use 
														
 
															+   * {@link org.apache.hadoop.util.UTF8ByteArrayUtils#findNthByte(byte[], int, 
														
 
															+   * int, byte, int)}
														
 
															    */
														
 
															+  @Deprecated
														
 
															   public static int findNthByte(byte [] utf, int start, int length, byte b, int n) {
														
 
															-    int pos = -1;
														
 
															-    int nextStart = start;
														
 
															-    for (int i = 0; i < n; i++) {
														
 
															-      pos = findByte(utf, nextStart, length, b);
														
 
															-      if (pos < 0) {
														
 
															-        return pos;
														
 
															-      }
														
 
															-      nextStart = pos + 1;
														
 
															-    }
														
 
															-    return pos;      
														
 
															+    return org.apache.hadoop.util.UTF8ByteArrayUtils.findNthByte(utf, start,
														
 
															+        length, b, n);
														
 
															   }
														
 
															   /**
														
@@ -114,18 +100,24 @@ public class UTF8ByteArrayUtils {
 
															    * @param b the byte to find
														
 
															    * @param n the desired occurrence of the given byte
														
 
															    * @return position that nth occurrence of the given byte if exists; otherwise -1
														
 
															+   * @deprecated use 
														
 
															+   * {@link org.apache.hadoop.util.UTF8ByteArrayUtils#findNthByte(byte[], 
														
 
															+   * byte, int)}
														
 
															    */
														
 
															+  @Deprecated
														
 
															   public static int findNthByte(byte [] utf, byte b, int n) {
														
 
															-    return findNthByte(utf, 0, utf.length, b, n);      
														
 
															+    return org.apache.hadoop.util.UTF8ByteArrayUtils.findNthByte(utf, b, n);      
														
 
															   }
														
 
															   /**
														
 
															    * Find the first occured tab in a UTF-8 encoded string
														
 
															    * @param utf a byte array containing a UTF-8 encoded string
														
 
															    * @return position that first tab occures otherwise -1
														
 
															+   * @deprecated use {@link StreamKeyValUtil#findTab(byte[])}
														
 
															    */
														
 
															+  @Deprecated
														
 
															   public static int findTab(byte [] utf) {
														
 
															-    return findNthByte(utf, 0, utf.length, (byte)'\t', 1);
														
 
															+    return StreamKeyValUtil.findTab(utf);
														
 
															   }
														
 
															   /**
														
@@ -138,22 +130,17 @@ public class UTF8ByteArrayUtils {
 
															    * @param val contains value upon the method is returned
														
 
															    * @param splitPos the split pos
														
 
															    * @param separatorLength the length of the separator between key and value
														
 
															+   * @deprecated use 
														
 
															+   * {@link StreamKeyValUtil#splitKeyVal(byte[], int, int, Text, Text, 
														
 
															+   * int, int)}
														
 
															    * @throws IOException
														
 
															    */
														
 
															+  @Deprecated
														
 
															   public static void splitKeyVal(byte[] utf, int start, int length, 
														
 
															                                  Text key, Text val, int splitPos,
														
 
															                                  int separatorLength) throws IOException {
														
 
															-    if (splitPos<start || splitPos >= (start+length))
														
 
															-      throw new IllegalArgumentException("splitPos must be in the range " +
														
 
															-                                         "[" + start + ", " + (start+length) + "]: " + splitPos);
														
 
															-    int keyLen = (splitPos-start);
														
 
															-    byte [] keyBytes = new byte[keyLen];
														
 
															-    System.arraycopy(utf, start, keyBytes, 0, keyLen);
														
 
															-    int valLen = (start+length)-splitPos-separatorLength;
														
 
															-    byte [] valBytes = new byte[valLen];
														
 
															-    System.arraycopy(utf, splitPos+separatorLength, valBytes, 0, valLen);
														
 
															-    key.set(keyBytes);
														
 
															-    val.set(valBytes);
														
 
															+    StreamKeyValUtil.splitKeyVal(utf, start, 
														
 
															+        length, key, val, splitPos, separatorLength);
														
 
															   }
														
 
															   /**
														
@@ -165,11 +152,14 @@ public class UTF8ByteArrayUtils {
 
															    * @param key contains key upon the method is returned
														
 
															    * @param val contains value upon the method is returned
														
 
															    * @param splitPos the split pos
														
 
															+   * @deprecated use 
														
 
															+   * {@link StreamKeyValUtil#splitKeyVal(byte[], int, int, Text, Text, int)}
														
 
															    * @throws IOException
														
 
															    */
														
 
															+  @Deprecated
														
 
															   public static void splitKeyVal(byte[] utf, int start, int length, 
														
 
															                                  Text key, Text val, int splitPos) throws IOException {
														
 
															-    splitKeyVal(utf, start, length, key, val, splitPos, 1);
														
 
															+    StreamKeyValUtil.splitKeyVal(utf, start, length, key, val, splitPos);
														
 
															   }
														
@@ -181,12 +171,15 @@ public class UTF8ByteArrayUtils {
 
															    * @param val contains value upon the method is returned
														
 
															    * @param splitPos the split pos
														
 
															    * @param separatorLength the length of the separator between key and value
														
 
															+   * @deprecated use 
														
 
															+   * {@link StreamKeyValUtil#splitKeyVal(byte[], Text, Text, int, int)}
														
 
															    * @throws IOException
														
 
															    */
														
 
															+  @Deprecated
														
 
															   public static void splitKeyVal(byte[] utf, Text key, Text val, int splitPos, 
														
 
															                                  int separatorLength) 
														
 
															     throws IOException {
														
 
															-    splitKeyVal(utf, 0, utf.length, key, val, splitPos, separatorLength);
														
 
															+    StreamKeyValUtil.splitKeyVal(utf, key, val, splitPos, separatorLength);
														
 
															   }
														
 
															   /**
														
@@ -196,23 +189,28 @@ public class UTF8ByteArrayUtils {
 
															    * @param key contains key upon the method is returned
														
 
															    * @param val contains value upon the method is returned
														
 
															    * @param splitPos the split pos
														
 
															+   * @deprecated use 
														
 
															+   * {@link StreamKeyValUtil#splitKeyVal(byte[], Text, Text, int)}
														
 
															    * @throws IOException
														
 
															    */
														
 
															+  @Deprecated
														
 
															   public static void splitKeyVal(byte[] utf, Text key, Text val, int splitPos) 
														
 
															     throws IOException {
														
 
															-    splitKeyVal(utf, 0, utf.length, key, val, splitPos, 1);
														
 
															+    StreamKeyValUtil.splitKeyVal(utf, key, val, splitPos);
														
 
															   }
														
 
															   /**
														
 
															    * Read a utf8 encoded line from a data input stream. 
														
 
															    * @param lineReader LineReader to read the line from.
														
 
															    * @param out Text to read into
														
 
															-   * @return number of bytes read 
														
 
															+   * @return number of bytes read
														
 
															+   * @deprecated use 
														
 
															+   * {@link StreamKeyValUtil#readLine(LineRecordReader.LineReader, Text)} 
														
 
															    * @throws IOException
														
 
															    */
														
 
															+  @Deprecated
														
 
															   public static int readLine(LineReader lineReader, Text out) 
														
 
															   throws IOException {
														
 
															-    out.clear();
														
 
															-    return lineReader.readLine(out);
														
 
															+    return StreamKeyValUtil.readLine(lineReader, out);
														
 
															   }
														
 
															 }
														
--- a/src/core/org/apache/hadoop/util/UTF8ByteArrayUtils.java
+++ b/src/core/org/apache/hadoop/util/UTF8ByteArrayUtils.java
@@ -0,0 +1,98 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.util;
														
 
															+
														
 
															+public class UTF8ByteArrayUtils {
														
 
															+  /**
														
 
															+   * Find the first occurrence of the given byte b in a UTF-8 encoded string
														
 
															+   * @param utf a byte array containing a UTF-8 encoded string
														
 
															+   * @param start starting offset
														
 
															+   * @param end ending position
														
 
															+   * @param b the byte to find
														
 
															+   * @return position that first byte occures otherwise -1
														
 
															+   */
														
 
															+  public static int findByte(byte [] utf, int start, int end, byte b) {
														
 
															+    for(int i=start; i<end; i++) {
														
 
															+      if (utf[i]==b) {
														
 
															+        return i;
														
 
															+      }
														
 
															+    }
														
 
															+    return -1;      
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Find the first occurrence of the given bytes b in a UTF-8 encoded string
														
 
															+   * @param utf a byte array containing a UTF-8 encoded string
														
 
															+   * @param start starting offset
														
 
															+   * @param end ending position
														
 
															+   * @param b the bytes to find
														
 
															+   * @return position that first byte occures otherwise -1
														
 
															+   */
														
 
															+  public static int findBytes(byte [] utf, int start, int end, byte[] b) {
														
 
															+    int matchEnd = end - b.length;
														
 
															+    for(int i=start; i<=matchEnd; i++) {
														
 
															+      boolean matched = true;
														
 
															+      for(int j=0; j<b.length; j++) {
														
 
															+        if (utf[i+j] != b[j]) {
														
 
															+          matched = false;
														
 
															+          break;
														
 
															+        }
														
 
															+      }
														
 
															+      if (matched) {
														
 
															+        return i;
														
 
															+      }
														
 
															+    }
														
 
															+    return -1;      
														
 
															+  }
														
 
															+    
														
 
															+  /**
														
 
															+   * Find the nth occurrence of the given byte b in a UTF-8 encoded string
														
 
															+   * @param utf a byte array containing a UTF-8 encoded string
														
 
															+   * @param start starting offset
														
 
															+   * @param length the length of byte array
														
 
															+   * @param b the byte to find
														
 
															+   * @param n the desired occurrence of the given byte
														
 
															+   * @return position that nth occurrence of the given byte if exists; otherwise -1
														
 
															+   */
														
 
															+  public static int findNthByte(byte [] utf, int start, int length, byte b, int n) {
														
 
															+    int pos = -1;
														
 
															+    int nextStart = start;
														
 
															+    for (int i = 0; i < n; i++) {
														
 
															+      pos = findByte(utf, nextStart, length, b);
														
 
															+      if (pos < 0) {
														
 
															+        return pos;
														
 
															+      }
														
 
															+      nextStart = pos + 1;
														
 
															+    }
														
 
															+    return pos;      
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * Find the nth occurrence of the given byte b in a UTF-8 encoded string
														
 
															+   * @param utf a byte array containing a UTF-8 encoded string
														
 
															+   * @param b the byte to find
														
 
															+   * @param n the desired occurrence of the given byte
														
 
															+   * @return position that nth occurrence of the given byte if exists; otherwise -1
														
 
															+   */
														
 
															+  public static int findNthByte(byte [] utf, byte b, int n) {
														
 
															+    return findNthByte(utf, 0, utf.length, b, n);      
														
 
															+  }
														
 
															+
														
 
															+}
														
 
															+
														
--- a/src/mapred/org/apache/hadoop/mapred/JobConf.java
+++ b/src/mapred/org/apache/hadoop/mapred/JobConf.java
@@ -39,6 +39,8 @@ import org.apache.hadoop.io.compress.CompressionCodec;
 
															 import org.apache.hadoop.mapred.lib.IdentityMapper;
														
 
															 import org.apache.hadoop.mapred.lib.IdentityReducer;
														
 
															 import org.apache.hadoop.mapred.lib.HashPartitioner;
														
 
															+import org.apache.hadoop.mapred.lib.KeyFieldBasedComparator;
														
 
															+import org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner;
														
 
															 import org.apache.hadoop.util.ReflectionUtils;
														
 
															 import org.apache.hadoop.util.Tool;
														
@@ -510,6 +512,58 @@ public class JobConf extends Configuration {
 
															              theClass, RawComparator.class);
														
 
															   }
														
 
															+  /**
														
 
															+   * Set the {@link KeyFieldBasedComparator} options used to compare keys.
														
 
															+   * 
														
 
															+   * @param keySpec the key specification of the form -k pos1[,pos2], where,
														
 
															+   *  pos is of the form f[.c][opts], where f is the number
														
 
															+   *  of the key field to use, and c is the number of the first character from
														
 
															+   *  the beginning of the field. Fields and character posns are numbered 
														
 
															+   *  starting with 1; a character position of zero in pos2 indicates the
														
 
															+   *  field's last character. If '.c' is omitted from pos1, it defaults to 1
														
 
															+   *  (the beginning of the field); if omitted from pos2, it defaults to 0 
														
 
															+   *  (the end of the field). opts are ordering options. The supported options
														
 
															+   *  are:
														
 
															+   *    -n, (Sort numerically)
														
 
															+   *    -r, (Reverse the result of comparison)                 
														
 
															+   */
														
 
															+  public void setKeyFieldComparatorOptions(String keySpec) {
														
 
															+    setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
														
 
															+    set("mapred.text.key.comparator.options", keySpec);
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * Get the {@link KeyFieldBasedComparator} options
														
 
															+   */
														
 
															+  public String getKeyFieldComparatorOption() {
														
 
															+    return get("mapred.text.key.comparator.options");
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Set the {@link KeyFieldBasedPartitioner} options used for 
														
 
															+   * {@link Partitioner}
														
 
															+   * 
														
 
															+   * @param keySpec the key specification of the form -k pos1[,pos2], where,
														
 
															+   *  pos is of the form f[.c][opts], where f is the number
														
 
															+   *  of the key field to use, and c is the number of the first character from
														
 
															+   *  the beginning of the field. Fields and character posns are numbered 
														
 
															+   *  starting with 1; a character position of zero in pos2 indicates the
														
 
															+   *  field's last character. If '.c' is omitted from pos1, it defaults to 1
														
 
															+   *  (the beginning of the field); if omitted from pos2, it defaults to 0 
														
 
															+   *  (the end of the field).
														
 
															+   */
														
 
															+  public void setKeyFieldPartitionerOptions(String keySpec) {
														
 
															+    setPartitionerClass(KeyFieldBasedPartitioner.class);
														
 
															+    set("mapred.text.key.partitioner.options", keySpec);
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * Get the {@link KeyFieldBasedPartitioner} options
														
 
															+   */
														
 
															+  public String getKeyFieldPartitionerOption() {
														
 
															+    return get("mapred.text.key.partitioner.options");
														
 
															+  }
														
 
															+
														
 
															   /** 
														
 
															    * Get the user defined {@link WritableComparable} comparator for 
														
 
															    * grouping keys of inputs to the reduce.
														
@@ -1261,6 +1315,5 @@ public class JobConf extends Configuration {
 
															     }
														
 
															     return null;
														
 
															   }
														
 
															-
														
 
															 }
														
--- a/src/mapred/org/apache/hadoop/mapred/lib/KeyFieldBasedComparator.java
+++ b/src/mapred/org/apache/hadoop/mapred/lib/KeyFieldBasedComparator.java
@@ -0,0 +1,328 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.mapred.lib;
														
 
															+
														
 
															+import java.util.List;
														
 
															+
														
 
															+import org.apache.hadoop.io.WritableComparator;
														
 
															+import org.apache.hadoop.io.WritableUtils;
														
 
															+import org.apache.hadoop.mapred.JobConf;
														
 
															+import org.apache.hadoop.mapred.JobConfigurable;
														
 
															+import org.apache.hadoop.mapred.lib.KeyFieldHelper.KeyDescription;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+
														
 
															+/**
														
 
															+ * This comparator implementation provides a subset of the features provided
														
 
															+ * by the Unix/GNU Sort. In particular, the supported features are:
														
 
															+ * -n, (Sort numerically)
														
 
															+ * -r, (Reverse the result of comparison)
														
 
															+ * -k pos1[,pos2], where pos is of the form f[.c][opts], where f is the number
														
 
															+ *  of the field to use, and c is the number of the first character from the
														
 
															+ *  beginning of the field. Fields and character posns are numbered starting
														
 
															+ *  with 1; a character position of zero in pos2 indicates the field's last
														
 
															+ *  character. If '.c' is omitted from pos1, it defaults to 1 (the beginning
														
 
															+ *  of the field); if omitted from pos2, it defaults to 0 (the end of the
														
 
															+ *  field). opts are ordering options (any of 'nr' as described above). 
														
 
															+ * We assume that the fields in the key are separated by 
														
 
															+ * map.output.key.field.separator.
														
 
															+ */
														
 
															+
														
 
															+public class KeyFieldBasedComparator<K, V> extends WritableComparator 
														
 
															+implements JobConfigurable {
														
 
															+  private KeyFieldHelper keyFieldHelper = new KeyFieldHelper();
														
 
															+  private static final byte NEGATIVE = (byte)'-';
														
 
															+  private static final byte ZERO = (byte)'0';
														
 
															+  private static final byte DECIMAL = (byte)'.';
														
 
															+  
														
 
															+  public void configure(JobConf job) {
														
 
															+    String option = job.getKeyFieldComparatorOption();
														
 
															+    String keyFieldSeparator = job.get("map.output.key.field.separator","\t");
														
 
															+    keyFieldHelper.setKeyFieldSeparator(keyFieldSeparator);
														
 
															+    keyFieldHelper.parseOption(option);
														
 
															+  }
														
 
															+  
														
 
															+  public KeyFieldBasedComparator() {
														
 
															+    super(Text.class);
														
 
															+  }
														
 
															+    
														
 
															+
														
 
															+  public int compare(byte[] b1, int s1, int l1,
														
 
															+      byte[] b2, int s2, int l2) {
														
 
															+    int n1 = WritableUtils.decodeVIntSize(b1[s1]);
														
 
															+    int n2 = WritableUtils.decodeVIntSize(b2[s2]);
														
 
															+    List <KeyDescription> allKeySpecs = keyFieldHelper.keySpecs();
														
 
															+    if (allKeySpecs.size() == 0) {
														
 
															+      return compareBytes(b1, s1+n1, l1-n1, b2, s2+n2, l2-n2);
														
 
															+    }
														
 
															+    int []lengthIndicesFirst = keyFieldHelper.getWordLengths(b1, s1+n1, s1+l1);
														
 
															+    int []lengthIndicesSecond = keyFieldHelper.getWordLengths(b2, s2+n2, s2+l2);
														
 
															+    for (KeyDescription keySpec : allKeySpecs) {
														
 
															+      int startCharFirst = keyFieldHelper.getStartOffset(b1, s1+n1, s1+l1, lengthIndicesFirst,
														
 
															+          keySpec);
														
 
															+      int endCharFirst = keyFieldHelper.getEndOffset(b1, s1+n1, s1+l1, lengthIndicesFirst,
														
 
															+          keySpec);
														
 
															+      int startCharSecond = keyFieldHelper.getStartOffset(b2, s2+n2, s2+l2, lengthIndicesSecond,
														
 
															+          keySpec);
														
 
															+      int endCharSecond = keyFieldHelper.getEndOffset(b2, s2+n2, s2+l2, lengthIndicesSecond,
														
 
															+          keySpec);
														
 
															+      int result;
														
 
															+      if ((result = compareByteSequence(b1, startCharFirst, endCharFirst, b2, 
														
 
															+          startCharSecond, endCharSecond, keySpec)) != 0) {
														
 
															+        return result;
														
 
															+      }
														
 
															+    }
														
 
															+    return 0;
														
 
															+  }
														
 
															+  
														
 
															+  private int compareByteSequence(byte[] first, int start1, int end1, 
														
 
															+      byte[] second, int start2, int end2, KeyDescription key) {
														
 
															+    if (start1 == -1) {
														
 
															+      if (key.reverse) {
														
 
															+        return 1;
														
 
															+      }
														
 
															+      return -1;
														
 
															+    }
														
 
															+    if (start2 == -1) {
														
 
															+      if (key.reverse) {
														
 
															+        return -1; 
														
 
															+      }
														
 
															+      return 1;
														
 
															+    }
														
 
															+    int compareResult = 0;
														
 
															+    if (!key.numeric) {
														
 
															+      compareResult = compareBytes(first, start1, end1, second, start2, end2);
														
 
															+    }
														
 
															+    if (key.numeric) {
														
 
															+      compareResult = numericalCompare (first, start1, end1, second, start2, end2);
														
 
															+    }
														
 
															+    if (key.reverse) {
														
 
															+      return -compareResult;
														
 
															+    }
														
 
															+    return compareResult;
														
 
															+  }
														
 
															+  
														
 
															+  private int numericalCompare (byte[] a, int start1, int end1, 
														
 
															+      byte[] b, int start2, int end2) {
														
 
															+    int i = start1;
														
 
															+    int j = start2;
														
 
															+    int mul = 1;
														
 
															+    byte first_a = a[i];
														
 
															+    byte first_b = b[j];
														
 
															+    if (first_a == NEGATIVE) {
														
 
															+      if (first_b != NEGATIVE) {
														
 
															+        //check for cases like -0.0 and 0.0 (they should be declared equal)
														
 
															+        return oneNegativeCompare(a,start1+1,end1,b,start2,end2);
														
 
															+      }
														
 
															+      i++;
														
 
															+    }
														
 
															+    if (first_b == NEGATIVE) {
														
 
															+      if (first_a != NEGATIVE) {
														
 
															+        //check for cases like 0.0 and -0.0 (they should be declared equal)
														
 
															+        return -oneNegativeCompare(b,start2+1,end2,a,start1,end1);
														
 
															+      }
														
 
															+      j++;
														
 
															+    }
														
 
															+    if (first_b == NEGATIVE && first_a == NEGATIVE) {
														
 
															+      mul = -1;
														
 
															+    }
														
 
															+
														
 
															+    //skip over ZEROs
														
 
															+    while (i <= end1) {
														
 
															+      if (a[i] != ZERO) {
														
 
															+        break;
														
 
															+      }
														
 
															+      i++;
														
 
															+    }
														
 
															+    while (j <= end2) {
														
 
															+      if (b[j] != ZERO) {
														
 
															+        break;
														
 
															+      }
														
 
															+      j++;
														
 
															+    }
														
 
															+    
														
 
															+    //skip over equal characters and stopping at the first nondigit char
														
 
															+    //The nondigit character could be '.'
														
 
															+    while (i <= end1 && j <= end2) {
														
 
															+      if (!isdigit(a[i]) || a[i] != b[j]) {
														
 
															+        break;
														
 
															+      }
														
 
															+      i++; j++;
														
 
															+    }
														
 
															+    if (i <= end1) {
														
 
															+      first_a = a[i];
														
 
															+    }
														
 
															+    if (j <= end2) {
														
 
															+      first_b = b[j];
														
 
															+    }
														
 
															+    //store the result of the difference. This could be final result if the
														
 
															+    //number of digits in the mantissa is the same in both the numbers 
														
 
															+    int firstResult = first_a - first_b;
														
 
															+    
														
 
															+    //check whether we hit a decimal in the earlier scan
														
 
															+    if ((first_a == DECIMAL && (!isdigit(first_b) || j > end2)) ||
														
 
															+            (first_b == DECIMAL && (!isdigit(first_a) || i > end1))) {
														
 
															+      return ((mul < 0) ? -decimalCompare(a,i,end1,b,j,end2) : 
														
 
															+        decimalCompare(a,i,end1,b,j,end2));
														
 
															+    }
														
 
															+    //check the number of digits in the mantissa of the numbers
														
 
															+    int numRemainDigits_a = 0;
														
 
															+    int numRemainDigits_b = 0;
														
 
															+    while (i <= end1) {
														
 
															+      //if we encounter a non-digit treat the corresponding number as being 
														
 
															+      //smaller      
														
 
															+      if (isdigit(a[i++])) {
														
 
															+        numRemainDigits_a++;
														
 
															+      } else break;
														
 
															+    }
														
 
															+    while (j <= end2) {
														
 
															+      //if we encounter a non-digit treat the corresponding number as being 
														
 
															+      //smaller
														
 
															+      if (isdigit(b[j++])) {
														
 
															+        numRemainDigits_b++;
														
 
															+      } else break;
														
 
															+    }
														
 
															+    int ret = numRemainDigits_a - numRemainDigits_b;
														
 
															+    if (ret == 0) { 
														
 
															+      return ((mul < 0) ? -firstResult : firstResult);
														
 
															+    } else {
														
 
															+      return ((mul < 0) ? -ret : ret);
														
 
															+    }
														
 
															+  }
														
 
															+  private boolean isdigit(byte b) {
														
 
															+    if ('0' <= b && b <= '9') {
														
 
															+      return true;
														
 
															+    }
														
 
															+    return false;
														
 
															+  }
														
 
															+  private int decimalCompare(byte[] a, int i, int end1, 
														
 
															+                             byte[] b, int j, int end2) {
														
 
															+    if (i > end1) {
														
 
															+      //if a[] has nothing remaining
														
 
															+      return -decimalCompare1(b, ++j, end2);
														
 
															+    }
														
 
															+    if (j > end2) {
														
 
															+      //if b[] has nothing remaining
														
 
															+      return decimalCompare1(a, ++i, end1);
														
 
															+    }
														
 
															+    if (a[i] == DECIMAL && b[j] == DECIMAL) {
														
 
															+      while (i <= end1 && j <= end2) {
														
 
															+        if (a[i] != b[j]) {
														
 
															+          if (isdigit(a[i]) && isdigit(b[j])) {
														
 
															+            return a[i] - b[j];
														
 
															+          }
														
 
															+          if (isdigit(a[i])) {
														
 
															+            return 1;
														
 
															+          }
														
 
															+          if (isdigit(b[j])) {
														
 
															+            return -1;
														
 
															+          }
														
 
															+          return 0;
														
 
															+        }
														
 
															+        i++; j++;
														
 
															+      }
														
 
															+      if (i > end1 && j > end2) {
														
 
															+        return 0;
														
 
															+      }
														
 
															+        
														
 
															+      if (i > end1) {
														
 
															+        //check whether there is a non-ZERO digit after potentially
														
 
															+        //a number of ZEROs (e.g., a=.4444, b=.444400004)
														
 
															+        return -decimalCompare1(b, j, end2);
														
 
															+      }
														
 
															+      if (j > end2) {
														
 
															+        //check whether there is a non-ZERO digit after potentially
														
 
															+        //a number of ZEROs (e.g., b=.4444, a=.444400004)
														
 
															+        return decimalCompare1(a, i, end1);
														
 
															+      }
														
 
															+    }
														
 
															+    else if (a[i] == DECIMAL) {
														
 
															+      return decimalCompare1(a, ++i, end1);
														
 
															+    }
														
 
															+    else if (b[j] == DECIMAL) {
														
 
															+      return -decimalCompare1(b, ++j, end2);
														
 
															+    }
														
 
															+    return 0;
														
 
															+  }
														
 
															+  
														
 
															+  private int decimalCompare1(byte[] a, int i, int end) {
														
 
															+    while (i <= end) {
														
 
															+      if (a[i] == ZERO) {
														
 
															+        i++;
														
 
															+        continue;
														
 
															+      }
														
 
															+      if (isdigit(a[i])) {
														
 
															+        return 1;
														
 
															+      } else {
														
 
															+        return 0;
														
 
															+      }
														
 
															+    }
														
 
															+    return 0;
														
 
															+  }
														
 
															+  
														
 
															+  private int oneNegativeCompare(byte[] a, int start1, int end1, 
														
 
															+      byte[] b, int start2, int end2) {
														
 
															+    //here a[] is negative and b[] is positive
														
 
															+    //We have to ascertain whether the number contains any digits.
														
 
															+    //If it does, then it is a smaller number for sure. If not,
														
 
															+    //then we need to scan b[] to find out whether b[] has a digit
														
 
															+    //If b[] does contain a digit, then b[] is certainly
														
 
															+    //greater. If not, that is, both a[] and b[] don't contain
														
 
															+    //digits then they should be considered equal.
														
 
															+    if (!isZero(a, start1, end1)) {
														
 
															+      return -1;
														
 
															+    }
														
 
															+    //reached here - this means that a[] is a ZERO
														
 
															+    if (!isZero(b, start2, end2)) {
														
 
															+      return -1;
														
 
															+    }
														
 
															+    //reached here - both numbers are basically ZEROs and hence
														
 
															+    //they should compare equal
														
 
															+    return 0;
														
 
															+  }
														
 
															+  
														
 
															+  private boolean isZero(byte a[], int start, int end) {
														
 
															+    //check for zeros in the significand part as well as the decimal part
														
 
															+    //note that we treat the non-digit characters as ZERO
														
 
															+    int i = start;
														
 
															+    //we check the significand for being a ZERO
														
 
															+    while (i <= end) {
														
 
															+      if (a[i] != ZERO) {
														
 
															+        if (a[i] != DECIMAL && isdigit(a[i])) {
														
 
															+          return false;
														
 
															+        }
														
 
															+        break;
														
 
															+      }
														
 
															+      i++;
														
 
															+    }
														
 
															+
														
 
															+    if (i != (end+1) && a[i++] == DECIMAL) {
														
 
															+      //we check the decimal part for being a ZERO
														
 
															+      while (i <= end) {
														
 
															+        if (a[i] != ZERO) {
														
 
															+          if (isdigit(a[i])) {
														
 
															+            return false;
														
 
															+          }
														
 
															+          break;
														
 
															+        }
														
 
															+        i++;
														
 
															+      }
														
 
															+    }
														
 
															+    return true;
														
 
															+  }
														
 
															+}
														
--- a/src/mapred/org/apache/hadoop/mapred/lib/KeyFieldBasedPartitioner.java
+++ b/src/mapred/org/apache/hadoop/mapred/lib/KeyFieldBasedPartitioner.java
@@ -18,37 +18,83 @@
 
															 package org.apache.hadoop.mapred.lib;
														
 
															+import java.io.UnsupportedEncodingException;
														
 
															+import java.util.List;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															 import org.apache.hadoop.mapred.JobConf;
														
 
															 import org.apache.hadoop.mapred.Partitioner;
														
 
															+import org.apache.hadoop.mapred.lib.KeyFieldHelper.KeyDescription;
														
 
															+ /**   
														
 
															+  *  Defines a way to partition keys based on certain key fields (also see
														
 
															+  *  {@link KeyFieldBasedComparator}.
														
 
															+  *  The key specification supported is of the form -k pos1[,pos2], where,
														
 
															+  *  pos is of the form f[.c][opts], where f is the number
														
 
															+  *  of the key field to use, and c is the number of the first character from
														
 
															+  *  the beginning of the field. Fields and character posns are numbered 
														
 
															+  *  starting with 1; a character position of zero in pos2 indicates the
														
 
															+  *  field's last character. If '.c' is omitted from pos1, it defaults to 1
														
 
															+  *  (the beginning of the field); if omitted from pos2, it defaults to 0 
														
 
															+  *  (the end of the field).
														
 
															+  * 
														
 
															+  */
														
 
															 public class KeyFieldBasedPartitioner<K2, V2> implements Partitioner<K2, V2> {
														
 
															+  private static final Log LOG = LogFactory.getLog(KeyFieldBasedPartitioner.class.getName());
														
 
															   private int numOfPartitionFields;
														
 
															-
														
 
															-  private String keyFieldSeparator;
														
 
															+  
														
 
															+  private KeyFieldHelper keyFieldHelper = new KeyFieldHelper();
														
 
															   public void configure(JobConf job) {
														
 
															-    this.keyFieldSeparator = job.get("map.output.key.field.separator", "\t");
														
 
															-    this.numOfPartitionFields = job.getInt("num.key.fields.for.partition", 0);
														
 
															+    String keyFieldSeparator = job.get("map.output.key.field.separator", "\t");
														
 
															+    keyFieldHelper.setKeyFieldSeparator(keyFieldSeparator);
														
 
															+    if (job.get("num.key.fields.for.partition") != null) {
														
 
															+      LOG.warn("Using deprecated num.key.fields.for.partition. " +
														
 
															+      		"Use mapred.text.key.partitioner.options instead");
														
 
															+      this.numOfPartitionFields = job.getInt("num.key.fields.for.partition",0);
														
 
															+      keyFieldHelper.setKeyFieldSpec(1,numOfPartitionFields);
														
 
															+    } else {
														
 
															+      String option = job.getKeyFieldPartitionerOption();
														
 
															+      keyFieldHelper.parseOption(option);
														
 
															+    }
														
 
															   }
														
 
															-  /** Use {@link Object#hashCode()} to partition. */
														
 
															   public int getPartition(K2 key, V2 value,
														
 
															       int numReduceTasks) {
														
 
															-    String partitionKeyStr = key.toString();
														
 
															-    String[] fields = partitionKeyStr.split(this.keyFieldSeparator);
														
 
															-    if (this.numOfPartitionFields > 0
														
 
															-        && this.numOfPartitionFields < fields.length) {
														
 
															-      StringBuffer sb = new StringBuffer();
														
 
															-      for (int i = 0; i < this.numOfPartitionFields; i++) {
														
 
															-        sb.append(fields[i]).append(this.keyFieldSeparator);
														
 
															-      }
														
 
															-      partitionKeyStr = sb.toString();
														
 
															-      if (partitionKeyStr.length() > 0) {
														
 
															-        partitionKeyStr = partitionKeyStr.substring(0,
														
 
															-            partitionKeyStr.length() - 1);
														
 
															-      }
														
 
															+    byte[] keyBytes;
														
 
															+
														
 
															+    List <KeyDescription> allKeySpecs = keyFieldHelper.keySpecs();
														
 
															+    if (allKeySpecs.size() == 0) {
														
 
															+      return (key.toString().hashCode() & Integer.MAX_VALUE) % numReduceTasks;
														
 
															+    }
														
 
															+
														
 
															+    try {
														
 
															+      keyBytes = key.toString().getBytes("UTF-8");
														
 
															+    } catch (UnsupportedEncodingException e) {
														
 
															+      throw new RuntimeException("The current system does not " +
														
 
															+          "support UTF-8 encoding!", e);
														
 
															+    }
														
 
															+    int []lengthIndicesFirst = keyFieldHelper.getWordLengths(keyBytes, 0, 
														
 
															+        keyBytes.length);
														
 
															+    int currentHash = 0;
														
 
															+    for (KeyDescription keySpec : allKeySpecs) {
														
 
															+      int startChar = keyFieldHelper.getStartOffset(keyBytes, 0, keyBytes.length, 
														
 
															+          lengthIndicesFirst, keySpec);
														
 
															+      int endChar = keyFieldHelper.getEndOffset(keyBytes, 0, keyBytes.length, 
														
 
															+          lengthIndicesFirst, keySpec);
														
 
															+      currentHash = hashCode(keyBytes, startChar, endChar, 
														
 
															+          currentHash);
														
 
															     }
														
 
															-    return (partitionKeyStr.hashCode() & Integer.MAX_VALUE) % numReduceTasks;
														
 
															+    return (currentHash & Integer.MAX_VALUE) % numReduceTasks;
														
 
															   }
														
 
															+  
														
 
															+  protected int hashCode(byte[] b, int start, int end, int currentHash) {
														
 
															+    for (int i = start; i <= end; i++) {
														
 
															+      currentHash = 31*currentHash + b[i];
														
 
															+    }
														
 
															+    return currentHash;
														
 
															+  }
														
 
															+
														
 
															 }
														
--- a/src/mapred/org/apache/hadoop/mapred/lib/KeyFieldHelper.java
+++ b/src/mapred/org/apache/hadoop/mapred/lib/KeyFieldHelper.java
@@ -0,0 +1,289 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.mapred.lib;
														
 
															+
														
 
															+import java.io.UnsupportedEncodingException;
														
 
															+import java.util.List;
														
 
															+import java.util.ArrayList;
														
 
															+import java.util.StringTokenizer;
														
 
															+
														
 
															+import org.apache.hadoop.util.UTF8ByteArrayUtils;
														
 
															+
														
 
															+/**
														
 
															+ * This is used in {@link KeyFieldBasedComparator} & 
														
 
															+ * {@link KeyFieldBasedPartitioner}. Defines all the methods
														
 
															+ * for parsing key specifications. The key specification is of the form:
														
 
															+ * -k pos1[,pos2], where pos is of the form f[.c][opts], where f is the number
														
 
															+ *  of the field to use, and c is the number of the first character from the
														
 
															+ *  beginning of the field. Fields and character posns are numbered starting
														
 
															+ *  with 1; a character position of zero in pos2 indicates the field's last
														
 
															+ *  character. If '.c' is omitted from pos1, it defaults to 1 (the beginning
														
 
															+ *  of the field); if omitted from pos2, it defaults to 0 (the end of the
														
 
															+ *  field). opts are ordering options (supported options are 'nr'). 
														
 
															+ */
														
 
															+
														
 
															+class KeyFieldHelper {
														
 
															+  
														
 
															+  protected static class KeyDescription {
														
 
															+    int beginFieldIdx = 1;
														
 
															+    int beginChar = 1;
														
 
															+    int endFieldIdx = 0;
														
 
															+    int endChar = 0;
														
 
															+    boolean numeric;
														
 
															+    boolean reverse;
														
 
															+  }
														
 
															+  
														
 
															+  private List<KeyDescription> allKeySpecs = new ArrayList<KeyDescription>();
														
 
															+  private byte[] keyFieldSeparator;
														
 
															+  private boolean keySpecSeen = false;
														
 
															+  
														
 
															+  public void setKeyFieldSeparator(String keyFieldSeparator) {
														
 
															+    try {
														
 
															+      this.keyFieldSeparator =
														
 
															+        keyFieldSeparator.getBytes("UTF-8");
														
 
															+    } catch (UnsupportedEncodingException e) {
														
 
															+      throw new RuntimeException("The current system does not " +
														
 
															+          "support UTF-8 encoding!", e);
														
 
															+    }    
														
 
															+  }
														
 
															+  
														
 
															+  /** Required for backcompatibility with num.key.fields.for.partition in
														
 
															+   * {@link KeyFieldBasedPartitioner} */
														
 
															+  public void setKeyFieldSpec(int start, int end) {
														
 
															+    if (end >= start) {
														
 
															+      KeyDescription k = new KeyDescription();
														
 
															+      k.beginFieldIdx = start;
														
 
															+      k.endFieldIdx = end;
														
 
															+      keySpecSeen = true;
														
 
															+      allKeySpecs.add(k);
														
 
															+    }
														
 
															+  }
														
 
															+  
														
 
															+  public List<KeyDescription> keySpecs() {
														
 
															+    return allKeySpecs;
														
 
															+  }
														
 
															+    
														
 
															+  public int[] getWordLengths(byte []b, int start, int end) {
														
 
															+    //Given a string like "hello how are you", it returns an array
														
 
															+    //like [4 5, 3, 3, 3], where the first element is the number of
														
 
															+	//fields
														
 
															+    if (!keySpecSeen) {
														
 
															+      //if there were no key specs, then the whole key is one word
														
 
															+      return new int[] {1};
														
 
															+    }
														
 
															+    int[] lengths = new int[10];
														
 
															+    int currLenLengths = lengths.length;
														
 
															+    int idx = 1;
														
 
															+    int pos;
														
 
															+    while ((pos = UTF8ByteArrayUtils.findBytes(b, start, end, 
														
 
															+        keyFieldSeparator)) != -1) {
														
 
															+      if (++idx == currLenLengths) {
														
 
															+        int[] temp = lengths;
														
 
															+        lengths = new int[(currLenLengths = currLenLengths*2)];
														
 
															+        System.arraycopy(temp, 0, lengths, 0, temp.length);
														
 
															+      }
														
 
															+      lengths[idx - 1] = pos - start;
														
 
															+      start = pos + 1;
														
 
															+    }
														
 
															+    
														
 
															+    if (start != end) {
														
 
															+      lengths[idx] = end - start;
														
 
															+    }
														
 
															+    lengths[0] = idx; //number of words is the first element
														
 
															+    return lengths;
														
 
															+  }
														
 
															+  public int getStartOffset(byte[]b, int start, int end, 
														
 
															+      int []lengthIndices, KeyDescription k) {
														
 
															+    //if -k2.5,2 is the keyspec, the startChar is lengthIndices[1] + 5
														
 
															+    //note that the [0]'th element is the number of fields in the key
														
 
															+    if (lengthIndices[0] >= k.beginFieldIdx) {
														
 
															+      int position = 0;
														
 
															+      for (int i = 1; i < k.beginFieldIdx; i++) {
														
 
															+        position += lengthIndices[i] + keyFieldSeparator.length; 
														
 
															+      }
														
 
															+      if (position + k.beginChar <= (end - start)) {
														
 
															+        return start + position + k.beginChar - 1; 
														
 
															+      }
														
 
															+    }
														
 
															+    return -1;
														
 
															+  }
														
 
															+  public int getEndOffset(byte[]b, int start, int end, 
														
 
															+      int []lengthIndices, KeyDescription k) {
														
 
															+    //if -k2,2.8 is the keyspec, the endChar is lengthIndices[1] + 8
														
 
															+    //note that the [0]'th element is the number of fields in the key
														
 
															+    if (k.endFieldIdx == 0) {
														
 
															+      //there is no end field specified for this keyspec. So the remaining
														
 
															+      //part of the key is considered in its entirety.
														
 
															+      return end; 
														
 
															+    }
														
 
															+    if (lengthIndices[0] >= k.endFieldIdx) {
														
 
															+      int position = 0;
														
 
															+      int i;
														
 
															+      for (i = 1; i < k.endFieldIdx; i++) {
														
 
															+        position += lengthIndices[i] + keyFieldSeparator.length;
														
 
															+      }
														
 
															+      if (k.endChar == 0) { 
														
 
															+        position += lengthIndices[i];
														
 
															+      }
														
 
															+      if (position + k.endChar <= (end - start)) {
														
 
															+        return start + position + k.endChar - 1;
														
 
															+      }
														
 
															+      return end;
														
 
															+    }
														
 
															+    return end;
														
 
															+  }
														
 
															+  public void parseOption(String option) {
														
 
															+    if (option == null || option.equals("")) {
														
 
															+      //we will have only default comparison
														
 
															+      return;
														
 
															+    }
														
 
															+    StringTokenizer args = new StringTokenizer(option);
														
 
															+    KeyDescription global = new KeyDescription();
														
 
															+    while (args.hasMoreTokens()) {
														
 
															+      String arg = args.nextToken();
														
 
															+      if (arg.equals("-n")) {  
														
 
															+        global.numeric = true;
														
 
															+      }
														
 
															+      if (arg.equals("-r")) {
														
 
															+        global.reverse = true;
														
 
															+      }
														
 
															+      if (arg.equals("-nr")) {
														
 
															+        global.numeric = true;
														
 
															+        global.reverse = true;
														
 
															+      }
														
 
															+      if (arg.startsWith("-k")) {
														
 
															+        KeyDescription k = parseKey(arg, args);
														
 
															+        if (k != null) {
														
 
															+          allKeySpecs.add(k);
														
 
															+          keySpecSeen = true;
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    for (KeyDescription key : allKeySpecs) {
														
 
															+      if (!(key.reverse | key.numeric)) {
														
 
															+        key.reverse = global.reverse;
														
 
															+        key.numeric = global.numeric;
														
 
															+      }
														
 
															+    }
														
 
															+    if (allKeySpecs.size() == 0) {
														
 
															+      allKeySpecs.add(global);
														
 
															+    }
														
 
															+  }
														
 
															+  
														
 
															+  private KeyDescription parseKey(String arg, StringTokenizer args) {
														
 
															+    //we allow for -k<arg> and -k <arg>
														
 
															+    String keyArgs = null;
														
 
															+    if (arg.length() == 2) {
														
 
															+      if (args.hasMoreTokens()) {
														
 
															+        keyArgs = args.nextToken();
														
 
															+      }
														
 
															+    } else {
														
 
															+      keyArgs = arg.substring(2);
														
 
															+    }
														
 
															+    if (keyArgs == null || keyArgs.length() == 0) {
														
 
															+      return null;
														
 
															+    }
														
 
															+    StringTokenizer st = new StringTokenizer(keyArgs,"nr.,",true);
														
 
															+       
														
 
															+    KeyDescription key = new KeyDescription();
														
 
															+    
														
 
															+    String token;
														
 
															+    //the key is of the form 1[.3][nr][,1.5][nr]
														
 
															+    if (st.hasMoreTokens()) {
														
 
															+      token = st.nextToken();
														
 
															+      //the first token must be a number
														
 
															+      key.beginFieldIdx = Integer.parseInt(token);
														
 
															+    }
														
 
															+    if (st.hasMoreTokens()) {
														
 
															+      token = st.nextToken();
														
 
															+      if (token.equals(".")) {
														
 
															+        token = st.nextToken();
														
 
															+        key.beginChar = Integer.parseInt(token);
														
 
															+        if (st.hasMoreTokens()) {
														
 
															+          token = st.nextToken();
														
 
															+        } else {
														
 
															+          return key;
														
 
															+        }
														
 
															+      } 
														
 
															+      do {
														
 
															+        if (token.equals("n")) {
														
 
															+          key.numeric = true;
														
 
															+        }
														
 
															+        else if (token.equals("r")) {
														
 
															+          key.reverse = true;
														
 
															+        }
														
 
															+        else break;
														
 
															+        if (st.hasMoreTokens()) {
														
 
															+          token = st.nextToken();
														
 
															+        } else {
														
 
															+          return key;
														
 
															+        }
														
 
															+      } while (true);
														
 
															+      if (token.equals(",")) {
														
 
															+        token = st.nextToken();
														
 
															+        //the first token must be a number
														
 
															+        key.endFieldIdx = Integer.parseInt(token);
														
 
															+        if (st.hasMoreTokens()) {
														
 
															+          token = st.nextToken();
														
 
															+          if (token.equals(".")) {
														
 
															+            token = st.nextToken();
														
 
															+            key.endChar = Integer.parseInt(token);
														
 
															+            if (st.hasMoreTokens()) {
														
 
															+              token = st.nextToken();
														
 
															+            } else {
														
 
															+              return key;
														
 
															+            }
														
 
															+          }
														
 
															+          do {
														
 
															+            if (token.equals("n")) {
														
 
															+              key.numeric = true;
														
 
															+            }
														
 
															+            else if (token.equals("r")) {
														
 
															+              key.reverse = true;
														
 
															+            }
														
 
															+            else { 
														
 
															+              throw new IllegalArgumentException("Invalid -k argument. " +
														
 
															+               "Must be of the form -k pos1,[pos2], where pos is of the form " +
														
 
															+               "f[.c]nr");
														
 
															+            }
														
 
															+            if (st.hasMoreTokens()) {
														
 
															+              token = st.nextToken();
														
 
															+            } else {
														
 
															+              break;
														
 
															+            }
														
 
															+          } while (true);
														
 
															+        }
														
 
															+        return key;
														
 
															+      }
														
 
															+      throw new IllegalArgumentException("Invalid -k argument. " +
														
 
															+          "Must be of the form -k pos1,[pos2], where pos is of the form " +
														
 
															+          "f[.c]nr");
														
 
															+    }
														
 
															+    return key;
														
 
															+  }
														
 
															+  private void printKey(KeyDescription key) {
														
 
															+    System.out.println("key.beginFieldIdx: " + key.beginFieldIdx);
														
 
															+    System.out.println("key.beginChar: " + key.beginChar);
														
 
															+    System.out.println("key.endFieldIdx: " + key.endFieldIdx);
														
 
															+    System.out.println("key.endChar: " + key.endChar);
														
 
															+    System.out.println("key.numeric: " + key.numeric);
														
 
															+    System.out.println("key.reverse: " + key.reverse);
														
 
															+    System.out.println("parseKey over");
														
 
															+  }  
														
 
															+}
														
--- a/src/test/org/apache/hadoop/mapred/lib/TestKeyFieldBasedComparator.java
+++ b/src/test/org/apache/hadoop/mapred/lib/TestKeyFieldBasedComparator.java
@@ -0,0 +1,131 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.mapred.lib;
														
 
															+
														
 
															+import java.io.*;
														
 
															+
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.FileUtil;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.io.LongWritable;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.mapred.FileInputFormat;
														
 
															+import org.apache.hadoop.mapred.FileOutputFormat;
														
 
															+import org.apache.hadoop.mapred.HadoopTestCase;
														
 
															+import org.apache.hadoop.mapred.JobClient;
														
 
															+import org.apache.hadoop.mapred.JobConf;
														
 
															+import org.apache.hadoop.mapred.OutputLogFilter;
														
 
															+import org.apache.hadoop.mapred.RunningJob;
														
 
															+import org.apache.hadoop.mapred.TextInputFormat;
														
 
															+import org.apache.hadoop.mapred.TextOutputFormat;
														
 
															+
														
 
															+
														
 
															+public class TestKeyFieldBasedComparator extends HadoopTestCase {
														
 
															+  JobConf conf;
														
 
															+  String line1 = "123 -123 005120 123.9 0.01 0.18 010 10.1 4444 011 011 234";
														
 
															+  String line2 = "134 -12 005100 123.10 -1.01 0.19 02 10.0 4444.1";
														
 
															+
														
 
															+  public TestKeyFieldBasedComparator() throws IOException {
														
 
															+    super(HadoopTestCase.LOCAL_MR, HadoopTestCase.LOCAL_FS, 1, 1);
														
 
															+    conf = createJobConf();
														
 
															+  }
														
 
															+  public void configure(String keySpec, int expect) throws Exception {
														
 
															+    Path testdir = new Path("build/test/test.mapred.spill");
														
 
															+    Path inDir = new Path(testdir, "in");
														
 
															+    Path outDir = new Path(testdir, "out");
														
 
															+    FileSystem fs = getFileSystem();
														
 
															+    fs.delete(testdir, true);
														
 
															+    conf.setInputFormat(TextInputFormat.class);
														
 
															+    FileInputFormat.setInputPaths(conf, inDir);
														
 
															+    FileOutputFormat.setOutputPath(conf, outDir);
														
 
															+    conf.setOutputKeyClass(Text.class);
														
 
															+    conf.setOutputValueClass(LongWritable.class);
														
 
															+
														
 
															+    conf.setNumMapTasks(1);
														
 
															+    conf.setNumReduceTasks(2);
														
 
															+
														
 
															+    conf.setOutputFormat(TextOutputFormat.class);
														
 
															+    conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
														
 
															+    conf.setKeyFieldComparatorOptions(keySpec);
														
 
															+    conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
														
 
															+    conf.set("map.output.key.field.separator", " ");
														
 
															+    conf.setMapperClass(InverseMapper.class);
														
 
															+    conf.setReducerClass(IdentityReducer.class);
														
 
															+    if (!fs.mkdirs(testdir)) {
														
 
															+      throw new IOException("Mkdirs failed to create " + testdir.toString());
														
 
															+    }
														
 
															+    if (!fs.mkdirs(inDir)) {
														
 
															+      throw new IOException("Mkdirs failed to create " + inDir.toString());
														
 
															+    }
														
 
															+    // set up input data in 2 files 
														
 
															+    Path inFile = new Path(inDir, "part0");
														
 
															+    FileOutputStream fos = new FileOutputStream(inFile.toString());
														
 
															+    fos.write((line1 + "\n").getBytes());
														
 
															+    fos.write((line2 + "\n").getBytes());
														
 
															+    fos.close();
														
 
															+    JobClient jc = new JobClient(conf);
														
 
															+    RunningJob r_job = jc.submitJob(conf);
														
 
															+    while (!r_job.isComplete()) {
														
 
															+      Thread.sleep(1000);
														
 
															+    }
														
 
															+    
														
 
															+    if (!r_job.isSuccessful()) {
														
 
															+      fail("Oops! The job broke due to an unexpected error");
														
 
															+    }
														
 
															+    Path[] outputFiles = FileUtil.stat2Paths(
														
 
															+        getFileSystem().listStatus(outDir,
														
 
															+        new OutputLogFilter()));
														
 
															+    if (outputFiles.length > 0) {
														
 
															+      InputStream is = getFileSystem().open(outputFiles[0]);
														
 
															+      BufferedReader reader = new BufferedReader(new InputStreamReader(is));
														
 
															+      String line = reader.readLine();
														
 
															+      //make sure we get what we expect as the first line, and also
														
 
															+      //that we have two lines (both the lines must end up in the same
														
 
															+      //reducer since the partitioner takes the same key spec for all
														
 
															+      //lines
														
 
															+      if (expect == 1) {
														
 
															+        assertTrue(line.startsWith(line1));
														
 
															+      } else if (expect == 2) {
														
 
															+        assertTrue(line.startsWith(line2));
														
 
															+      }
														
 
															+      line = reader.readLine();
														
 
															+      if (expect == 1) {
														
 
															+        assertTrue(line.startsWith(line2));
														
 
															+      } else if (expect == 2) {
														
 
															+        assertTrue(line.startsWith(line1));
														
 
															+      }
														
 
															+      reader.close();
														
 
															+    }
														
 
															+  }
														
 
															+  public void testBasicUnixComparator() throws Exception {
														
 
															+    configure("-k1,1n", 1);
														
 
															+    configure("-k2,2n", 1);
														
 
															+    configure("-k2.2,2n", 2);
														
 
															+    configure("-k3.4,3n", 2);
														
 
															+    configure("-k3.2,3.3n -k4,4n", 2);
														
 
															+    configure("-k3.2,3.3n -k4,4nr", 1);
														
 
															+    configure("-k2.4,2.4n", 2);
														
 
															+    configure("-k7,7", 1);
														
 
															+    configure("-k7,7n", 2);
														
 
															+    configure("-k8,8n", 2);
														
 
															+    configure("-k9,9n", 1);
														
 
															+    configure("-k11,11",2);
														
 
															+    configure("-k10,10",2);
														
 
															+  }
														
 
															+}