|
@@ -21,10 +21,13 @@ package org.apache.hadoop.streaming;
|
|
import java.io.IOException;
|
|
import java.io.IOException;
|
|
|
|
|
|
import org.apache.hadoop.io.Text;
|
|
import org.apache.hadoop.io.Text;
|
|
|
|
+import org.apache.hadoop.mapred.LineRecordReader;
|
|
import org.apache.hadoop.mapred.LineRecordReader.LineReader;
|
|
import org.apache.hadoop.mapred.LineRecordReader.LineReader;
|
|
|
|
|
|
/**
|
|
/**
|
|
* General utils for byte array containing UTF-8 encoded strings
|
|
* General utils for byte array containing UTF-8 encoded strings
|
|
|
|
+ * @deprecated use {@link org.apache.hadoop.util.UTF8ByteArrayUtils} and
|
|
|
|
+ * {@link StreamKeyValUtil} instead
|
|
*/
|
|
*/
|
|
|
|
|
|
public class UTF8ByteArrayUtils {
|
|
public class UTF8ByteArrayUtils {
|
|
@@ -34,14 +37,11 @@ public class UTF8ByteArrayUtils {
|
|
* @param start starting offset
|
|
* @param start starting offset
|
|
* @param length no. of bytes
|
|
* @param length no. of bytes
|
|
* @return position that first tab occures otherwise -1
|
|
* @return position that first tab occures otherwise -1
|
|
|
|
+ * @deprecated use {@link StreamKeyValUtil#findTab(byte[], int, int)}
|
|
*/
|
|
*/
|
|
|
|
+ @Deprecated
|
|
public static int findTab(byte [] utf, int start, int length) {
|
|
public static int findTab(byte [] utf, int start, int length) {
|
|
- for(int i=start; i<(start+length); i++) {
|
|
|
|
- if (utf[i]==(byte)'\t') {
|
|
|
|
- return i;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- return -1;
|
|
|
|
|
|
+ return StreamKeyValUtil.findTab(utf, start, length);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -51,14 +51,13 @@ public class UTF8ByteArrayUtils {
|
|
* @param end ending position
|
|
* @param end ending position
|
|
* @param b the byte to find
|
|
* @param b the byte to find
|
|
* @return position that first byte occures otherwise -1
|
|
* @return position that first byte occures otherwise -1
|
|
|
|
+ * @deprecated use
|
|
|
|
+ * {@link org.apache.hadoop.util.UTF8ByteArrayUtils#findByte(byte[], int,
|
|
|
|
+ * int, byte)}
|
|
*/
|
|
*/
|
|
|
|
+ @Deprecated
|
|
public static int findByte(byte [] utf, int start, int end, byte b) {
|
|
public static int findByte(byte [] utf, int start, int end, byte b) {
|
|
- for(int i=start; i<end; i++) {
|
|
|
|
- if (utf[i]==b) {
|
|
|
|
- return i;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- return -1;
|
|
|
|
|
|
+ return org.apache.hadoop.util.UTF8ByteArrayUtils.findByte(utf, start, end, b);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -68,22 +67,13 @@ public class UTF8ByteArrayUtils {
|
|
* @param end ending position
|
|
* @param end ending position
|
|
* @param b the bytes to find
|
|
* @param b the bytes to find
|
|
* @return position that first byte occures otherwise -1
|
|
* @return position that first byte occures otherwise -1
|
|
|
|
+ * @deprecated use
|
|
|
|
+ * {@link org.apache.hadoop.util.UTF8ByteArrayUtils#findBytes(byte[], int,
|
|
|
|
+ * int, byte[])}
|
|
*/
|
|
*/
|
|
|
|
+ @Deprecated
|
|
public static int findBytes(byte [] utf, int start, int end, byte[] b) {
|
|
public static int findBytes(byte [] utf, int start, int end, byte[] b) {
|
|
- int matchEnd = end - b.length;
|
|
|
|
- for(int i=start; i<=matchEnd; i++) {
|
|
|
|
- boolean matched = true;
|
|
|
|
- for(int j=0; j<b.length; j++) {
|
|
|
|
- if (utf[i+j] != b[j]) {
|
|
|
|
- matched = false;
|
|
|
|
- break;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if (matched) {
|
|
|
|
- return i;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- return -1;
|
|
|
|
|
|
+ return org.apache.hadoop.util.UTF8ByteArrayUtils.findBytes(utf, start, end, b);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -94,18 +84,14 @@ public class UTF8ByteArrayUtils {
|
|
* @param b the byte to find
|
|
* @param b the byte to find
|
|
* @param n the desired occurrence of the given byte
|
|
* @param n the desired occurrence of the given byte
|
|
* @return position that nth occurrence of the given byte if exists; otherwise -1
|
|
* @return position that nth occurrence of the given byte if exists; otherwise -1
|
|
|
|
+ * @deprecated use
|
|
|
|
+ * {@link org.apache.hadoop.util.UTF8ByteArrayUtils#findNthByte(byte[], int,
|
|
|
|
+ * int, byte, int)}
|
|
*/
|
|
*/
|
|
|
|
+ @Deprecated
|
|
public static int findNthByte(byte [] utf, int start, int length, byte b, int n) {
|
|
public static int findNthByte(byte [] utf, int start, int length, byte b, int n) {
|
|
- int pos = -1;
|
|
|
|
- int nextStart = start;
|
|
|
|
- for (int i = 0; i < n; i++) {
|
|
|
|
- pos = findByte(utf, nextStart, length, b);
|
|
|
|
- if (pos < 0) {
|
|
|
|
- return pos;
|
|
|
|
- }
|
|
|
|
- nextStart = pos + 1;
|
|
|
|
- }
|
|
|
|
- return pos;
|
|
|
|
|
|
+ return org.apache.hadoop.util.UTF8ByteArrayUtils.findNthByte(utf, start,
|
|
|
|
+ length, b, n);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -114,18 +100,24 @@ public class UTF8ByteArrayUtils {
|
|
* @param b the byte to find
|
|
* @param b the byte to find
|
|
* @param n the desired occurrence of the given byte
|
|
* @param n the desired occurrence of the given byte
|
|
* @return position that nth occurrence of the given byte if exists; otherwise -1
|
|
* @return position that nth occurrence of the given byte if exists; otherwise -1
|
|
|
|
+ * @deprecated use
|
|
|
|
+ * {@link org.apache.hadoop.util.UTF8ByteArrayUtils#findNthByte(byte[],
|
|
|
|
+ * byte, int)}
|
|
*/
|
|
*/
|
|
|
|
+ @Deprecated
|
|
public static int findNthByte(byte [] utf, byte b, int n) {
|
|
public static int findNthByte(byte [] utf, byte b, int n) {
|
|
- return findNthByte(utf, 0, utf.length, b, n);
|
|
|
|
|
|
+ return org.apache.hadoop.util.UTF8ByteArrayUtils.findNthByte(utf, b, n);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
* Find the first occured tab in a UTF-8 encoded string
|
|
* Find the first occured tab in a UTF-8 encoded string
|
|
* @param utf a byte array containing a UTF-8 encoded string
|
|
* @param utf a byte array containing a UTF-8 encoded string
|
|
* @return position that first tab occures otherwise -1
|
|
* @return position that first tab occures otherwise -1
|
|
|
|
+ * @deprecated use {@link StreamKeyValUtil#findTab(byte[])}
|
|
*/
|
|
*/
|
|
|
|
+ @Deprecated
|
|
public static int findTab(byte [] utf) {
|
|
public static int findTab(byte [] utf) {
|
|
- return findNthByte(utf, 0, utf.length, (byte)'\t', 1);
|
|
|
|
|
|
+ return StreamKeyValUtil.findTab(utf);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -138,22 +130,17 @@ public class UTF8ByteArrayUtils {
|
|
* @param val contains value upon the method is returned
|
|
* @param val contains value upon the method is returned
|
|
* @param splitPos the split pos
|
|
* @param splitPos the split pos
|
|
* @param separatorLength the length of the separator between key and value
|
|
* @param separatorLength the length of the separator between key and value
|
|
|
|
+ * @deprecated use
|
|
|
|
+ * {@link StreamKeyValUtil#splitKeyVal(byte[], int, int, Text, Text,
|
|
|
|
+ * int, int)}
|
|
* @throws IOException
|
|
* @throws IOException
|
|
*/
|
|
*/
|
|
|
|
+ @Deprecated
|
|
public static void splitKeyVal(byte[] utf, int start, int length,
|
|
public static void splitKeyVal(byte[] utf, int start, int length,
|
|
Text key, Text val, int splitPos,
|
|
Text key, Text val, int splitPos,
|
|
int separatorLength) throws IOException {
|
|
int separatorLength) throws IOException {
|
|
- if (splitPos<start || splitPos >= (start+length))
|
|
|
|
- throw new IllegalArgumentException("splitPos must be in the range " +
|
|
|
|
- "[" + start + ", " + (start+length) + "]: " + splitPos);
|
|
|
|
- int keyLen = (splitPos-start);
|
|
|
|
- byte [] keyBytes = new byte[keyLen];
|
|
|
|
- System.arraycopy(utf, start, keyBytes, 0, keyLen);
|
|
|
|
- int valLen = (start+length)-splitPos-separatorLength;
|
|
|
|
- byte [] valBytes = new byte[valLen];
|
|
|
|
- System.arraycopy(utf, splitPos+separatorLength, valBytes, 0, valLen);
|
|
|
|
- key.set(keyBytes);
|
|
|
|
- val.set(valBytes);
|
|
|
|
|
|
+ StreamKeyValUtil.splitKeyVal(utf, start,
|
|
|
|
+ length, key, val, splitPos, separatorLength);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -165,11 +152,14 @@ public class UTF8ByteArrayUtils {
|
|
* @param key contains key upon the method is returned
|
|
* @param key contains key upon the method is returned
|
|
* @param val contains value upon the method is returned
|
|
* @param val contains value upon the method is returned
|
|
* @param splitPos the split pos
|
|
* @param splitPos the split pos
|
|
|
|
+ * @deprecated use
|
|
|
|
+ * {@link StreamKeyValUtil#splitKeyVal(byte[], int, int, Text, Text, int)}
|
|
* @throws IOException
|
|
* @throws IOException
|
|
*/
|
|
*/
|
|
|
|
+ @Deprecated
|
|
public static void splitKeyVal(byte[] utf, int start, int length,
|
|
public static void splitKeyVal(byte[] utf, int start, int length,
|
|
Text key, Text val, int splitPos) throws IOException {
|
|
Text key, Text val, int splitPos) throws IOException {
|
|
- splitKeyVal(utf, start, length, key, val, splitPos, 1);
|
|
|
|
|
|
+ StreamKeyValUtil.splitKeyVal(utf, start, length, key, val, splitPos);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
@@ -181,12 +171,15 @@ public class UTF8ByteArrayUtils {
|
|
* @param val contains value upon the method is returned
|
|
* @param val contains value upon the method is returned
|
|
* @param splitPos the split pos
|
|
* @param splitPos the split pos
|
|
* @param separatorLength the length of the separator between key and value
|
|
* @param separatorLength the length of the separator between key and value
|
|
|
|
+ * @deprecated use
|
|
|
|
+ * {@link StreamKeyValUtil#splitKeyVal(byte[], Text, Text, int, int)}
|
|
* @throws IOException
|
|
* @throws IOException
|
|
*/
|
|
*/
|
|
|
|
+ @Deprecated
|
|
public static void splitKeyVal(byte[] utf, Text key, Text val, int splitPos,
|
|
public static void splitKeyVal(byte[] utf, Text key, Text val, int splitPos,
|
|
int separatorLength)
|
|
int separatorLength)
|
|
throws IOException {
|
|
throws IOException {
|
|
- splitKeyVal(utf, 0, utf.length, key, val, splitPos, separatorLength);
|
|
|
|
|
|
+ StreamKeyValUtil.splitKeyVal(utf, key, val, splitPos, separatorLength);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -196,23 +189,28 @@ public class UTF8ByteArrayUtils {
|
|
* @param key contains key upon the method is returned
|
|
* @param key contains key upon the method is returned
|
|
* @param val contains value upon the method is returned
|
|
* @param val contains value upon the method is returned
|
|
* @param splitPos the split pos
|
|
* @param splitPos the split pos
|
|
|
|
+ * @deprecated use
|
|
|
|
+ * {@link StreamKeyValUtil#splitKeyVal(byte[], Text, Text, int)}
|
|
* @throws IOException
|
|
* @throws IOException
|
|
*/
|
|
*/
|
|
|
|
+ @Deprecated
|
|
public static void splitKeyVal(byte[] utf, Text key, Text val, int splitPos)
|
|
public static void splitKeyVal(byte[] utf, Text key, Text val, int splitPos)
|
|
throws IOException {
|
|
throws IOException {
|
|
- splitKeyVal(utf, 0, utf.length, key, val, splitPos, 1);
|
|
|
|
|
|
+ StreamKeyValUtil.splitKeyVal(utf, key, val, splitPos);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
* Read a utf8 encoded line from a data input stream.
|
|
* Read a utf8 encoded line from a data input stream.
|
|
* @param lineReader LineReader to read the line from.
|
|
* @param lineReader LineReader to read the line from.
|
|
* @param out Text to read into
|
|
* @param out Text to read into
|
|
- * @return number of bytes read
|
|
|
|
|
|
+ * @return number of bytes read
|
|
|
|
+ * @deprecated use
|
|
|
|
+ * {@link StreamKeyValUtil#readLine(LineRecordReader.LineReader, Text)}
|
|
* @throws IOException
|
|
* @throws IOException
|
|
*/
|
|
*/
|
|
|
|
+ @Deprecated
|
|
public static int readLine(LineReader lineReader, Text out)
|
|
public static int readLine(LineReader lineReader, Text out)
|
|
throws IOException {
|
|
throws IOException {
|
|
- out.clear();
|
|
|
|
- return lineReader.readLine(out);
|
|
|
|
|
|
+ return StreamKeyValUtil.readLine(lineReader, out);
|
|
}
|
|
}
|
|
}
|
|
}
|