|
@@ -25,6 +25,7 @@ import java.util.Arrays;
|
|
|
|
|
|
import org.apache.commons.logging.Log;
|
|
|
import org.apache.commons.logging.LogFactory;
|
|
|
+import org.apache.hadoop.HadoopIllegalArgumentException;
|
|
|
import org.apache.hadoop.classification.InterfaceAudience;
|
|
|
import org.apache.hadoop.classification.InterfaceStability;
|
|
|
import org.apache.hadoop.conf.Configuration;
|
|
@@ -824,6 +825,148 @@ public class MapFile {
|
|
|
return cnt;
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * Class to merge multiple MapFiles of same Key and Value types to one MapFile
|
|
|
+ */
|
|
|
+ public static class Merger {
|
|
|
+ private Configuration conf;
|
|
|
+ private WritableComparator comparator = null;
|
|
|
+ private Reader[] inReaders;
|
|
|
+ private Writer outWriter;
|
|
|
+ private Class<Writable> valueClass = null;
|
|
|
+ private Class<WritableComparable> keyClass = null;
|
|
|
+
|
|
|
+ public Merger(Configuration conf) throws IOException {
|
|
|
+ this.conf = conf;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Merge multiple MapFiles to one Mapfile
|
|
|
+ *
|
|
|
+ * @param inMapFiles
|
|
|
+ * @param outMapFile
|
|
|
+ * @throws IOException
|
|
|
+ */
|
|
|
+ public void merge(Path[] inMapFiles, boolean deleteInputs,
|
|
|
+ Path outMapFile) throws IOException {
|
|
|
+ try {
|
|
|
+ open(inMapFiles, outMapFile);
|
|
|
+ mergePass();
|
|
|
+ } finally {
|
|
|
+ close();
|
|
|
+ }
|
|
|
+ if (deleteInputs) {
|
|
|
+ for (int i = 0; i < inMapFiles.length; i++) {
|
|
|
+ Path path = inMapFiles[i];
|
|
|
+ delete(path.getFileSystem(conf), path.toString());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Open all input files for reading and verify the key and value types. And
|
|
|
+ * open Output file for writing
|
|
|
+ */
|
|
|
+ @SuppressWarnings("unchecked")
|
|
|
+ private void open(Path[] inMapFiles, Path outMapFile) throws IOException {
|
|
|
+ inReaders = new Reader[inMapFiles.length];
|
|
|
+ for (int i = 0; i < inMapFiles.length; i++) {
|
|
|
+ Reader reader = new Reader(inMapFiles[i], conf);
|
|
|
+ if (keyClass == null || valueClass == null) {
|
|
|
+ keyClass = (Class<WritableComparable>) reader.getKeyClass();
|
|
|
+ valueClass = (Class<Writable>) reader.getValueClass();
|
|
|
+ } else if (keyClass != reader.getKeyClass()
|
|
|
+ || valueClass != reader.getValueClass()) {
|
|
|
+ throw new HadoopIllegalArgumentException(
|
|
|
+ "Input files cannot be merged as they"
|
|
|
+ + " have different Key and Value classes");
|
|
|
+ }
|
|
|
+ inReaders[i] = reader;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (comparator == null) {
|
|
|
+ Class<? extends WritableComparable> cls;
|
|
|
+ cls = keyClass.asSubclass(WritableComparable.class);
|
|
|
+ this.comparator = WritableComparator.get(cls, conf);
|
|
|
+ } else if (comparator.getKeyClass() != keyClass) {
|
|
|
+ throw new HadoopIllegalArgumentException(
|
|
|
+ "Input files cannot be merged as they"
|
|
|
+ + " have different Key class compared to"
|
|
|
+ + " specified comparator");
|
|
|
+ }
|
|
|
+
|
|
|
+ outWriter = new MapFile.Writer(conf, outMapFile,
|
|
|
+ MapFile.Writer.keyClass(keyClass),
|
|
|
+ MapFile.Writer.valueClass(valueClass));
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Merge all input files to output map file.<br>
|
|
|
+ * 1. Read first key/value from all input files to keys/values array. <br>
|
|
|
+ * 2. Select the least key and corresponding value. <br>
|
|
|
+ * 3. Write the selected key and value to output file. <br>
|
|
|
+ * 4. Replace the already written key/value in keys/values arrays with the
|
|
|
+ * next key/value from the selected input <br>
|
|
|
+ * 5. Repeat step 2-4 till all keys are read. <br>
|
|
|
+ */
|
|
|
+ private void mergePass() throws IOException {
|
|
|
+ // re-usable array
|
|
|
+ WritableComparable[] keys = new WritableComparable[inReaders.length];
|
|
|
+ Writable[] values = new Writable[inReaders.length];
|
|
|
+ // Read first key/value from all inputs
|
|
|
+ for (int i = 0; i < inReaders.length; i++) {
|
|
|
+ keys[i] = ReflectionUtils.newInstance(keyClass, null);
|
|
|
+ values[i] = ReflectionUtils.newInstance(valueClass, null);
|
|
|
+ if (!inReaders[i].next(keys[i], values[i])) {
|
|
|
+ // Handle empty files
|
|
|
+ keys[i] = null;
|
|
|
+ values[i] = null;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ do {
|
|
|
+ int currentEntry = -1;
|
|
|
+ WritableComparable currentKey = null;
|
|
|
+ Writable currentValue = null;
|
|
|
+ for (int i = 0; i < keys.length; i++) {
|
|
|
+ if (keys[i] == null) {
|
|
|
+ // Skip Readers reached EOF
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if (currentKey == null || comparator.compare(currentKey, keys[i]) > 0) {
|
|
|
+ currentEntry = i;
|
|
|
+ currentKey = keys[i];
|
|
|
+ currentValue = values[i];
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (currentKey == null) {
|
|
|
+ // Merge Complete
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ // Write the selected key/value to merge stream
|
|
|
+ outWriter.append(currentKey, currentValue);
|
|
|
+ // Replace the already written key/value in keys/values arrays with the
|
|
|
+ // next key/value from the selected input
|
|
|
+ if (!inReaders[currentEntry].next(keys[currentEntry],
|
|
|
+ values[currentEntry])) {
|
|
|
+ // EOF for this file
|
|
|
+ keys[currentEntry] = null;
|
|
|
+ values[currentEntry] = null;
|
|
|
+ }
|
|
|
+ } while (true);
|
|
|
+ }
|
|
|
+
|
|
|
+ private void close() throws IOException {
|
|
|
+ for (int i = 0; i < inReaders.length; i++) {
|
|
|
+ IOUtils.closeStream(inReaders[i]);
|
|
|
+ inReaders[i] = null;
|
|
|
+ }
|
|
|
+ if (outWriter != null) {
|
|
|
+ outWriter.close();
|
|
|
+ outWriter = null;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
public static void main(String[] args) throws Exception {
|
|
|
String usage = "Usage: MapFile inFile outFile";
|