فهرست منبع

HADOOP-602. Remove dependency on Lucene. Contributed by Milind.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@500397 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 18 سال پیش
والد
کامیت
b4f8971f7c

+ 5 - 0
CHANGES.txt

@@ -82,6 +82,11 @@ Trunk (unreleased changes)
 25. HADOOP-856.  Fix HDFS's fsck command to not report that
     non-existent filesystems are healthy.  (Milind Bhandarkar via cutting)
 
+26. HADOOP-602.  Remove the dependency on Lucene's PriorityQueue
+    utility, by copying it into Hadoop.  This facilitates using Hadoop
+    with different versions of Lucene without worrying about CLASSPATH
+    order.  (Milind Bhandarkar via cutting)
+
 
 Release 0.10.1 - 2007-01-10
 

BIN
lib/lucene-core-1.9.1.jar


+ 1 - 1
src/contrib/streaming/src/java/org/apache/hadoop/streaming/MergerInputFormat.java

@@ -24,12 +24,12 @@ import java.io.ByteArrayInputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.util.ArrayList;
-import org.apache.lucene.util.PriorityQueue;
 
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.*;
 import org.apache.hadoop.mapred.*;
+import org.apache.hadoop.util.PriorityQueue;
 
 /**
  Eventually will be fed TupleInputFormats. 

+ 1 - 1
src/java/org/apache/hadoop/io/SequenceFile.java

@@ -25,7 +25,6 @@ import java.net.URI;
 import java.net.URISyntaxException;
 import java.rmi.server.UID;
 import java.security.MessageDigest;
-import org.apache.lucene.util.PriorityQueue;
 import org.apache.commons.logging.*;
 import org.apache.hadoop.fs.*;
 import org.apache.hadoop.io.compress.CompressionCodec;
@@ -40,6 +39,7 @@ import org.apache.hadoop.util.Progress;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.NativeCodeLoader;
 import org.apache.hadoop.util.MergeSort;
+import org.apache.hadoop.util.PriorityQueue;
 
 /** Support for flat files of binary key/value pairs. */
 public class SequenceFile {

+ 149 - 0
src/java/org/apache/hadoop/util/PriorityQueue.java

@@ -0,0 +1,149 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+
+/** A PriorityQueue maintains a partial ordering of its elements such that the
+  least element can always be found in constant time.  Put()'s and pop()'s
+  require log(size) time. */
+public abstract class PriorityQueue {
+  private Object[] heap;
+  private int size;
+  private int maxSize;
+
+  /** Determines the ordering of objects in this priority queue.  Subclasses
+    must define this one method. */
+  protected abstract boolean lessThan(Object a, Object b);
+
+  /** Subclass constructors must call this. */
+  protected final void initialize(int maxSize) {
+    size = 0;
+    int heapSize = maxSize + 1;
+    heap = new Object[heapSize];
+    this.maxSize = maxSize;
+  }
+
+  /**
+   * Adds an Object to a PriorityQueue in log(size) time.
+   * If one tries to add more objects than maxSize from initialize
+   * a RuntimeException (ArrayIndexOutOfBound) is thrown.
+   */
+  public final void put(Object element) {
+    size++;
+    heap[size] = element;
+    upHeap();
+  }
+
+  /**
+   * Adds element to the PriorityQueue in log(size) time if either
+   * the PriorityQueue is not full, or not lessThan(element, top()).
+   * @param element
+   * @return true if element is added, false otherwise.
+   */
+  public boolean insert(Object element){
+    if(size < maxSize){
+      put(element);
+      return true;
+    }
+    else if(size > 0 && !lessThan(element, top())){
+      heap[1] = element;
+      adjustTop();
+      return true;
+    }
+    else
+      return false;
+   }
+
+  /** Returns the least element of the PriorityQueue in constant time. */
+  public final Object top() {
+    if (size > 0)
+      return heap[1];
+    else
+      return null;
+  }
+
+  /** Removes and returns the least element of the PriorityQueue in log(size)
+    time. */
+  public final Object pop() {
+    if (size > 0) {
+      Object result = heap[1];			  // save first value
+      heap[1] = heap[size];			  // move last to first
+      heap[size] = null;			  // permit GC of objects
+      size--;
+      downHeap();				  // adjust heap
+      return result;
+    } else
+      return null;
+  }
+
+  /** Should be called when the Object at top changes values.  Still log(n)
+   * worst case, but it's at least twice as fast to <pre>
+   *  { pq.top().change(); pq.adjustTop(); }
+   * </pre> instead of <pre>
+   *  { o = pq.pop(); o.change(); pq.push(o); }
+   * </pre>
+   */
+  public final void adjustTop() {
+    downHeap();
+  }
+
+
+  /** Returns the number of elements currently stored in the PriorityQueue. */
+  public final int size() {
+    return size;
+  }
+
+  /** Removes all entries from the PriorityQueue. */
+  public final void clear() {
+    for (int i = 0; i <= size; i++)
+      heap[i] = null;
+    size = 0;
+  }
+
+  private final void upHeap() {
+    int i = size;
+    Object node = heap[i];			  // save bottom node
+    int j = i >>> 1;
+    while (j > 0 && lessThan(node, heap[j])) {
+      heap[i] = heap[j];			  // shift parents down
+      i = j;
+      j = j >>> 1;
+    }
+    heap[i] = node;				  // install saved node
+  }
+
+  private final void downHeap() {
+    int i = 1;
+    Object node = heap[i];			  // save top node
+    int j = i << 1;				  // find smaller child
+    int k = j + 1;
+    if (k <= size && lessThan(heap[k], heap[j])) {
+      j = k;
+    }
+    while (j <= size && lessThan(heap[j], node)) {
+      heap[i] = heap[j];			  // shift up child
+      i = j;
+      j = i << 1;
+      k = j + 1;
+      if (k <= size && lessThan(heap[k], heap[j])) {
+	j = k;
+      }
+    }
+    heap[i] = node;				  // install saved node
+  }
+}