Browse Source

HADOOP-3308. Improve QuickSort by excluding values eq the pivot from the
partition.



git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/trunk@651735 13f79535-47bb-0310-9956-ffa450edef68

Christopher Douglas 17 years ago
parent
commit
29a454a986

+ 3 - 0
CHANGES.txt

@@ -46,6 +46,9 @@ Trunk (unreleased changes)
     HADOOP-3295. Allow TextOutputFormat to use configurable spearators.
     (Zheng Shao via cdouglas).
 
+    HADOOP-3308. Improve QuickSort by excluding values eq the pivot from the
+    partition. (cdouglas)
+
   OPTIMIZATIONS
 
     HADOOP-3274. The default constructor of BytesWritable creates empty 

+ 32 - 12
src/java/org/apache/hadoop/util/QuickSort.java

@@ -21,7 +21,7 @@ package org.apache.hadoop.util;
  * An implementation of the core algorithm of QuickSort.
  * See "Median-of-Three Partitioning" in Sedgewick book.
  */
-public class QuickSort implements IndexedSorter {
+public final class QuickSort implements IndexedSorter {
 
   public QuickSort() { }
 
@@ -39,7 +39,8 @@ public class QuickSort implements IndexedSorter {
    * Same as {@link #sort}, but indicate that we're making progress after
    * each partition.
    */
-  public void sort(IndexedSortable s, int p, int r, Progressable rep) {
+  public void sort(final IndexedSortable s, final int p, final int r,
+      final Progressable rep) {
     if (null != rep) {
       rep.progress();
     }
@@ -60,26 +61,45 @@ public class QuickSort implements IndexedSorter {
     fix(s, p, r-1);
 
     // Divide
-    int x = p;
     int i = p;
     int j = r;
+    int ll = p;
+    int rr = r;
+    int cr;
     while(true) {
-      while (++i < r && s.compare(i, x) < 0) { } // move lindex
-      while (--j > x && s.compare(x, j) < 0) { } // move rindex
+      while (++i < j) {
+        if ((cr = s.compare(i, p)) > 0) break;
+        if (0 == cr && ++ll != i) {
+          s.swap(ll, i);
+        }
+      }
+      while (--j > i) {
+        if ((cr = s.compare(p, j)) > 0) break;
+        if (0 == cr && --rr != j) {
+          s.swap(rr, j);
+        }
+      }
       if (i < j) s.swap(i, j);
       else break;
     }
-    // swap pivot into position
-    s.swap(x, i - 1);
+    j = i;
+    // swap pivot- and all eq values- into position
+    while (ll >= p) {
+      s.swap(ll--, --i);
+    }
+    while (rr < r) {
+      s.swap(rr++, j++);
+    }
 
     // Conquer
     // Recurse on smaller interval first to keep stack shallow
-    if (i - p - 1 < r - i) {
-      sort(s, p, i - 1, rep);
-      sort(s, i, r, rep);
+    assert i != j;
+    if (i - p < r - j) {
+      sort(s, p, i, rep);
+      sort(s, j, r, rep);
     } else {
-      sort(s, i, r, rep);
-      sort(s, p, i - 1, rep);
+      sort(s, j, r, rep);
+      sort(s, p, i, rep);
     }
   }
 

+ 39 - 8
src/test/org/apache/hadoop/util/TestIndexedSort.java

@@ -165,18 +165,31 @@ public class TestIndexedSort extends TestCase {
   }
 
   public void testAllEqual() throws Exception {
-    final int SAMPLE = 50;
+    final int SAMPLE = 500;
     int[] values = new int[SAMPLE];
     Arrays.fill(values, 10);
     SampleSortable s = new SampleSortable(values);
     IndexedSorter sorter = new QuickSort();
     sorter.sort(s, 0, SAMPLE);
     int[] check = s.getSorted();
-    assertTrue(Arrays.equals(values, check));
+    assertTrue(Arrays.toString(values) + "\ndoesn't match\n" +
+        Arrays.toString(check), Arrays.equals(values, check));
+    Random r = new Random();
+    int diff = r.nextInt(SAMPLE);
+    values[diff] = 9;
+    values[(diff + r.nextInt(SAMPLE >>> 1)) % SAMPLE] = 11;
+    s = new SampleSortable(values);
+    sorter.sort(s, 0, SAMPLE);
+    check = s.getSorted();
+    Arrays.sort(values);
+    assertTrue(check[0] == 9);
+    assertTrue(check[SAMPLE - 1] == 11);
+    assertTrue(Arrays.toString(values) + "\ndoesn't match\n" +
+        Arrays.toString(check), Arrays.equals(values, check));
   }
 
   public void testSorted() throws Exception {
-    final int SAMPLE = 50;
+    final int SAMPLE = 500;
     int[] values = new int[SAMPLE];
     Random r = new Random();
     for (int i = 0; i < SAMPLE; ++i) {
@@ -187,7 +200,22 @@ public class TestIndexedSort extends TestCase {
     IndexedSorter sorter = new QuickSort();
     sorter.sort(s, 0, SAMPLE);
     int[] check = s.getSorted();
-    assertTrue(Arrays.equals(values, check));
+    assertTrue(Arrays.toString(values) + "\ndoesn't match\n" +
+        Arrays.toString(check), Arrays.equals(values, check));
+  }
+
+  public void testSequential() throws Exception {
+    final int SAMPLE = 500;
+    int[] values = new int[SAMPLE];
+    for (int i = 0; i < SAMPLE; ++i) {
+      values[i] = i;
+    }
+    SampleSortable s = new SampleSortable(values);
+    IndexedSorter sorter = new QuickSort();
+    sorter.sort(s, 0, SAMPLE);
+    int[] check = s.getSorted();
+    assertTrue(Arrays.toString(values) + "\ndoesn't match\n" +
+        Arrays.toString(check), Arrays.equals(values, check));
   }
 
   public void testSingleRecord() throws Exception {
@@ -198,18 +226,20 @@ public class TestIndexedSort extends TestCase {
     IndexedSorter sorter = new QuickSort();
     sorter.sort(s, 0, SAMPLE);
     int[] check = s.getSorted();
-    assertTrue(Arrays.equals(values, check));
+    assertTrue(Arrays.toString(values) + "\ndoesn't match\n" +
+        Arrays.toString(check), Arrays.equals(values, check));
   }
 
   public void testQuickSort() throws Exception {
-    final int SAMPLE = 10000;
+    final int SAMPLE = 100000;
     SampleSortable s = new SampleSortable(SAMPLE);
     int[] values = s.getValues();
     Arrays.sort(values);
     IndexedSorter sorter = new QuickSort();
     sorter.sort(s, 0, SAMPLE);
     int[] check = s.getSorted();
-    assertTrue(Arrays.equals(values, check));
+    assertTrue(Arrays.toString(values) + "\ndoesn't match\n" +
+        Arrays.toString(check), Arrays.equals(values, check));
   }
 
   public void testWritable() throws Exception {
@@ -220,7 +250,8 @@ public class TestIndexedSort extends TestCase {
     IndexedSorter sorter = new QuickSort();
     sorter.sort(s, 0, SAMPLE);
     String[] check = s.getSorted();
-    assertTrue(Arrays.equals(values, check));
+    assertTrue(Arrays.toString(values) + "\ndoesn't match\n" +
+        Arrays.toString(check), Arrays.equals(values, check));
   }
 
 }