Browse Source

HADOOP-2600 Performance: HStore.getRowKeyAtOrBefore should use
MapFile.Reader#getClosest (before)


git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@612614 13f79535-47bb-0310-9956-ffa450edef68

Michael Stack 17 years ago
parent
commit
9ae50452dc

+ 3 - 0
src/contrib/hbase/CHANGES.txt

@@ -42,6 +42,9 @@ Trunk (unreleased changes)
                consumes >20% CPU
    HADOOP-2443 Keep lazy cache of regions in client rather than an
                'authoritative' list (Bryan Duxbury via Stack)
+   HADOOP-2600 Performance: HStore.getRowKeyAtOrBefore should use
+               MapFile.Reader#getClosest (before)
+               (Bryan Duxbury via Stack)
 
   BUG FIXES
    HADOOP-2059 In tests, exceptions in min dfs shutdown should not fail test

+ 27 - 11
src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java

@@ -1871,30 +1871,46 @@ public class HStore implements HConstants {
   private Text rowAtOrBeforeFromMapFile(MapFile.Reader map, Text row, 
     long timestamp)
   throws IOException {
+    HStoreKey searchKey = new HStoreKey(row, timestamp);
     Text previousRow = null;
     ImmutableBytesWritable readval = new ImmutableBytesWritable();
     HStoreKey readkey = new HStoreKey();
     
     synchronized(map) {
-      // start at the beginning of the map
-      // TODO: this sucks. do a clever binary search instead.
+      // don't bother with the rest of this if the file is empty
       map.reset();
-    
-      while(map.next(readkey, readval)){
+      if (!map.next(readkey, readval)) {
+        return null;
+      }
+      
+      HStoreKey finalKey = new HStoreKey(); 
+      map.finalKey(finalKey);
+      if (finalKey.getRow().compareTo(row) < 0) {
+        return finalKey.getRow();
+      }
+      
+      // seek to the exact row, or the one that would be immediately before it
+      readkey = (HStoreKey)map.getClosest(searchKey, readval, true);
+      
+      if (readkey == null) {
+        // didn't find anything that would match, so returns
+        return null;
+      }
+      
+      do {
         if (readkey.getRow().compareTo(row) == 0) {
           // exact match on row
           if (readkey.getTimestamp() <= timestamp) {
             // timestamp fits, return this key
             return readkey.getRow();
           }
-          
           // getting here means that we matched the row, but the timestamp
           // is too recent - hopefully one of the next cells will match
           // better, so keep rolling
-        }        
-        // if the row key we just read is beyond the key we're searching for,
-        // then we're done; return the last key we saw before this one
-        else if (readkey.getRow().toString().compareTo(row.toString()) > 0 ) {
+          continue;
+        } else if (readkey.getRow().toString().compareTo(row.toString()) > 0 ) {
+          // if the row key we just read is beyond the key we're searching for,
+          // then we're done; return the last key we saw before this one
           return previousRow;
         } else {
           // so, the row key doesn't match, and we haven't gone past the row
@@ -1905,8 +1921,8 @@ public class HStore implements HConstants {
           }
           // otherwise, ignore this key, because it doesn't fulfill our 
           // requirements.
-        }
-      }
+        }        
+      } while(map.next(readkey, readval));
     }
     // getting here means we exhausted all of the cells in the mapfile.
     // whatever satisfying row we reached previously is the row we should 

+ 0 - 1
src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestGet2.java

@@ -201,7 +201,6 @@ public class TestGet2 extends HBaseTestCase {
       assertEquals(new String(results.get(COLUMNS[0])), "t20 bytes");
 
       // try "050", should get stuff from "040"
-      t50 = new Text("050");
       results = region.getClosestRowBefore(t50, HConstants.LATEST_TIMESTAMP);
       assertEquals(new String(results.get(COLUMNS[0])), "t40 bytes");
     } finally {