Przeglądaj źródła

HADOOP-2161 getRow() is orders of magnitudes slower than get(), even on
rows with one column


git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@593665 13f79535-47bb-0310-9956-ffa450edef68

Michael Stack 18 lat temu
rodzic
commit
3ece0c0eea

+ 2 - 0
src/contrib/hbase/CHANGES.txt

@@ -28,6 +28,8 @@ Trunk (unreleased changes)
                problematic.
    HADOOP-2155 Method expecting HBaseConfiguration throws NPE when given Configuration
    HADOOP-2156 BufferUnderflowException for un-named HTableDescriptors
+   HADOOP-2161 getRow() is orders of magnitudes slower than get(), even on rows
+               with one column (Clint Morgan and Stack)
 
   IMPROVEMENTS
     HADOOP-2401 Add convenience put method that takes writable

+ 1 - 1
src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java

@@ -3063,7 +3063,7 @@ HMasterRegionInterface {
             (new LocalHBaseCluster(conf)).startup();
           } else {
             Constructor<? extends HMaster> c =
-              masterClass.getConstructor(Configuration.class);
+              masterClass.getConstructor(HBaseConfiguration.class);
             HMaster master = c.newInstance(conf);
             master.start();
           }

+ 1 - 1
src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMemcache.java

@@ -235,7 +235,7 @@ public class HMemcache {
           && key.matchesWithoutColumn(itKey)) {
         byte [] val = tailMap.get(itKey);
         results.put(itCol, val);
-      } else if (key.getRow().compareTo(itKey.getRow()) > 0) {
+      } else if (key.getRow().compareTo(itKey.getRow()) < 0) {
         break;
       }
     }

+ 1 - 4
src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java

@@ -35,7 +35,6 @@ import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.filter.RowFilterInterface;
@@ -1018,13 +1017,11 @@ public class HRegion implements HConstants {
    */
   TreeMap<Text, byte []> getFull(Text row) throws IOException {
     HStoreKey key = new HStoreKey(row, System.currentTimeMillis());
-
     lock.obtainReadLock();
     try {
       TreeMap<Text, byte []> memResult = memcache.getFull(key);
       for (Text colFamily: stores.keySet()) {
-        HStore targetStore = stores.get(colFamily);
-        targetStore.getFull(key, memResult);
+        this.stores.get(colFamily).getFull(key, memResult);
       }
       return memResult;
     } finally {

+ 1 - 1
src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java

@@ -1453,7 +1453,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
               "hbase.master is set to 'local' mode");
           } else {
             Constructor<? extends HRegionServer> c =
-              regionServerClass.getConstructor(Configuration.class);
+              regionServerClass.getConstructor(HBaseConfiguration.class);
             HRegionServer hrs = c.newInstance(conf);
             Thread t = new Thread(hrs);
             t.setName("regionserver" + hrs.server.getListenerAddress());

+ 3 - 1
src/contrib/hbase/src/java/org/apache/hadoop/hbase/HScannerInterface.java

@@ -21,6 +21,7 @@ package org.apache.hadoop.hbase;
 
 import java.io.Closeable;
 import java.io.IOException;
+import java.util.Iterator;
 import java.util.Map;
 import java.util.SortedMap;
 
@@ -28,7 +29,8 @@ import org.apache.hadoop.io.Text;
 
 /**
  * HScannerInterface iterates through a set of rows.  It's implemented by
- * several classes.
+ * several classes.  Implements {@link Iterable} but be sure to still call
+ * {@link #close()} when done with your {@link Iterator}
  */
 public interface HScannerInterface extends Closeable,
 Iterable<Map.Entry<HStoreKey, SortedMap<Text, byte []>>> {

+ 1 - 1
src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java

@@ -1100,7 +1100,7 @@ class HStore implements HConstants {
               }
               results.put(new Text(readcol), readval.get());
               readval = new ImmutableBytesWritable();
-            } else if(key.getRow().compareTo(readkey.getRow()) > 0) {
+            } else if(key.getRow().compareTo(readkey.getRow()) < 0) {
               break;
             }
             

+ 11 - 5
src/contrib/hbase/src/java/org/apache/hadoop/hbase/HTable.java

@@ -564,13 +564,19 @@ public class HTable implements HConstants {
   
   /** 
    * Start an atomic row insertion/update.  No changes are committed until the 
-   * call to commit() returns.
-   * 
-   * A call to abort() will abandon any updates in progress.
+   * call to commit() returns. A call to abort() will abandon any updates in
+   * progress.
    *
    * 
-   * @param row Name of row to start update against.
-   * @return Row lockid.
+   * @param row Name of row to start update against.  Note, choose row names
+   * with care.  Rows are sorted lexicographically (comparison is done
+   * using {@link Text#compareTo(Object)}.  If your keys are numeric,
+   * lexicographic sorting means that 46 sorts AFTER 450 (If you want to use
+   * numerics for keys, zero-pad).
+   * @return Row lock id..
+   * @see #commit(long)
+   * @see #commit(long, long)
+   * @see #abort(long)
    */
   public synchronized long startUpdate(final Text row) {
     checkClosed();

+ 2 - 0
src/contrib/hbase/src/test/org/apache/hadoop/hbase/HBaseTestCase.java

@@ -37,6 +37,8 @@ public abstract class HBaseTestCase extends TestCase {
   protected final static String COLFAMILY_NAME1 = "colfamily1:";
   protected final static String COLFAMILY_NAME2 = "colfamily2:";
   protected final static String COLFAMILY_NAME3 = "colfamily3:";
+  protected static Text [] COLUMNS = new Text [] {new Text(COLFAMILY_NAME1),
+    new Text(COLFAMILY_NAME2), new Text(COLFAMILY_NAME3)};
   protected Path testDir = null;
   protected FileSystem localFs = null;
   protected static final char FIRST_CHAR = 'a';

+ 10 - 4
src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java

@@ -25,6 +25,7 @@ import java.io.UnsupportedEncodingException;
 import java.text.SimpleDateFormat;
 import java.util.Arrays;
 import java.util.Date;
+import java.util.Formatter;
 import java.util.List;
 import java.util.Random;
 import java.util.TreeMap;
@@ -293,6 +294,7 @@ public class PerformanceEvaluation implements HConstants {
     protected HBaseAdmin admin;
     protected HTable table;
     protected volatile HBaseConfiguration conf;
+    private Formatter formatter = new Formatter();
     
     Test(final HBaseConfiguration conf, final int startRow,
         final int perClientRunRows, final int totalRows, final Status status) {
@@ -366,10 +368,14 @@ public class PerformanceEvaluation implements HConstants {
     }
     
     Text getRandomRow() {
-      return new Text(Integer.toString(this.rand.nextInt(Integer.MAX_VALUE) %
+      return new Text(format(this.rand.nextInt(Integer.MAX_VALUE) %
         this.totalRows));
     }
     
+    public Text format(final int i) {
+      return new Text(String.format("%010d", Integer.valueOf(i)));
+    }
+    
     /*
      * Test for individual row.
      * @param i Row index.
@@ -439,7 +445,7 @@ public class PerformanceEvaluation implements HConstants {
     void testSetup() throws IOException {
       super.testSetup();
       this.testScanner = table.obtainScanner(new Text[] {COLUMN_NAME},
-          new Text(Integer.toString(this.startRow)));
+        format(this.startRow));
     }
     
     @Override
@@ -471,7 +477,7 @@ public class PerformanceEvaluation implements HConstants {
     
     @Override
     void testRow(final int i) throws IOException {
-      table.get(new Text(Integer.toString(i)), COLUMN_NAME);
+      table.get(format(i), COLUMN_NAME);
     }
 
     @Override
@@ -488,7 +494,7 @@ public class PerformanceEvaluation implements HConstants {
     
     @Override
     void testRow(final int i) throws IOException {
-      long lockid = table.startUpdate(new Text(Integer.toString(i)));
+      long lockid = table.startUpdate(format(i));
       table.put(lockid, COLUMN_NAME, generateValue());
       table.commit(lockid);
     }