Browse Source

HDFS-11383. Intern strings in BlockLocation and ExtendedBlock. Contributed by Misha Dmitriev.

Andrew Wang 8 years ago
parent
commit
7101477e47

+ 11 - 10
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java

@@ -22,6 +22,7 @@ import java.io.Serializable;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.util.StringInterner;
 
 /**
  * Represents the network location of a block, information about the hosts
@@ -114,27 +115,27 @@ public class BlockLocation implements Serializable {
     if (names == null) {
       this.names = EMPTY_STR_ARRAY;
     } else {
-      this.names = names;
+      this.names = StringInterner.internStringsInArray(names);
     }
     if (hosts == null) {
       this.hosts = EMPTY_STR_ARRAY;
     } else {
-      this.hosts = hosts;
+      this.hosts = StringInterner.internStringsInArray(hosts);
     }
     if (cachedHosts == null) {
       this.cachedHosts = EMPTY_STR_ARRAY;
     } else {
-      this.cachedHosts = cachedHosts;
+      this.cachedHosts = StringInterner.internStringsInArray(cachedHosts);
     }
     if (topologyPaths == null) {
       this.topologyPaths = EMPTY_STR_ARRAY;
     } else {
-      this.topologyPaths = topologyPaths;
+      this.topologyPaths = StringInterner.internStringsInArray(topologyPaths);
     }
     if (storageIds == null) {
       this.storageIds = EMPTY_STR_ARRAY;
     } else {
-      this.storageIds = storageIds;
+      this.storageIds = StringInterner.internStringsInArray(storageIds);
     }
     if (storageTypes == null) {
       this.storageTypes = EMPTY_STORAGE_TYPE_ARRAY;
@@ -238,7 +239,7 @@ public class BlockLocation implements Serializable {
     if (hosts == null) {
       this.hosts = EMPTY_STR_ARRAY;
     } else {
-      this.hosts = hosts;
+      this.hosts = StringInterner.internStringsInArray(hosts);
     }
   }
 
@@ -249,7 +250,7 @@ public class BlockLocation implements Serializable {
     if (cachedHosts == null) {
       this.cachedHosts = EMPTY_STR_ARRAY;
     } else {
-      this.cachedHosts = cachedHosts;
+      this.cachedHosts = StringInterner.internStringsInArray(cachedHosts);
     }
   }
 
@@ -260,7 +261,7 @@ public class BlockLocation implements Serializable {
     if (names == null) {
       this.names = EMPTY_STR_ARRAY;
     } else {
-      this.names = names;
+      this.names = StringInterner.internStringsInArray(names);
     }
   }
 
@@ -271,7 +272,7 @@ public class BlockLocation implements Serializable {
     if (topologyPaths == null) {
       this.topologyPaths = EMPTY_STR_ARRAY;
     } else {
-      this.topologyPaths = topologyPaths;
+      this.topologyPaths = StringInterner.internStringsInArray(topologyPaths);
     }
   }
 
@@ -279,7 +280,7 @@ public class BlockLocation implements Serializable {
     if (storageIds == null) {
       this.storageIds = EMPTY_STR_ARRAY;
     } else {
-      this.storageIds = storageIds;
+      this.storageIds = StringInterner.internStringsInArray(storageIds);
     }
   }
 

+ 19 - 18
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringInterner.java

@@ -25,8 +25,9 @@ import com.google.common.collect.Interner;
 import com.google.common.collect.Interners;
 
 /**
- * Provides equivalent behavior to String.intern() to optimize performance, 
- * whereby does not consume memory in the permanent generation.
+ * Provides string interning utility methods. For weak interning,
+ * we use the standard String.intern() call, that performs very well
+ * (no problems with PermGen overflowing, etc.) starting from JDK 7.
  */
 @InterfaceAudience.Public
 @InterfaceStability.Stable
@@ -35,20 +36,9 @@ public class StringInterner {
   /**
    * Retains a strong reference to each string instance it has interned.
    */
-  private final static Interner<String> strongInterner;
-  
-  /**
-   * Retains a weak reference to each string instance it has interned. 
-   */
-  private final static Interner<String> weakInterner;
-  
-  
-  
-  static {
-    strongInterner = Interners.newStrongInterner();
-    weakInterner = Interners.newWeakInterner();
-  }
-  
+  private final static Interner<String> STRONG_INTERNER =
+      Interners.newStrongInterner();
+
   /**
    * Interns and returns a reference to the representative instance 
    * for any of a collection of string instances that are equal to each other.
@@ -62,7 +52,7 @@ public class StringInterner {
     if (sample == null) {
       return null;
     }
-    return strongInterner.intern(sample);
+    return STRONG_INTERNER.intern(sample);
   }
   
   /**
@@ -78,7 +68,18 @@ public class StringInterner {
     if (sample == null) {
       return null;
     }
-    return weakInterner.intern(sample);
+    return sample.intern();
+  }
+
+  /**
+   * Interns all the strings in the given array in place,
+   * returning the same array.
+   */
+  public static String[] internStringsInArray(String[] strings) {
+    for (int i = 0; i < strings.length; i++) {
+      strings[i] = weakIntern(strings[i]);
+    }
+    return strings;
   }
 
 }

+ 10 - 6
hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ExtendedBlock.java

@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hdfs.protocol;
 
+import org.apache.commons.lang.builder.HashCodeBuilder;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 
@@ -42,13 +43,13 @@ public class ExtendedBlock {
   }
 
   public ExtendedBlock(String poolId, Block b) {
-    this.poolId = poolId;
+    this.poolId = poolId != null ? poolId.intern() : null;
     this.block = b;
   }
 
   public ExtendedBlock(final String poolId, final long blkid, final long len,
       final long genstamp) {
-    this.poolId = poolId;
+    this.poolId = poolId != null ? poolId.intern() : null;
     block = new Block(blkid, len, genstamp);
   }
 
@@ -86,7 +87,7 @@ public class ExtendedBlock {
   }
 
   public void set(String poolId, Block blk) {
-    this.poolId = poolId;
+    this.poolId = poolId != null ? poolId.intern() : null;
     this.block = blk;
   }
 
@@ -107,13 +108,16 @@ public class ExtendedBlock {
       return false;
     }
     ExtendedBlock b = (ExtendedBlock)o;
-    return b.block.equals(block) && b.poolId.equals(poolId);
+    return b.block.equals(block) &&
+        (b.poolId != null ? b.poolId.equals(poolId) : poolId == null);
   }
 
   @Override // Object
   public int hashCode() {
-    int result = 31 + poolId.hashCode();
-    return (31 * result + block.hashCode());
+    return new HashCodeBuilder(31, 17).
+        append(poolId).
+        append(block).
+        toHashCode();
   }
 
   @Override // Object