Ver código fonte

HADOOP-1774. Remove use of INode.parent in Block CRC upgrade.
(Raghu Angadi via dhruba)



git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk@575395 13f79535-47bb-0310-9956-ffa450edef68

Dhruba Borthakur 17 anos atrás
pai
commit
8c56482490

+ 3 - 0
CHANGES.txt

@@ -76,6 +76,9 @@ Trunk (unreleased changes)
     HADOOP-1687.  Save memory in namenode by optimizing BlockMap
     representation.  (Konstantin Shvachko via cutting)
 
+    HADOOP-1774. Remove use of INode.parent in Block CRC upgrade.
+    (Raghu Angadi via dhruba)
+
   BUG FIXES
 
     HADOOP-1763. Too many lost task trackers on large clusters due to

+ 83 - 0
src/java/org/apache/hadoop/dfs/BlockCrcUpgrade.java

@@ -1636,6 +1636,8 @@ class BlockCrcUpgradeObjectNamenode extends UpgradeObjectNamenode {
   HashMap<DatanodeID, DnInfo> unfinishedDnMap = 
                                       new HashMap<DatanodeID, DnInfo>();  
 
+  HashMap<INodeMapEntry, INodeMapEntry> iNodeParentMap = null;
+  
   Daemon monitorThread;
   double avgDatanodeCompletionPct = 0;
   
@@ -1674,6 +1676,8 @@ class BlockCrcUpgradeObjectNamenode extends UpgradeObjectNamenode {
     
     assert monitorThread == null;
     
+    buildINodeToParentMap();
+    
     lastNodeCompletionTime = System.currentTimeMillis();
     
     monitorThread = new Daemon(new UpgradeMonitor());
@@ -1792,6 +1796,7 @@ class BlockCrcUpgradeObjectNamenode extends UpgradeObjectNamenode {
                    (BlockCrcUpgradeUtils.CrcInfoCommand)cmd;
     
     BlockCrcInfo crcInfo = getFSNamesystem().blockCrcInfo(crcCmd.block,
+                                                          this,
                                                           false);
     return new BlockCrcUpgradeUtils.CrcInfoCommandReply(crcInfo);
   }
@@ -1832,6 +1837,84 @@ class BlockCrcUpgradeObjectNamenode extends UpgradeObjectNamenode {
   public BlockCrcUpgradeObjectNamenode() {
   }
   
+  /* This is a wrapper class so that we can control equals() and hashCode().
+   * INode's equals() and hashCode() are not suitable for INodeToParent
+   * HashMap.
+   */
+  static class INodeMapEntry {
+    INode iNode;
+    INodeMapEntry parent;
+    
+    INodeMapEntry(INode iNode, INodeMapEntry parent) {
+      this.iNode = iNode;
+      this.parent = parent;
+    }
+    
+    public int hashCode() {
+      return System.identityHashCode(iNode);
+    }
+    public boolean equals(Object entry) {
+      return entry instanceof INodeMapEntry &&
+             ((INodeMapEntry)entry).iNode == iNode;
+    }
+    
+    private StringBuilder getName() {
+      StringBuilder str = (parent.parent == null) ? new StringBuilder() : 
+                          parent.getName();
+      str.append(Path.SEPARATOR);
+      return str.append(iNode.getLocalName());
+    }
+    String getAbsoluteName() {
+      return (parent == null) ? "/" : getName().toString();
+    }
+    
+    INodeDirectory getParentINode() {
+      return (parent == null) ? null : (INodeDirectory)parent.iNode;
+    }
+  }
+  
+  private INodeMapEntry addINodeParentEntry(INode inode, INodeMapEntry parent) {
+    INodeMapEntry entry = new INodeMapEntry(inode, parent);
+    iNodeParentMap.put(entry, entry);
+    return entry;
+  }
+
+  private long addToINodeParentMap(INodeMapEntry parent) {
+    long count = 0;
+    INodeDirectory dir = ((INodeDirectory)parent.iNode);
+    for(Iterator<INode> it = dir.getChildren().iterator(); it.hasNext();) {
+      INode inode = it.next();
+      if ( inode.isDirectory() ) {
+        count += 1 + addToINodeParentMap( addINodeParentEntry(inode, parent) );
+      } else {
+        // add only files that have associated ".crc" files.
+        if ( dir.getChild("." + inode.getLocalName() + ".crc") != null ) {
+          addINodeParentEntry(inode, parent);
+          count++;
+        }
+      }
+    }
+    return count;
+  }
+  
+  INodeMapEntry getINodeMapEntry(INode iNode) {
+    return iNodeParentMap.get(new INodeMapEntry(iNode, null));
+  }
+  
+  // builds INode to parent map for non ".crc" files.
+  private void buildINodeToParentMap() {
+    //larger intitial value should be ok for small clusters also.
+    iNodeParentMap = new HashMap<INodeMapEntry, INodeMapEntry>(256*1024);
+    
+    LOG.info("Building INode to parent map.");
+    
+    //Iterate over the whole INode tree.
+    INodeDirectory dir = getFSNamesystem().dir.rootDir;
+    long numAdded = 1 + addToINodeParentMap(addINodeParentEntry(dir, null));
+    
+    LOG.info("Added " + numAdded + " entries to INode to parent map.");
+  }
+  
   // For now we will wait for all the nodes to complete upgrade.
   synchronized boolean isUpgradeDone() {
     return upgradeStatus == UpgradeStatus.COMPLETED;    

+ 18 - 6
src/java/org/apache/hadoop/dfs/FSNamesystem.java

@@ -443,8 +443,10 @@ class FSNamesystem implements FSConstants {
    * <li> replication for crc file.
    * When replicas is true, it includes replicas of the block.
    */
-  public synchronized BlockCrcInfo blockCrcInfo(Block block, 
-                                                boolean replicas) {
+  public synchronized BlockCrcInfo blockCrcInfo(
+                           Block block,
+                           BlockCrcUpgradeObjectNamenode namenodeUpgradeObj,
+                           boolean replicas) {
     BlockCrcInfo crcInfo = new BlockCrcInfo();
     crcInfo.status = BlockCrcInfo.STATUS_ERROR;
     
@@ -459,7 +461,7 @@ class FSNamesystem implements FSConstants {
       return crcInfo;
     }
 
-    crcInfo.fileName = fileINode.getAbsoluteName();
+    crcInfo.fileName = "localName:" + fileINode.getLocalName();
     
     // Find the offset and length for this block.
     Block[] fileBlocks = fileINode.getBlocks();
@@ -502,15 +504,25 @@ class FSNamesystem implements FSConstants {
     } else {
 
       //Find CRC file
+      BlockCrcUpgradeObjectNamenode.INodeMapEntry entry =
+                                namenodeUpgradeObj.getINodeMapEntry(fileINode);
+      
+      if (entry == null || entry.parent == null) {
+        LOG.warn("Could not find parent INode for " + fileName + "  " + block);
+        return crcInfo;
+      }
+      
+      crcInfo.fileName = entry.getAbsoluteName();
+      
       String crcName = "." + fileName + ".crc";
-      INodeFile crcINode = (INodeFile)fileINode.getParent().getChild(crcName);
-
-      if (crcINode == null ) {
+      INode iNode = entry.getParentINode().getChild(crcName);
+      if (iNode == null || iNode.isDirectory()) {
         // Should we log this?
         crcInfo.status = BlockCrcInfo.STATUS_NO_CRC_DATA;
         return crcInfo;
       }
 
+      INodeFile crcINode = (INodeFile)iNode;
       Block[] blocks = crcINode.getBlocks();
       if ( blocks == null )  {
         LOG.warn("getBlockCrcInfo(): could not find blocks for crc file for " +

+ 1 - 1
src/java/org/apache/hadoop/dfs/NameNode.java

@@ -663,7 +663,7 @@ public class NameNode implements ClientProtocol, DatanodeProtocol, FSConstants {
 
   public BlockCrcInfo blockCrcUpgradeGetBlockLocations(Block block) 
                                                        throws IOException {
-    return namesystem.blockCrcInfo(block, true);
+    return namesystem.blockCrcInfo(block, null, true);
   }
 
   /** 

+ 4 - 0
src/test/org/apache/hadoop/dfs/MiniDFSCluster.java

@@ -26,6 +26,7 @@ import java.util.Collection;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.dfs.FSConstants.DatanodeReportType;
 import org.apache.hadoop.dfs.FSConstants.StartupOption;
+import org.apache.hadoop.fs.Command;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.util.ToolRunner;
@@ -124,6 +125,9 @@ public class MiniDFSCluster {
     // Format and clean out DataNode directories
     if (format) {
       if (data_dir.exists() && !FileUtil.fullyDelete(data_dir)) {
+        String[] cmd = { "find", data_dir.toString() };
+        String reply = Command.execCommand(cmd);
+        System.err.print("Reply from find : " + reply);
         throw new IOException("Cannot remove data directory: " + data_dir);
       }
       NameNode.format(conf);