Procházet zdrojové kódy

Merge trunk into HDFS-1073

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1073@1146881 13f79535-47bb-0310-9956-ffa450edef68
Todd Lipcon před 14 roky
rodič
revize
57a6b34139
31 změnil soubory, kde provedl 607 přidání a 213 odebrání
  1. 27 1
      hdfs/CHANGES.txt
  2. 3 6
      hdfs/src/java/org/apache/hadoop/hdfs/DFSClient.java
  3. 27 26
      hdfs/src/java/org/apache/hadoop/hdfs/DFSInputStream.java
  4. 4 8
      hdfs/src/java/org/apache/hadoop/hdfs/DFSOutputStream.java
  5. 2 2
      hdfs/src/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
  6. 1 2
      hdfs/src/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
  7. 11 1
      hdfs/src/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
  8. 97 0
      hdfs/src/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
  9. 7 2
      hdfs/src/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java
  10. 6 2
      hdfs/src/java/org/apache/hadoop/hdfs/server/blockmanagement/Host2NodesMap.java
  11. 7 0
      hdfs/src/java/org/apache/hadoop/hdfs/server/common/IncorrectVersionException.java
  12. 5 10
      hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java
  13. 2 4
      hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
  14. 14 7
      hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
  15. 33 15
      hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
  16. 2 2
      hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java
  17. 3 7
      hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java
  18. 2 4
      hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java
  19. 1 3
      hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java
  20. 2 3
      hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/UpgradeObjectDatanode.java
  21. 46 25
      hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java
  22. 14 61
      hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
  23. 2 2
      hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
  24. 4 6
      hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
  25. 55 0
      hdfs/src/test/hdfs/org/apache/hadoop/cli/testHDFSConf.xml
  26. 69 0
      hdfs/src/test/hdfs/org/apache/hadoop/hdfs/TestDFSShell.java
  27. 24 12
      hdfs/src/test/hdfs/org/apache/hadoop/hdfs/TestWriteRead.java
  28. 2 1
      hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/blockmanagement/TestHost2NodesMap.java
  29. 48 1
      hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureToleration.java
  30. 60 0
      hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDatanodeRegister.java
  31. 27 0
      hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestEditLogFileOutputStream.java

+ 27 - 1
hdfs/CHANGES.txt

@@ -540,6 +540,20 @@ Trunk (unreleased changes)
     HDFS-2109. Store uMask as member variable to DFSClient.Conf.  (Bharath
     Mundlapudi via szetszwo)
 
+    HDFS-2111. Add tests for ensuring that the DN will start with a few bad
+    data directories. (Harsh J Chouraria via todd)
+
+    HDFS-2134. Move DecommissionManager to the blockmanagement package.
+    (szetszwo)
+
+    HDFS-1977. Stop using StringUtils.stringifyException(). 
+    (Bharath Mundlapudi via jitendra)
+
+    HDFS-2131. Add new tests for the -overwrite/-f option in put and
+    copyFromLocal by HADOOP-7361.  (Uma Maheswara Rao G via szetszwo)
+
+    HDFS-2140. Move Host2NodesMap to the blockmanagement package.  (szetszwo)
+
   OPTIMIZATIONS
 
     HDFS-1458. Improve checkpoint performance by avoiding unnecessary image
@@ -805,9 +819,18 @@ Trunk (unreleased changes)
     HDFS-2053. Bug in INodeDirectory#computeContentSummary warning.
     (Michael Noll via eli)
 
-    HDFS-1990. Fix  resource leaks in BlockReceiver.close().  (Uma Maheswara
+    HDFS-1990. Fix resource leaks in BlockReceiver.close().  (Uma Maheswara
     Rao G via szetszwo)
 
+    HDFS-2034. Length in DFSInputStream.getBlockRange(..) becomes -ve when
+    reading only from a currently being written block. (John George via
+    szetszwo)
+
+    HDFS-2132. Potential resource leak in EditLogFileOutputStream.close. (atm)
+
+    HDFS-2120. on reconnect, DN can connect to NN even with different source
+    versions. (John George via atm)
+
 Release 0.22.0 - Unreleased
 
   INCOMPATIBLE CHANGES
@@ -1066,6 +1089,9 @@ Release 0.22.0 - Unreleased
     UnsupportedActionException("register") instead of "journal".
     (Ching-Shen Chen via shv)
 
+    HDFS-2054  BlockSender.sendChunk() prints ERROR for connection closures
+    encountered during transferToFully() (Kihwal Lee via stack)
+
   OPTIMIZATIONS
 
     HDFS-1140. Speedup INode.getPathComponents. (Dmytro Molkov via shv)

+ 3 - 6
hdfs/src/java/org/apache/hadoop/hdfs/DFSClient.java

@@ -101,7 +101,6 @@ import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.Progressable;
-import org.apache.hadoop.util.StringUtils;
 
 /********************************************************
  * DFSClient can connect to a Hadoop Filesystem and 
@@ -489,8 +488,7 @@ public class DFSClient implements FSConstants, java.io.Closeable {
     try {
       return namenode.getPreferredBlockSize(f);
     } catch (IOException ie) {
-      LOG.warn("Problem getting block size: " + 
-          StringUtils.stringifyException(ie));
+      LOG.warn("Problem getting block size", ie);
       throw ie;
     }
   }
@@ -1578,9 +1576,8 @@ public class DFSClient implements FSConstants, java.io.Closeable {
     try {
       reportBadBlocks(lblocks);
     } catch (IOException ie) {
-      LOG.info("Found corruption while reading " + file 
-               + ".  Error repairing corrupt blocks.  Bad blocks remain. " 
-               + StringUtils.stringifyException(ie));
+      LOG.info("Found corruption while reading " + file
+          + ".  Error repairing corrupt blocks.  Bad blocks remain.", ie);
     }
   }
 

+ 27 - 26
hdfs/src/java/org/apache/hadoop/hdfs/DFSInputStream.java

@@ -47,7 +47,6 @@ import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.token.Token;
-import org.apache.hadoop.util.StringUtils;
 
 /****************************************************************
  * DFSInputStream provides bytes from a named file.  It handles 
@@ -294,8 +293,8 @@ public class DFSInputStream extends FSInputStream {
 
   /**
    * Get blocks in the specified range.
-   * Fetch them from the namenode if not cached.
-   * 
+   * Fetch them from the namenode if not cached. This function
+   * will not get a read request beyond the EOF.
    * @param offset
    * @param length
    * @return consequent segment of located blocks
@@ -304,28 +303,31 @@ public class DFSInputStream extends FSInputStream {
   private synchronized List<LocatedBlock> getBlockRange(long offset, 
                                                         long length) 
                                                       throws IOException {
+    // getFileLength(): returns total file length
+    // locatedBlocks.getFileLength(): returns length of completed blocks
+    if (offset >= getFileLength()) {
+      throw new IOException("Offset: " + offset +
+        " exceeds file length: " + getFileLength());
+    }
+
     final List<LocatedBlock> blocks;
-    if (locatedBlocks.isLastBlockComplete()) {
-      blocks = getFinalizedBlockRange(offset, length);
+    final long lengthOfCompleteBlk = locatedBlocks.getFileLength();
+    final boolean readOffsetWithinCompleteBlk = offset < lengthOfCompleteBlk;
+    final boolean readLengthPastCompleteBlk = offset + length > lengthOfCompleteBlk;
+
+    if (readOffsetWithinCompleteBlk) {
+      //get the blocks of finalized (completed) block range
+      blocks = getFinalizedBlockRange(offset, 
+        Math.min(length, lengthOfCompleteBlk - offset));
+    } else {
+      blocks = new ArrayList<LocatedBlock>(1);
     }
-    else {
-      final boolean readPastEnd = offset + length > locatedBlocks.getFileLength();
-      /* if requested length is greater than current file length
-       * then, it could possibly be from the current block being
-       * written to. First get the finalized block range and then
-       * if necessary, get the length of last block being written
-       * to.
-       */
-      if (readPastEnd)
-        length = locatedBlocks.getFileLength() - offset;
 
-      blocks = getFinalizedBlockRange(offset, length);
-      /* requested length is greater than what finalized blocks 
-       * have.
-       */
-      if (readPastEnd)
-        blocks.add(locatedBlocks.getLastLocatedBlock());
+    // get the blocks from incomplete block range
+    if (readLengthPastCompleteBlk) {
+       blocks.add(locatedBlocks.getLastLocatedBlock());
     }
+
     return blocks;
   }
 
@@ -496,7 +498,7 @@ public class DFSInputStream extends FSInputStream {
         if (!retryCurrentNode) {
           DFSClient.LOG.warn("Exception while reading from "
               + getCurrentBlock() + " of " + src + " from "
-              + currentNode + ": " + StringUtils.stringifyException(e));
+              + currentNode, e);
         }
         ioe = e;
       }
@@ -554,7 +556,7 @@ public class DFSInputStream extends FSInputStream {
           throw ce;            
         } catch (IOException e) {
           if (retries == 1) {
-            DFSClient.LOG.warn("DFS Read: " + StringUtils.stringifyException(e));
+            DFSClient.LOG.warn("DFS Read", e);
           }
           blockEnd = -1;
           if (currentNode != null) { addToDeadNodes(currentNode); }
@@ -928,9 +930,8 @@ public class DFSInputStream extends FSInputStream {
         } catch (IOException e) {//make following read to retry
           if(DFSClient.LOG.isDebugEnabled()) {
             DFSClient.LOG.debug("Exception while seek to " + targetPos
-                + " from " + getCurrentBlock() + " of " + src
-                + " from " + currentNode + ": "
-                + StringUtils.stringifyException(e));
+                + " from " + getCurrentBlock() + " of " + src + " from "
+                + currentNode, e);
           }
         }
       }

+ 4 - 8
hdfs/src/java/org/apache/hadoop/hdfs/DFSOutputStream.java

@@ -75,8 +75,6 @@ import org.apache.hadoop.util.Daemon;
 import org.apache.hadoop.util.DataChecksum;
 import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.util.PureJavaCrc32;
-import org.apache.hadoop.util.StringUtils;
-
 
 
 /****************************************************************
@@ -548,8 +546,7 @@ class DFSOutputStream extends FSOutputSummer implements Syncable {
             Thread.sleep(artificialSlowdown); 
           }
         } catch (Throwable e) {
-          DFSClient.LOG.warn("DataStreamer Exception: " + 
-              StringUtils.stringifyException(e));
+          DFSClient.LOG.warn("DataStreamer Exception", e);
           if (e instanceof IOException) {
             setLastException((IOException)e);
           }
@@ -698,9 +695,8 @@ class DFSOutputStream extends FSOutputSummer implements Syncable {
               synchronized (dataQueue) {
                 dataQueue.notifyAll();
               }
-              DFSClient.LOG.warn("DFSOutputStream ResponseProcessor exception " + 
-                  " for block " + block +
-                  StringUtils.stringifyException(e));
+              DFSClient.LOG.warn("DFSOutputStream ResponseProcessor exception "
+                  + " for block " + block, e);
               responderClosed = true;
             }
           }
@@ -1101,7 +1097,7 @@ class DFSOutputStream extends FSOutputSummer implements Syncable {
                 throw e;
               } else {
                 --retries;
-                DFSClient.LOG.info(StringUtils.stringifyException(e));
+                DFSClient.LOG.info("Exception while adding a block", e);
                 if (System.currentTimeMillis() - localstart > 5000) {
                   DFSClient.LOG.info("Waiting for replication for "
                       + (System.currentTimeMillis() - localstart) / 1000

+ 2 - 2
hdfs/src/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java

@@ -729,7 +729,7 @@ public class Balancer {
             blocksToReceive -= getBlockList();
             continue;
           } catch (IOException e) {
-            LOG.warn(StringUtils.stringifyException(e));
+            LOG.warn("Exception while getting block list", e);
             return;
           }
         } 
@@ -1553,7 +1553,7 @@ public class Balancer {
     try {
       System.exit(ToolRunner.run(null, new Cli(), args));
     } catch (Throwable e) {
-      LOG.error(StringUtils.stringifyException(e));
+      LOG.error("Exiting balancer due an exception", e);
       System.exit(-1);
     }
   }

+ 1 - 2
hdfs/src/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java

@@ -52,7 +52,6 @@ import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.Daemon;
-import org.apache.hadoop.util.StringUtils;
 
 /**
  * The class provides utilities for {@link Balancer} to access a NameNode
@@ -222,7 +221,7 @@ class NameNodeConnector {
         try {
           blockTokenSecretManager.setKeys(namenode.getBlockKeys());
         } catch (Exception e) {
-          LOG.error(StringUtils.stringifyException(e));
+          LOG.error("Failed to set keys", e);
         }
         try {
           Thread.sleep(keyUpdaterInterval);

+ 11 - 1
hdfs/src/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

@@ -99,6 +99,8 @@ public class BlockManager {
    */
   public final BlocksMap blocksMap;
 
+  private final DatanodeManager datanodeManager;
+
   //
   // Store blocks-->datanodedescriptor(s) map of corrupt replicas
   //
@@ -164,6 +166,7 @@ public class BlockManager {
       DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_DEFAULT) * 1000L);
     setConfigurationParameters(conf);
     blocksMap = new BlocksMap(capacity, DEFAULT_MAP_LOAD_FACTOR);
+    datanodeManager = new DatanodeManager(fsn);
   }
 
   void setConfigurationParameters(Configuration conf) throws IOException {
@@ -207,13 +210,20 @@ public class BlockManager {
     FSNamesystem.LOG.info("shouldCheckForEnoughRacks = " + shouldCheckForEnoughRacks);
   }
 
-  public void activate() {
+  public void activate(Configuration conf) {
     pendingReplications.start();
+    datanodeManager.activate(conf);
   }
 
   public void close() {
     if (pendingReplications != null) pendingReplications.stop();
     blocksMap.close();
+    datanodeManager.close();
+  }
+
+  /** @return the datanodeManager */
+  public DatanodeManager getDatanodeManager() {
+    return datanodeManager;
   }
 
   public void metaSave(PrintWriter out) {

+ 97 - 0
hdfs/src/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java

@@ -0,0 +1,97 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.blockmanagement;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.util.Daemon;
+
+/**
+ * Manage datanodes, include decommission and other activities.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public class DatanodeManager {
+  static final Log LOG = LogFactory.getLog(DatanodeManager.class);
+
+  final FSNamesystem namesystem;
+
+  private final Host2NodesMap host2DatanodeMap = new Host2NodesMap();
+  
+  DatanodeManager(final FSNamesystem namesystem) {
+    this.namesystem = namesystem;
+  }
+
+  private Daemon decommissionthread = null;
+
+  void activate(final Configuration conf) {
+    this.decommissionthread = new Daemon(new DecommissionManager(namesystem).new Monitor(
+        conf.getInt(DFSConfigKeys.DFS_NAMENODE_DECOMMISSION_INTERVAL_KEY, 
+                    DFSConfigKeys.DFS_NAMENODE_DECOMMISSION_INTERVAL_DEFAULT),
+        conf.getInt(DFSConfigKeys.DFS_NAMENODE_DECOMMISSION_NODES_PER_INTERVAL_KEY, 
+                    DFSConfigKeys.DFS_NAMENODE_DECOMMISSION_NODES_PER_INTERVAL_DEFAULT)));
+    decommissionthread.start();
+  }
+
+  void close() {
+    if (decommissionthread != null) decommissionthread.interrupt();
+  }
+
+  /** @return the datanode descriptor for the host. */
+  public DatanodeDescriptor getDatanodeByHost(final String host) {
+    return host2DatanodeMap.getDatanodeByHost(host);
+  }
+
+  /** Add a datanode. */
+  public void addDatanode(final DatanodeDescriptor node) {
+    // To keep host2DatanodeMap consistent with datanodeMap,
+    // remove  from host2DatanodeMap the datanodeDescriptor removed
+    // from datanodeMap before adding node to host2DatanodeMap.
+    synchronized (namesystem.datanodeMap) {
+      host2DatanodeMap.remove(
+          namesystem.datanodeMap.put(node.getStorageID(), node));
+    }
+    host2DatanodeMap.add(node);
+
+    if (LOG.isDebugEnabled()) {
+      LOG.debug(getClass().getSimpleName() + ".unprotectedAddDatanode: "
+          + "node " + node.getName() + " is added to datanodeMap.");
+    }
+  }
+
+  /** Physically remove node from datanodeMap. */
+  public void wipeDatanode(final DatanodeID node) throws IOException {
+    final String key = node.getStorageID();
+    synchronized (namesystem.datanodeMap) {
+      host2DatanodeMap.remove(namesystem.datanodeMap.remove(key));
+    }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug(getClass().getSimpleName() + ".wipeDatanode("
+          + node.getName() + "): storage " + key 
+          + " is removed from datanodeMap.");
+    }
+  }
+}

+ 7 - 2
hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/DecommissionManager.java → hdfs/src/java/org/apache/hadoop/hdfs/server/blockmanagement/DecommissionManager.java

@@ -15,18 +15,22 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.hadoop.hdfs.server.namenode;
+package org.apache.hadoop.hdfs.server.blockmanagement;
 
 import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.util.CyclicIteration;
 
 /**
  * Manage node decommissioning.
  */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
 class DecommissionManager {
   static final Log LOG = LogFactory.getLog(DecommissionManager.class);
 
@@ -56,6 +60,7 @@ class DecommissionManager {
      * Check decommission status of numNodesPerCheck nodes
      * for every recheckInterval milliseconds.
      */
+    @Override
     public void run() {
       for(; fsnamesystem.isRunning(); ) {
         fsnamesystem.writeLock();

+ 6 - 2
hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/Host2NodesMap.java → hdfs/src/java/org/apache/hadoop/hdfs/server/blockmanagement/Host2NodesMap.java

@@ -15,15 +15,19 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.hadoop.hdfs.server.namenode;
+package org.apache.hadoop.hdfs.server.blockmanagement;
 
 import java.util.HashMap;
 import java.util.Random;
 import java.util.concurrent.locks.ReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 
-import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
 
+/** A map from host names to datanode descriptors. */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
 class Host2NodesMap {
   private HashMap<String, DatanodeDescriptor[]> map
     = new HashMap<String, DatanodeDescriptor[]>();

+ 7 - 0
hdfs/src/java/org/apache/hadoop/hdfs/server/common/IncorrectVersionException.java

@@ -45,4 +45,11 @@ public class IncorrectVersionException extends IOException {
           + versionReported + ". Expecting = " + versionExpected + ".");
   }
 
+  public IncorrectVersionException(String versionReported,
+                                   String ofWhat,
+                                   String versionExpected) {
+    super("Unexpected version " 
+          + (ofWhat==null ? "" : "of " + ofWhat) + ". Reported: "
+          + versionReported + ". Expecting = " + versionExpected + ".");
+  }
 }

+ 5 - 10
hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java

@@ -49,7 +49,6 @@ import org.apache.hadoop.hdfs.server.common.GenerationStamp;
 import org.apache.hadoop.hdfs.server.datanode.FSDataset.FSVolume;
 import org.apache.hadoop.hdfs.util.DataTransferThrottler;
 import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.util.StringUtils;
 
 /**
  * Performs two types of scanning:
@@ -434,9 +433,8 @@ class BlockPoolSliceScanner {
           return;
         }
 
-        LOG.warn((second ? "Second " : "First ") + 
-                 "Verification failed for " + block + ". Exception : " +
-                 StringUtils.stringifyException(e));
+        LOG.warn((second ? "Second " : "First ") + "Verification failed for "
+            + block, e);
         
         if (second) {
           totalScanErrors++;
@@ -512,8 +510,7 @@ class BlockPoolSliceScanner {
         logReader[1] = log.getPreviousFileReader();
       }
     } catch (IOException e) {
-      LOG.warn("Could not read previous verification times : " +
-               StringUtils.stringifyException(e));
+      LOG.warn("Could not read previous verification times", e);
     }
     
     try {
@@ -645,8 +642,7 @@ class BlockPoolSliceScanner {
         }
       }
     } catch (RuntimeException e) {
-      LOG.warn("RuntimeException during BlockPoolScanner.scan() : " +
-               StringUtils.stringifyException(e));
+      LOG.warn("RuntimeException during BlockPoolScanner.scan()", e);
       throw e;
     } finally {
       cleanUp();
@@ -910,8 +906,7 @@ class BlockPoolSliceScanner {
         try {
           readNext();
         } catch (IOException e) {
-          LOG.info("Could not reade next line in LogHandler : " +
-                   StringUtils.stringifyException(e));
+          LOG.info("Could not read next line in LogHandler", e);
         }
         return curLine;
       }

+ 2 - 4
hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java

@@ -49,7 +49,6 @@ import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.util.Daemon;
 import org.apache.hadoop.util.DataChecksum;
 import org.apache.hadoop.util.PureJavaCrc32;
-import org.apache.hadoop.util.StringUtils;
 
 /** A class that receives a block and writes to its own disk, meanwhile
  * may copies it to another site. If a throttler is provided,
@@ -276,9 +275,8 @@ class BlockReceiver implements Closeable, FSConstants {
    */
   private void handleMirrorOutError(IOException ioe) throws IOException {
     String bpid = block.getBlockPoolId();
-    LOG.info(datanode.getDNRegistrationForBP(bpid) + ":Exception writing block " +
-             block + " to mirror " + mirrorAddr + "\n" +
-             StringUtils.stringifyException(ioe));
+    LOG.info(datanode.getDNRegistrationForBP(bpid)
+        + ":Exception writing block " + block + " to mirror " + mirrorAddr, ioe);
     if (Thread.interrupted()) { // shut down if the thread is interrupted
       throw ioe;
     } else { // encounter an error while writing to mirror

+ 14 - 7
hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java

@@ -38,7 +38,6 @@ import org.apache.hadoop.hdfs.util.DataTransferThrottler;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.net.SocketOutputStream;
 import org.apache.hadoop.util.DataChecksum;
-import org.apache.hadoop.util.StringUtils;
 
 /**
  * Reads a block from the disk and sends it to a recipient.
@@ -328,9 +327,8 @@ class BlockSender implements java.io.Closeable, FSConstants {
       try {
         checksumIn.readFully(buf, checksumOff, checksumLen);
       } catch (IOException e) {
-        LOG.warn(" Could not read or failed to veirfy checksum for data" +
-                 " at offset " + offset + " for block " + block + " got : "
-                 + StringUtils.stringifyException(e));
+        LOG.warn(" Could not read or failed to veirfy checksum for data"
+            + " at offset " + offset + " for block " + block, e);
         IOUtils.closeStream(checksumIn);
         checksumIn = null;
         if (corruptChecksumOk) {
@@ -401,10 +399,19 @@ class BlockSender implements java.io.Closeable, FSConstants {
       }
       
     } catch (IOException e) {
-      /* exception while writing to the client (well, with transferTo(),
-       * it could also be while reading from the local file).
+      /* Exception while writing to the client. Connection closure from
+       * the other end is mostly the case and we do not care much about
+       * it. But other things can go wrong, especially in transferTo(),
+       * which we do not want to ignore.
+       *
+       * The message parsing below should not be considered as a good
+       * coding example. NEVER do it to drive a program logic. NEVER.
+       * It was done here because the NIO throws an IOException for EPIPE.
        */
-      LOG.error("BlockSender.sendChunks() exception: " + StringUtils.stringifyException(e));
+      String ioem = e.getMessage();
+      if (!ioem.startsWith("Broken pipe") && !ioem.startsWith("Connection reset")) {
+        LOG.error("BlockSender.sendChunks() exception: ", e);
+      }
       throw ioeToSocketException(e);
     }
 

+ 33 - 15
hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java

@@ -861,8 +861,8 @@ public class DataNode extends Configured
         /* One common reason is that NameNode could be in safe mode.
          * Should we keep on retrying in that case?
          */
-        LOG.warn("Failed to report bad block " + block + " to namenode : " +
-                 " Exception : " + StringUtils.stringifyException(e));
+        LOG.warn("Failed to report bad block " + block + " to namenode : "
+            + " Exception", e);
       }
       
     }
@@ -1111,12 +1111,11 @@ public class DataNode extends Configured
           if (UnregisteredNodeException.class.getName().equals(reClass) ||
               DisallowedDatanodeException.class.getName().equals(reClass) ||
               IncorrectVersionException.class.getName().equals(reClass)) {
-            LOG.warn("blockpool " + blockPoolId + " is shutting down: " + 
-                StringUtils.stringifyException(re));
+            LOG.warn("blockpool " + blockPoolId + " is shutting down", re);
             shouldServiceRun = false;
             return;
           }
-          LOG.warn(StringUtils.stringifyException(re));
+          LOG.warn("RemoteException in offerService", re);
           try {
             long sleepTime = Math.min(1000, heartBeatInterval);
             Thread.sleep(sleepTime);
@@ -1124,7 +1123,7 @@ public class DataNode extends Configured
             Thread.currentThread().interrupt();
           }
         } catch (IOException e) {
-          LOG.warn(StringUtils.stringifyException(e));
+          LOG.warn("IOException in offerService", e);
         }
       } // while (shouldRun && shouldServiceRun)
     } // offerService
@@ -1144,7 +1143,26 @@ public class DataNode extends Configured
     void register() throws IOException {
       LOG.info("in register: sid=" + bpRegistration.getStorageID() + ";SI="
           + bpRegistration.storageInfo); 
-                
+
+      // build and layout versions should match
+      String nsBuildVer = bpNamenode.versionRequest().getBuildVersion();
+      String stBuildVer = Storage.getBuildVersion();
+
+      if (!nsBuildVer.equals(stBuildVer)) {
+        LOG.warn("Data-node and name-node Build versions must be " +
+          "the same. Namenode build version: " + nsBuildVer + "Datanode " +
+          "build version: " + stBuildVer);
+        throw new IncorrectVersionException(nsBuildVer, "namenode", stBuildVer);
+      }
+
+      if (FSConstants.LAYOUT_VERSION != bpNSInfo.getLayoutVersion()) {
+        LOG.warn("Data-node and name-node layout versions must be " +
+          "the same. Expected: "+ FSConstants.LAYOUT_VERSION +
+          " actual "+ bpNSInfo.getLayoutVersion());
+        throw new IncorrectVersionException
+          (bpNSInfo.getLayoutVersion(), "namenode");
+      }
+
       while(shouldRun && shouldServiceRun) {
         try {
           // Use returned registration from namenode with updated machine name.
@@ -1241,18 +1259,18 @@ public class DataNode extends Configured
             startDistributedUpgradeIfNeeded();
             offerService();
           } catch (Exception ex) {
-            LOG.error("Exception: " + StringUtils.stringifyException(ex));
+            LOG.error("Exception in BPOfferService", ex);
             if (shouldRun && shouldServiceRun) {
               try {
                 Thread.sleep(5000);
               } catch (InterruptedException ie) {
-                LOG.warn("Received exception: ", ie);
+                LOG.warn("Received exception", ie);
               }
             }
           }
         }
       } catch (Throwable ex) {
-        LOG.warn("Unexpected exception ", ex);
+        LOG.warn("Unexpected exception", ex);
       } finally {
         LOG.warn(bpRegistration + " ending block pool service for: " 
             + blockPoolId);
@@ -1737,8 +1755,7 @@ public class DataNode extends Configured
       try {
         nn.errorReport(bpos.bpRegistration, dpError, errMsgr);
       } catch(IOException e) {
-        LOG.warn("Error reporting disk failure to NameNode: " + 
-            StringUtils.stringifyException(e));
+        LOG.warn("Error reporting disk failure to NameNode", e);
       }
     }
     
@@ -2007,8 +2024,9 @@ public class DataNode extends Configured
           }
         }
       } catch (IOException ie) {
-        LOG.warn(bpReg + ":Failed to transfer " + b + " to " + targets[0].getName()
-            + " got " + StringUtils.stringifyException(ie));
+        LOG.warn(
+            bpReg + ":Failed to transfer " + b + " to " + targets[0].getName()
+                + " got ", ie);
         // check if there are any disk problem
         checkDiskError();
         
@@ -2279,7 +2297,7 @@ public class DataNode extends Configured
       if (datanode != null)
         datanode.join();
     } catch (Throwable e) {
-      LOG.error(StringUtils.stringifyException(e));
+      LOG.error("Exception in secureMain", e);
       System.exit(-1);
     } finally {
       // We need to add System.exit here because either shutdown was called or

+ 2 - 2
hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java

@@ -173,8 +173,8 @@ public class DataStorage extends Storage {
         }
       } catch (IOException ioe) {
         sd.unlock();
-        LOG.warn("Ignoring storage directory "+ dataDir
-        		+ " due to an exception: " + StringUtils.stringifyException(ioe));
+        LOG.warn("Ignoring storage directory " + dataDir
+            + " due to an exception", ioe);
         //continue with other good dirs
         continue;
       }

+ 3 - 7
hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java

@@ -62,7 +62,6 @@ import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.DataChecksum;
-import org.apache.hadoop.util.StringUtils;
 
 import com.google.protobuf.ByteString;
 
@@ -268,10 +267,8 @@ class DataXceiver extends Receiver implements Runnable, FSConstants {
       /* What exactly should we do here?
        * Earlier version shutdown() datanode if there is disk error.
        */
-      LOG.warn(dnR +  ":Got exception while serving " + 
-          block + " to " +
-                remoteAddress + ":\n" + 
-                StringUtils.stringifyException(ioe) );
+      LOG.warn(dnR + ":Got exception while serving " + block + " to "
+          + remoteAddress, ioe);
       throw ioe;
     } finally {
       IOUtils.closeStream(blockSender);
@@ -424,8 +421,7 @@ class DataXceiver extends Receiver implements Runnable, FSConstants {
           } else {
             LOG.info(datanode + ":Exception transfering block " +
                      block + " to mirror " + mirrorNode +
-                     ". continuing without the mirror.\n" +
-                     StringUtils.stringifyException(e));
+                     ". continuing without the mirror.", e);
           }
         }
       }

+ 2 - 4
hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java

@@ -34,7 +34,6 @@ import org.apache.hadoop.hdfs.server.balancer.Balancer;
 import org.apache.hadoop.hdfs.util.DataTransferThrottler;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.util.Daemon;
-import org.apache.hadoop.util.StringUtils;
 
 
 /**
@@ -157,7 +156,7 @@ class DataXceiverServer implements Runnable, FSConstants {
       ss.close();
     } catch (IOException ie) {
       LOG.warn(datanode.getMachineName()
-          + ":DataXceiverServer: Close exception due to: ", ie);
+          + " :DataXceiverServer: close exception", ie);
     }
   }
   
@@ -167,8 +166,7 @@ class DataXceiverServer implements Runnable, FSConstants {
     try {
       this.ss.close();
     } catch (IOException ie) {
-      LOG.warn(datanode.getMachineName() + ":DataXceiverServer.kill(): "
-                              + StringUtils.stringifyException(ie));
+      LOG.warn(datanode.getMachineName() + ":DataXceiverServer.kill(): ", ie);
     }
 
     // close all the sockets that were accepted earlier

+ 1 - 3
hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java

@@ -62,7 +62,6 @@ import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlo
 import org.apache.hadoop.metrics2.util.MBeans;
 import org.apache.hadoop.util.DataChecksum;
 import org.apache.hadoop.util.DiskChecker;
-import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.DiskChecker.DiskErrorException;
 import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException;
 import org.apache.hadoop.util.ReflectionUtils;
@@ -2377,8 +2376,7 @@ public class FSDataset implements FSConstants, FSDatasetInterface {
       try {
         datanode.reportBadBlocks(new ExtendedBlock(bpid, corruptBlock));  
       } catch (IOException e) {
-        DataNode.LOG.warn("Failed to repot bad block " + corruptBlock
-            + "Exception:" + StringUtils.stringifyException(e));
+        DataNode.LOG.warn("Failed to repot bad block " + corruptBlock, e);
       }
     }
   }

+ 2 - 3
hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/UpgradeObjectDatanode.java

@@ -24,7 +24,6 @@ import org.apache.hadoop.hdfs.server.common.UpgradeObject;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.UpgradeCommand;
-import org.apache.hadoop.util.StringUtils;
 import java.io.IOException;
 import java.net.SocketTimeoutException;
 
@@ -110,7 +109,7 @@ public abstract class UpgradeObjectDatanode extends UpgradeObject implements Run
       try {
         doUpgrade();
       } catch(Exception e) {
-        DataNode.LOG.error(StringUtils.stringifyException(e));
+        DataNode.LOG.error("Exception in doUpgrade", e);
       }
       break;
     }
@@ -129,7 +128,7 @@ public abstract class UpgradeObjectDatanode extends UpgradeObject implements Run
       if(upgradeManager != null)
         upgradeManager.completeUpgrade();
     } catch(IOException e) {
-      DataNode.LOG.error(StringUtils.stringifyException(e));
+      DataNode.LOG.error("Exception in completeUpgrade", e);
     }
   }
 

+ 46 - 25
hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java

@@ -33,6 +33,8 @@ import org.apache.hadoop.io.DataOutputBuffer;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.Writable;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * An implementation of the abstract class {@link EditLogOutputStream}, which
  * stores edits in a local file.
@@ -137,32 +139,41 @@ class EditLogFileOutputStream extends EditLogOutputStream {
       throw new IOException("Trying to use aborted output stream");
     }
 
-    // close should have been called after all pending transactions
-    // have been flushed & synced.
-    // if already closed, just skip
-    if(bufCurrent != null)
-    {
-      int bufSize = bufCurrent.size();
-      if (bufSize != 0) {
-        throw new IOException("FSEditStream has " + bufSize
-            + " bytes still to be flushed and cannot " + "be closed.");
+    try {
+      // close should have been called after all pending transactions
+      // have been flushed & synced.
+      // if already closed, just skip
+      if(bufCurrent != null)
+      {
+        int bufSize = bufCurrent.size();
+        if (bufSize != 0) {
+          throw new IOException("FSEditStream has " + bufSize
+              + " bytes still to be flushed and cannot " + "be closed.");
+        }
+        bufCurrent.close();
+        bufCurrent = null;
       }
-      bufCurrent.close();
-      bufCurrent = null;
-    }
-
-    if(bufReady != null) {
-      bufReady.close();
-      bufReady = null;
-    }
-
-    // remove the last INVALID marker from transaction log.
-    if (fc != null && fc.isOpen()) {
-      fc.truncate(fc.position());
-      fc.close();
-    }
-    if (fp != null) {
-      fp.close();
+  
+      if(bufReady != null) {
+        bufReady.close();
+        bufReady = null;
+      }
+  
+      // remove the last INVALID marker from transaction log.
+      if (fc != null && fc.isOpen()) {
+        fc.truncate(fc.position());
+        fc.close();
+        fc = null;
+      }
+      if (fp != null) {
+        fp.close();
+        fp = null;
+      }
+    } finally {
+      IOUtils.cleanup(FSNamesystem.LOG, bufCurrent, bufReady, fc, fp);
+      bufCurrent = bufReady = null;
+      fc = null;
+      fp = null;
     }
     fp = null;
   }
@@ -263,4 +274,14 @@ class EditLogFileOutputStream extends EditLogOutputStream {
   public boolean isOpen() {
     return fp != null;
   }
+  
+  @VisibleForTesting
+  public void setFileChannelForTesting(FileChannel fc) {
+    this.fc = fc;
+  }
+  
+  @VisibleForTesting
+  public FileChannel getFileChannelForTesting() {
+    return fc;
+  }
 }

+ 14 - 61
hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -268,7 +268,7 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean,
    * <p>
    * Mapping: StorageID -> DatanodeDescriptor
    */
-  NavigableMap<String, DatanodeDescriptor> datanodeMap = 
+  public final NavigableMap<String, DatanodeDescriptor> datanodeMap = 
     new TreeMap<String, DatanodeDescriptor>();
 
   Random r = new Random();
@@ -319,14 +319,12 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean,
       ReplaceDatanodeOnFailure.DEFAULT;
 
   private volatile SafeModeInfo safeMode;  // safe mode information
-  private Host2NodesMap host2DataNodeMap = new Host2NodesMap();
     
   /** datanode network toplogy */
   public NetworkTopology clusterMap = new NetworkTopology();
   private DNSToSwitchMapping dnsToSwitchMapping;
 
   private HostsFileReader hostsReader; 
-  private Daemon dnthread = null;
 
   private long maxFsObjects = 0;          // maximum number of fs objects
 
@@ -405,7 +403,7 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean,
    */
   void activate(Configuration conf) throws IOException {
     setBlockTotal();
-    blockManager.activate();
+    blockManager.activate(conf);
     this.hbthread = new Daemon(new HeartbeatMonitor());
     this.lmthread = new Daemon(leaseManager.new Monitor());
     this.replthread = new Daemon(new ReplicationMonitor());
@@ -416,13 +414,6 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean,
     this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
     nnrmthread.start();
 
-    this.dnthread = new Daemon(new DecommissionManager(this).new Monitor(
-        conf.getInt(DFSConfigKeys.DFS_NAMENODE_DECOMMISSION_INTERVAL_KEY, 
-                    DFSConfigKeys.DFS_NAMENODE_DECOMMISSION_INTERVAL_DEFAULT),
-        conf.getInt(DFSConfigKeys.DFS_NAMENODE_DECOMMISSION_NODES_PER_INTERVAL_KEY, 
-                    DFSConfigKeys.DFS_NAMENODE_DECOMMISSION_NODES_PER_INTERVAL_DEFAULT)));
-    dnthread.start();
-
     this.dnsToSwitchMapping = ReflectionUtils.newInstance(
         conf.getClass(DFSConfigKeys.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, 
                       ScriptBasedMapping.class,
@@ -638,7 +629,6 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean,
       if (blockManager != null) blockManager.close();
       if (hbthread != null) hbthread.interrupt();
       if (replthread != null) replthread.interrupt();
-      if (dnthread != null) dnthread.interrupt();
       if (smmthread != null) smmthread.interrupt();
       if (dtSecretManager != null) dtSecretManager.stopThreads();
       if (nnrmthread != null) nnrmthread.interrupt();
@@ -663,7 +653,7 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean,
   }
 
   /** Is this name system running? */
-  boolean isRunning() {
+  public boolean isRunning() {
     return fsRunning;
   }
 
@@ -889,8 +879,8 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean,
     LocatedBlocks blocks = getBlockLocations(src, offset, length, true, true);
     if (blocks != null) {
       //sort the blocks
-      DatanodeDescriptor client = host2DataNodeMap.getDatanodeByHost(
-          clientMachine);
+      final DatanodeDescriptor client = 
+          blockManager.getDatanodeManager().getDatanodeByHost(clientMachine);
       for (LocatedBlock b : blocks.getLocatedBlocks()) {
         clusterMap.pseudoSortByDistance(client, b.getLocations());
         
@@ -1501,8 +1491,8 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean,
         }
       }
 
-      DatanodeDescriptor clientNode = 
-        host2DataNodeMap.getDatanodeByHost(clientMachine);
+      final DatanodeDescriptor clientNode = 
+          blockManager.getDatanodeManager().getDatanodeByHost(clientMachine);
 
       if (append && myFile != null) {
         //
@@ -2853,7 +2843,8 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean,
                                  + " storage " + nodeReg.getStorageID());
 
     DatanodeDescriptor nodeS = datanodeMap.get(nodeReg.getStorageID());
-    DatanodeDescriptor nodeN = host2DataNodeMap.getDatanodeByName(nodeReg.getName());
+    DatanodeDescriptor nodeN =
+        blockManager.getDatanodeManager().getDatanodeByHost(nodeReg.getName());
       
     if (nodeN != null && nodeN != nodeS) {
       NameNode.LOG.info("BLOCK* NameSystem.registerDatanode: "
@@ -2862,7 +2853,7 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean,
       // which is not served by anybody anymore.
       removeDatanode(nodeN);
       // physically remove node from datanodeMap
-      wipeDatanode(nodeN);
+      blockManager.getDatanodeManager().wipeDatanode(nodeN);
       nodeN = null;
     }
 
@@ -2929,7 +2920,7 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean,
     DatanodeDescriptor nodeDescr 
       = new DatanodeDescriptor(nodeReg, NetworkTopology.DEFAULT_RACK, hostName);
     resolveNetworkLocation(nodeDescr);
-    unprotectedAddDatanode(nodeDescr);
+    blockManager.getDatanodeManager().addDatanode(nodeDescr);
     clusterMap.add(nodeDescr);
     checkDecommissioning(nodeDescr, dnAddress);
     
@@ -3227,7 +3218,7 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean,
             lastBlockKeyUpdate = now;
           }
         } catch (Exception e) {
-          FSNamesystem.LOG.error(StringUtils.stringifyException(e));
+          FSNamesystem.LOG.error("Exception while checking heartbeat", e);
         }
         try {
           Thread.sleep(5000);  // 5 seconds
@@ -3367,44 +3358,6 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean,
           + nodeDescr.getName() + " is out of service now.");
     }
   }
-    
-  void unprotectedAddDatanode(DatanodeDescriptor nodeDescr) {
-    assert hasWriteLock();
-    // To keep host2DataNodeMap consistent with datanodeMap,
-    // remove  from host2DataNodeMap the datanodeDescriptor removed
-    // from datanodeMap before adding nodeDescr to host2DataNodeMap.
-    synchronized (datanodeMap) {
-      host2DataNodeMap.remove(
-                            datanodeMap.put(nodeDescr.getStorageID(), nodeDescr));
-    }
-    host2DataNodeMap.add(nodeDescr);
-      
-    if(NameNode.stateChangeLog.isDebugEnabled()) {
-      NameNode.stateChangeLog.debug(
-          "BLOCK* NameSystem.unprotectedAddDatanode: "
-          + "node " + nodeDescr.getName() + " is added to datanodeMap.");
-    }
-  }
-
-  /**
-   * Physically remove node from datanodeMap.
-   *
-   * @param nodeID node
-   * @throws IOException
-   */
-  void wipeDatanode(DatanodeID nodeID) throws IOException {
-    assert hasWriteLock();
-    String key = nodeID.getStorageID();
-    synchronized (datanodeMap) {
-      host2DataNodeMap.remove(datanodeMap.remove(key));
-    }
-    if(NameNode.stateChangeLog.isDebugEnabled()) {
-      NameNode.stateChangeLog.debug(
-          "BLOCK* NameSystem.wipeDatanode: "
-          + nodeID.getName() + " storage " + key 
-          + " is removed from datanodeMap.");
-    }
-  }
 
   FSImage getFSImage() {
     return dir.fsImage;
@@ -3990,7 +3943,7 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean,
    * Change, if appropriate, the admin state of a datanode to 
    * decommission completed. Return true if decommission is complete.
    */
-  boolean checkDecommissionStateInternal(DatanodeDescriptor node) {
+  public boolean checkDecommissionStateInternal(DatanodeDescriptor node) {
     assert hasWriteLock();
     //
     // Check to see if all blocks in this decommissioned
@@ -4305,7 +4258,7 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean,
         try {
           needUpgrade = startDistributedUpgradeIfNeeded();
         } catch(IOException e) {
-          FSNamesystem.LOG.error(StringUtils.stringifyException(e));
+          FSNamesystem.LOG.error("IOException in startDistributedUpgradeIfNeeded", e);
         }
         if(needUpgrade) {
           // switch to manual safe mode

+ 2 - 2
hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java

@@ -692,7 +692,7 @@ public class NameNode implements NamenodeProtocols, FSConstants {
     try {
       if (httpServer != null) httpServer.stop();
     } catch (Exception e) {
-      LOG.error(StringUtils.stringifyException(e));
+      LOG.error("Exception while stopping httpserver", e);
     }
     if(namesystem != null) namesystem.close();
     if(emptier != null) emptier.interrupt();
@@ -1668,7 +1668,7 @@ public class NameNode implements NamenodeProtocols, FSConstants {
       if (namenode != null)
         namenode.join();
     } catch (Throwable e) {
-      LOG.error(StringUtils.stringifyException(e));
+      LOG.error("Exception in namenode join", e);
       System.exit(-1);
     }
   }

+ 4 - 6
hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java

@@ -320,7 +320,7 @@ public class SecondaryNameNode implements Runnable {
     try {
       if (checkpointImage != null) checkpointImage.close();
     } catch(IOException e) {
-      LOG.warn(StringUtils.stringifyException(e));
+      LOG.warn("Exception while closing CheckpointStorage", e);
     }
   }
 
@@ -330,7 +330,7 @@ public class SecondaryNameNode implements Runnable {
       try { 
         ugi = UserGroupInformation.getLoginUser();
       } catch (IOException e) {
-        LOG.error(StringUtils.stringifyException(e));
+        LOG.error("Exception while getting login user", e);
         e.printStackTrace();
         Runtime.getRuntime().exit(-1);
       }
@@ -378,12 +378,10 @@ public class SecondaryNameNode implements Runnable {
           lastCheckpointTime = now;
         }
       } catch (IOException e) {
-        LOG.error("Exception in doCheckpoint: ");
-        LOG.error(StringUtils.stringifyException(e));
+        LOG.error("Exception in doCheckpoint", e);
         e.printStackTrace();
       } catch (Throwable e) {
-        LOG.error("Throwable Exception in doCheckpoint: ");
-        LOG.error(StringUtils.stringifyException(e));
+        LOG.error("Throwable Exception in doCheckpoint", e);
         e.printStackTrace();
         Runtime.getRuntime().exit(-1);
       }

+ 55 - 0
hdfs/src/test/hdfs/org/apache/hadoop/cli/testHDFSConf.xml

@@ -2670,6 +2670,24 @@
       </comparators>
     </test>
 
+    <test> <!-- TESTED -->
+       <description>cp: putting file into an already existing destination with -f option(absolute path)</description>
+      <test-commands>
+        <command>-fs NAMENODE -touchz /user/file0</command>
+        <command>-fs NAMENODE -cp -f CLITEST_DATA/data120bytes /user/file0</command>
+        <command>-fs NAMENODE -cat /user/file0</command>
+      </test-commands>
+      <cleanup-commands>
+        <command>-fs NAMENODE -rm -r /user</command>
+      </cleanup-commands>
+     <comparators>
+        <comparator>
+          <type>RegexpComparator</type>
+          <expected-output>12345678901234</expected-output>
+        </comparator>
+      </comparators>
+    </test>
+
     <test> <!-- TESTED -->
       <description>cp: copying directory to directory in hdfs:// path</description>
       <test-commands>
@@ -4076,6 +4094,24 @@
       </comparators>
     </test>
     
+    <test> <!-- TESTED -->
+      <description>put: putting file into an already existing destination with -f option(absolute path)</description>
+      <test-commands>
+        <command>-fs NAMENODE -touchz /user/file0</command>
+        <command>-fs NAMENODE -put -f CLITEST_DATA/data120bytes /user/file0</command>
+        <command>-fs NAMENODE -cat /user/file0</command>
+      </test-commands>
+      <cleanup-commands>
+        <command>-fs NAMENODE -rm -r /user</command>
+     </cleanup-commands>
+      <comparators>
+        <comparator>
+          <type>RegexpComparator</type>
+          <expected-output>12345678901234</expected-output>
+        </comparator>
+      </comparators>
+    </test>
+    
     <test> <!-- TESTED -->
       <description>put: putting file into an already existing destination(relative path)</description>
       <test-commands>
@@ -4593,6 +4629,25 @@
     </test>
     
     <test> <!-- TESTED -->
+      <description>copyFromLocal: copying file into an already existing destination with -f option(absolute path)</description>
+      <test-commands>
+        <command>-fs NAMENODE -touchz /user/file0</command>
+        <command>-fs NAMENODE -copyFromLocal -f CLITEST_DATA/data120bytes /user/file0</command>
+        <command>-fs NAMENODE -cat /user/file0</command>
+      </test-commands>
+      <cleanup-commands>
+        <command>-fs NAMENODE -rm -r /user</command>
+      </cleanup-commands>
+      <comparators>
+        <comparator>
+          <type>RegexpComparator</type>
+          <expected-output>12345678901234</expected-output>
+        </comparator>
+      </comparators>
+    </test>
+    
+    <test> <!-- TESTED -->
+    
       <description>copyFromLocal: copying file into an already existing destination(relative path)</description>
       <test-commands>
         <command>-fs NAMENODE -touchz file0</command>

+ 69 - 0
hdfs/src/test/hdfs/org/apache/hadoop/hdfs/TestDFSShell.java

@@ -1354,4 +1354,73 @@ public class TestDFSShell extends TestCase {
     int res = admin.run(new String[] {"-refreshNodes"});
     assertEquals("expected to fail -1", res , -1);
   }
+
+  // force Copy Option is -f
+  public void testCopyCommandsWithForceOption() throws Exception {
+    Configuration conf = new Configuration();
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
+        .format(true).build();
+    FsShell shell = null;
+    FileSystem fs = null;
+    File localFile = new File("testFileForPut");
+    Path hdfsTestDir = new Path("ForceTestDir");
+    try {
+      fs = cluster.getFileSystem();
+      fs.mkdirs(hdfsTestDir);
+      localFile.createNewFile();
+      writeFile(fs, new Path("testFileForPut"));
+      shell = new FsShell();
+
+      // Tests for put
+      String[] argv = new String[] { "-put", "-f", localFile.getName(),
+          "ForceTestDir" };
+      int res = ToolRunner.run(shell, argv);
+      int SUCCESS = 0;
+      int ERROR = 1;
+      assertEquals("put -f is not working", SUCCESS, res);
+
+      argv = new String[] { "-put", localFile.getName(), "ForceTestDir" };
+      res = ToolRunner.run(shell, argv);
+      assertEquals("put command itself is able to overwrite the file", ERROR,
+          res);
+
+      // Tests for copyFromLocal
+      argv = new String[] { "-copyFromLocal", "-f", localFile.getName(),
+          "ForceTestDir" };
+      res = ToolRunner.run(shell, argv);
+      assertEquals("copyFromLocal -f is not working", SUCCESS, res);
+
+      argv = new String[] { "-copyFromLocal", localFile.getName(),
+          "ForceTestDir" };
+      res = ToolRunner.run(shell, argv);
+      assertEquals(
+          "copyFromLocal command itself is able to overwrite the file", ERROR,
+          res);
+
+      // Tests for cp
+      argv = new String[] { "-cp", "-f", localFile.getName(), "ForceTestDir" };
+      res = ToolRunner.run(shell, argv);
+      assertEquals("cp -f is not working", SUCCESS, res);
+
+      argv = new String[] { "-cp", localFile.getName(),
+          "ForceTestDir" };
+      res = ToolRunner.run(shell, argv);
+      assertEquals("cp command itself is able to overwrite the file", ERROR,
+          res);
+    } finally {
+      if (null != shell)
+        shell.close();
+
+      if (localFile.exists())
+        localFile.delete();
+
+      if (null != fs) {
+        fs.delete(hdfsTestDir, true);
+        fs.close();
+      }
+      cluster.shutdown();
+    }
+
+  }
+
 }

+ 24 - 12
hdfs/src/test/hdfs/org/apache/hadoop/hdfs/TestWriteRead.java

@@ -44,6 +44,7 @@ public class TestWriteRead {
 
   private static final int BUFFER_SIZE = 8192 * 100;
   private static final String ROOT_DIR = "/tmp/";
+  private static final long blockSize = 1024*100;
 
   // command-line options. Different defaults for unit test vs real cluster
   String filenameOption = ROOT_DIR + "fileX1";
@@ -69,8 +70,8 @@ public class TestWriteRead {
     LOG.info("initJunitModeTest");
 
     conf = new HdfsConfiguration();
-    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024 * 100); // 100K
-                                                                // blocksize
+    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); // 100K
+                                                              // blocksize
 
     cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
     cluster.waitActive();
@@ -99,15 +100,14 @@ public class TestWriteRead {
   }
 
   /** Junit Test reading while writing. */
-  
   @Test
   public void testWriteReadSeq() throws IOException {
     useFCOption = false; 
     positionReadOption = false;
     String fname = filenameOption;
-    
+    long rdBeginPos = 0;
     // need to run long enough to fail: takes 25 to 35 seec on Mac
-    int stat = testWriteAndRead(fname, WR_NTIMES, WR_CHUNK_SIZE);
+    int stat = testWriteAndRead(fname, WR_NTIMES, WR_CHUNK_SIZE, rdBeginPos);
     LOG.info("Summary status from test1: status= " + stat);
     Assert.assertEquals(0, stat);
   }
@@ -117,14 +117,27 @@ public class TestWriteRead {
   public void testWriteReadPos() throws IOException {
     String fname = filenameOption;
     positionReadOption = true;   // position read
-    int stat = testWriteAndRead(fname, WR_NTIMES, WR_CHUNK_SIZE);
+    long rdBeginPos = 0;
+    int stat = testWriteAndRead(fname, WR_NTIMES, WR_CHUNK_SIZE, rdBeginPos);
     Assert.assertEquals(0, stat);
   }
 
+  /** Junit Test position read of the current block being written. */
+  @Test
+  public void testReadPosCurrentBlock() throws IOException {
+    String fname = filenameOption;
+    positionReadOption = true;   // position read
+    int wrChunkSize = (int)(blockSize) + (int)(blockSize/2);
+    long rdBeginPos = blockSize+1;
+    int numTimes=5;
+    int stat = testWriteAndRead(fname, numTimes, wrChunkSize, rdBeginPos);
+    Assert.assertEquals(0, stat);
+  }
    
   // equivalent of TestWriteRead1
   private int clusterTestWriteRead1() throws IOException {
-    int stat = testWriteAndRead(filenameOption, loopOption, chunkSizeOption);
+    long rdBeginPos = 0;
+    int stat = testWriteAndRead(filenameOption, loopOption, chunkSizeOption, rdBeginPos);
     return stat;
   }
 
@@ -133,10 +146,9 @@ public class TestWriteRead {
    * Return number of bytes read. 
    * Support both sequential read and position read.
    */
-  private long readData(String fname, byte[] buffer, long byteExpected)
+  private long readData(String fname, byte[] buffer, long byteExpected, long beginPosition)
       throws IOException {
     long totalByteRead = 0;
-    long beginPosition = 0;
     Path path = getFullyQualifiedPath(fname);
 
     FSDataInputStream in = null;
@@ -263,7 +275,7 @@ public class TestWriteRead {
    * After each iteration of write, do a read of the file from begin to end. 
    * Return 0 on success, else number of failure.
    */
-  private int testWriteAndRead(String fname, int loopN, int chunkSize)
+  private int testWriteAndRead(String fname, int loopN, int chunkSize, long readBeginPosition)
       throws IOException {
 
     int countOfFailures = 0;
@@ -324,7 +336,7 @@ public class TestWriteRead {
               + ". TotalByteVisible = " + totalByteVisible + " to file "
               + fname);
         }
-        byteVisibleToRead = readData(fname, inBuffer, totalByteVisible);
+        byteVisibleToRead = readData(fname, inBuffer, totalByteVisible, readBeginPosition);
 
         String readmsg = "Written=" + totalByteWritten + " ; Expected Visible="
             + totalByteVisible + " ; Got Visible=" + byteVisibleToRead
@@ -353,7 +365,7 @@ public class TestWriteRead {
 
       out.close();
 
-      byteVisibleToRead = readData(fname, inBuffer, totalByteVisible);
+      byteVisibleToRead = readData(fname, inBuffer, totalByteVisible, readBeginPosition);
 
       String readmsg2 = "Written=" + totalByteWritten + " ; Expected Visible="
           + totalByteVisible + " ; Got Visible=" + byteVisibleToRead

+ 2 - 1
hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestHost2NodesMap.java → hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/blockmanagement/TestHost2NodesMap.java

@@ -16,12 +16,13 @@
  * limitations under the License.
  */
 
-package org.apache.hadoop.hdfs.server.namenode;
+package org.apache.hadoop.hdfs.server.blockmanagement;
 
 import junit.framework.TestCase;
 
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
+import org.apache.hadoop.hdfs.server.blockmanagement.Host2NodesMap;
 
 public class TestHost2NodesMap extends TestCase {
   static private Host2NodesMap map = new Host2NodesMap();

+ 48 - 1
hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureToleration.java

@@ -24,7 +24,6 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
@@ -93,6 +92,54 @@ public class TestDataNodeVolumeFailureToleration {
     cluster.shutdown();
   }
 
+  /**
+   * Test the DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY configuration
+   * option, ie the DN tolerates a failed-to-use scenario during
+   * its start-up.
+   */
+  @Test
+  public void testValidVolumesAtStartup() throws Exception {
+    assumeTrue(!System.getProperty("os.name").startsWith("Windows"));
+
+    // Make sure no DNs are running.
+    cluster.shutdownDataNodes();
+
+    // Bring up a datanode with two default data dirs, but with one bad one.
+    conf.setInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY, 1);
+
+    // We use subdirectories 0 and 1 in order to have only a single
+    // data dir's parent inject a failure.
+    File tld = new File(MiniDFSCluster.getBaseDirectory(), "badData");
+    File dataDir1 = new File(tld, "data1");
+    File dataDir1Actual = new File(dataDir1, "1");
+    dataDir1Actual.mkdirs();
+    // Force an IOE to occur on one of the dfs.data.dir.
+    File dataDir2 = new File(tld, "data2");
+    prepareDirToFail(dataDir2);
+    File dataDir2Actual = new File(dataDir2, "2");
+
+    // Start one DN, with manually managed DN dir
+    conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY,
+        dataDir1Actual.getPath() + "," + dataDir2Actual.getPath());
+    cluster.startDataNodes(conf, 1, false, null, null);
+    cluster.waitActive();
+
+    try {
+      assertTrue("The DN should have started up fine.",
+          cluster.isDataNodeUp());
+      DataNode dn = cluster.getDataNodes().get(0);
+      String si = dn.getFSDataset().getStorageInfo();
+      assertTrue("The DN should have started with this directory",
+          si.contains(dataDir1Actual.getPath()));
+      assertFalse("The DN shouldn't have a bad directory.",
+          si.contains(dataDir2Actual.getPath()));
+    } finally {
+      cluster.shutdownDataNodes();
+      FileUtil.chmod(dataDir2.toString(), "755");
+    }
+
+  }
+
   /**
    * Test the DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY configuration
    * option, ie the DN shuts itself down when the number of failures

+ 60 - 0
hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDatanodeRegister.java

@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.datanode;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.AbstractList;
+
+import static org.junit.Assert.fail;
+import static org.mockito.Mockito.*;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
+import org.junit.Test;
+
+
+public class TestDatanodeRegister { 
+  public static final Log LOG = LogFactory.getLog(TestDatanodeRegister.class);
+  @Test
+  public void testDataNodeRegister() throws Exception {
+    DataNode.BPOfferService myMockBPOS = mock(DataNode.BPOfferService.class);
+    doCallRealMethod().when(myMockBPOS).register();
+    myMockBPOS.bpRegistration = mock(DatanodeRegistration.class);
+    when(myMockBPOS.bpRegistration.getStorageID()).thenReturn("myTestStorageID");
+    
+    NamespaceInfo fakeNSInfo = mock(NamespaceInfo.class);
+    when(fakeNSInfo.getBuildVersion()).thenReturn("NSBuildVersion");
+    DatanodeProtocol fakeDNProt = mock(DatanodeProtocol.class);
+    when(fakeDNProt.versionRequest()).thenReturn(fakeNSInfo);
+    doCallRealMethod().when(myMockBPOS).setNameNode(fakeDNProt);
+    myMockBPOS.setNameNode( fakeDNProt );
+    try {   
+      myMockBPOS.register();
+      fail("register() did not throw exception! " +
+           "Expected: IncorrectVersionException");
+    } catch (IncorrectVersionException ie) {
+      LOG.info("register() returned correct Exception: IncorrectVersionException");
+    }
+  }
+}

+ 27 - 0
hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestEditLogFileOutputStream.java

@@ -20,9 +20,11 @@ package org.apache.hadoop.hdfs.server.namenode;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import java.io.File;
 import java.io.IOException;
+import java.nio.channels.FileChannel;
 
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.conf.Configuration;
@@ -35,6 +37,7 @@ import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.junit.Before;
 import org.junit.Test;
+import org.mockito.Mockito;
 
 public class TestEditLogFileOutputStream {
   
@@ -82,6 +85,30 @@ public class TestEditLogFileOutputStream {
     assertTrue("Edit log disk space used should be at least 257 blocks",
         256 * 4096 <= new DU(editLog, conf).getUsed());
   }
+  
+  @Test
+  public void testClose() throws IOException {
+    String errorMessage = "TESTING: fc.truncate() threw IOE";
+    
+    File testDir = new File(System.getProperty("test.build.data", "/tmp"));
+    assertTrue("could not create test directory", testDir.exists() || testDir.mkdirs());
+    File f = new File(testDir, "edits");
+    assertTrue("could not create test file", f.createNewFile());
+    EditLogFileOutputStream elos = new EditLogFileOutputStream(f, 0);
+    
+    FileChannel mockFc = Mockito.spy(elos.getFileChannelForTesting());
+    Mockito.doThrow(new IOException(errorMessage)).when(mockFc).truncate(Mockito.anyLong());
+    elos.setFileChannelForTesting(mockFc);
+    
+    try {
+      elos.close();
+      fail("elos.close() succeeded, but should have thrown");
+    } catch (IOException e) {
+      assertEquals("wrong IOE thrown from elos.close()", e.getMessage(), errorMessage);
+    }
+    
+    assertEquals("fc was not nulled when elos.close() failed", elos.getFileChannelForTesting(), null);
+  }
 
   /**
    * Tests EditLogFileOutputStream doesn't throw NullPointerException on