Browse Source

Merging chagnes r1035795:r1035920 from trunk to federation

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hdfs/branches/HDFS-1052@1078173 13f79535-47bb-0310-9956-ffa450edef68
Suresh Srinivas 14 năm trước cách đây
mục cha
commit
109a44785e

+ 12 - 0
CHANGES.txt

@@ -442,6 +442,18 @@ Release 0.22.0 - Unreleased
 
     HDFS-1387. Update HDFS permissions guide for security. (Todd Lipcon via eli)
 
+    HDFS-455. Make NN and DN handle in a intuitive way comma-separated 
+    configuration strings. (Michele Catasta via eli)
+
+    HDFS-1071. savenamespace should write the fsimage to all configured 
+    fs.name.dir in parallel (Dmytro Molkov via jghoman)
+ 
+    HDFS-1055. Improve thread naming for DataXceivers. 
+    (Todd Lipcon and Ramkumar Vadali via eli).
+
+    HDFS-718. Configuration parameter to prevent accidental formatting of 
+    HDFS filesystem. (Andrew Ryan via jghoman)
+
   OPTIMIZATIONS
 
     HDFS-1140. Speedup INode.getPathComponents. (Dmytro Molkov via shv)

+ 9 - 0
src/java/hdfs-default.xml

@@ -554,4 +554,13 @@ creations/deletions), or "all".</description>
   </description>
 </property>
 
+<property>
+  <name>dfs.namenode.support.allow.format</name>
+  <value>true</value>
+  <description>Does HDFS namenode allow itself to be formatted?
+               You may consider setting this to false for any production
+               cluster, to avoid any possibility of formatting a running DFS.
+  </description>
+</property>
+
 </configuration>

+ 2 - 0
src/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

@@ -93,6 +93,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String  DFS_SERVER_HTTPS_KEYSTORE_RESOURCE_DEFAULT = "ssl-server.xml";
   public static final String  DFS_NAMENODE_NAME_DIR_RESTORE_KEY = "dfs.namenode.name.dir.restore";
   public static final boolean DFS_NAMENODE_NAME_DIR_RESTORE_DEFAULT = false;
+  public static final String  DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY = "dfs.namenode.support.allow.format";
+  public static final boolean DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT = true;
   public static final String  DFS_LIST_LIMIT = "dfs.ls.limit";
   public static final int     DFS_LIST_LIMIT_DEFAULT = 1000;
   public static final String  DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY = "dfs.datanode.failed.volumes.tolerated";

+ 1 - 1
src/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java

@@ -2027,7 +2027,7 @@ public class DataNode extends Configured
 
   static Collection<URI> getStorageDirs(Configuration conf) {
     Collection<String> dirNames =
-      conf.getStringCollection(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY);
+      conf.getTrimmedStringCollection(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY);
     return Util.stringCollectionAsURIs(dirNames);
   }
 

+ 25 - 0
src/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java

@@ -88,6 +88,20 @@ class DataXceiver extends DataTransferProtocol.Receiver
     }
   }
 
+  /**
+   * Update the current thread's name to contain the current status.
+   * Use this only after this receiver has started on its thread, i.e.,
+   * outside the constructor.
+   */
+  private void updateCurrentThreadName(String status) {
+    StringBuilder sb = new StringBuilder();
+    sb.append("DataXceiver for client ").append(remoteAddress);
+    if (status != null) {
+      sb.append(" [").append(status).append("]");
+    }
+    Thread.currentThread().setName(sb.toString());
+  }
+
   /** Return the datanode object. */
   DataNode getDataNode() {return datanode;}
 
@@ -95,6 +109,8 @@ class DataXceiver extends DataTransferProtocol.Receiver
    * Read/write data from/to the DataXceiveServer.
    */
   public void run() {
+    updateCurrentThreadName("Waiting for operation");
+
     DataInputStream in=null; 
     try {
       in = new DataInputStream(
@@ -119,6 +135,7 @@ class DataXceiver extends DataTransferProtocol.Receiver
         LOG.debug(datanode.getMachineName() + ":Number of active connections is: "
             + datanode.getXceiverCount());
       }
+      updateCurrentThreadName("Cleaning up");
       IOUtils.closeStream(in);
       IOUtils.closeSocket(s);
       dataXceiverServer.childSockets.remove(s);
@@ -166,6 +183,8 @@ class DataXceiver extends DataTransferProtocol.Receiver
             dnR.getStorageID(), block, "%d")
         : dnR + " Served block " + block + " to " +
             s.getInetAddress();
+
+    updateCurrentThreadName("Sending block " + block);
     try {
       try {
         blockSender = new BlockSender(block, startOffset, length,
@@ -212,6 +231,7 @@ class DataXceiver extends DataTransferProtocol.Receiver
       long newGs, long minBytesRcvd, long maxBytesRcvd,
       String client, DatanodeInfo srcDataNode, DatanodeInfo[] targets,
       Token<BlockTokenIdentifier> blockToken) throws IOException {
+    updateCurrentThreadName("Receiving block " + block + " client=" + client);
 
     if (LOG.isDebugEnabled()) {
       LOG.debug("writeBlock receive buf size " + s.getReceiveBufferSize() +
@@ -427,11 +447,13 @@ class DataXceiver extends DataTransferProtocol.Receiver
       }
     }
 
+    updateCurrentThreadName("Reading metadata for block " + block);
     final MetaDataInputStream metadataIn = 
       datanode.data.getMetaDataInputStream(block);
     final DataInputStream checksumIn = new DataInputStream(new BufferedInputStream(
         metadataIn, BUFFER_SIZE));
 
+    updateCurrentThreadName("Getting checksum for block " + block);
     try {
       //read metadata file
       final BlockMetadataHeader header = BlockMetadataHeader.readHeader(checksumIn);
@@ -470,6 +492,7 @@ class DataXceiver extends DataTransferProtocol.Receiver
   @Override
   protected void opCopyBlock(DataInputStream in, ExtendedBlock block,
       Token<BlockTokenIdentifier> blockToken) throws IOException {
+    updateCurrentThreadName("Copying block " + block);
     // Read in the header
     if (datanode.isBlockTokenEnabled) {
       try {
@@ -545,6 +568,8 @@ class DataXceiver extends DataTransferProtocol.Receiver
   protected void opReplaceBlock(DataInputStream in,
       ExtendedBlock block, String sourceID, DatanodeInfo proxySource,
       Token<BlockTokenIdentifier> blockToken) throws IOException {
+    updateCurrentThreadName("Replacing block " + block + " from " + sourceID);
+
     /* read header */
     block.setNumBytes(dataXceiverServer.estimateBlockSize);
     if (datanode.isBlockTokenEnabled) {

+ 2 - 1
src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java

@@ -21,6 +21,7 @@ import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Iterator;
+import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -268,7 +269,7 @@ public class FSEditLog {
    *  except fsimage.processIOError)
    */
   synchronized void processIOError(
-      ArrayList<EditLogOutputStream> errorStreams,
+      List<EditLogOutputStream> errorStreams,
       boolean propagate) {
     
     if (errorStreams == null || errorStreams.size() == 0) {

+ 104 - 44
src/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java

@@ -40,6 +40,7 @@ import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -179,7 +180,13 @@ public class FSImage extends Storage {
   /**
    * Used for saving the image to disk
    */
-  static private final FsPermission FILE_PERM = new FsPermission((short)0);
+  static private final ThreadLocal<FsPermission> FILE_PERM =
+                          new ThreadLocal<FsPermission>() {
+                            @Override
+                            protected FsPermission initialValue() {
+                              return new FsPermission((short) 0);
+                            }
+                          };
   static private final byte[] PATH_SEPARATOR = DFSUtil.string2Bytes(Path.SEPARATOR);
 
   private static final Random R = new Random();
@@ -909,30 +916,34 @@ public class FSImage extends Storage {
    * @param propagate - flag, if set - then call corresponding EditLog stream's 
    * processIOError function.
    */
-  void processIOError(ArrayList<StorageDirectory> sds, boolean propagate) {
+  void processIOError(List<StorageDirectory> sds, boolean propagate) {
     ArrayList<EditLogOutputStream> al = null;
-    for(StorageDirectory sd:sds) {
-      // if has a stream assosiated with it - remove it too..
-      if (propagate && sd.getStorageDirType().isOfType(NameNodeDirType.EDITS)) {
-        EditLogOutputStream eStream = editLog.getEditsStream(sd);
-        if(al == null) al = new ArrayList<EditLogOutputStream>(1);
-        al.add(eStream);
-      }
-      
-      for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
-        StorageDirectory sd1 = it.next();
-        if (sd.equals(sd1)) {
-          //add storage to the removed list
-          LOG.warn("FSImage:processIOError: removing storage: "
-              + sd.getRoot().getPath());
-          try {
-            sd1.unlock(); //unlock before removing (in case it will be restored)
-          } catch (Exception e) {
-            // nothing
+    synchronized (sds) {
+      for (StorageDirectory sd : sds) {
+        // if has a stream assosiated with it - remove it too..
+        if (propagate && sd.getStorageDirType().isOfType(NameNodeDirType.EDITS)) {
+          EditLogOutputStream eStream = editLog.getEditsStream(sd);
+          if (al == null)
+            al = new ArrayList<EditLogOutputStream>(1);
+          al.add(eStream);
+        }
+
+        for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
+          StorageDirectory sd1 = it.next();
+          if (sd.equals(sd1)) {
+            // add storage to the removed list
+            LOG.warn("FSImage:processIOError: removing storage: "
+                + sd.getRoot().getPath());
+            try {
+              sd1.unlock(); // unlock before removing (in case it will be
+                            // restored)
+            } catch (Exception e) {
+              // nothing
+            }
+            removedStorageDirs.add(sd1);
+            it.remove();
+            break;
           }
-          removedStorageDirs.add(sd1);
-          it.remove();
-          break;
         }
       }
     }
@@ -1468,6 +1479,53 @@ public class FSImage extends Storage {
   public void setImageDigest(MD5Hash digest) {
     this.imageDigest = digest;
   }
+  /**
+   * FSImageSaver is being run in a separate thread when saving
+   * FSImage. There is one thread per each copy of the image.
+   *
+   * FSImageSaver assumes that it was launched from a thread that holds
+   * FSNamesystem lock and waits for the execution of FSImageSaver thread
+   * to finish.
+   * This way we are guraranteed that the namespace is not being updated
+   * while multiple instances of FSImageSaver are traversing it
+   * and writing it out.
+   */
+  private class FSImageSaver implements Runnable {
+    private StorageDirectory sd;
+    private List<StorageDirectory> errorSDs;
+    
+    FSImageSaver(StorageDirectory sd, List<StorageDirectory> errorSDs) {
+      this.sd = sd;
+      this.errorSDs = errorSDs;
+    }
+    
+    public void run() {
+      try {
+        saveCurrent(sd);
+      } catch (IOException ie) {
+        LOG.error("Unable to save image for " + sd.getRoot(), ie);
+        errorSDs.add(sd);              
+      }
+    }
+    
+    public String toString() {
+      return "FSImageSaver for " + sd.getRoot() +
+             " of type " + sd.getStorageDirType();
+    }
+  }
+  
+  private void waitForThreads(List<Thread> threads) {
+    for (Thread thread : threads) {
+      while (thread.isAlive()) {
+        try {
+          thread.join();
+        } catch (InterruptedException iex) {
+          LOG.error("Caught exception while waiting for thread " +
+                    thread.getName() + " to finish. Retrying join");
+        }        
+      }
+    }
+  }
   /**
    * Save the contents of the FS image and create empty edits.
    * 
@@ -1488,7 +1546,8 @@ public class FSImage extends Storage {
     editLog.close();
     if(renewCheckpointTime)
       this.checkpointTime = now();
-    ArrayList<StorageDirectory> errorSDs = new ArrayList<StorageDirectory>();
+    List<StorageDirectory> errorSDs =
+      Collections.synchronizedList(new ArrayList<StorageDirectory>());
 
     // mv current -> lastcheckpoint.tmp
     for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
@@ -1501,17 +1560,18 @@ public class FSImage extends Storage {
       }
     }
 
+    List<Thread> saveThreads = new ArrayList<Thread>();
     // save images into current
     for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.IMAGE);
                                                               it.hasNext();) {
       StorageDirectory sd = it.next();
-      try {
-        saveCurrent(sd);
-      } catch(IOException ie) {
-        LOG.error("Unable to save image for " + sd.getRoot(), ie);
-        errorSDs.add(sd);
-      }
+      FSImageSaver saver = new FSImageSaver(sd, errorSDs);
+      Thread saveThread = new Thread(saver, saver.toString());
+      saveThreads.add(saveThread);
+      saveThread.start();
     }
+    waitForThreads(saveThreads);
+    saveThreads.clear();
 
     // -NOTE-
     // If NN has image-only and edits-only storage directories and fails here 
@@ -1522,18 +1582,17 @@ public class FSImage extends Storage {
     // to the old state contained in their lastcheckpoint.tmp.
     // The edits directories should be discarded during startup because their
     // checkpointTime is older than that of image directories.
-
     // recreate edits in current
     for (Iterator<StorageDirectory> it = dirIterator(NameNodeDirType.EDITS);
                                                               it.hasNext();) {
-      StorageDirectory sd = it.next();
-      try {
-        saveCurrent(sd);
-      } catch(IOException ie) {
-        LOG.error("Unable to save edits for " + sd.getRoot(), ie);
-        errorSDs.add(sd);
-      }
+      final StorageDirectory sd = it.next();
+      FSImageSaver saver = new FSImageSaver(sd, errorSDs);
+      Thread saveThread = new Thread(saver, saver.toString());
+      saveThreads.add(saveThread);
+      saveThread.start();
     }
+    waitForThreads(saveThreads);
+
     // mv lastcheckpoint.tmp -> previous.checkpoint
     for (Iterator<StorageDirectory> it = dirIterator(); it.hasNext();) {
       StorageDirectory sd = it.next();
@@ -1711,6 +1770,7 @@ public class FSImage extends Storage {
     int nameLen = name.position();
     out.writeShort(nameLen);
     out.write(name.array(), name.arrayOffset(), nameLen);
+    FsPermission filePerm = FILE_PERM.get();
     if (node.isDirectory()) {
       out.writeShort(0);  // replication
       out.writeLong(node.getModificationTime());
@@ -1719,10 +1779,10 @@ public class FSImage extends Storage {
       out.writeInt(-1);   // # of blocks
       out.writeLong(node.getNsQuota());
       out.writeLong(node.getDsQuota());
-      FILE_PERM.fromShort(node.getFsPermissionShort());
+      filePerm.fromShort(node.getFsPermissionShort());
       PermissionStatus.write(out, node.getUserName(),
                              node.getGroupName(),
-                             FILE_PERM);
+                             filePerm);
     } else if (node.isLink()) {
       out.writeShort(0);  // replication
       out.writeLong(0);   // modification time
@@ -1730,10 +1790,10 @@ public class FSImage extends Storage {
       out.writeLong(0);   // preferred block size
       out.writeInt(-2);   // # of blocks
       Text.writeString(out, ((INodeSymlink)node).getLinkValue());
-      FILE_PERM.fromShort(node.getFsPermissionShort());
+      filePerm.fromShort(node.getFsPermissionShort());
       PermissionStatus.write(out, node.getUserName(),
                              node.getGroupName(),
-                             FILE_PERM);      
+                             filePerm);      
     } else {
       INodeFile fileINode = (INodeFile)node;
       out.writeShort(fileINode.getReplication());
@@ -1744,10 +1804,10 @@ public class FSImage extends Storage {
       out.writeInt(blocks.length);
       for (Block blk : blocks)
         blk.write(out);
-      FILE_PERM.fromShort(fileINode.getFsPermissionShort());
+      filePerm.fromShort(fileINode.getFsPermissionShort());
       PermissionStatus.write(out, fileINode.getUserName(),
                              fileINode.getGroupName(),
-                             FILE_PERM);
+                             filePerm);
     }
   }
   

+ 2 - 2
src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -376,7 +376,7 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean, FSClusterSt
 
   public static Collection<URI> getStorageDirs(Configuration conf,
                                                 String propertyName) {
-    Collection<String> dirNames = conf.getStringCollection(propertyName);
+    Collection<String> dirNames = conf.getTrimmedStringCollection(propertyName);
     StartupOption startOpt = NameNode.getStartupOption(conf);
     if(startOpt == StartupOption.IMPORT) {
       // In case of IMPORT this will get rid of default directories 
@@ -387,7 +387,7 @@ public class FSNamesystem implements FSConstants, FSNamesystemMBean, FSClusterSt
       cE.addResource("core-default.xml");
       cE.addResource("core-site.xml");
       cE.addResource("hdfs-default.xml");
-      Collection<String> dirNames2 = cE.getStringCollection(propertyName);
+      Collection<String> dirNames2 = cE.getTrimmedStringCollection(propertyName);
       dirNames.removeAll(dirNames2);
       if(dirNames.isEmpty())
         LOG.warn("!!! WARNING !!!" +

+ 12 - 0
src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java

@@ -17,6 +17,9 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT;
+
 import java.io.File;
 import java.io.IOException;
 import java.net.InetSocketAddress;
@@ -1403,6 +1406,15 @@ public class NameNode implements NamenodeProtocols, FSConstants {
   private static boolean format(Configuration conf,
                                 boolean isConfirmationNeeded)
       throws IOException {
+    if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, 
+                         DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) {
+      throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY
+                             + " is set to false for this filesystem, so it "
+                             + "cannot be formatted. You will need to set "
+                             + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY +" parameter "
+                             + "to true in order to format this filesystem");
+    }
+    
     Collection<URI> dirsToFormat = FSNamesystem.getNamespaceDirs(conf);
     Collection<URI> editDirsToFormat = 
                  FSNamesystem.getNamespaceEditsDirs(conf);

+ 135 - 0
src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestAllowFormat.java

@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_DIR_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.util.StringUtils;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Startup and format tests
+ * 
+ */
+public class TestAllowFormat {
+  public static final String NAME_NODE_HOST = "localhost:";
+  public static final String NAME_NODE_HTTP_HOST = "0.0.0.0:";
+  private static final Log LOG =
+    LogFactory.getLog(TestAllowFormat.class.getName());
+  private static Configuration config;
+  private static MiniDFSCluster cluster = null;
+  private static File hdfsDir=null;
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    config = new Configuration();
+    String baseDir = System.getProperty("test.build.data", "build/test/data");
+
+    hdfsDir = new File(baseDir, "dfs");
+    if ( hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir) ) {
+      throw new IOException("Could not delete hdfs directory '" + hdfsDir +
+                            "'");
+    }
+    LOG.info("hdfsdir is " + hdfsDir.getAbsolutePath());
+    config.set(DFS_NAMENODE_NAME_DIR_KEY, new File(hdfsDir, "name").getPath());
+    config.set(DFS_DATANODE_DATA_DIR_KEY, new File(hdfsDir, "data").getPath());
+
+    config.set(DFS_NAMENODE_CHECKPOINT_DIR_KEY,new File(hdfsDir, "secondary").getPath());
+
+    FileSystem.setDefaultUri(config, "hdfs://"+NAME_NODE_HOST + "0");
+  }
+
+  /**
+   * clean up
+   */
+  @AfterClass
+  public static void tearDown() throws Exception {
+    if (cluster!=null) {
+      cluster.shutdown();
+      LOG.info("Stopping mini cluster");
+    }
+    
+    if ( hdfsDir.exists() && !FileUtil.fullyDelete(hdfsDir) ) {
+      throw new IOException("Could not delete hdfs directory in tearDown '"
+                            + hdfsDir + "'");
+    }	
+  }
+
+   /**
+   * start MiniDFScluster, try formatting with different settings
+   * @throws IOException
+   * @throws InterruptedException 
+   */
+  @Test
+  public void testAllowFormat() throws IOException {
+    LOG.info("--starting mini cluster");
+    // manage dirs parameter set to false 
+
+    NameNode nn;
+    // 1. Create a new cluster and format DFS
+    config.setBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, true);
+    cluster = new MiniDFSCluster.Builder(config).manageDataDfsDirs(false)
+                                                .manageNameDfsDirs(false)
+                                                .build();
+    cluster.waitActive();
+    assertNotNull(cluster);
+
+    nn = cluster.getNameNode();
+    assertNotNull(nn);
+    LOG.info("Mini cluster created OK");
+    
+    // 2. Try formatting DFS with allowformat false.
+    // NOTE: the cluster must be shut down for format to work.
+    LOG.info("Verifying format will fail with allowformat false");
+    config.setBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, false);
+    try {
+      cluster.shutdown();
+      NameNode.format(config);
+      fail("Format succeeded, when it should have failed");
+    } catch (IOException e) { // expected to fail
+      // Verify we got message we expected
+      assertTrue("Exception was not about formatting Namenode", 
+          e.getMessage().startsWith("The option " + 
+                                    DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY));
+      LOG.info("Expected failure: " + StringUtils.stringifyException(e));
+      LOG.info("Done verifying format will fail with allowformat false");
+    }
+    // 3. Try formatting DFS with allowformat true
+    LOG.info("Verifying format will succeed with allowformat true");
+    config.setBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, true);
+    NameNode.format(config);
+    LOG.info("Done verifying format will succeed with allowformat true");
+  }
+}

+ 133 - 0
src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestParallelImageWrite.java

@@ -0,0 +1,133 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.namenode;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.server.namenode.FSImage;
+import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.protocol.FSConstants.SafeModeAction;
+import org.apache.hadoop.util.PureJavaCrc32;
+
+import java.util.Iterator;
+import java.util.List;
+import java.util.ArrayList;
+
+import java.io.File;
+import java.io.FileInputStream;
+
+/**
+ * A JUnit test for checking if restarting DFS preserves integrity.
+ * Specifically with FSImage being written in parallel
+ */
+public class TestParallelImageWrite extends TestCase {
+  /** check if DFS remains in proper condition after a restart */
+  public void testRestartDFS() throws Exception {
+    final Configuration conf = new HdfsConfiguration();
+    MiniDFSCluster cluster = null;
+    FSNamesystem fsn = null;
+    DFSTestUtil files = new DFSTestUtil("TestRestartDFS", 200, 3, 8*1024);
+
+    final String dir = "/srcdat";
+    final Path rootpath = new Path("/");
+    final Path dirpath = new Path(dir);
+
+    long rootmtime;
+    FileStatus rootstatus;
+    FileStatus dirstatus;
+
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).format(true).numDataNodes(4).build();
+      FileSystem fs = cluster.getFileSystem();
+      files.createFiles(fs, dir);
+
+      rootmtime = fs.getFileStatus(rootpath).getModificationTime();
+      rootstatus = fs.getFileStatus(dirpath);
+      dirstatus = fs.getFileStatus(dirpath);
+
+      fs.setOwner(rootpath, rootstatus.getOwner() + "_XXX", null);
+      fs.setOwner(dirpath, null, dirstatus.getGroup() + "_XXX");
+    } finally {
+      if (cluster != null) { cluster.shutdown(); }
+    }
+    try {
+      // Here we restart the MiniDFScluster without formatting namenode
+      cluster = new MiniDFSCluster.Builder(conf).format(false).numDataNodes(4).build();
+      fsn = cluster.getNamesystem();
+      FileSystem fs = cluster.getFileSystem();
+      assertTrue("Filesystem corrupted after restart.",
+                 files.checkFiles(fs, dir));
+
+      final FileStatus newrootstatus = fs.getFileStatus(rootpath);
+      assertEquals(rootmtime, newrootstatus.getModificationTime());
+      assertEquals(rootstatus.getOwner() + "_XXX", newrootstatus.getOwner());
+      assertEquals(rootstatus.getGroup(), newrootstatus.getGroup());
+
+      final FileStatus newdirstatus = fs.getFileStatus(dirpath);
+      assertEquals(dirstatus.getOwner(), newdirstatus.getOwner());
+      assertEquals(dirstatus.getGroup() + "_XXX", newdirstatus.getGroup());
+      rootmtime = fs.getFileStatus(rootpath).getModificationTime();
+
+      checkImages(fsn);
+
+      // Modify the system and then perform saveNamespace
+      files.cleanup(fs, dir);
+      files.createFiles(fs, dir);
+      fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+      cluster.getNameNode().saveNamespace();
+      checkImages(fsn);
+      fsn.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
+      files.cleanup(fs, dir);
+    } finally {
+      if (cluster != null) { cluster.shutdown(); }
+    }
+  }
+  
+  private void checkImages(FSNamesystem fsn) throws Exception {
+    Iterator<StorageDirectory> iter = fsn.
+            getFSImage().dirIterator(FSImage.NameNodeDirType.IMAGE);
+    List<Long> checksums = new ArrayList<Long>();
+    while (iter.hasNext()) {
+      StorageDirectory sd = iter.next();
+      File fsImage = FSImage.getImageFile(sd, FSImage.NameNodeFile.IMAGE);
+      PureJavaCrc32 crc = new PureJavaCrc32();
+      FileInputStream in = new FileInputStream(fsImage);
+      byte[] buff = new byte[4096];
+      int read = 0;
+      while ((read = in.read(buff)) != -1) {
+       crc.update(buff, 0, read);
+      }
+      long val = crc.getValue();
+      checksums.add(val);
+    }
+    assertTrue("Not enough fsimage copies in MiniDFSCluster " + 
+               "to test parallel write", checksums.size() > 1);
+    for (int i = 1; i < checksums.size(); i++) {
+      assertEquals(checksums.get(i - 1), checksums.get(i));
+    }
+  }
+}
+