浏览代码

HDFS-6917. Add an hdfs debug command to validate blocks, call recoverlease, etc. (cmccabe)

Colin Patrick Mccabe 10 年之前
父节点
当前提交
7b026c50f1

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -324,6 +324,9 @@ Release 2.7.0 - UNRELEASED
 
     HDFS-7035. Make adding a new data directory to the DataNode an atomic
     operation and improve error handling (Lei Xu via Colin P. McCabe)
+    
+    HDFS-6917. Add an hdfs debug command to validate blocks, call recoverlease,
+    etc. (cmccabe)
 
   OPTIMIZATIONS
 

+ 4 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs

@@ -53,6 +53,7 @@ function hadoop_usage
   echo "  zkfc                 run the ZK Failover Controller daemon"
   echo ""
   echo "Most commands print help when invoked w/o parameters."
+  # There are also debug commands, but they don't show up in this listing.
 }
 
 # let's locate libexec...
@@ -121,6 +122,9 @@ case ${COMMAND} in
       CLASS='org.apache.hadoop.hdfs.server.datanode.DataNode'
     fi
   ;;
+  debug)
+    CLASS='org.apache.hadoop.hdfs.tools.DebugAdmin'
+  ;;
   dfs)
     CLASS=org.apache.hadoop.fs.FsShell
     hadoop_debug "Appending HADOOP_CLIENT_OPTS onto HADOOP_OPTS"

+ 361 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DebugAdmin.java

@@ -0,0 +1,361 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools;
+
+import java.io.DataInputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.util.concurrent.Uninterruptibles;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.util.DataChecksum;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.Tool;
+
+/**
+ * This class implements debug operations on the HDFS command-line.
+ *
+ * These operations are only for debugging, and may change or disappear
+ * between HDFS versions.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public class DebugAdmin extends Configured implements Tool {
+  /**
+   * All the debug commands we can run.
+   */
+  private DebugCommand DEBUG_COMMANDS[] = {
+    new VerifyBlockChecksumCommand(),
+    new RecoverLeaseCommand(),
+    new HelpCommand()
+  };
+
+  /**
+   * The base class for debug commands.
+   */
+  private abstract class DebugCommand {
+    final String name;
+    final String usageText;
+    final String helpText;
+
+    DebugCommand(String name, String usageText, String helpText) {
+      this.name = name;
+      this.usageText = usageText;
+      this.helpText = helpText;
+    }
+
+    abstract int run(List<String> args) throws IOException;
+  }
+
+  private static int HEADER_LEN = 7;
+
+  /**
+   * The command for verifying a block metadata file and possibly block file.
+   */
+  private class VerifyBlockChecksumCommand extends DebugCommand {
+    VerifyBlockChecksumCommand() {
+      super("verify",
+"verify [-meta <metadata-file>] [-block <block-file>]",
+"  Verify HDFS metadata and block files.  If a block file is specified, we\n" +
+"  will verify that the checksums in the metadata file match the block\n" +
+"  file.");
+    }
+
+    int run(List<String> args) throws IOException {
+      if (args.size() == 0) {
+        System.out.println(usageText);
+        System.out.println(helpText + "\n");
+        return 1;
+      }
+      String blockFile = StringUtils.popOptionWithArgument("-block", args);
+      String metaFile = StringUtils.popOptionWithArgument("-meta", args);
+      if (metaFile == null) {
+        System.err.println("You must specify a meta file with -meta");
+        return 1;
+      }
+
+      FileInputStream metaStream = null, dataStream = null;
+      FileChannel metaChannel = null, dataChannel = null;
+      DataInputStream checksumStream = null;
+      try {
+        BlockMetadataHeader header;
+        try {
+          metaStream = new FileInputStream(metaFile);
+          checksumStream = new DataInputStream(metaStream);
+          header = BlockMetadataHeader.readHeader(checksumStream);
+          metaChannel = metaStream.getChannel();
+          metaChannel.position(HEADER_LEN);
+        } catch (RuntimeException e) {
+          System.err.println("Failed to read HDFS metadata file header for " +
+              metaFile + ": " + StringUtils.stringifyException(e));
+          return 1;
+        } catch (IOException e) {
+          System.err.println("Failed to read HDFS metadata file header for " +
+              metaFile + ": " + StringUtils.stringifyException(e));
+          return 1;
+        }
+        DataChecksum checksum = header.getChecksum();
+        System.out.println("Checksum type: " + checksum.toString());
+        if (blockFile == null) {
+          return 0;
+        }
+        ByteBuffer metaBuf, dataBuf;
+        try {
+          dataStream = new FileInputStream(blockFile);
+          dataChannel = dataStream.getChannel();
+          final int CHECKSUMS_PER_BUF = 1024 * 32;
+          metaBuf = ByteBuffer.allocate(checksum.
+              getChecksumSize() * CHECKSUMS_PER_BUF);
+          dataBuf = ByteBuffer.allocate(checksum.
+              getBytesPerChecksum() * CHECKSUMS_PER_BUF);
+        } catch (IOException e) {
+          System.err.println("Failed to open HDFS block file for " +
+              blockFile + ": " + StringUtils.stringifyException(e));
+          return 1;
+        }
+        long offset = 0;
+        while (true) {
+          dataBuf.clear();
+          int dataRead = -1;
+          try {
+            dataRead = dataChannel.read(dataBuf);
+            if (dataRead < 0) {
+              break;
+            }
+          } catch (IOException e) {
+            System.err.println("Got I/O error reading block file " +
+                blockFile + "from disk at offset " + dataChannel.position() +
+                ": " + StringUtils.stringifyException(e));
+            return 1;
+          }
+          try {
+            int csumToRead =
+                (((checksum.getBytesPerChecksum() - 1) + dataRead) /
+                  checksum.getBytesPerChecksum()) *
+                      checksum.getChecksumSize();
+            metaBuf.clear();
+            metaBuf.limit(csumToRead);
+            metaChannel.read(metaBuf);
+            dataBuf.flip();
+            metaBuf.flip();
+          } catch (IOException e) {
+            System.err.println("Got I/O error reading metadata file " +
+                metaFile + "from disk at offset " + metaChannel.position() +
+                ": " +  StringUtils.stringifyException(e));
+            return 1;
+          }
+          try {
+            checksum.verifyChunkedSums(dataBuf, metaBuf,
+                blockFile, offset);
+          } catch (IOException e) {
+            System.out.println("verifyChunkedSums error: " +
+                StringUtils.stringifyException(e));
+            return 1;
+          }
+          offset += dataRead;
+        }
+        System.out.println("Checksum verification succeeded on block file " +
+            blockFile);
+        return 0;
+      } finally {
+        IOUtils.cleanup(null, metaStream, dataStream, checksumStream);
+      }
+    }
+  }
+
+  /**
+   * The command for recovering a file lease.
+   */
+  private class RecoverLeaseCommand extends DebugCommand {
+    RecoverLeaseCommand() {
+      super("recoverLease",
+"recoverLease [-path <path>] [-retries <num-retries>]",
+"  Recover the lease on the specified path.  The path must reside on an\n" +
+"  HDFS filesystem.  The default number of retries is 1.");
+    }
+
+    private static final int TIMEOUT_MS = 5000;
+
+    int run(List<String> args) throws IOException {
+      if (args.size() == 0) {
+        System.out.println(usageText);
+        System.out.println(helpText + "\n");
+        return 1;
+      }
+      String pathStr = StringUtils.popOptionWithArgument("-path", args);
+      String retriesStr = StringUtils.popOptionWithArgument("-retries", args);
+      if (pathStr == null) {
+        System.err.println("You must supply a -path argument to " +
+            "recoverLease.");
+        return 1;
+      }
+      int maxRetries = 1;
+      if (retriesStr != null) {
+        try {
+          maxRetries = Integer.parseInt(retriesStr);
+        } catch (NumberFormatException e) {
+          System.err.println("Failed to parse the argument to -retries: " +
+              StringUtils.stringifyException(e));
+          return 1;
+        }
+      }
+      FileSystem fs;
+      try {
+        fs = FileSystem.newInstance(new URI(pathStr), getConf(), null);
+      } catch (URISyntaxException e) {
+        System.err.println("URISyntaxException for " + pathStr + ":" +
+            StringUtils.stringifyException(e));
+        return 1;
+      } catch (InterruptedException e) {
+        System.err.println("InterruptedException for " + pathStr + ":" +
+            StringUtils.stringifyException(e));
+        return 1;
+      }
+      DistributedFileSystem dfs = null;
+      try {
+        dfs = (DistributedFileSystem) fs;
+      } catch (ClassCastException e) {
+        System.err.println("Invalid filesystem for path " + pathStr + ": " +
+            "needed scheme hdfs, but got: " + fs.getScheme());
+        return 1;
+      }
+      for (int retry = 0; true; ) {
+        boolean recovered = false;
+        IOException ioe = null;
+        try {
+          recovered = dfs.recoverLease(new Path(pathStr));
+        } catch (IOException e) {
+          ioe = e;
+        }
+        if (recovered) {
+          System.out.println("recoverLease SUCCEEDED on " + pathStr); 
+          return 0;
+        }
+        if (ioe != null) {
+          System.err.println("recoverLease got exception: ");
+          ioe.printStackTrace();
+        } else {
+          System.err.println("recoverLease returned false.");
+        }
+        retry++;
+        if (retry >= maxRetries) {
+          break;
+        }
+        System.err.println("Retrying in " + TIMEOUT_MS + " ms...");
+        Uninterruptibles.sleepUninterruptibly(TIMEOUT_MS,
+            TimeUnit.MILLISECONDS);
+        System.err.println("Retry #" + retry);
+      }
+      System.err.println("Giving up on recoverLease for " + pathStr + " after " +
+          maxRetries + (maxRetries == 1 ? " try." : " tries."));
+      return 1;
+    }
+  }
+
+  /**
+   * The command for getting help about other commands.
+   */
+  private class HelpCommand extends DebugCommand {
+    HelpCommand() {
+      super("help",
+"help [command-name]",
+"  Get help about a command.");
+    }
+
+    int run(List<String> args) {
+      DebugCommand command = popCommand(args);
+      if (command == null) {
+        printUsage();
+        return 0;
+      }
+      System.out.println(command.usageText);
+      System.out.println(command.helpText + "\n");
+      return 0;
+    }
+  }
+
+  public DebugAdmin(Configuration conf) {
+    super(conf);
+  }
+
+  private DebugCommand popCommand(List<String> args) {
+    String commandStr = (args.size() == 0) ? "" : args.get(0);
+    if (commandStr.startsWith("-")) {
+      commandStr = commandStr.substring(1);
+    }
+    for (DebugCommand command : DEBUG_COMMANDS) {
+      if (command.name.equals(commandStr)) {
+        args.remove(0);
+        return command;
+      }
+    }
+    return null;
+  }
+
+  public int run(String[] argv) {
+    LinkedList<String> args = new LinkedList<String>();
+    for (int j = 0; j < argv.length; ++j) {
+      args.add(argv[j]);
+    }
+    DebugCommand command = popCommand(args);
+    if (command == null) {
+      printUsage();
+      return 0;
+    }
+    try {
+      return command.run(args);
+    } catch (IOException e) {
+      System.err.println("IOException: " +
+          StringUtils.stringifyException(e));
+      return 1;
+    } catch (RuntimeException e) {
+      System.err.println("RuntimeException: " +
+          StringUtils.stringifyException(e));
+      return 1;
+    }
+  }
+
+  private void printUsage() {
+    System.out.println("Usage: hdfs debug <command> [arguments]\n");
+    for (DebugCommand command : DEBUG_COMMANDS) {
+      if (!command.name.equals("help")) {
+        System.out.println(command.usageText);
+      }
+    }
+  }
+
+  public static void main(String[] argsArray) throws IOException {
+    DebugAdmin debugAdmin = new DebugAdmin(new Configuration());
+    System.exit(debugAdmin.run(argsArray));
+  }
+}

+ 118 - 0
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDebugAdmin.java

@@ -0,0 +1,118 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
+import org.apache.hadoop.io.IOUtils;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.PrintStream;
+
+import static org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetTestUtil.*;
+import static org.junit.Assert.assertEquals;
+
+public class TestDebugAdmin {
+  private MiniDFSCluster cluster;
+  private DistributedFileSystem fs;
+  private DebugAdmin admin;
+  private DataNode datanode;
+
+  @Before
+  public void setUp() throws Exception {
+    Configuration conf = new Configuration();
+    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+    cluster.waitActive();
+    fs = cluster.getFileSystem();
+    admin = new DebugAdmin(conf);
+    datanode = cluster.getDataNodes().get(0);
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    if (cluster != null) {
+      cluster.shutdown();
+      cluster = null;
+    }
+  }
+
+  private String runCmd(String[] cmd) throws Exception {
+    final ByteArrayOutputStream bytes = new ByteArrayOutputStream();
+    final PrintStream out = new PrintStream(bytes);
+    final PrintStream oldErr = System.err;
+    final PrintStream oldOut = System.out;
+    System.setErr(out);
+    System.setOut(out);
+    int ret;
+    try {
+      ret = admin.run(cmd);
+    } finally {
+      System.setErr(oldErr);
+      System.setOut(oldOut);
+      IOUtils.closeStream(out);
+    }
+    return "ret: " + ret + ", " + bytes.toString();
+  }
+
+  @Test(timeout = 60000)
+  public void testRecoverLease() throws Exception {
+    assertEquals("ret: 1, You must supply a -path argument to recoverLease.\n",
+        runCmd(new String[]{"recoverLease", "-retries", "1"}));
+    FSDataOutputStream out = fs.create(new Path("/foo"));
+    out.write(123);
+    out.close();
+    assertEquals("ret: 0, recoverLease SUCCEEDED on /foo\n",
+        runCmd(new String[]{"recoverLease", "-path", "/foo"}));
+  }
+
+  @Test(timeout = 60000)
+  public void testVerifyBlockChecksumCommand() throws Exception {
+    DFSTestUtil.createFile(fs, new Path("/bar"), 1234, (short) 1, 0xdeadbeef);
+    FsDatasetSpi<?> fsd = datanode.getFSDataset();
+    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, new Path("/bar"));
+    File blockFile = getBlockFile(fsd,
+        block.getBlockPoolId(), block.getLocalBlock());
+    assertEquals("ret: 1, You must specify a meta file with -meta\n",
+        runCmd(new String[]{"verify", "-block", blockFile.getAbsolutePath()}));
+    File metaFile = getMetaFile(fsd,
+        block.getBlockPoolId(), block.getLocalBlock());
+    assertEquals("ret: 0, Checksum type: " +
+          "DataChecksum(type=CRC32C, chunkSize=512)\n",
+        runCmd(new String[]{"verify",
+            "-meta", metaFile.getAbsolutePath()}));
+    assertEquals("ret: 0, Checksum type: " +
+          "DataChecksum(type=CRC32C, chunkSize=512)\n" +
+          "Checksum verification succeeded on block file " +
+          blockFile.getAbsolutePath() + "\n",
+        runCmd(new String[]{"verify",
+            "-meta", metaFile.getAbsolutePath(),
+            "-block", blockFile.getAbsolutePath()})
+    );
+  }
+}