Browse Source

HDFS-567. Add block forensics contrib tool to print history of corrupt and missing blocks from the HDFS logs. Contributed by Bill Zeller and Jithendra Pandey.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hdfs/trunk@816717 13f79535-47bb-0310-9956-ffa450edef68
Suresh Srinivas 15 years ago
parent
commit
3457898bd5

+ 6 - 1
CHANGES.txt

@@ -40,7 +40,12 @@ Trunk (unreleased changes)
     HDFS-385. Add support for an experimental API that allows a module external
     to HDFS to specify how HDFS blocks should be placed. (dhruba)
 
-    HADOOP-4952. Update hadoop-core and test jars to propagate new FileContext file system application interface. (Sanjay Radia via suresh).
+    HADOOP-4952. Update hadoop-core and test jars to propagate new FileContext
+    file system application interface. (Sanjay Radia via suresh).
+
+    HDFS-567. Add block forensics contrib tool to print history of corrupt and
+    missing blocks from the HDFS logs.
+    (Bill Zeller, Jithendra Pandey via suresh).
 
   IMPROVEMENTS
 

+ 25 - 0
src/contrib/block_forensics/README

@@ -0,0 +1,25 @@
+This contribution consists of two components designed to make it easier to find information about lost or corrupt blocks.
+
+The first is a map reduce designed to search for one or more block ids in a set of log files. It exists in org.apache.hadoop.block_forensics.BlockSearch. Building this contribution generates a jar file that can be executed using:
+
+  bin/hadoop jar [jar location] [hdfs input path] [hdfs output dir] [comma delimited list of block ids]
+
+  For example, the command:
+    bin/hadoop jar /foo/bar/hadoop-0.1-block_forensics.jar /input/* /ouptut 2343,45245,75823
+  ... searches for any of blocks 2343, 45245, or 75823 in any of the files 
+   contained in the /input/ directory. 
+
+  
+  The output will be any line containing one of the provided block ids. While this tool is designed to be used with block ids, it can also be used for general text searching. 
+  
+The second component is a standalone java program that will repeatedly query the namenode at a given interval looking for corrupt replicas. If it finds a corrupt replica, it will launch the above map reduce job. The syntax is:
+
+  java BlockForensics http://[namenode]:[port]/corrupt_replicas_xml.jsp [sleep time between namenode query for corrupt blocks (in milliseconds)] [mapred jar location] [hdfs input path]
+
+  For example, the command:
+    java BlockForensics http://localhost:50070/corrupt_replicas_xml.jsp 30000
+                        /foo/bar/hadoop-0.1-block_forensics.jar /input/*
+  ... queries the namenode at localhost:50070 for corrupt replicas every 30
+      seconds and runs /foo/bar/hadoop-0.1-block_forensics.jar if any are found. 
+
+The map reduce job jar and the BlockForensics class can be found in your build/contrib/block_forensics and build/contrib/block_forensics/classes directories, respectively. 

+ 66 - 0
src/contrib/block_forensics/build.xml

@@ -0,0 +1,66 @@
+<?xml version="1.0"?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<!-- 
+Before you can run these subtargets directly, you need 
+to call at top-level: ant deploy-contrib compile-core-test
+-->
+<project name="block_forensics" default="jar">
+  <property name="version" value="0.1"/>
+  <import file="../build-contrib.xml"/>
+
+  <!-- create the list of files to add to the classpath -->
+  <fileset dir="${hadoop.root}/lib" id="class.path">
+    <include name="**/*.jar" />
+    <exclude name="**/excluded/" />
+  </fileset>
+
+  <!-- Override jar target to specify main class -->
+  <target name="jar" depends="compile">
+    <jar
+      jarfile="${build.dir}/hadoop-${version}-${name}.jar"
+      basedir="${build.classes}"      
+    >
+    <manifest>
+      <attribute name="Main-Class" value="org.apache.hadoop.blockforensics.BlockSearch"/>
+    </manifest>
+    </jar>
+    
+    <javac srcdir="client" destdir="${build.classes}"/>
+
+  </target>
+
+  <!-- Run only pure-Java unit tests. superdottest -->
+  <target name="test">
+   <antcall target="hadoopbuildcontrib.test"> 
+   </antcall>
+  </target>  
+ 
+  <!-- Run all unit tests
+  This is not called as part of the nightly build
+  because it will only run on platforms that have standard 
+  Unix utilities available. 
+  -->
+ <target name="test-unix">
+   <antcall target="hadoopbuildcontrib.test">
+   </antcall>
+ </target>  
+
+
+</project>

+ 186 - 0
src/contrib/block_forensics/client/BlockForensics.java

@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ 
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.lang.Runtime;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.StringTokenizer;
+import java.util.TreeSet;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import org.w3c.dom.Document;
+import org.w3c.dom.NodeList;
+import org.xml.sax.SAXException;
+
+/**
+ * This class repeatedly queries a namenode looking for corrupt replicas. If 
+ * any are found a provided hadoop job is launched and the output printed
+ * to stdout. 
+ *
+ * The syntax is:
+ *
+ * java BlockForensics http://[namenode]:[port]/corrupt_replicas_xml.jsp 
+ *                    [sleep time between namenode query for corrupt blocks
+ *                      (in seconds)] [mapred jar location] [hdfs input path]
+ *
+ * All arguments are required.
+ */
+public class BlockForensics {
+  
+  public static String join(List<?> l, String sep) {
+    StringBuilder sb = new StringBuilder();
+    Iterator it = l.iterator();
+    
+    while(it.hasNext()){
+      sb.append(it.next());
+      if (it.hasNext()) {
+        sb.append(sep);
+      }
+    }
+    
+    return sb.toString();
+  }
+  
+  
+  // runs hadoop command and prints output to stdout
+  public static void runHadoopCmd(String ... args)
+  throws IOException {
+    String hadoop_home = System.getenv("HADOOP_HOME");
+    
+    List<String> l = new LinkedList<String>();
+    l.add("bin/hadoop");
+    l.addAll(Arrays.asList(args));
+    
+    ProcessBuilder pb = new ProcessBuilder(l);
+    
+    if (hadoop_home != null) {
+      pb.directory(new File(hadoop_home));
+    }
+
+    pb.redirectErrorStream(true);
+          
+    Process p = pb.start();
+
+    BufferedReader br = new BufferedReader(
+                          new InputStreamReader(p.getInputStream()));
+    String line;
+
+    while ((line = br.readLine()) != null) {
+      System.out.println(line);
+    }
+
+
+  }
+    
+  public static void main(String[] args)
+    throws SAXException, ParserConfigurationException, 
+           InterruptedException, IOException {
+
+    if (System.getenv("HADOOP_HOME") == null) {
+      System.err.println("The environmental variable HADOOP_HOME is undefined");
+      System.exit(1);
+    }
+
+
+    if (args.length < 4) {
+      System.out.println("Usage: java BlockForensics [http://namenode:port/"
+                         + "corrupt_replicas_xml.jsp] [sleep time between "
+                         + "requests (in milliseconds)] [mapred jar location] "
+                         + "[hdfs input path]");
+      return;
+    }
+             
+    int sleepTime = 30000;
+  
+    try {
+      sleepTime = Integer.parseInt(args[1]);
+    } catch (NumberFormatException e) {
+      System.out.println("The sleep time entered is invalid, "
+                         + "using default value: "+sleepTime+"ms"); 
+    }
+      
+    Set<Long> blockIds = new TreeSet<Long>();
+    
+    while (true) {
+      InputStream xml = new URL(args[0]).openConnection().getInputStream();
+    
+      DocumentBuilderFactory fact = DocumentBuilderFactory.newInstance();
+      DocumentBuilder builder = fact.newDocumentBuilder();
+      Document doc = builder.parse(xml);
+         
+      NodeList corruptReplicaNodes = doc.getElementsByTagName("block_id");
+
+      List<Long> searchBlockIds = new LinkedList<Long>();
+      for(int i=0; i<corruptReplicaNodes.getLength(); i++) {
+        Long blockId = new Long(corruptReplicaNodes.item(i)
+                                                    .getFirstChild()
+                                                    .getNodeValue());
+        if (!blockIds.contains(blockId)) {
+          blockIds.add(blockId);
+          searchBlockIds.add(blockId);
+        }
+      }
+      
+      if (searchBlockIds.size() > 0) {
+        String blockIdsStr = BlockForensics.join(searchBlockIds, ",");
+        System.out.println("\nSearching for: " + blockIdsStr);
+        String tmpDir =
+            new String("/tmp-block-forensics-" +
+                Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+
+        System.out.println("Using temporary dir: "+tmpDir);
+
+        // delete tmp dir
+        BlockForensics.runHadoopCmd("fs", "-rmr", tmpDir);
+      
+        // launch mapred job      
+        BlockForensics.runHadoopCmd("jar",
+                                    args[2], // jar location
+                                    args[3], // input dir
+                                    tmpDir, // output dir
+                                    blockIdsStr// comma delimited list of blocks
+                                    );
+        // cat output
+        BlockForensics.runHadoopCmd("fs", "-cat", tmpDir+"/part*");
+
+        // delete temp dir
+        BlockForensics.runHadoopCmd("fs", "-rmr", tmpDir);
+
+        int sleepSecs = (int)(sleepTime/1000.);
+        System.out.print("Sleeping for "+sleepSecs
+                         + " second"+(sleepSecs == 1?"":"s")+".");
+      }
+
+      System.out.print(".");
+      Thread.sleep(sleepTime);
+
+    }
+  }
+}

+ 44 - 0
src/contrib/block_forensics/ivy.xml

@@ -0,0 +1,44 @@
+<?xml version="1.0" ?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<ivy-module version="1.0">
+  <info organisation="org.apache.hadoop" module="${ant.project.name}">
+    <license name="Apache 2.0"/>
+    <ivyauthor name="Apache Hadoop Team" url="http://hadoop.apache.org"/>
+    <description>
+        Apache Hadoop
+    </description>
+  </info>
+  <configurations defaultconfmapping="default">
+    <!--these match the Maven configurations-->
+    <conf name="default" extends="master,runtime"/>
+    <conf name="master" description="contains the artifact but no dependencies"/>
+    <conf name="runtime" description="runtime but not the artifact" />
+
+    <conf name="common" visibility="private" 
+      extends="runtime"
+      description="artifacts needed to compile/test the application"/>
+    <conf name="test" visibility="private" extends="runtime"/>
+  </configurations>
+
+  <publications>
+    <!--get the artifact from our module name-->
+    <artifact conf="master"/>
+  </publications>
+</ivy-module>

+ 21 - 0
src/contrib/block_forensics/ivy/libraries.properties

@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+#This properties file lists the versions of the various artifacts used by thrifts.
+#It drives ivy and the generation of a maven POM
+
+#Please list the dependencies name with version if they are different from the ones 
+#listed in the global libraries.properties file (in alphabetical order)

+ 136 - 0
src/contrib/block_forensics/src/java/org/apache/hadoop/block_forensics/BlockSearch.java

@@ -0,0 +1,136 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.blockforensics;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.StringTokenizer;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+
+/**
+ * BlockSearch is a mapred job that's designed to search input for appearances 
+ * of strings. 
+ *
+ * The syntax is:
+ * 
+ * bin/hadoop jar [jar location] [hdfs input path] [hdfs output dir]
+                  [comma delimited list of block ids]
+ *
+ * All arguments are required.
+ *
+ * This tool is designed to be used to search for one or more block ids in log
+ * files but can be used for general text search, assuming the search strings
+ * don't contain tokens. It assumes only one search string will appear per line. 
+ */
+public class BlockSearch extends Configured implements Tool {
+  public static class Map extends Mapper<LongWritable, Text, Text, Text> {
+    private Text blockIdText = new Text();
+    private Text valText = new Text();
+    private List<String> blockIds = null;
+
+    protected void setup(Context context) 
+      throws IOException, InterruptedException {
+      Configuration conf = context.getConfiguration();
+      StringTokenizer st = new StringTokenizer(conf.get("blockIds"), ",");
+      blockIds = new LinkedList<String>();
+      while (st.hasMoreTokens()) {
+        String blockId = st.nextToken();
+        blockIds.add(blockId);
+      }
+    }
+
+
+    public void map(LongWritable key, Text value, Context context)
+      throws IOException, InterruptedException {
+      if (blockIds == null) {
+        System.err.println("Error: No block ids specified");
+      } else {
+        String valStr = value.toString();
+
+        for(String blockId: blockIds) {
+          if (valStr.indexOf(blockId) != -1) {
+            blockIdText.set(blockId);
+            valText.set(valStr);
+            context.write(blockIdText, valText);
+            break; // assume only one block id appears per line
+          }
+        }
+      }
+
+    }
+
+  }
+
+
+  public static class Reduce extends Reducer<Text, Text, Text, Text> {
+    private Text val = new Text();
+    public void reduce(Text key, Iterator<Text> values, Context context)
+    throws IOException, InterruptedException {
+      while (values.hasNext()) {
+        context.write(key, values.next());
+      }
+    }
+  }
+    
+  public int run(String[] args) throws Exception {
+    if (args.length < 3) {
+      System.out.println("BlockSearch <inLogs> <outDir> <comma delimited list of blocks>");
+      ToolRunner.printGenericCommandUsage(System.out);
+      return 2;
+    }
+
+    Configuration conf = getConf();
+    conf.set("blockIds", args[2]);
+
+    Job job = new Job(conf);
+
+    job.setCombinerClass(Reduce.class);
+    job.setJarByClass(BlockSearch.class);
+    job.setJobName("BlockSearch");
+    job.setMapperClass(Map.class);
+    job.setOutputKeyClass(Text.class);
+    job.setOutputValueClass(Text.class);
+    job.setReducerClass(Reduce.class);
+
+    FileInputFormat.setInputPaths(job, new Path(args[0]));
+    FileOutputFormat.setOutputPath(job, new Path(args[1]));
+
+    return job.waitForCompletion(true) ? 0 : 1;
+  }
+
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(new Configuration(), new BlockSearch(), args);
+    System.exit(res);
+  }
+}