19 years ago · 2e2c31a0e0
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,6 +1,39 @@
 
															 Hadoop Change Log
														
 
															+Release 0.3.2 - 2006-06-09
														
 
															+
														
 
															+ 1. HADOOP-275.  Update the streaming contrib module to use log4j for
														
 
															+    its logging.  (Michel Tourn via cutting)
														
 
															+
														
 
															+ 2. HADOOP-279.  Provide defaults for log4j logging parameters, so
														
 
															+    that things still work reasonably when Hadoop-specific system
														
 
															+    properties are not provided.  (omalley via cutting)
														
 
															+
														
 
															+ 3. HADOOP-280.  Fix a typo in AllTestDriver which caused the wrong
														
 
															+    test to be run when "DistributedFSCheck" was specified.
														
 
															+   (Konstantin Shvachko via cutting)
														
 
															+
														
 
															+ 4. HADOOP-240.  DFS's mkdirs() implementation no longer logs a warning
														
 
															+    when the directory already exists. (Hairong Kuang via cutting)
														
 
															+
														
 
															+ 5. HADOOP-285.  Fix DFS datanodes to be able to re-join the cluster
														
 
															+    after the connection to the namenode is lost.  (omalley via cutting)
														
 
															+
														
 
															+ 6. HADOOP-277.  Fix a race condition when creating directories.
														
 
															+   (Sameer Paranjpye via cutting)
														
 
															+
														
 
															+ 7. HADOOP-289.  Improved exception handling in DFS datanode.
														
 
															+    (Konstantin Shvachko via cutting)
														
 
															+
														
 
															+ 8. HADOOP-292.  Fix client-side logging to go to standard error
														
 
															+    rather than standard output, so that it can be distinguished from
														
 
															+    application output.  (omalley via cutting)
														
 
															+
														
 
															+ 9. HADOOP-294.  Fixed bug where conditions for retrying after errors
														
 
															+    in the DFS client were reversed.  (omalley via cutting)
														
 
															+
														
 
															+
														
 
															 Release 0.3.1 - 2006-06-05
														
 
															  1. HADOOP-272.  Fix a bug in bin/hadoop setting log
														
--- a/bin/hadoop
+++ b/bin/hadoop
@@ -13,6 +13,8 @@
 
															 #
														
 
															 #   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_HOME}/conf.
														
 
															 #
														
 
															+#   HADOOP_ROOT_LOGGER The root appender. Default is INFO,console
														
 
															+#
														
 
															 # resolve links - $0 may be a softlink
														
 
															 THIS="$0"
														
@@ -162,7 +164,7 @@ HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
 
															 HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_LOGFILE"
														
 
															 HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.home.dir=$HADOOP_HOME"
														
 
															 HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
														
 
															-HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_ROOT_LOGGER:-INFO,stdout}"
														
 
															+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_ROOT_LOGGER:-INFO,console}"
														
 
															 # run it
														
 
															 exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS -classpath "$CLASSPATH" $CLASS "$@"
														
--- a/build.xml
+++ b/build.xml
@@ -9,7 +9,7 @@
 
															   <property name="Name" value="Hadoop"/>
														
 
															   <property name="name" value="hadoop"/>
														
 
															-  <property name="version" value="0.3.2-dev"/>
														
 
															+  <property name="version" value="0.3.3-dev"/>
														
 
															   <property name="final.name" value="${name}-${version}"/>
														
 
															   <property name="year" value="2006"/>
														
 
															   <property name="libhdfs.version" value="1"/>
														
--- a/conf/log4j.properties
+++ b/conf/log4j.properties
@@ -1,10 +1,14 @@
 
															-# RootLogger - DailyRollingFileAppender
														
 
															+# Define some default values that can be overridden by system properties
														
 
															+hadoop.root.logger=INFO,console
														
 
															+hadoop.log.dir=.
														
 
															+hadoop.log.file=hadoop.log
														
 
															+
														
 
															+# Define the root logger to the system property "hadoop.root.logger".
														
 
															 log4j.rootLogger=${hadoop.root.logger}
														
 
															 # Logging Threshold
														
 
															 log4j.threshhold=ALL
														
 
															-
														
 
															 #
														
 
															 # Daily Rolling File Appender
														
 
															 #
														
@@ -26,13 +30,14 @@ log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
 
															 #
														
 
															-# stdout
														
 
															-# Add *stdout* to rootlogger above if you want to use this 
														
 
															+# console
														
 
															+# Add "console" to rootlogger above if you want to use this 
														
 
															 #
														
 
															-log4j.appender.stdout=org.apache.log4j.ConsoleAppender
														
 
															-log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
														
 
															-log4j.appender.stdout.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
														
 
															+log4j.appender.console=org.apache.log4j.ConsoleAppender
														
 
															+log4j.appender.console.target=System.err
														
 
															+log4j.appender.console.layout=org.apache.log4j.PatternLayout
														
 
															+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
														
 
															 #
														
 
															 # Rolling File Appender
														
@@ -49,3 +54,8 @@ log4j.appender.stdout.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %
 
															 #log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
														
 
															 #log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
														
 
															+# Custom Logging levels
														
 
															+
														
 
															+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
														
 
															+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
														
 
															+
														
--- a/site/index.html
+++ b/site/index.html
@@ -122,6 +122,12 @@ document.write("<text>Last Published:</text> " + document.lastModified);
 
															 <a href="#News">News</a>
														
 
															 <ul class="minitoc">
														
 
															 <li>
														
 
															+<a href="#9+June%2C+2006%3A+release+0.3.2+available">9 June, 2006: release 0.3.2 available</a>
														
 
															+</li>
														
 
															+<li>
														
 
															+<a href="#8+June%2C+2006%3A+FAQ+added+to+Wiki">8 June, 2006: FAQ added to Wiki</a>
														
 
															+</li>
														
 
															+<li>
														
 
															 <a href="#5+June%2C+2006%3A+release+0.3.1+available">5 June, 2006: release 0.3.1 available</a>
														
 
															 </li>
														
 
															 <li>
														
@@ -154,26 +160,37 @@ document.write("<text>Last Published:</text> " + document.lastModified);
 
															 <a name="N1000C"></a><a name="News"></a>
														
 
															 <h2 class="h3">News</h2>
														
 
															 <div class="section">
														
 
															-<a name="N10012"></a><a name="5+June%2C+2006%3A+release+0.3.1+available"></a>
														
 
															+<a name="N10012"></a><a name="9+June%2C+2006%3A+release+0.3.2+available"></a>
														
 
															+<h3 class="h4">9 June, 2006: release 0.3.2 available</h3>
														
 
															+<p>This is a bugfix release.  For details see the <a href="http://tinyurl.com/k9g5c">change log</a>. The release can
														
 
															+      be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
														
 
															+      nearby mirror</a>.
														
 
															+      </p>
														
 
															+<a name="N10024"></a><a name="8+June%2C+2006%3A+FAQ+added+to+Wiki"></a>
														
 
															+<h3 class="h4">8 June, 2006: FAQ added to Wiki</h3>
														
 
															+<p>Hadoop now has a <a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>.  Please
														
 
															+      help make this more complete!
														
 
															+      </p>
														
 
															+<a name="N10032"></a><a name="5+June%2C+2006%3A+release+0.3.1+available"></a>
														
 
															 <h3 class="h4">5 June, 2006: release 0.3.1 available</h3>
														
 
															 <p>This is a bugfix release.  For details see the <a href="http://tinyurl.com/l6on4">change log</a>. The release can
														
 
															       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
														
 
															       nearby mirror</a>.
														
 
															       </p>
														
 
															-<a name="N10024"></a><a name="2+June%2C+2006%3A+release+0.3.0+available"></a>
														
 
															+<a name="N10044"></a><a name="2+June%2C+2006%3A+release+0.3.0+available"></a>
														
 
															 <h3 class="h4">2 June, 2006: release 0.3.0 available</h3>
														
 
															 <p>This includes many fixes, improving performance, scalability
														
 
															       and reliability and adding new features.  For details see the <a href="http://tinyurl.com/rq3f7">change log</a>. The release can
														
 
															       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
														
 
															       nearby mirror</a>.
														
 
															       </p>
														
 
															-<a name="N10036"></a><a name="12+May%2C+2006%3A+release+0.2.1+available"></a>
														
 
															+<a name="N10056"></a><a name="12+May%2C+2006%3A+release+0.2.1+available"></a>
														
 
															 <h3 class="h4">12 May, 2006: release 0.2.1 available</h3>
														
 
															 <p>This fixes a few bugs in release 0.2.0, listed in the <a href="http://tinyurl.com/rnnvz">change log</a>. The
														
 
															       release can be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
														
 
															       nearby mirror</a>.
														
 
															       </p>
														
 
															-<a name="N10048"></a><a name="5+May%2C+2006%3A+release+0.2.0+available"></a>
														
 
															+<a name="N10068"></a><a name="5+May%2C+2006%3A+release+0.2.0+available"></a>
														
 
															 <h3 class="h4">5 May, 2006: release 0.2.0 available</h3>
														
 
															 <p>We are now aiming for monthly releases.  There have been many
														
 
															       bug fixes and improvements in the past month.  MapReduce and DFS
														
@@ -182,24 +199,24 @@ document.write("<text>Last Published:</text> " + document.lastModified);
 
															       details. The release can be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
														
 
															       nearby mirror</a>.
														
 
															       </p>
														
 
															-<a name="N1005A"></a><a name="2+April%2C+2006%3A+release+0.1.0+available"></a>
														
 
															+<a name="N1007A"></a><a name="2+April%2C+2006%3A+release+0.1.0+available"></a>
														
 
															 <h3 class="h4">2 April, 2006: release 0.1.0 available</h3>
														
 
															 <p>This is the first Hadoop release.  The release is available
														
 
															       <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/">
														
 
															       here</a>.</p>
														
 
															-<a name="N10068"></a><a name="6+February%2C+2006%3A+nightly+builds"></a>
														
 
															+<a name="N10088"></a><a name="6+February%2C+2006%3A+nightly+builds"></a>
														
 
															 <h3 class="h4">6 February, 2006: nightly builds</h3>
														
 
															 <p>Hadoop now has nightly builds.  This automatically creates a
														
 
															       <a href="http://cvs.apache.org/dist/lucene/hadoop/nightly/">downloadable version of Hadoop every
														
 
															       night</a>.  All unit tests must pass, or a message is sent to
														
 
															       the developers mailing list and no new version is created.  This
														
 
															       also updates the <a href="docs/api/">javadoc</a>.</p>
														
 
															-<a name="N1007A"></a><a name="3+February%2C+2006%3A+Hadoop+code+moved+out+of+Nutch"></a>
														
 
															+<a name="N1009A"></a><a name="3+February%2C+2006%3A+Hadoop+code+moved+out+of+Nutch"></a>
														
 
															 <h3 class="h4">3 February, 2006: Hadoop code moved out of Nutch</h3>
														
 
															 <p>The Hadoop code has now been moved into its own Subversion
														
 
															       tree, renamed into packages under <span class="codefrag">org.apache.hadoop</span>.
														
 
															       All unit tests pass, but little else has yet been tested.</p>
														
 
															-<a name="N10087"></a><a name="30+March%2C+2006%3A+Hadoop+project+approved"></a>
														
 
															+<a name="N100A7"></a><a name="30+March%2C+2006%3A+Hadoop+project+approved"></a>
														
 
															 <h3 class="h4">30 March, 2006: Hadoop project approved</h3>
														
 
															 <p>The Lucene PMC has elected to split the Nutch MapReduce and
														
 
															       distributed filesytem code into a new project named Hadoop.</p>
														
--- a/site/index.pdf
+++ b/site/index.pdf
--- a/src/contrib/streaming/src/java/org/apache/hadoop/streaming/Environment.java
+++ b/src/contrib/streaming/src/java/org/apache/hadoop/streaming/Environment.java
@@ -17,8 +17,12 @@
 
															 package org.apache.hadoop.streaming;
														
 
															 import java.io.*;
														
 
															+import java.net.InetAddress;
														
 
															 import java.util.*;
														
 
															+/*
														
 
															+ * If we move to Java 1.5, we can get rid of this class and just use System.getenv
														
 
															+ */
														
 
															 public class Environment extends Properties
														
 
															 {
														
 
															    public Environment()
														
@@ -26,13 +30,15 @@ public class Environment extends Properties
 
															    {
														
 
															       // Extend this code to fit all operating
														
 
															       // environments that you expect to run in
														
 
															-
														
 
															       String command = null;
														
 
															       String OS = System.getProperty("os.name");
														
 
															+      String lowerOs = OS.toLowerCase();
														
 
															       if (OS.equals("Windows NT")) {
														
 
															          command = "cmd /C set";
														
 
															       } else if (OS.indexOf("ix") > -1 || OS.indexOf("inux") > -1) {
														
 
															          command = "env";
														
 
															+      } else if(lowerOs.startsWith("mac os x")) {
														
 
															+         command = "env";
														
 
															       } else {
														
 
															          // Add others here
														
 
															       }
														
@@ -83,4 +89,19 @@ public class Environment extends Properties
 
															      }     
														
 
															      return arr;
														
 
															    }
														
 
															-} 
														
 
															+   
														
 
															+   public String getHost()
														
 
															+   {
														
 
															+     String host = getProperty("HOST");
														
 
															+     if(host == null) {
														
 
															+       // HOST isn't always in the environment
														
 
															+       try {
														
 
															+         host = InetAddress.getLocalHost().getHostName();
														
 
															+       } catch(IOException io) {
														
 
															+         io.printStackTrace();
														
 
															+       }
														
 
															+     }
														
 
															+     return host;
														
 
															+   }
														
 
															+   
														
 
															+} 
														
--- a/src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeMapRed.java
+++ b/src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeMapRed.java
@@ -22,9 +22,12 @@ import java.io.IOException;
 
															 import java.util.Date;

														
 
															 import java.util.Map;

														
 
															 import java.util.Arrays;

														
 
															+import java.util.ArrayList;

														
 
															 import java.util.Properties;

														
 
															 import java.util.regex.*;

														
 
															+import org.apache.commons.logging.*;

														
 
															+

														
 
															 import org.apache.hadoop.mapred.JobConf;

														
 
															 import org.apache.hadoop.mapred.Mapper;

														
 
															 import org.apache.hadoop.mapred.Reducer;

														
@@ -43,6 +46,8 @@ import org.apache.hadoop.io.LongWritable;
 
															  */

														
 
															 public abstract class PipeMapRed {

														
 
															+  protected static final Log LOG = LogFactory.getLog(PipeMapRed.class.getName());  

														
 
															+  

														
 
															   /** The command to be spawned as a subprocess.

														
 
															    * Mapper/Reducer operations will delegate to it

														
 
															    */

														
@@ -53,9 +58,9 @@ public abstract class PipeMapRed {
 
															   /**

														
 
															-   * @returns ow many TABS before the end of the key part 

														
 
															+   * @returns how many TABS before the end of the key part 

														
 
															    * usually: 1 or "ALL"

														
 
															-   * used both for tool output of both Map and Reduce

														
 
															+   * used for tool output of both Map and Reduce

														
 
															    * configured via tool's argv: splitKeyVal=ALL or 1..

														
 
															    * although it is interpreted here, not by tool

														
 
															    */

														
@@ -91,20 +96,57 @@ public abstract class PipeMapRed {
 
															     return cols;

														
 
															   }

														
 
															-  String[] splitArgs(String args)

														
 
															+  final static int OUTSIDE = 1;

														
 
															+  final static int SINGLEQ = 2;

														
 
															+  final static int DOUBLEQ = 3;

														
 
															+  

														
 
															+  static String[] splitArgs(String args)

														
 
															   {

														
 
															-    String regex = "\\s(?=(?:[^\"]*\"[^\"]*\")*[^\"]*\\z)";

														
 
															-    String[] split = args.split(regex);

														
 
															-    // remove outer quotes

														
 
															-    for(int i=0; i<split.length; i++) {

														
 
															-        String si = split[i].trim();

														
 
															-        if(si.charAt(0)=='"' && si.charAt(si.length()-1)=='"') {

														
 
															-            si = si.substring(1, si.length()-1);

														
 
															-            split[i] = si;

														
 
															+    ArrayList argList = new ArrayList();

														
 
															+    char[] ch = args.toCharArray();

														
 
															+    int clen = ch.length;

														
 
															+    int state = OUTSIDE;

														
 
															+    int argstart = 0;

														
 
															+    for(int c=0; c<=clen; c++) {

														
 
															+        boolean last = (c==clen);

														
 
															+        int lastState = state;

														
 
															+        boolean endToken = false;

														
 
															+        if(!last) {

														
 
															+          if(ch[c]=='\'') {

														
 
															+            if(state == OUTSIDE) {

														
 
															+              state = SINGLEQ;

														
 
															+            } else if(state == SINGLEQ) {

														
 
															+              state = OUTSIDE;  

														
 
															+            }

														
 
															+            endToken = (state != lastState);

														
 
															+          } else if(ch[c]=='"') {

														
 
															+            if(state == OUTSIDE) {

														
 
															+              state = DOUBLEQ;

														
 
															+            } else if(state == DOUBLEQ) {

														
 
															+              state = OUTSIDE;  

														
 
															+            }          

														
 
															+            endToken = (state != lastState);

														
 
															+          } else if(ch[c]==' ') {

														
 
															+            if(state == OUTSIDE) {

														
 
															+              endToken = true;

														
 
															+            }            

														
 
															+          }

														
 
															+        }

														
 
															+        if(last || endToken) {

														
 
															+          if(c == argstart) {

														
 
															+            // unquoted space

														
 
															+          } else {

														
 
															+            String a;

														
 
															+            a = args.substring(argstart, c); 

														
 
															+            argList.add(a);

														
 
															+          }

														
 
															+          argstart = c+1;

														
 
															+          lastState = state;

														
 
															         }

														
 
															     }

														
 
															-    return split;

														
 
															+    return (String[])argList.toArray(new String[0]);

														
 
															   }

														
 
															+

														
 
															   public void configure(JobConf job)

														
 
															   {

														
@@ -132,7 +174,7 @@ public abstract class PipeMapRed {
 
															 	  // A  relative path should match in the unjarred Job data

														
 
															       // In this case, force an absolute path to make sure exec finds it.

														
 
															       argvSplit[0] = new File(argvSplit[0]).getAbsolutePath();

														
 
															-      log_.println("PipeMapRed exec " + Arrays.toString(argvSplit));

														
 
															+      log_.println("PipeMapRed exec " + Arrays.asList(argvSplit));

														
 
															       Environment childEnv = (Environment)StreamUtil.env().clone();

														
@@ -440,4 +482,5 @@ public abstract class PipeMapRed {
 
															       }

														
 
															     }    

														
 
															   }

														
 
															+  

														
 
															 }

														
--- a/src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeMapper.java
+++ b/src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeMapper.java
@@ -25,7 +25,6 @@ import org.apache.hadoop.mapred.Reducer;
 
															 import org.apache.hadoop.mapred.Reporter;

														
 
															 import org.apache.hadoop.mapred.OutputCollector;

														
 
															-import org.apache.hadoop.io.UTF8;

														
 
															 import org.apache.hadoop.io.LongWritable;

														
 
															 import org.apache.hadoop.io.WritableComparator;

														
 
															 import org.apache.hadoop.io.WritableComparable;

														
--- a/src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeReducer.java
+++ b/src/contrib/streaming/src/java/org/apache/hadoop/streaming/PipeReducer.java
@@ -25,7 +25,6 @@ import org.apache.hadoop.mapred.Reducer;
 
															 import org.apache.hadoop.mapred.Reporter;

														
 
															 import org.apache.hadoop.mapred.OutputCollector;

														
 
															-import org.apache.hadoop.io.UTF8;

														
 
															 import org.apache.hadoop.io.LongWritable;

														
 
															 import org.apache.hadoop.io.WritableComparator;

														
 
															 import org.apache.hadoop.io.WritableComparable;

														
--- a/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamBaseRecordReader.java
+++ b/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamBaseRecordReader.java
@@ -20,14 +20,14 @@ import java.io.*;
 
															 import org.apache.hadoop.io.Writable;
														
 
															 import org.apache.hadoop.io.WritableComparable;
														
 
															-import org.apache.hadoop.io.UTF8;
														
 
															 import org.apache.hadoop.fs.FileSystem;
														
 
															 import org.apache.hadoop.fs.FSDataInputStream;
														
 
															 import org.apache.hadoop.mapred.Reporter;
														
 
															 import org.apache.hadoop.mapred.RecordReader;
														
 
															 import org.apache.hadoop.mapred.FileSplit;
														
 
															 import org.apache.hadoop.mapred.JobConf;
														
 
															-import org.apache.hadoop.util.LogFormatter;
														
 
															+import org.apache.commons.logging.*;
														
 
															+
														
 
															 /** 
														
 
															  * Shared functionality for hadoopStreaming formats.
														
@@ -40,7 +40,10 @@ import org.apache.hadoop.util.LogFormatter;
 
															 public abstract class StreamBaseRecordReader implements RecordReader
														
 
															 {
														
 
															-  protected static final Logger LOG = LogFormatter.getLogger(StreamBaseRecordReader.class.getName());
														
 
															+  protected static final Log LOG = LogFactory.getLog(StreamBaseRecordReader.class.getName());
														
 
															+  
														
 
															+  // custom JobConf properties for this class are prefixed with this namespace
														
 
															+  final String CONF_NS = "stream.recordreader.";
														
 
															   public StreamBaseRecordReader(
														
 
															     FSDataInputStream in, long start, long end, 
														
@@ -49,15 +52,45 @@ public abstract class StreamBaseRecordReader implements RecordReader
 
															   {
														
 
															     in_ = in;
														
 
															     start_ = start;
														
 
															-    splitName_ = splitName;
														
 
															     end_ = end;
														
 
															+    length_ = end_ - start_;
														
 
															+    splitName_ = splitName;
														
 
															     reporter_ = reporter;
														
 
															     job_ = job;
														
 
															+    
														
 
															+    statusMaxRecordChars_ = job_.getInt(CONF_NS + "statuschars", 200);
														
 
															+  }
														
 
															+
														
 
															+  /// RecordReader API
														
 
															+  
														
 
															+  /** Read a record. Implementation should call numRecStats at the end
														
 
															+   */  
														
 
															+  public abstract boolean next(Writable key, Writable value) throws IOException;
														
 
															+
														
 
															+  /** Returns the current position in the input. */
														
 
															+  public synchronized long getPos() throws IOException 
														
 
															+  { 
														
 
															+    return in_.getPos(); 
														
 
															+  }
														
 
															+
														
 
															+  /** Close this to future operations.*/
														
 
															+  public synchronized void close() throws IOException 
														
 
															+  { 
														
 
															+    in_.close(); 
														
 
															   }
														
 
															+  
														
 
															+  /// StreamBaseRecordReader API
														
 
															-  /** Called once before the first call to next */
														
 
															   public void init() throws IOException
														
 
															   {
														
 
															+    LOG.info("StreamBaseRecordReader.init: " +
														
 
															+    " start_=" + start_ + " end_=" + end_ + " length_=" + length_ +
														
 
															+    " start_ > in_.getPos() =" 
														
 
															+        + (start_ > in_.getPos()) + " " + start_ 
														
 
															+        + " > " + in_.getPos() );
														
 
															+    if (start_ > in_.getPos()) {
														
 
															+      in_.seek(start_);
														
 
															+    }  
														
 
															     seekNextRecordBoundary();
														
 
															   }
														
@@ -66,17 +99,12 @@ public abstract class StreamBaseRecordReader implements RecordReader
 
															    */
														
 
															   public abstract void seekNextRecordBoundary() throws IOException;
														
 
															-  
														
 
															-  /** Read a record. Implementation should call numRecStats at the end
														
 
															-   */  
														
 
															-  public abstract boolean next(Writable key, Writable value) throws IOException;
														
 
															-
														
 
															-  
														
 
															+    
														
 
															   void numRecStats(CharSequence record) throws IOException
														
 
															   {
														
 
															     numRec_++;          
														
 
															     if(numRec_ == nextStatusRec_) {
														
 
															-      nextStatusRec_ +=100000;//*= 10;
														
 
															+      nextStatusRec_ +=100;//*= 10;
														
 
															       String status = getStatus(record);
														
 
															       LOG.info(status);
														
 
															       reporter_.setStatus(status);
														
@@ -91,10 +119,9 @@ public abstract class StreamBaseRecordReader implements RecordReader
 
															       pos = getPos();
														
 
															     } catch(IOException io) {
														
 
															     }
														
 
															-    final int M = 2000;
														
 
															     String recStr;
														
 
															-    if(record.length() > M) {
														
 
															-    	recStr = record.subSequence(0, M) + "...";
														
 
															+    if(record.length() > statusMaxRecordChars_) {
														
 
															+        recStr = record.subSequence(0, statusMaxRecordChars_) + "...";
														
 
															     } else {
														
 
															     	recStr = record.toString();
														
 
															     }
														
@@ -103,25 +130,15 @@ public abstract class StreamBaseRecordReader implements RecordReader
 
															     return status;
														
 
															   }
														
 
															-  /** Returns the current position in the input. */
														
 
															-  public synchronized long getPos() throws IOException 
														
 
															-  { 
														
 
															-    return in_.getPos(); 
														
 
															-  }
														
 
															-
														
 
															-  /** Close this to future operations.*/
														
 
															-  public synchronized void close() throws IOException 
														
 
															-  { 
														
 
															-    in_.close(); 
														
 
															-  }
														
 
															-
														
 
															   FSDataInputStream in_;
														
 
															   long start_;
														
 
															   long end_;
														
 
															+  long length_;
														
 
															   String splitName_;
														
 
															   Reporter reporter_;
														
 
															   JobConf job_;
														
 
															   int numRec_ = 0;
														
 
															   int nextStatusRec_ = 1;
														
 
															+  int statusMaxRecordChars_;
														
 
															 }
														
--- a/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamInputFormat.java
+++ b/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamInputFormat.java
@@ -23,6 +23,8 @@ import java.util.Arrays;
 
															 import java.util.regex.Matcher;

														
 
															 import java.util.regex.Pattern;

														
 
															+import org.apache.commons.logging.*;

														
 
															+

														
 
															 import org.apache.hadoop.fs.Path;

														
 
															 import org.apache.hadoop.fs.FileSystem;

														
 
															 import org.apache.hadoop.fs.PathFilter;

														
@@ -30,11 +32,8 @@ import org.apache.hadoop.fs.FSDataInputStream;
 
															 import org.apache.hadoop.io.Writable;

														
 
															 import org.apache.hadoop.io.WritableComparable;

														
 
															-import org.apache.hadoop.io.UTF8;

														
 
															 import org.apache.hadoop.mapred.*;

														
 
															-import org.apache.hadoop.util.LogFormatter;

														
 
															-

														
 
															 /** An input format that performs globbing on DFS paths and 

														
 
															  * selects a RecordReader based on a JobConf property.

														
@@ -46,7 +45,8 @@ public class StreamInputFormat extends InputFormatBase
 
															   // an InputFormat should be public with the synthetic public default constructor

														
 
															   // JobTracker's JobInProgress will instantiate with clazz.newInstance() (and a custom ClassLoader)

														
 
															-  protected static final Logger LOG = LogFormatter.getLogger(StreamInputFormat.class.getName());

														
 
															+  protected static final Log LOG = LogFactory.getLog(StreamInputFormat.class.getName());

														
 
															+  

														
 
															   static {

														
 
															     //LOG.setLevel(Level.FINE);

														
 
															   }

														
@@ -59,7 +59,7 @@ public class StreamInputFormat extends InputFormatBase
 
															     int dsup = globs.length;

														
 
															     for(int d=0; d<dsup; d++) {

														
 
															       String leafName = globs[d].getName();

														
 
															-      LOG.fine("StreamInputFormat: globs[" + d + "] leafName = " + leafName);

														
 
															+      LOG.info("StreamInputFormat: globs[" + d + "] leafName = " + leafName);

														
 
															       Path[] paths; Path dir;

														
 
															 	  PathFilter filter = new GlobFilter(fs, leafName);

														
 
															 	  dir = new Path(globs[d].getParent().toString());

														
@@ -79,7 +79,13 @@ public class StreamInputFormat extends InputFormatBase
 
															     }

														
 
															     String globToRegexp(String glob)

														
 
															 	{

														
 
															-	  return glob.replaceAll("\\*", ".*");

														
 
															+      String re = glob;

														
 
															+      re = re.replaceAll("\\.", "\\\\.");

														
 
															+      re = re.replaceAll("\\+", "\\\\+");

														
 
															+	  re = re.replaceAll("\\*", ".*");

														
 
															+      re = re.replaceAll("\\?", ".");

														
 
															+      LOG.info("globToRegexp: |" + glob + "|  ->  |" + re + "|");

														
 
															+      return re;

														
 
															 	}

														
 
															     public boolean accept(Path pathname)

														
@@ -88,7 +94,7 @@ public class StreamInputFormat extends InputFormatBase
 
															       if(acc) {

														
 
															       	acc = pat_.matcher(pathname.getName()).matches();

														
 
															       }

														
 
															-      LOG.finer("matches " + pat_ + ", " + pathname + " = " + acc);

														
 
															+      LOG.info("matches " + pat_ + ", " + pathname + " = " + acc);

														
 
															       return acc;

														
 
															     }

														
@@ -99,7 +105,7 @@ public class StreamInputFormat extends InputFormatBase
 
															   public RecordReader getRecordReader(FileSystem fs, final FileSplit split,

														
 
															                                       JobConf job, Reporter reporter)

														
 
															     throws IOException {

														
 
															-    LOG.finer("getRecordReader start.....");

														
 
															+    LOG.info("getRecordReader start.....");

														
 
															     reporter.setStatus(split.toString());

														
 
															     final long start = split.getStart();

														
@@ -143,5 +149,5 @@ public class StreamInputFormat extends InputFormatBase
 
															     return reader;

														
 
															   }

														
 
															-  

														
 
															+

														
 
															 }

														
--- a/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java
+++ b/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java
@@ -23,6 +23,8 @@ import java.util.ArrayList;
 
															 import java.util.Arrays;
														
 
															 import java.util.Iterator;
														
 
															+import org.apache.commons.logging.*;
														
 
															+
														
 
															 import org.apache.hadoop.conf.Configuration;
														
 
															 import org.apache.hadoop.io.UTF8;
														
@@ -32,16 +34,14 @@ import org.apache.hadoop.mapred.JobConf;
 
															 import org.apache.hadoop.mapred.JobClient;
														
 
															 import org.apache.hadoop.mapred.RunningJob;
														
 
															-import org.apache.hadoop.util.LogFormatter;
														
 
															-
														
 
															 /** All the client-side work happens here. 
														
 
															  * (Jar packaging, MapRed job submission and monitoring)
														
 
															  * @author Michel Tourn
														
 
															  */
														
 
															 public class StreamJob
														
 
															 {
														
 
															-  protected static final Logger LOG = LogFormatter.getLogger(StreamJob.class.getName());
														
 
															-    
														
 
															+  protected static final Log LOG = LogFactory.getLog(StreamJob.class.getName());    
														
 
															+  
														
 
															   public StreamJob(String[] argv, boolean mayExit)
														
 
															   {
														
 
															     argv_ = argv;
														
@@ -72,9 +72,10 @@ public class StreamJob
 
															   void preProcessArgs()
														
 
															   {
														
 
															     verbose_ = false;
														
 
															+    addTaskEnvironment_ = "";
														
 
															   }
														
 
															-  void postProcessArgs()
														
 
															+  void postProcessArgs() throws IOException
														
 
															   {
														
 
															     if(cluster_ == null) {
														
 
															         // hadoop-default.xml is standard, hadoop-local.xml is not.
														
@@ -87,22 +88,35 @@ public class StreamJob
 
															     if(output_ == null) {
														
 
															         fail("Required argument: -output ");
														
 
															     }
														
 
															-    // careful with class names..
														
 
															-    mapCmd_ = packageOrTrimNoShip(mapCmd_);
														
 
															-    redCmd_ = packageOrTrimNoShip(redCmd_);
														
 
															+    msg("addTaskEnvironment=" + addTaskEnvironment_);
														
 
															+
														
 
															+    Iterator it = packageFiles_.iterator();
														
 
															+    while(it.hasNext()) {
														
 
															+      File f = new File((String)it.next());    
														
 
															+      if(f.isFile()) {
														
 
															+        shippedCanonFiles_.add(f.getCanonicalPath());
														
 
															+      }
														
 
															+    }
														
 
															+    msg("shippedCanonFiles_=" + shippedCanonFiles_);
														
 
															-    // TBD -D format or sthg on cmdline. 
														
 
															-    // Plus maybe a standard list originating on client or server    
														
 
															-    addTaskEnvironment_ = ""; 
														
 
															+    // careful with class names..
														
 
															+    mapCmd_ = unqualifyIfLocalPath(mapCmd_);
														
 
															+    redCmd_ = unqualifyIfLocalPath(redCmd_);    
														
 
															+  }
														
 
															+  
														
 
															+  void validateNameEqValue(String neqv)
														
 
															+  {
														
 
															+    String[] nv = neqv.split("=", 2);
														
 
															+    if(nv.length < 2) {
														
 
															+        fail("Invalid name=value spec: " + neqv);
														
 
															+    }
														
 
															+    msg("Recording name=value: name=" + nv[0] + " value=" + nv[1]);
														
 
															   }
														
 
															-  String packageOrTrimNoShip(String cmd)
														
 
															+  String unqualifyIfLocalPath(String cmd) throws IOException
														
 
															   {
														
 
															     if(cmd == null) {
														
 
															       //    
														
 
															-    } else if(cmd.startsWith(NOSHIP)) {
														
 
															-      // don't package the file, but keep the abolute path
														
 
															-      cmd = cmd.substring(NOSHIP.length());
														
 
															     } else {
														
 
															       String prog = cmd;
														
 
															       String args = "";
														
@@ -111,18 +125,23 @@ public class StreamJob
 
															         prog = cmd.substring(0, s);
														
 
															         args = cmd.substring(s+1);
														
 
															       }
														
 
															-      packageFiles_.add(new File(prog).getAbsolutePath());
														
 
															-      // Change path to simple filename. 
														
 
															-      // That way when PipeMapRed calls Runtime.exec(), 
														
 
															-      // it will look for the excutable in Task's working dir.
														
 
															-      // And this is where TaskRunner unjars our job jar.
														
 
															-      prog = new File(prog).getName();
														
 
															-      if(args.length() > 0) {
														
 
															-        cmd = prog + " " + args;
														
 
															-      } else {
														
 
															-        cmd = prog;
														
 
															+      String progCanon = new File(prog).getCanonicalPath();
														
 
															+      boolean shipped = shippedCanonFiles_.contains(progCanon);
														
 
															+      msg("shipped: " + shipped + " " + progCanon);
														
 
															+      if(shipped) {
														
 
															+        // Change path to simple filename. 
														
 
															+        // That way when PipeMapRed calls Runtime.exec(), 
														
 
															+        // it will look for the excutable in Task's working dir.
														
 
															+        // And this is where TaskRunner unjars our job jar.
														
 
															+        prog = new File(prog).getName();
														
 
															+        if(args.length() > 0) {
														
 
															+          cmd = prog + " " + args;
														
 
															+        } else {
														
 
															+          cmd = prog;
														
 
															+        }
														
 
															       }
														
 
															     }
														
 
															+    msg("cmd=" + cmd);
														
 
															     return cmd;
														
 
															   }
														
@@ -130,17 +149,20 @@ public class StreamJob
 
															   {
														
 
															     return new File(getHadoopClientHome() + "/conf", hadoopAliasConf_).getAbsolutePath();
														
 
															   }
														
 
															+   
														
 
															   void parseArgv()
														
 
															   {
														
 
															     if(argv_.length==0) {
														
 
															-      exitUsage();
														
 
															+      exitUsage(false);
														
 
															     }
														
 
															     int i=0; 
														
 
															     while(i < argv_.length) {
														
 
															       String s;
														
 
															       if(argv_[i].equals("-verbose")) {
														
 
															         verbose_ = true;      
														
 
															+      } else if(argv_[i].equals("-info")) {
														
 
															+        detailedUsage_ = true;      
														
 
															       } else if(argv_[i].equals("-debug")) {
														
 
															         debug_++;
														
 
															       } else if((s = optionArg(argv_, i, "-input", false)) != null) {
														
@@ -155,7 +177,7 @@ public class StreamJob
 
															       } else if((s = optionArg(argv_, i, "-reducer", redCmd_ != null)) != null) {
														
 
															         i++;
														
 
															         redCmd_ = s;
														
 
															-      } else if((s = optionArg(argv_, i, "-files", false)) != null) {
														
 
															+      } else if((s = optionArg(argv_, i, "-file", false)) != null) {
														
 
															         i++;
														
 
															         packageFiles_.add(s);
														
 
															       } else if((s = optionArg(argv_, i, "-cluster", cluster_ != null)) != null) {
														
@@ -164,15 +186,35 @@ public class StreamJob
 
															       } else if((s = optionArg(argv_, i, "-config", false)) != null) {
														
 
															         i++;
														
 
															         configPath_.add(s);
														
 
															+      } else if((s = optionArg(argv_, i, "-dfs", false)) != null) {
														
 
															+        i++;
														
 
															+        userJobConfProps_.add("fs.default.name="+s);
														
 
															+      } else if((s = optionArg(argv_, i, "-jt", false)) != null) {
														
 
															+        i++;
														
 
															+        userJobConfProps_.add("mapred.job.tracker="+s);
														
 
															+      } else if((s = optionArg(argv_, i, "-jobconf", false)) != null) {
														
 
															+        i++;
														
 
															+        validateNameEqValue(s);
														
 
															+        userJobConfProps_.add(s);
														
 
															+      } else if((s = optionArg(argv_, i, "-cmdenv", false)) != null) {
														
 
															+        i++;
														
 
															+        validateNameEqValue(s);
														
 
															+        if(addTaskEnvironment_.length() > 0) {
														
 
															+            addTaskEnvironment_ += " ";
														
 
															+        }
														
 
															+        addTaskEnvironment_ += s;
														
 
															       } else if((s = optionArg(argv_, i, "-inputreader", inReaderSpec_ != null)) != null) {
														
 
															         i++;
														
 
															         inReaderSpec_ = s;
														
 
															       } else {
														
 
															         System.err.println("Unexpected argument: " + argv_[i]);
														
 
															-        exitUsage();
														
 
															+        exitUsage(false);
														
 
															       }
														
 
															       i++;
														
 
															     }
														
 
															+    if(detailedUsage_) {
														
 
															+        exitUsage(true);
														
 
															+    }
														
 
															   }
														
 
															   String optionArg(String[] args, int index, String arg, boolean argSet)
														
@@ -196,22 +238,32 @@ public class StreamJob
 
															     }
														
 
															   }
														
 
															-  public void exitUsage()
														
 
															+  public void exitUsage(boolean detailed)
														
 
															   {
														
 
															                       //         1         2         3         4         5         6         7         
														
 
															                       //1234567890123456789012345678901234567890123456789012345678901234567890123456789
														
 
															-    System.out.println("Usage: bin/hadoop jar build/hadoop-streaming.jar [options]");
														
 
															+    System.out.println("Usage: $HADOOP_HOME/bin/hadoop jar build/hadoop-streaming.jar [options]");
														
 
															     System.out.println("Options:");
														
 
															-    System.out.println("  -input   <path>     DFS input file(s) for the Map step");
														
 
															-    System.out.println("  -output  <path>     DFS output directory for the Reduce step");
														
 
															-    System.out.println("  -mapper  <cmd>      The streaming command to run");
														
 
															-    System.out.println("  -reducer <cmd>      The streaming command to run");
														
 
															-    System.out.println("  -files   <file>     Additional files to be shipped in the Job jar file");
														
 
															-    System.out.println("  -cluster <name>     Default uses hadoop-default.xml and hadoop-site.xml");
														
 
															-    System.out.println("  -config  <file>     Optional. One or more paths to xml config files");
														
 
															-    System.out.println("  -inputreader <spec> Optional. See below");
														
 
															+    System.out.println("  -input    <path>     DFS input file(s) for the Map step");
														
 
															+    System.out.println("  -output   <path>     DFS output directory for the Reduce step");
														
 
															+    System.out.println("  -mapper   <cmd>      The streaming command to run");
														
 
															+    System.out.println("  -combiner <cmd>      Not implemented. But you can pipe the mapper output");
														
 
															+    System.out.println("  -reducer  <cmd>      The streaming command to run");
														
 
															+    System.out.println("  -file     <file>     File/dir to be shipped in the Job jar file");
														
 
															+    System.out.println("  -cluster  <name>     Default uses hadoop-default.xml and hadoop-site.xml");
														
 
															+    System.out.println("  -config   <file>     Optional. One or more paths to xml config files");
														
 
															+    System.out.println("  -dfs      <h:p>      Optional. Override DFS configuration");
														
 
															+    System.out.println("  -jt       <h:p>      Optional. Override JobTracker configuration");
														
 
															+    System.out.println("  -inputreader <spec>  Optional.");
														
 
															+    System.out.println("  -jobconf  <n>=<v>    Optional.");
														
 
															+    System.out.println("  -cmdenv   <n>=<v>    Optional. Pass env.var to streaming commands");
														
 
															     System.out.println("  -verbose");
														
 
															     System.out.println();
														
 
															+    if(!detailed) {    
														
 
															+    System.out.println("For more details about these options:");
														
 
															+    System.out.println("Use $HADOOP_HOME/bin/hadoop jar build/hadoop-streaming.jar -info");
														
 
															+        fail("");
														
 
															+    }
														
 
															     System.out.println("In -input: globbing on <path> is supported and can have multiple -input");
														
 
															     System.out.println("Default Map input format: a line is a record in UTF-8");
														
 
															     System.out.println("  the key part ends at first TAB, the rest of the line is the value");
														
@@ -220,21 +272,34 @@ public class StreamJob
 
															     System.out.println("  Ex: -inputreader 'StreamXmlRecordReader,begin=<doc>,end=</doc>'");
														
 
															     System.out.println("Map output format, reduce input/output format:");
														
 
															     System.out.println("  Format defined by what mapper command outputs. Line-oriented");
														
 
															-    System.out.println("Mapper and Reducer <cmd> syntax: ");
														
 
															-    System.out.println("  If the mapper or reducer programs are prefixed with " + NOSHIP + " then ");
														
 
															-    System.out.println("  the paths are assumed to be valid absolute paths on the task tracker machines");
														
 
															-    System.out.println("  and are NOT packaged with the Job jar file.");
														
 
															+    System.out.println();
														
 
															     System.out.println("Use -cluster <name> to switch between \"local\" Hadoop and one or more remote ");
														
 
															     System.out.println("  Hadoop clusters. ");
														
 
															     System.out.println("  The default is to use the normal hadoop-default.xml and hadoop-site.xml");
														
 
															     System.out.println("  Else configuration will use $HADOOP_HOME/conf/hadoop-<name>.xml");
														
 
															     System.out.println();
														
 
															-    System.out.println("Example: hadoopStreaming -mapper \"noship:/usr/local/bin/perl5 filter.pl\"");
														
 
															-    System.out.println("           -files /local/filter.pl -input \"/logs/0604*/*\" [...]");
														
 
															+    System.out.println("To set the number of reduce tasks (num. of output files):");
														
 
															+    System.out.println("  -jobconf mapred.reduce.tasks=10");
														
 
															+    System.out.println("To change the local temp directory:");
														
 
															+    System.out.println("  -jobconf dfs.data.dir=/tmp");
														
 
															+    System.out.println("Additional local temp directories with -cluster local:");
														
 
															+    System.out.println("  -jobconf mapred.local.dir=/tmp/local");
														
 
															+    System.out.println("  -jobconf mapred.system.dir=/tmp/system");
														
 
															+    System.out.println("  -jobconf mapred.temp.dir=/tmp/temp");
														
 
															+    System.out.println("For more details about jobconf parameters see:");
														
 
															+    System.out.println("  http://wiki.apache.org/lucene-hadoop/JobConfFile");
														
 
															+    System.out.println("To set an environement variable in a streaming command:");
														
 
															+    System.out.println("   -cmdenv EXAMPLE_DIR=/home/example/dictionaries/");
														
 
															+    System.out.println();
														
 
															+    System.out.println("Shortcut to run from any directory:");
														
 
															+    System.out.println("   setenv HSTREAMING \"$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/build/hadoop-streaming.jar\"");
														
 
															+    System.out.println();
														
 
															+    System.out.println("Example: $HSTREAMING -mapper \"/usr/local/bin/perl5 filter.pl\"");
														
 
															+    System.out.println("           -file /local/filter.pl -input \"/logs/0604*/*\" [...]");
														
 
															     System.out.println("  Ships a script, invokes the non-shipped perl interpreter");
														
 
															     System.out.println("  Shipped files go to the working directory so filter.pl is found by perl");
														
 
															     System.out.println("  Input files are all the daily logs for days in month 2006-04");
														
 
															-    fail("");    
														
 
															+    fail("");
														
 
															   }
														
 
															   public void fail(String message)
														
@@ -291,7 +356,7 @@ public class StreamJob
 
															         msg("Found runtime classes in: " + runtimeClasses);
														
 
															     }
														
 
															     if(isLocalHadoop()) {
														
 
															-      // don't package class files (they might get unpackaged in . and then 
														
 
															+      // don't package class files (they might get unpackaged in "." and then 
														
 
															       //  hide the intended CLASSPATH entry)
														
 
															       // we still package everything else (so that scripts and executable are found in 
														
 
															       //  Task workdir like distributed Hadoop)
														
@@ -393,7 +458,17 @@ public class StreamJob
 
															     if(jar_ != null) {
														
 
															         jobConf_.setJar(jar_);
														
 
															     }
														
 
															-    //jobConf_.mtdump();System.exit(1);
														
 
															+
														
 
															+    // last, allow user to override anything 
														
 
															+    // (although typically used with properties we didn't touch)
														
 
															+    it = userJobConfProps_.iterator();
														
 
															+    while(it.hasNext()) {
														
 
															+        String prop = (String)it.next();
														
 
															+        String[] nv = prop.split("=", 2);
														
 
															+        msg("JobConf: set(" + nv[0] + ", " + nv[1]+")");
														
 
															+        jobConf_.set(nv[0], nv[1]);
														
 
															+    }   
														
 
															+    
														
 
															   }
														
 
															   protected String getJobTrackerHostPort()
														
@@ -432,7 +507,7 @@ public class StreamJob
 
															       running_ = jc_.submitJob(jobConf_);
														
 
															       jobId_ = running_.getJobID();
														
 
															-      LOG.info("getLocalDirs(): " + Arrays.toString(jobConf_.getLocalDirs()));     
														
 
															+      LOG.info("getLocalDirs(): " + Arrays.asList(jobConf_.getLocalDirs()));     
														
 
															       LOG.info("Running job: " + jobId_);      
														
 
															       jobInfo();
														
@@ -467,11 +542,10 @@ public class StreamJob
 
															   }
														
 
															-  public final static String NOSHIP = "noship:";
														
 
															-  
														
 
															   protected boolean mayExit_;
														
 
															   protected String[] argv_;
														
 
															   protected boolean verbose_;
														
 
															+  protected boolean detailedUsage_;
														
 
															   protected int debug_;
														
 
															   protected Environment env_;
														
@@ -483,8 +557,10 @@ public class StreamJob
 
															   protected JobClient jc_;
														
 
															   // command-line arguments
														
 
															-  protected ArrayList inputGlobs_   = new ArrayList(); // <String>
														
 
															-  protected ArrayList packageFiles_ = new ArrayList(); // <String>
														
 
															+  protected ArrayList inputGlobs_       = new ArrayList(); // <String>
														
 
															+  protected ArrayList packageFiles_     = new ArrayList(); // <String>
														
 
															+  protected ArrayList shippedCanonFiles_= new ArrayList(); // <String>  
														
 
															+  protected ArrayList userJobConfProps_ = new ArrayList(); // <String>
														
 
															   protected String output_;
														
 
															   protected String mapCmd_;
														
 
															   protected String redCmd_;
														
--- a/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamLineRecordReader.java
+++ b/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamLineRecordReader.java
@@ -69,7 +69,7 @@ public class StreamLineRecordReader extends StreamBaseRecordReader
 
															       return false;
														
 
															     //((LongWritable)key).set(pos);           // key is position
														
 
															-    //((UTF8)value).set(readLine(in));        // value is line
														
 
															+    //((UTF8)value).set(readLine(in));   // value is line
														
 
															     String line = readLine(in_);
														
 
															     // key is line up to TAB, value is rest
														
--- a/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamUtil.java
+++ b/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamUtil.java
@@ -198,7 +198,7 @@ public class StreamUtil
 
															   static {
														
 
															     try {
														
 
															       env = new Environment();
														
 
															-      HOST = env.get("HOST").toString();
														
 
															+      HOST = env.getHost();
														
 
															     } catch(IOException io) {
														
 
															       io.printStackTrace();
														
 
															     }
														
@@ -275,6 +275,22 @@ public class StreamUtil
 
															     }
														
 
															   }
														
 
															+  static final String regexpSpecials = "[]()?*+|.!^-\\~@";
														
 
															+  
														
 
															+  public static String regexpEscape(String plain)
														
 
															+  {
														
 
															+    StringBuffer buf = new StringBuffer();
														
 
															+    char[] ch = plain.toCharArray();
														
 
															+    int csup = ch.length;
														
 
															+    for(int c=0; c<csup; c++) {
														
 
															+      if(regexpSpecials.indexOf(ch[c]) != -1) {
														
 
															+        buf.append("\\");    
														
 
															+      }
														
 
															+      buf.append(ch[c]);
														
 
															+    }
														
 
															+    return buf.toString();
														
 
															+  }
														
 
															+  
														
 
															   static String slurp(File f) throws IOException
														
 
															   {
														
 
															     FileInputStream in = new FileInputStream(f);
														
@@ -298,5 +314,5 @@ public class StreamUtil
 
															     }
														
 
															     return env_;
														
 
															   }
														
 
															-
														
 
															+  
														
 
															 }
														
--- a/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamXmlRecordReader.java
+++ b/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamXmlRecordReader.java
@@ -17,10 +17,12 @@
 
															 package org.apache.hadoop.streaming;
														
 
															 import java.io.*;
														
 
															+import java.util.regex.*;
														
 
															 import org.apache.hadoop.io.Writable;
														
 
															 import org.apache.hadoop.io.WritableComparable;
														
 
															 import org.apache.hadoop.io.UTF8;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															 import org.apache.hadoop.fs.FileSystem;
														
 
															 import org.apache.hadoop.fs.FSDataInputStream;
														
 
															 import org.apache.hadoop.mapred.Reporter;
														
@@ -32,6 +34,14 @@ import org.apache.hadoop.mapred.JobConf;
 
															  *  Values are XML subtrees delimited by configurable tags.
														
 
															  *  Keys could be the value of a certain attribute in the XML subtree, 
														
 
															  *  but this is left to the stream processor application.
														
 
															+ *
														
 
															+ *  The name-value properties that StreamXmlRecordReader understands are:
														
 
															+ *    String begin (chars marking beginning of record)
														
 
															+ *    String end   (chars marking end of record)
														
 
															+ *    int maxrec   (maximum record size)
														
 
															+ *    int lookahead(maximum lookahead to sync CDATA)
														
 
															+ *    boolean slowmatch
														
 
															+ *
														
 
															  *  @author Michel Tourn
														
 
															  */
														
 
															 public class StreamXmlRecordReader extends StreamBaseRecordReader 
														
@@ -42,67 +52,278 @@ public class StreamXmlRecordReader extends StreamBaseRecordReader
 
															     throws IOException
														
 
															   {
														
 
															     super(in, start, end, splitName, reporter, job);
														
 
															-    beginMark_ = checkJobGet("stream.recordreader.begin");
														
 
															-    endMark_   = checkJobGet("stream.recordreader.end");
														
 
															-  }
														
 
															+    
														
 
															+    beginMark_ = checkJobGet(CONF_NS + "begin");
														
 
															+    endMark_   = checkJobGet(CONF_NS + "end");
														
 
															-  String checkJobGet(String prop) throws IOException
														
 
															-  {
														
 
															-  	String val = job_.get(prop);
														
 
															-  	if(val == null) {
														
 
															-  		throw new IOException("JobConf: missing required property: " + prop);
														
 
															-  	}
														
 
															-  	return val;
														
 
															+    maxRecSize_= job_.getInt(CONF_NS + "maxrec", 50*1000);
														
 
															+    lookAhead_ = job_.getInt(CONF_NS + "lookahead", 2*maxRecSize_);
														
 
															+    synched_ = false;
														
 
															+    
														
 
															+    slowMatch_ = job_.getBoolean(CONF_NS + "slowmatch", false);
														
 
															+    if(slowMatch_) {
														
 
															+      beginPat_  = makePatternCDataOrMark(beginMark_);
														
 
															+      endPat_    = makePatternCDataOrMark(endMark_);
														
 
															+    }
														
 
															   }
														
 
															-  public void seekNextRecordBoundary() throws IOException
														
 
															-  {
														
 
															-  System.out.println("@@@start seekNext " + in_.getPos());
														
 
															-    readUntilMatch(beginMark_, null);      
														
 
															-  System.out.println("@@@end   seekNext " + in_.getPos());
														
 
															-  }
														
 
															-    
														
 
															+  int numNext = 0;
														
 
															   public synchronized boolean next(Writable key, Writable value)
														
 
															    throws IOException
														
 
															   {
														
 
															     long pos = in_.getPos();
														
 
															-    if (pos >= end_)
														
 
															+    numNext++;
														
 
															+    if (pos >= end_) {
														
 
															       return false;
														
 
															+    }
														
 
															     StringBuffer buf = new StringBuffer();
														
 
															-    readUntilMatch(endMark_, buf);
														
 
															+    if(!readUntilMatchBegin()) {
														
 
															+        return false;
														
 
															+    }
														
 
															+    if(!readUntilMatchEnd(buf)) {
														
 
															+        return false;
														
 
															+    }
														
 
															     numRecStats(buf);
														
 
															+    
														
 
															+    // There is only one elem..key/value splitting is not done here.
														
 
															+    ((UTF8)key).set(buf.toString());
														
 
															+    ((UTF8)value).set("");
														
 
															+    
														
 
															+    /*if(numNext < 5) {
														
 
															+        System.out.println("@@@ " + numNext + ". true next k=|" + key.toString().replaceAll("[\\r\\n]", " ")
														
 
															+        + "|, len=" + buf.length() + " v=|" + value.toString().replaceAll("[\\r\\n]", " ") + "|");
														
 
															+    }*/
														
 
															+
														
 
															     return true;
														
 
															   }
														
 
															+  
														
 
															+  public void seekNextRecordBoundary() throws IOException
														
 
															+  {
														
 
															+    readUntilMatchBegin();
														
 
															+  }
														
 
															+  
														
 
															+  boolean readUntilMatchBegin() throws IOException
														
 
															+  {
														
 
															+    if(slowMatch_) {
														
 
															+        return slowReadUntilMatch(beginPat_, false, null);
														
 
															+    } else {
														
 
															+        return fastReadUntilMatch(beginMark_, false, null);
														
 
															+    }
														
 
															+  }
														
 
															+  
														
 
															+  boolean readUntilMatchEnd(StringBuffer buf) throws IOException
														
 
															+  {
														
 
															+    if(slowMatch_) {
														
 
															+      return slowReadUntilMatch(endPat_, true, buf);
														
 
															+    } else {
														
 
															+      return fastReadUntilMatch(endMark_, true, buf);
														
 
															+    }
														
 
															+  }
														
 
															+  
														
 
															+  
														
 
															+  boolean slowReadUntilMatch(Pattern markPattern, boolean includePat, StringBuffer outBufOrNull) 
														
 
															+    throws IOException   
														
 
															+  {
														
 
															+    try {
														
 
															+      long inStart = in_.getPos();
														
 
															+      byte[] buf = new byte[Math.max(lookAhead_, maxRecSize_)];
														
 
															+      int read = 0;
														
 
															+      boolean success = true;
														
 
															+      in_.mark(lookAhead_ + 2);
														
 
															+      read = in_.read(buf);
														
 
															+      String sbuf = new String(buf);        
														
 
															+      Matcher match = markPattern.matcher(sbuf);
														
 
															-  void readUntilMatch(String pat, StringBuffer outBuf) throws IOException 
														
 
															+      firstMatchStart_ = NA;
														
 
															+      firstMatchEnd_ = NA;
														
 
															+      int bufPos = 0;
														
 
															+      int state = synched_ ? CDATA_OUT : CDATA_UNK;
														
 
															+      int s=0;
														
 
															+      int matchLen = 0;
														
 
															+      while(match.find(bufPos)) {
														
 
															+        int input;
														
 
															+        matchLen = match.group(0).length();
														
 
															+        if(match.group(1) != null) {
														
 
															+          input = CDATA_BEGIN;
														
 
															+        } else if(match.group(2) != null) {
														
 
															+          input = CDATA_END;
														
 
															+          firstMatchStart_ = NA; // |<DOC CDATA[ </DOC> ]]> should keep it
														
 
															+        } else {
														
 
															+          input = RECORD_MAYBE;
														
 
															+        }
														
 
															+        if(input == RECORD_MAYBE) {
														
 
															+            if(firstMatchStart_ == NA) {
														
 
															+              firstMatchStart_ = match.start();
														
 
															+              firstMatchEnd_   = match.end();
														
 
															+            }
														
 
															+        }
														
 
															+        state = nextState(state, input, match.start());
														
 
															+        /*System.out.println("@@@" +
														
 
															+         s + ". Match " + match.start() + " " + match.groupCount() +
														
 
															+         " state=" + state + " input=" + input + 
														
 
															+         " firstMatchStart_=" + firstMatchStart_ + " startinstream=" + (inStart+firstMatchStart_) + 
														
 
															+         " match=" + match.group(0) + " in=" + in_.getPos());*/
														
 
															+        if(state == RECORD_ACCEPT) {
														
 
															+          break;
														
 
															+        }
														
 
															+        bufPos = match.end();
														
 
															+        s++;
														
 
															+      }
														
 
															+      if(state != CDATA_UNK) {
														
 
															+        synched_ = true;
														
 
															+      }
														
 
															+      boolean matched = (firstMatchStart_ != NA) && (state == RECORD_ACCEPT || state == CDATA_UNK); 
														
 
															+      if(matched) {
														
 
															+        int endPos = includePat ? firstMatchEnd_ : firstMatchStart_;
														
 
															+        //System.out.println("firstMatchStart_=" + firstMatchStart_ + " firstMatchEnd_=" + firstMatchEnd_);
														
 
															+        String snip = sbuf.substring(firstMatchStart_, firstMatchEnd_);
														
 
															+        //System.out.println(" match snip=|" + snip + "| markPattern=" + markPattern);
														
 
															+        if(outBufOrNull != null) {
														
 
															+          buf = new byte[endPos];
														
 
															+          in_.reset();      
														
 
															+          read = in_.read(buf);
														
 
															+          if(read != endPos) {
														
 
															+              //System.out.println("@@@ BAD re-read less: " + read + " < " + endPos);
														
 
															+          }          
														
 
															+          outBufOrNull.append(new String(buf));
														
 
															+        } else {
														
 
															+          //System.out.println("Skip to " + (inStart + endPos));
														
 
															+          in_.seek(inStart + endPos);
														
 
															+        }
														
 
															+      }
														
 
															+      return matched;
														
 
															+    } catch(Exception e) {
														
 
															+      e.printStackTrace();
														
 
															+    } finally {
														
 
															+      // in_ ?
														
 
															+    }
														
 
															+    return false;
														
 
															+  }  
														
 
															+  
														
 
															+  // states
														
 
															+  final static int CDATA_IN  = 10;
														
 
															+  final static int CDATA_OUT = 11;
														
 
															+  final static int CDATA_UNK = 12;
														
 
															+  final static int RECORD_ACCEPT = 13;
														
 
															+  // inputs
														
 
															+  final static int CDATA_BEGIN = 20;
														
 
															+  final static int CDATA_END   = 21;
														
 
															+  final static int RECORD_MAYBE= 22;
														
 
															+  
														
 
															+  /* also updates firstMatchStart_;*/
														
 
															+  int nextState(int state, int input, int bufPos)
														
 
															   {
														
 
															+    switch(state) {
														
 
															+      case CDATA_UNK:
														
 
															+      case CDATA_OUT:
														
 
															+        switch(input) {
														
 
															+          case CDATA_BEGIN:
														
 
															+            return CDATA_IN;
														
 
															+          case CDATA_END:
														
 
															+            if(state==CDATA_OUT) {
														
 
															+              //System.out.println("buggy XML " + bufPos);
														
 
															+            }
														
 
															+            return CDATA_OUT;
														
 
															+          case RECORD_MAYBE:
														
 
															+            return (state==CDATA_UNK) ? CDATA_UNK : RECORD_ACCEPT;
														
 
															+        }
														
 
															+      break;
														
 
															+      case CDATA_IN:
														
 
															+       return (input==CDATA_END) ? CDATA_OUT : CDATA_IN;
														
 
															+    }
														
 
															+    throw new IllegalStateException(state + " " + input + " " + bufPos + " " + splitName_);
														
 
															+  }
														
 
															+  
														
 
															-    char[] cpat = pat.toCharArray();
														
 
															+  Pattern makePatternCDataOrMark(String escapedMark)
														
 
															+  {
														
 
															+    StringBuffer pat = new StringBuffer();
														
 
															+    addGroup(pat, StreamUtil.regexpEscape("CDATA["));   // CDATA_BEGIN
														
 
															+    addGroup(pat, StreamUtil.regexpEscape("]]>"));      // CDATA_END
														
 
															+    addGroup(pat, escapedMark);                         // RECORD_MAYBE
														
 
															+    return Pattern.compile(pat.toString());
														
 
															+  }
														
 
															+  void addGroup(StringBuffer pat, String escapedGroup)
														
 
															+  {
														
 
															+    if(pat.length() > 0) {
														
 
															+        pat.append("|");
														
 
															+    }
														
 
															+    pat.append("(");
														
 
															+    pat.append(escapedGroup);
														
 
															+    pat.append(")");
														
 
															+  }
														
 
															+  
														
 
															+  
														
 
															+  
														
 
															+  boolean fastReadUntilMatch(String textPat, boolean includePat, StringBuffer outBufOrNull) throws IOException 
														
 
															+  {
														
 
															+    //System.out.println("@@@BEGIN readUntilMatch inPos=" + in_.getPos());  
														
 
															+    char[] cpat = textPat.toCharArray();
														
 
															     int m = 0;
														
 
															+    boolean match = false;
														
 
															+    long markPos = -1;
														
 
															     int msup = cpat.length;
														
 
															+    if(!includePat) {
														
 
															+      int LL = 120000 * 10;
														
 
															+      markPos = in_.getPos();
														
 
															+      in_.mark(LL); // lookAhead_
														
 
															+    }
														
 
															     while (true) {
														
 
															       int b = in_.read();
														
 
															       if (b == -1)
														
 
															         break;
														
 
															       char c = (char)b; // this assumes eight-bit matching. OK with UTF-8
														
 
															+      if(outBufOrNull != null) {
														
 
															+        outBufOrNull.append(c);
														
 
															+      }
														
 
															       if (c == cpat[m]) {
														
 
															         m++;
														
 
															-        if(m==msup-1) {
														
 
															+        if(m==msup) {
														
 
															+          match = true;
														
 
															           break;
														
 
															         }
														
 
															       } else {
														
 
															         m = 0;
														
 
															       }
														
 
															-      if(outBuf != null) {
														
 
															-        outBuf.append(c);
														
 
															+    }
														
 
															+    if(!includePat && match) {
														
 
															+      if(outBufOrNull != null) {
														
 
															+        outBufOrNull.setLength(outBufOrNull.length() - textPat.length());
														
 
															       }
														
 
															+      long pos = in_.getPos() - textPat.length();
														
 
															+      in_.reset();
														
 
															+      in_.seek(pos);
														
 
															     }
														
 
															-System.out.println("@@@START readUntilMatch(" + pat + ", " + outBuf + "\n@@@END readUntilMatch");
														
 
															+    //System.out.println("@@@DONE  readUntilMatch inPos=" + in_.getPos() + " includePat=" + includePat + " pat=" + textPat + ", buf=|" + outBufOrNull + "|");
														
 
															+    return match;
														
 
															+  }
														
 
															+  
														
 
															+  String checkJobGet(String prop) throws IOException
														
 
															+  {
														
 
															+    String val = job_.get(prop);
														
 
															+    if(val == null) {
														
 
															+        throw new IOException("JobConf: missing required property: " + prop);
														
 
															+    }
														
 
															+    return val;
														
 
															   }
														
 
															   String beginMark_;
														
 
															   String endMark_;
														
 
															+  
														
 
															+  Pattern beginPat_;
														
 
															+  Pattern endPat_;
														
 
															+
														
 
															+  boolean slowMatch_;  
														
 
															+  int lookAhead_; // bytes to read to try to synch CDATA/non-CDATA. Should be more than max record size
														
 
															+  int maxRecSize_;
														
 
															+
														
 
															+  final static int NA = -1;  
														
 
															+  int firstMatchStart_ = 0; // candidate record boundary. Might just be CDATA.
														
 
															+  int firstMatchEnd_ = 0;
														
 
															+  
														
 
															+  boolean isRecordMatch_;
														
 
															+  boolean synched_;
														
 
															 }
														
--- a/src/java/org/apache/hadoop/dfs/DFSClient.java
+++ b/src/java/org/apache/hadoop/dfs/DFSClient.java
@@ -808,7 +808,7 @@ class DFSClient implements FSConstants {
 
															                     localName, overwrite, replication, blockSize);
														
 
															               } catch (RemoteException e) {
														
 
															                 if (--retries == 0 || 
														
 
															-                    AlreadyBeingCreatedException.class.getName().
														
 
															+                    !AlreadyBeingCreatedException.class.getName().
														
 
															                         equals(e.getClassName())) {
														
 
															                   throw e;
														
 
															                 } else {
														
@@ -838,7 +838,7 @@ class DFSClient implements FSConstants {
 
															                                          clientName.toString());
														
 
															               } catch (RemoteException e) {
														
 
															                 if (--retries == 0 || 
														
 
															-                    NotReplicatedYetException.class.getName().
														
 
															+                    !NotReplicatedYetException.class.getName().
														
 
															                         equals(e.getClassName())) {
														
 
															                   throw e;
														
 
															                 } else {
														
--- a/src/java/org/apache/hadoop/dfs/DataNode.java
+++ b/src/java/org/apache/hadoop/dfs/DataNode.java
@@ -24,7 +24,6 @@ import org.apache.hadoop.util.DiskChecker.DiskErrorException;
 
															 import java.io.*;
														
 
															 import java.net.*;
														
 
															-import java.nio.channels.FileLock;
														
 
															 import java.util.*;
														
 
															 /**********************************************************
														
@@ -173,7 +172,20 @@ public class DataNode implements FSConstants, Runnable {
 
															      * @throws IOException
														
 
															      */
														
 
															     private void register() throws IOException {
														
 
															-      dnRegistration = namenode.register( dnRegistration );
														
 
															+      while (shouldRun) {
														
 
															+        try {
														
 
															+          dnRegistration = namenode.register( dnRegistration );
														
 
															+          break;
														
 
															+        } catch( ConnectException se ) {  // namenode has not been started
														
 
															+          LOG.info("Namenode not available yet, Zzzzz...");
														
 
															+        } catch( SocketTimeoutException te ) {  // namenode is busy
														
 
															+          LOG.info("Problem connecting to Namenode: " + 
														
 
															+                   StringUtils.stringifyException(te));
														
 
															+        }
														
 
															+        try {
														
 
															+          Thread.sleep(10 * 1000);
														
 
															+        } catch (InterruptedException ie) {}
														
 
															+      }
														
 
															       if( storage.getStorageID().equals("") ) {
														
 
															         storage.setStorageID( dnRegistration.getStorageID());
														
 
															         storage.write();
														
@@ -194,7 +206,7 @@ public class DataNode implements FSConstants, Runnable {
 
															     }
														
 
															     void handleDiskError( String errMsgr ) {
														
 
															-        LOG.warn( "Shuting down DataNode because "+errMsgr );
														
 
															+        LOG.warn( "DataNode is shutting down.\n" + errMsgr );
														
 
															         try {
														
 
															             namenode.errorReport(
														
 
															                     dnRegistration, DatanodeProtocol.DISK_ERROR, errMsgr);
														
@@ -208,9 +220,7 @@ public class DataNode implements FSConstants, Runnable {
 
															      * forever calling remote NameNode functions.
														
 
															      */
														
 
															     public void offerService() throws Exception {
														
 
															-      // start dataXceiveServer  
														
 
															-      dataXceiveServer.start();
														
 
															-      
														
 
															+     
														
 
															       long lastHeartbeat = 0, lastBlockReport = 0;
														
 
															       LOG.info("using BLOCKREPORT_INTERVAL of " + blockReportInterval + "msec");
														
@@ -325,13 +335,16 @@ public class DataNode implements FSConstants, Runnable {
 
															           } // synchronized
														
 
															         } // while (shouldRun)
														
 
															       } catch(DiskErrorException e) {
														
 
															-        handleDiskError(e.getMessage());
														
 
															-      }
														
 
															-      
														
 
															-      // wait for dataXceiveServer to terminate
														
 
															-      try {
														
 
															-          this.dataXceiveServer.join();
														
 
															-      } catch (InterruptedException ie) {
														
 
															+        handleDiskError(e.getLocalizedMessage());
														
 
															+      } catch( RemoteException re ) {
														
 
															+        String reClass = re.getClassName();
														
 
															+        if( UnregisteredDatanodeException.class.getName().equals( reClass )) {
														
 
															+          LOG.warn( "DataNode is shutting down: " + 
														
 
															+                    StringUtils.stringifyException(re));
														
 
															+          shutdown();
														
 
															+          return;
														
 
															+        }
														
 
															+        throw re;
														
 
															       }
														
 
															     } // offerService
														
@@ -818,6 +831,10 @@ public class DataNode implements FSConstants, Runnable {
 
															      */
														
 
															     public void run() {
														
 
															         LOG.info("Starting DataNode in: "+data.data);
														
 
															+        
														
 
															+        // start dataXceiveServer
														
 
															+        dataXceiveServer.start();
														
 
															+        
														
 
															         while (shouldRun) {
														
 
															             try {
														
 
															                 offerService();
														
@@ -832,7 +849,14 @@ public class DataNode implements FSConstants, Runnable {
 
															               }
														
 
															             }
														
 
															         }
														
 
															-      LOG.info("Finishing DataNode in: "+data.data);
														
 
															+        
														
 
															+        // wait for dataXceiveServer to terminate
														
 
															+        try {
														
 
															+            this.dataXceiveServer.join();
														
 
															+        } catch (InterruptedException ie) {
														
 
															+        }
														
 
															+        
														
 
															+        LOG.info("Finishing DataNode in: "+data.data);
														
 
															     }
														
 
															     /** Start datanode daemons.
														
--- a/src/java/org/apache/hadoop/dfs/FSDirectory.java
+++ b/src/java/org/apache/hadoop/dfs/FSDirectory.java
@@ -143,18 +143,22 @@ class FSDirectory implements FSConstants {
 
															          * @param path file path
														
 
															          * @param newNode INode to be added
														
 
															          * @return null if the node already exists; inserted INode, otherwise
														
 
															+         * @throws FileNotFoundException 
														
 
															          * @author shv
														
 
															          */
														
 
															-        INode addNode(String path, INode newNode) {
														
 
															+        INode addNode(String path, INode newNode) throws FileNotFoundException {
														
 
															           File target = new File( path );
														
 
															           // find parent
														
 
															           Path parent = new Path(path).getParent();
														
 
															-          if (parent == null)
														
 
															-            return null;
														
 
															+          if (parent == null) { // add root
														
 
															+              return null;
														
 
															+          }
														
 
															           INode parentNode = getNode(parent.toString());
														
 
															-          if (parentNode == null)
														
 
															-            return null;
														
 
															-          // check whether the parent already has a node with that name
														
 
															+          if (parentNode == null) {
														
 
															+              throw new FileNotFoundException(
														
 
															+                      "Parent path does not exist: "+path);
														
 
															+          }
														
 
															+           // check whether the parent already has a node with that name
														
 
															           String name = newNode.name = target.getName();
														
 
															           if( parentNode.getChild( name ) != null )
														
 
															             return null;
														
@@ -688,11 +692,19 @@ class FSDirectory implements FSConstants {
 
															      */
														
 
															     boolean unprotectedAddFile(UTF8 path, INode newNode) {
														
 
															       synchronized (rootDir) {
														
 
															-        int nrBlocks = (newNode.blocks == null) ? 0 : newNode.blocks.length;
														
 
															-        // Add file->block mapping
														
 
															-        for (int i = 0; i < nrBlocks; i++)
														
 
															-            activeBlocks.put(newNode.blocks[i], newNode);
														
 
															-        return (rootDir.addNode(path.toString(), newNode) != null);
														
 
															+         try {
														
 
															+            if( rootDir.addNode(path.toString(), newNode ) != null ) {
														
 
															+                int nrBlocks = (newNode.blocks == null) ? 0 : newNode.blocks.length;
														
 
															+                // Add file->block mapping
														
 
															+                for (int i = 0; i < nrBlocks; i++)
														
 
															+                    activeBlocks.put(newNode.blocks[i], newNode);
														
 
															+                return true;
														
 
															+            } else {
														
 
															+                return false;
														
 
															+            }
														
 
															+        } catch (FileNotFoundException e ) {
														
 
															+            return false;
														
 
															+        }
														
 
															       }
														
 
															     }
														
@@ -720,23 +732,36 @@ class FSDirectory implements FSConstants {
 
															             INode renamedNode = rootDir.getNode(srcStr);
														
 
															             if (renamedNode == null) {
														
 
															                 NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
														
 
															-                        +"failed to rename "+src+" to "+dst+ " because "+ src+" does not exist" );
														
 
															+                        +"failed to rename "+src+" to "+dst+ " because source does not exist" );
														
 
															                 return false;
														
 
															             }
														
 
															-            renamedNode.removeNode();
														
 
															             if (isDir(dst)) {
														
 
															               dstStr += "/" + new File(srcStr).getName();
														
 
															             }
														
 
															+            if( rootDir.getNode(dstStr.toString()) != null ) {
														
 
															+                NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
														
 
															+                        +"failed to rename "+src+" to "+dstStr+ " because destination exists" );
														
 
															+                return false;
														
 
															+            }
														
 
															+            renamedNode.removeNode();
														
 
															+            
														
 
															             // the renamed node can be reused now
														
 
															-            if( rootDir.addNode(dstStr, renamedNode ) == null ) {
														
 
															+            try {
														
 
															+                if( rootDir.addNode(dstStr, renamedNode ) != null ) {
														
 
															+                    NameNode.stateChangeLog.debug("DIR* FSDirectory.unprotectedRenameTo: "
														
 
															+                        +src+" is renamed to "+dst );
														
 
															+                    return true;
														
 
															+                }
														
 
															+            } catch (FileNotFoundException e ) {
														
 
															                 NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
														
 
															                         +"failed to rename "+src+" to "+dst );
														
 
															-              rootDir.addNode(srcStr, renamedNode); // put it back
														
 
															-              return false;
														
 
															+                try {
														
 
															+                    rootDir.addNode(srcStr, renamedNode); // put it back
														
 
															+                }catch(FileNotFoundException e2) {                
														
 
															+                }
														
 
															             }
														
 
															-            NameNode.stateChangeLog.debug("DIR* FSDirectory.unprotectedRenameTo: "
														
 
															-                     +src+" is renamed to "+dst );
														
 
															-            return true;
														
 
															+
														
 
															+            return false;
														
 
															         }
														
 
															     }
														
@@ -977,29 +1002,28 @@ class FSDirectory implements FSConstants {
 
															         // Now go backwards through list of dirs, creating along
														
 
															         // the way
														
 
															-        boolean lastSuccess = false;
														
 
															         int numElts = v.size();
														
 
															         for (int i = numElts - 1; i >= 0; i--) {
														
 
															             String cur = (String) v.elementAt(i);
														
 
															-            INode inserted = unprotectedMkdir(cur);
														
 
															-            if (inserted != null) {
														
 
															-                NameNode.stateChangeLog.debug("DIR* FSDirectory.mkdirs: "
														
 
															+            try {
														
 
															+               INode inserted = unprotectedMkdir(cur);
														
 
															+               if (inserted != null) {
														
 
															+                   NameNode.stateChangeLog.debug("DIR* FSDirectory.mkdirs: "
														
 
															                         +"created directory "+cur );
														
 
															-                logEdit(OP_MKDIR, new UTF8(inserted.computeName()), null);
														
 
															-                lastSuccess = true;
														
 
															-            } else {
														
 
															-                lastSuccess = false;
														
 
															+                   logEdit(OP_MKDIR, new UTF8(inserted.computeName()), null);
														
 
															+               } // otherwise cur exists, continue
														
 
															+            } catch (FileNotFoundException e ) {
														
 
															+                NameNode.stateChangeLog.debug("DIR* FSDirectory.mkdirs: "
														
 
															+                        +"failed to create directory "+src);
														
 
															+                return false;
														
 
															             }
														
 
															         }
														
 
															-/*        if( !lastSuccess )
														
 
															-            NameNode.stateChangeLog.warn("DIR* FSDirectory.mkdirs: "
														
 
															-                    +"failed to create directory "+src );*/
														
 
															-        return lastSuccess;
														
 
															+        return true;
														
 
															     }
														
 
															     /**
														
 
															      */
														
 
															-    INode unprotectedMkdir(String src) {
														
 
															+    INode unprotectedMkdir(String src) throws FileNotFoundException {
														
 
															         synchronized (rootDir) {
														
 
															             return rootDir.addNode(src, new INode(new File(src).getName()));
														
 
															         }
														
--- a/src/java/org/apache/hadoop/dfs/FSNamesystem.java
+++ b/src/java/org/apache/hadoop/dfs/FSNamesystem.java
@@ -1266,7 +1266,7 @@ class FSNamesystem implements FSConstants {
 
															             if (! dir.isValidBlock(b) && ! pendingCreateBlocks.contains(b)) {
														
 
															                 obsolete.add(b);
														
 
															-                NameNode.stateChangeLog.info("BLOCK* NameSystem.processReport: "
														
 
															+                NameNode.stateChangeLog.debug("BLOCK* NameSystem.processReport: "
														
 
															                         +"ask "+nodeID.getName()+" to delete "+b.getBlockName() );
														
 
															             }
														
 
															         }
														
@@ -1329,6 +1329,8 @@ class FSNamesystem implements FSConstants {
 
															      */
														
 
															     private void proccessOverReplicatedBlock( Block block, short replication ) {
														
 
															       TreeSet containingNodes = (TreeSet) blocksMap.get(block);
														
 
															+      if( containingNodes == null )
														
 
															+        return;
														
 
															       Vector nonExcess = new Vector();
														
 
															       for (Iterator it = containingNodes.iterator(); it.hasNext(); ) {
														
 
															           DatanodeInfo cur = (DatanodeInfo) it.next();
														
@@ -1509,7 +1511,7 @@ class FSNamesystem implements FSConstants {
 
															                 blockList.append(' ');
														
 
															                 blockList.append(((Block)invalidateSet.elementAt(i)).getBlockName());
														
 
															             }
														
 
															-            NameNode.stateChangeLog.info("BLOCK* NameSystem.blockToInvalidate: "
														
 
															+            NameNode.stateChangeLog.debug("BLOCK* NameSystem.blockToInvalidate: "
														
 
															                    +"ask "+nodeID.getName()+" to delete " + blockList );
														
 
															         }
														
 
															         return (Block[]) invalidateSet.toArray(new Block[invalidateSet.size()]);
														
--- a/src/java/org/apache/hadoop/fs/FileSystem.java
+++ b/src/java/org/apache/hadoop/fs/FileSystem.java
@@ -459,7 +459,8 @@ public abstract class FileSystem extends Configured {
 
															     /**
														
 
															      * Make the given file and all non-existent parents into
														
 
															-     * directories.
														
 
															+     * directories. Has the semantics of Unix 'mkdir -p'.
														
 
															+     * Existence of the directory hierarchy is not an error.
														
 
															      */
														
 
															     public abstract boolean mkdirs(Path f) throws IOException;
														
--- a/src/java/org/apache/hadoop/fs/LocalFileSystem.java
+++ b/src/java/org/apache/hadoop/fs/LocalFileSystem.java
@@ -223,11 +223,18 @@ public class LocalFileSystem extends FileSystem {
 
															         }
														
 
															         return results;
														
 
															     }
														
 
															-
														
 
															+    
														
 
															+    /**
														
 
															+     * Creates the specified directory hierarchy. Does not
														
 
															+     * treat existence as an error.
														
 
															+     */
														
 
															     public boolean mkdirs(Path f) throws IOException {
														
 
															-      return pathToFile(f).mkdirs();
														
 
															+      Path parent = f.getParent();
														
 
															+      File p2f = pathToFile(f);
														
 
															+      return (parent == null || mkdirs(parent)) &&
														
 
															+             (p2f.mkdir() || p2f.isDirectory());
														
 
															     }
														
 
															-
														
 
															+    
														
 
															     /**
														
 
															      * Set the working directory to the given directory.
														
 
															      */
														
--- a/src/site/src/documentation/content/xdocs/index.xml
+++ b/src/site/src/documentation/content/xdocs/index.xml
@@ -14,6 +14,22 @@
 
															     <section>
														
 
															       <title>News</title>
														
 
															+      <section>
														
 
															+      <title>9 June, 2006: release 0.3.2 available</title>
														
 
															+      <p>This is a bugfix release.  For details see the <a
														
 
															+      href="http://tinyurl.com/k9g5c">change log</a>. The release can
														
 
															+      be obtained from <a
														
 
															+      href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
														
 
															+      nearby mirror</a>.
														
 
															+      </p> </section>
														
 
															+
														
 
															+      <section>
														
 
															+      <title>8 June, 2006: FAQ added to Wiki</title>
														
 
															+      <p>Hadoop now has a <a
														
 
															+      href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>.  Please
														
 
															+      help make this more complete!
														
 
															+      </p> </section>
														
 
															+
														
 
															       <section>
														
 
															       <title>5 June, 2006: release 0.3.1 available</title>
														
 
															       <p>This is a bugfix release.  For details see the <a
														
--- a/src/test/org/apache/hadoop/dfs/ClusterTestDFSNamespaceLogging.java
+++ b/src/test/org/apache/hadoop/dfs/ClusterTestDFSNamespaceLogging.java
@@ -73,7 +73,7 @@ public class ClusterTestDFSNamespaceLogging extends TestCase implements FSConsta
 
															   private static final int BLOCK_LOG_HEADER_LEN = 32;
														
 
															   /** DFS block size
														
 
															    */
														
 
															-  private static final int BLOCK_SIZE = 32*1000*1000;
														
 
															+  private static final int BLOCK_SIZE = 32*1024*1024;
														
 
															   /** Buffer size
														
 
															    */
														
@@ -158,7 +158,7 @@ public class ClusterTestDFSNamespaceLogging extends TestCase implements FSConsta
 
															       // create a file with 2 data blocks
														
 
															       try {
														
 
															-        createFile("/data/yy",BLOCK_SIZE+1);
														
 
															+        createFile("/data/yy", BLOCK_SIZE+1);
														
 
															         assertCreate( "/data/yy", BLOCK_SIZE+1, false );
														
 
															       } catch( IOException ioe ) {
														
 
															     	assertCreate( "/data/yy", BLOCK_SIZE+1, true );
														
@@ -326,9 +326,9 @@ public class ClusterTestDFSNamespaceLogging extends TestCase implements FSConsta
 
															   //
														
 
															   private void configureDFS() throws IOException {
														
 
															 	// set given config param to override other config settings
														
 
															-	conf.setInt("test.dfs.block_size", BLOCK_SIZE);
														
 
															+	conf.setInt("dfs.block.size", BLOCK_SIZE);
														
 
															 	// verify that config changed
														
 
															-	assertTrue(BLOCK_SIZE == conf.getInt("test.dfs.block_size", 2)); // 2 is an intentional obviously-wrong block size
														
 
															+	assertTrue(BLOCK_SIZE == conf.getInt("dfs.block.size", 2)); // 2 is an intentional obviously-wrong block size
														
 
															 	// downsize for testing (just to save resources)
														
 
															 	conf.setInt("dfs.namenode.handler.count", 3);
														
 
															 	conf.setLong("dfs.blockreport.intervalMsec", 50*1000L);
														
--- a/src/test/org/apache/hadoop/test/AllTestDriver.java
+++ b/src/test/org/apache/hadoop/test/AllTestDriver.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.io.TestSetFile;
 
															 import org.apache.hadoop.io.TestSequenceFile;
														
 
															 import org.apache.hadoop.ipc.TestIPC;
														
 
															 import org.apache.hadoop.ipc.TestRPC;
														
 
															+import org.apache.hadoop.fs.DistributedFSCheck;
														
 
															 import org.apache.hadoop.fs.TestDFSIO;
														
 
															 import org.apache.hadoop.fs.DFSCIOTest;
														
@@ -52,7 +53,7 @@ public class AllTestDriver {
 
															 	    pgd.addClass("testtextinputformat", TestTextInputFormat.class, "A test for text input format.");
														
 
															       pgd.addClass("TestDFSIO", TestDFSIO.class, "Distributed i/o benchmark.");
														
 
															       pgd.addClass("DFSCIOTest", DFSCIOTest.class, "Distributed i/o benchmark of libhdfs.");
														
 
															-      pgd.addClass("DistributedFSCheck", TestDFSIO.class, "Distributed checkup of the file system consistency.");
														
 
															+      pgd.addClass("DistributedFSCheck", DistributedFSCheck.class, "Distributed checkup of the file system consistency.");
														
 
															 	    pgd.driver(argv);
														
 
															 	}
														
 
															 	catch(Throwable e){