瀏覽代碼

Merge -r 481432:483294 from trunk to 0.9 branch, preparing for 0.9.1 release.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/branches/branch-0.9@483297 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 18 年之前
父節點
當前提交
88b2aa0419

+ 8 - 0
CHANGES.txt

@@ -1,5 +1,13 @@
 Hadoop Change Log
 Hadoop Change Log
 
 
+Release 0.9.1 - 2006-12-06
+
+ 1. HADOOP-780. Use ReflectionUtils to instantiate key and value
+    objects. (ab)
+
+ 2. HADOOP-779. Fix contrib/streaming to work correctly with gzipped
+    input files.  (Hairong Kuang via cutting)
+
 
 
 Release 0.9.0 - 2006-12-01
 Release 0.9.0 - 2006-12-01
 
 

+ 1 - 1
build.xml

@@ -9,7 +9,7 @@
  
  
   <property name="Name" value="Hadoop"/>
   <property name="Name" value="Hadoop"/>
   <property name="name" value="hadoop"/>
   <property name="name" value="hadoop"/>
-  <property name="version" value="0.9.1-dev"/>
+  <property name="version" value="0.9.2-dev"/>
   <property name="final.name" value="${name}-${version}"/>
   <property name="final.name" value="${name}-${version}"/>
   <property name="year" value="2006"/>
   <property name="year" value="2006"/>
   <property name="libhdfs.version" value="1"/>
   <property name="libhdfs.version" value="1"/>

+ 29 - 20
site/index.html

@@ -125,6 +125,9 @@ document.write("<text>Last Published:</text> " + document.lastModified);
 <a href="#News">News</a>
 <a href="#News">News</a>
 <ul class="minitoc">
 <ul class="minitoc">
 <li>
 <li>
+<a href="#6+December%2C+2006%3A+release+0.9.1+available">6 December, 2006: release 0.9.1 available</a>
+</li>
+<li>
 <a href="#1+December%2C+2006%3A+release+0.9.0+available">1 December, 2006: release 0.9.0 available</a>
 <a href="#1+December%2C+2006%3A+release+0.9.0+available">1 December, 2006: release 0.9.0 available</a>
 </li>
 </li>
 <li>
 <li>
@@ -193,97 +196,103 @@ document.write("<text>Last Published:</text> " + document.lastModified);
 <a name="N1000C"></a><a name="News"></a>
 <a name="N1000C"></a><a name="News"></a>
 <h2 class="h3">News</h2>
 <h2 class="h3">News</h2>
 <div class="section">
 <div class="section">
-<a name="N10012"></a><a name="1+December%2C+2006%3A+release+0.9.0+available"></a>
+<a name="N10012"></a><a name="6+December%2C+2006%3A+release+0.9.1+available"></a>
+<h3 class="h4">6 December, 2006: release 0.9.1 available</h3>
+<p>This fixes critical bugs in 0.9.0.  For details see the <a href="http://tinyurl.com/y55d7p">release notes</a>. The release can
+      be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
+      nearby mirror</a>.
+      </p>
+<a name="N10024"></a><a name="1+December%2C+2006%3A+release+0.9.0+available"></a>
 <h3 class="h4">1 December, 2006: release 0.9.0 available</h3>
 <h3 class="h4">1 December, 2006: release 0.9.0 available</h3>
 <p>For details see the <a href="http://tinyurl.com/sdjhb">release notes</a>. The release can
 <p>For details see the <a href="http://tinyurl.com/sdjhb">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N10024"></a><a name="3+November%2C+2006%3A+release+0.8.0+available"></a>
+<a name="N10036"></a><a name="3+November%2C+2006%3A+release+0.8.0+available"></a>
 <h3 class="h4">3 November, 2006: release 0.8.0 available</h3>
 <h3 class="h4">3 November, 2006: release 0.8.0 available</h3>
 <p>For details see the <a href="http://tinyurl.com/ykqv6s">release notes</a>. The release can
 <p>For details see the <a href="http://tinyurl.com/ykqv6s">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N10036"></a><a name="18+October%2C+2006%3A+release+0.7.2+available"></a>
+<a name="N10048"></a><a name="18+October%2C+2006%3A+release+0.7.2+available"></a>
 <h3 class="h4">18 October, 2006: release 0.7.2 available</h3>
 <h3 class="h4">18 October, 2006: release 0.7.2 available</h3>
 <p>This fixes critical bugs in 0.7.1.  For details see the <a href="http://tinyurl.com/ygy6y7">release notes</a>. The release can
 <p>This fixes critical bugs in 0.7.1.  For details see the <a href="http://tinyurl.com/ygy6y7">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N10048"></a><a name="11+October%2C+2006%3A+release+0.7.1+available"></a>
+<a name="N1005A"></a><a name="11+October%2C+2006%3A+release+0.7.1+available"></a>
 <h3 class="h4">11 October, 2006: release 0.7.1 available</h3>
 <h3 class="h4">11 October, 2006: release 0.7.1 available</h3>
 <p>This fixes critical bugs in 0.7.0.  For details see the <a href="http://tinyurl.com/p7qod">release notes</a>. The release can
 <p>This fixes critical bugs in 0.7.0.  For details see the <a href="http://tinyurl.com/p7qod">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N1005A"></a><a name="6+October%2C+2006%3A+release+0.7.0+available"></a>
+<a name="N1006C"></a><a name="6+October%2C+2006%3A+release+0.7.0+available"></a>
 <h3 class="h4">6 October, 2006: release 0.7.0 available</h3>
 <h3 class="h4">6 October, 2006: release 0.7.0 available</h3>
 <p>For details see the <a href="http://tinyurl.com/kvd9m">release notes</a>. The release can
 <p>For details see the <a href="http://tinyurl.com/kvd9m">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N1006C"></a><a name="18+September%2C+2006%3A+release+0.6.2+available"></a>
+<a name="N1007E"></a><a name="18+September%2C+2006%3A+release+0.6.2+available"></a>
 <h3 class="h4">18 September, 2006: release 0.6.2 available</h3>
 <h3 class="h4">18 September, 2006: release 0.6.2 available</h3>
 <p>This fixes critical bugs in 0.6.1.  For details see the <a href="http://tinyurl.com/gyb56">release notes</a>. The release can
 <p>This fixes critical bugs in 0.6.1.  For details see the <a href="http://tinyurl.com/gyb56">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N1007E"></a><a name="13+September%2C+2006%3A+release+0.6.1+available"></a>
+<a name="N10090"></a><a name="13+September%2C+2006%3A+release+0.6.1+available"></a>
 <h3 class="h4">13 September, 2006: release 0.6.1 available</h3>
 <h3 class="h4">13 September, 2006: release 0.6.1 available</h3>
 <p>This fixes critical bugs in 0.6.0.  For details see the <a href="http://tinyurl.com/lykp4">release notes</a>. The release can
 <p>This fixes critical bugs in 0.6.0.  For details see the <a href="http://tinyurl.com/lykp4">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N10090"></a><a name="8+September%2C+2006%3A+release+0.6.0+available"></a>
+<a name="N100A2"></a><a name="8+September%2C+2006%3A+release+0.6.0+available"></a>
 <h3 class="h4">8 September, 2006: release 0.6.0 available</h3>
 <h3 class="h4">8 September, 2006: release 0.6.0 available</h3>
 <p>For details see the <a href="http://tinyurl.com/r3zoj">release notes</a>. The release can
 <p>For details see the <a href="http://tinyurl.com/r3zoj">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N100A2"></a><a name="4+August%2C+2006%3A+release+0.5.0+available"></a>
+<a name="N100B4"></a><a name="4+August%2C+2006%3A+release+0.5.0+available"></a>
 <h3 class="h4">4 August, 2006: release 0.5.0 available</h3>
 <h3 class="h4">4 August, 2006: release 0.5.0 available</h3>
 <p>For details see the <a href="http://tinyurl.com/pnml2">release notes</a>. The release can
 <p>For details see the <a href="http://tinyurl.com/pnml2">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N100B4"></a><a name="28+June%2C+2006%3A+release+0.4.0+available"></a>
+<a name="N100C6"></a><a name="28+June%2C+2006%3A+release+0.4.0+available"></a>
 <h3 class="h4">28 June, 2006: release 0.4.0 available</h3>
 <h3 class="h4">28 June, 2006: release 0.4.0 available</h3>
 <p>For details see the <a href="http://tinyurl.com/o35b6">change log</a>. The release can
 <p>For details see the <a href="http://tinyurl.com/o35b6">change log</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N100C6"></a><a name="9+June%2C+2006%3A+release+0.3.2+available"></a>
+<a name="N100D8"></a><a name="9+June%2C+2006%3A+release+0.3.2+available"></a>
 <h3 class="h4">9 June, 2006: release 0.3.2 available</h3>
 <h3 class="h4">9 June, 2006: release 0.3.2 available</h3>
 <p>This is a bugfix release.  For details see the <a href="http://tinyurl.com/k9g5c">change log</a>. The release can
 <p>This is a bugfix release.  For details see the <a href="http://tinyurl.com/k9g5c">change log</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N100D8"></a><a name="8+June%2C+2006%3A+FAQ+added+to+Wiki"></a>
+<a name="N100EA"></a><a name="8+June%2C+2006%3A+FAQ+added+to+Wiki"></a>
 <h3 class="h4">8 June, 2006: FAQ added to Wiki</h3>
 <h3 class="h4">8 June, 2006: FAQ added to Wiki</h3>
 <p>Hadoop now has a <a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>.  Please
 <p>Hadoop now has a <a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>.  Please
       help make this more complete!
       help make this more complete!
       </p>
       </p>
-<a name="N100E6"></a><a name="5+June%2C+2006%3A+release+0.3.1+available"></a>
+<a name="N100F8"></a><a name="5+June%2C+2006%3A+release+0.3.1+available"></a>
 <h3 class="h4">5 June, 2006: release 0.3.1 available</h3>
 <h3 class="h4">5 June, 2006: release 0.3.1 available</h3>
 <p>This is a bugfix release.  For details see the <a href="http://tinyurl.com/l6on4">change log</a>. The release can
 <p>This is a bugfix release.  For details see the <a href="http://tinyurl.com/l6on4">change log</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N100F8"></a><a name="2+June%2C+2006%3A+release+0.3.0+available"></a>
+<a name="N1010A"></a><a name="2+June%2C+2006%3A+release+0.3.0+available"></a>
 <h3 class="h4">2 June, 2006: release 0.3.0 available</h3>
 <h3 class="h4">2 June, 2006: release 0.3.0 available</h3>
 <p>This includes many fixes, improving performance, scalability
 <p>This includes many fixes, improving performance, scalability
       and reliability and adding new features.  For details see the <a href="http://tinyurl.com/rq3f7">change log</a>. The release can
       and reliability and adding new features.  For details see the <a href="http://tinyurl.com/rq3f7">change log</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N1010A"></a><a name="12+May%2C+2006%3A+release+0.2.1+available"></a>
+<a name="N1011C"></a><a name="12+May%2C+2006%3A+release+0.2.1+available"></a>
 <h3 class="h4">12 May, 2006: release 0.2.1 available</h3>
 <h3 class="h4">12 May, 2006: release 0.2.1 available</h3>
 <p>This fixes a few bugs in release 0.2.0, listed in the <a href="http://tinyurl.com/rnnvz">change log</a>. The
 <p>This fixes a few bugs in release 0.2.0, listed in the <a href="http://tinyurl.com/rnnvz">change log</a>. The
       release can be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       release can be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N1011C"></a><a name="5+May%2C+2006%3A+release+0.2.0+available"></a>
+<a name="N1012E"></a><a name="5+May%2C+2006%3A+release+0.2.0+available"></a>
 <h3 class="h4">5 May, 2006: release 0.2.0 available</h3>
 <h3 class="h4">5 May, 2006: release 0.2.0 available</h3>
 <p>We are now aiming for monthly releases.  There have been many
 <p>We are now aiming for monthly releases.  There have been many
       bug fixes and improvements in the past month.  MapReduce and DFS
       bug fixes and improvements in the past month.  MapReduce and DFS
@@ -292,24 +301,24 @@ document.write("<text>Last Published:</text> " + document.lastModified);
       details. The release can be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       details. The release can be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N1012E"></a><a name="2+April%2C+2006%3A+release+0.1.0+available"></a>
+<a name="N10140"></a><a name="2+April%2C+2006%3A+release+0.1.0+available"></a>
 <h3 class="h4">2 April, 2006: release 0.1.0 available</h3>
 <h3 class="h4">2 April, 2006: release 0.1.0 available</h3>
 <p>This is the first Hadoop release.  The release is available
 <p>This is the first Hadoop release.  The release is available
       <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/">
       <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/">
       here</a>.</p>
       here</a>.</p>
-<a name="N1013C"></a><a name="6+February%2C+2006%3A+nightly+builds"></a>
+<a name="N1014E"></a><a name="6+February%2C+2006%3A+nightly+builds"></a>
 <h3 class="h4">6 February, 2006: nightly builds</h3>
 <h3 class="h4">6 February, 2006: nightly builds</h3>
 <p>Hadoop now has nightly builds.  This automatically creates a
 <p>Hadoop now has nightly builds.  This automatically creates a
       <a href="http://cvs.apache.org/dist/lucene/hadoop/nightly/">downloadable version of Hadoop every
       <a href="http://cvs.apache.org/dist/lucene/hadoop/nightly/">downloadable version of Hadoop every
       night</a>.  All unit tests must pass, or a message is sent to
       night</a>.  All unit tests must pass, or a message is sent to
       the developers mailing list and no new version is created.  This
       the developers mailing list and no new version is created.  This
       also updates the <a href="docs/api/">javadoc</a>.</p>
       also updates the <a href="docs/api/">javadoc</a>.</p>
-<a name="N1014E"></a><a name="3+February%2C+2006%3A+Hadoop+code+moved+out+of+Nutch"></a>
+<a name="N10160"></a><a name="3+February%2C+2006%3A+Hadoop+code+moved+out+of+Nutch"></a>
 <h3 class="h4">3 February, 2006: Hadoop code moved out of Nutch</h3>
 <h3 class="h4">3 February, 2006: Hadoop code moved out of Nutch</h3>
 <p>The Hadoop code has now been moved into its own Subversion
 <p>The Hadoop code has now been moved into its own Subversion
       tree, renamed into packages under <span class="codefrag">org.apache.hadoop</span>.
       tree, renamed into packages under <span class="codefrag">org.apache.hadoop</span>.
       All unit tests pass, but little else has yet been tested.</p>
       All unit tests pass, but little else has yet been tested.</p>
-<a name="N1015B"></a><a name="30+March%2C+2006%3A+Hadoop+project+approved"></a>
+<a name="N1016D"></a><a name="30+March%2C+2006%3A+Hadoop+project+approved"></a>
 <h3 class="h4">30 March, 2006: Hadoop project approved</h3>
 <h3 class="h4">30 March, 2006: Hadoop project approved</h3>
 <p>The Lucene PMC has elected to split the Nutch MapReduce and
 <p>The Lucene PMC has elected to split the Nutch MapReduce and
       distributed filesytem code into a new project named Hadoop.</p>
       distributed filesytem code into a new project named Hadoop.</p>

File diff suppressed because it is too large
+ 30 - 19
site/index.pdf


+ 1 - 1
src/contrib/build-contrib.xml

@@ -103,7 +103,7 @@
      srcdir="${src.test}"
      srcdir="${src.test}"
      includes="**/*.java"
      includes="**/*.java"
      destdir="${build.test}"
      destdir="${build.test}"
-     debug="${debug}">
+     debug="${javac.debug}">
       <classpath refid="test.classpath"/>
       <classpath refid="test.classpath"/>
     </javac>
     </javac>
   </target>
   </target>

+ 15 - 31
src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamLineRecordReader.java

@@ -45,7 +45,7 @@ public class StreamLineRecordReader extends StreamBaseRecordReader {
     super(in, split, reporter, job, fs);
     super(in, split, reporter, job, fs);
     gzipped_ = StreamInputFormat.isGzippedInput(job);
     gzipped_ = StreamInputFormat.isGzippedInput(job);
     if (gzipped_) {
     if (gzipped_) {
-      din_ = new DataInputStream(new GZIPInputStream(in_));
+      din_ = new BufferedInputStream( (new GZIPInputStream(in_) ) );
     } else {
     } else {
       din_ = in_;
       din_ = in_;
     }
     }
@@ -88,40 +88,24 @@ public class StreamLineRecordReader extends StreamBaseRecordReader {
     Text tValue = (Text) value;
     Text tValue = (Text) value;
     byte[] line;
     byte[] line;
 
 
-    while (true) {
-      if (gzipped_) {
-        // figure EOS from readLine
-      } else {
-        long pos = in_.getPos();
-        if (pos >= end_) return false;
-      }
-
-      line = UTF8ByteArrayUtils.readLine((InputStream) in_);
-      if (line == null) return false;
-      try {
-        Text.validateUTF8(line);
-      } catch (MalformedInputException m) {
-        System.err.println("line=" + line + "|" + new Text(line));
-        System.out.flush();
-      }
-      try {
-        int tab = UTF8ByteArrayUtils.findTab(line);
-        if (tab == -1) {
-          tKey.set(line);
-          tValue.set("");
-        } else {
-          UTF8ByteArrayUtils.splitKeyVal(line, tKey, tValue, tab);
-        }
-        break;
-      } catch (MalformedInputException e) {
-        LOG.warn(StringUtils.stringifyException(e));
-      }
+    if ( !gzipped_  ) {
+      long pos = in_.getPos();
+      if (pos >= end_) return false;
+    }
+    
+    line = UTF8ByteArrayUtils.readLine((InputStream) din_);
+    if (line == null) return false;
+    int tab = UTF8ByteArrayUtils.findTab(line);
+    if (tab == -1) {
+      tKey.set(line);
+      tValue.set("");
+    } else {
+      UTF8ByteArrayUtils.splitKeyVal(line, tKey, tValue, tab);
     }
     }
     numRecStats(line, 0, line.length);
     numRecStats(line, 0, line.length);
     return true;
     return true;
   }
   }
 
 
   boolean gzipped_;
   boolean gzipped_;
-  GZIPInputStream zin_;
-  DataInputStream din_; // GZIP or plain  
+  InputStream din_; // GZIP or plain  
 }
 }

+ 59 - 0
src/contrib/streaming/src/test/org/apache/hadoop/streaming/TestGzipInput.java

@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.streaming;
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.zip.GZIPOutputStream;
+
+/**
+ * This class tests gzip input streaming in MapReduce local mode.
+ */
+public class TestGzipInput extends TestStreaming
+{
+
+  public TestGzipInput() throws IOException {
+  }
+  
+  protected void createInput() throws IOException
+  {
+    GZIPOutputStream out = new GZIPOutputStream(
+        new FileOutputStream(INPUT_FILE.getAbsoluteFile()));
+    out.write(input.getBytes("UTF-8"));
+    out.close();
+  }
+
+  protected String[] genArgs() {
+    return new String[] {
+        "-input", INPUT_FILE.getAbsolutePath(),
+        "-output", OUTPUT_DIR.getAbsolutePath(),
+        "-mapper", map,
+        "-combiner", combine,
+        "-reducer", reduce,
+        "-jobconf", "stream.recordreader.compression=gzip"
+    };
+    
+  }
+
+  public static void main(String[]args) throws Exception
+  {
+    new TestGzipInput().testCommandLine();
+  }
+
+}

+ 31 - 25
src/contrib/streaming/src/test/org/apache/hadoop/streaming/TestStreaming.java

@@ -33,17 +33,17 @@ public class TestStreaming extends TestCase
 
 
   // "map" command: grep -E (red|green|blue)
   // "map" command: grep -E (red|green|blue)
   // reduce command: uniq
   // reduce command: uniq
-  String INPUT_FILE = "input.txt";
-  String OUTPUT_DIR = "out";
-  String input = "roses.are.red\nviolets.are.blue\nbunnies.are.pink\n";
+  protected File INPUT_FILE = new File("input.txt");
+  protected File OUTPUT_DIR = new File("out");
+  protected String input = "roses.are.red\nviolets.are.blue\nbunnies.are.pink\n";
   // map behaves like "/usr/bin/tr . \\n"; (split words into lines)
   // map behaves like "/usr/bin/tr . \\n"; (split words into lines)
-  String map = StreamUtil.makeJavaCommand(TrApp.class, new String[]{".", "\\n"});
+  protected String map = StreamUtil.makeJavaCommand(TrApp.class, new String[]{".", "\\n"});
   // combine, reduce behave like /usr/bin/uniq. But also prepend lines with C, R.
   // combine, reduce behave like /usr/bin/uniq. But also prepend lines with C, R.
-  String combine  = StreamUtil.makeJavaCommand(UniqApp.class, new String[]{"C"});
-  String reduce = StreamUtil.makeJavaCommand(UniqApp.class, new String[]{"R"});
-  String outputExpect = "RCare\t\nRCblue\t\nRCbunnies\t\nRCpink\t\nRCred\t\nRCroses\t\nRCviolets\t\n";
+  protected String combine  = StreamUtil.makeJavaCommand(UniqApp.class, new String[]{"C"});
+  protected String reduce = StreamUtil.makeJavaCommand(UniqApp.class, new String[]{"R"});
+  protected String outputExpect = "RCare\t\nRCblue\t\nRCbunnies\t\nRCpink\t\nRCred\t\nRCroses\t\nRCviolets\t\n";
 
 
-  StreamJob job;
+  private StreamJob job;
 
 
   public TestStreaming() throws IOException
   public TestStreaming() throws IOException
   {
   {
@@ -52,14 +52,27 @@ public class TestStreaming extends TestCase
     utilTest.redirectIfAntJunit();
     utilTest.redirectIfAntJunit();
   }
   }
 
 
-  void createInput() throws IOException
+  protected void createInput() throws IOException
   {
   {
-    String path = new File(".", INPUT_FILE).getAbsolutePath();// needed from junit forked vm
-    DataOutputStream out = new DataOutputStream(new FileOutputStream(path));
+    DataOutputStream out = new DataOutputStream(
+        new FileOutputStream(INPUT_FILE.getAbsoluteFile()));
     out.write(input.getBytes("UTF-8"));
     out.write(input.getBytes("UTF-8"));
     out.close();
     out.close();
   }
   }
 
 
+  protected String[] genArgs() {
+    return new String[] {
+        "-input", INPUT_FILE.getAbsolutePath(),
+        "-output", OUTPUT_DIR.getAbsolutePath(),
+        "-mapper", map,
+        "-combiner", combine,
+        "-reducer", reduce,
+        //"-verbose",
+        //"-jobconf", "stream.debug=set"
+        "-jobconf", "keep.failed.task.files=true"
+        };
+  }
+  
   public void testCommandLine()
   public void testCommandLine()
   {
   {
     try {
     try {
@@ -68,30 +81,23 @@ public class TestStreaming extends TestCase
 
 
       // During tests, the default Configuration will use a local mapred
       // During tests, the default Configuration will use a local mapred
       // So don't specify -config or -cluster
       // So don't specify -config or -cluster
-      String argv[] = new String[] {
-          "-input", INPUT_FILE,
-          "-output", OUTPUT_DIR,
-          "-mapper", map,
-          "-combiner", combine,
-          "-reducer", reduce,
-          //"-verbose",
-          //"-jobconf", "stream.debug=set"
-          "-jobconf", "keep.failed.task.files=true",
-      };
-      job = new StreamJob(argv, mayExit);      
+      job = new StreamJob(genArgs(), mayExit);      
       job.go();
       job.go();
-      File outFile = new File(".", OUTPUT_DIR + "/part-00000").getAbsoluteFile();
+      File outFile = new File(OUTPUT_DIR, "part-00000").getAbsoluteFile();
       String output = StreamUtil.slurp(outFile);
       String output = StreamUtil.slurp(outFile);
+      outFile.delete();
       System.err.println("outEx1=" + outputExpect);
       System.err.println("outEx1=" + outputExpect);
       System.err.println("  out1=" + output);
       System.err.println("  out1=" + output);
       assertEquals(outputExpect, output);
       assertEquals(outputExpect, output);
-
     } catch(Exception e) {
     } catch(Exception e) {
       failTrace(e);
       failTrace(e);
+    } finally {
+      INPUT_FILE.delete();
+      OUTPUT_DIR.delete();
     }
     }
   }
   }
 
 
-  void failTrace(Exception e)
+  private void failTrace(Exception e)
   {
   {
     StringWriter sw = new StringWriter();
     StringWriter sw = new StringWriter();
     e.printStackTrace(new PrintWriter(sw));
     e.printStackTrace(new PrintWriter(sw));

+ 6 - 5
src/java/org/apache/hadoop/io/MapFile.java

@@ -22,6 +22,7 @@ import java.io.*;
 import org.apache.hadoop.fs.*;
 import org.apache.hadoop.fs.*;
 import org.apache.hadoop.conf.*;
 import org.apache.hadoop.conf.*;
 import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.util.Progressable;
+import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.io.SequenceFile.CompressionType;
 import org.apache.hadoop.io.SequenceFile.CompressionType;
 
 
 /** A file-based map from keys to values.
 /** A file-based map from keys to values.
@@ -470,8 +471,8 @@ public class MapFile {
               ", got " + dataReader.getValueClass().getName());
               ", got " + dataReader.getValueClass().getName());
     }
     }
     long cnt = 0L;
     long cnt = 0L;
-    Writable key = (Writable)keyClass.getConstructor(new Class[0]).newInstance(new Object[0]);
-    Writable value = (Writable)valueClass.getConstructor(new Class[0]).newInstance(new Object[0]);
+    Writable key = (Writable)ReflectionUtils.newInstance(keyClass, conf);
+    Writable value = (Writable)ReflectionUtils.newInstance(valueClass, conf);
     SequenceFile.Writer indexWriter = null;
     SequenceFile.Writer indexWriter = null;
     if (!dryrun) indexWriter = SequenceFile.createWriter(fs, conf, index, keyClass, LongWritable.class);
     if (!dryrun) indexWriter = SequenceFile.createWriter(fs, conf, index, keyClass, LongWritable.class);
     try {
     try {
@@ -510,11 +511,11 @@ public class MapFile {
     FileSystem fs = new LocalFileSystem(conf);
     FileSystem fs = new LocalFileSystem(conf);
     MapFile.Reader reader = new MapFile.Reader(fs, in, conf);
     MapFile.Reader reader = new MapFile.Reader(fs, in, conf);
     MapFile.Writer writer =
     MapFile.Writer writer =
-      new MapFile.Writer(fs, out, reader.getKeyClass(), reader.getValueClass());
+      new MapFile.Writer(conf, fs, out, reader.getKeyClass(), reader.getValueClass());
 
 
     WritableComparable key =
     WritableComparable key =
-      (WritableComparable)reader.getKeyClass().newInstance();
-    Writable value = (Writable)reader.getValueClass().newInstance();
+      (WritableComparable)ReflectionUtils.newInstance(reader.getKeyClass(), conf);
+    Writable value = (Writable)ReflectionUtils.newInstance(reader.getValueClass(), conf);
 
 
     while (reader.next(key, value))               // copy all entries
     while (reader.next(key, value))               // copy all entries
       writer.append(key, value);
       writer.append(key, value);

+ 2 - 16
src/java/org/apache/hadoop/ipc/Client.java

@@ -44,6 +44,7 @@ import org.apache.hadoop.dfs.FSConstants;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableUtils;
 import org.apache.hadoop.io.WritableUtils;
 import org.apache.hadoop.io.DataOutputBuffer;
 import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.StringUtils;
 
 
 /** A client for an IPC service.  IPC calls take a single {@link Writable} as a
 /** A client for an IPC service.  IPC calls take a single {@link Writable} as a
@@ -259,12 +260,9 @@ public class Client {
                                   WritableUtils.readString(in));
                                   WritableUtils.readString(in));
             call.setResult(null, ex);
             call.setResult(null, ex);
           } else {
           } else {
-            Writable value = makeValue();
+            Writable value = (Writable)ReflectionUtils.newInstance(valueClass, conf);
             try {
             try {
               readingCall = call;
               readingCall = call;
-              if(value instanceof Configurable) {
-                ((Configurable) value).setConf(conf);
-              }
               value.readFields(in);                 // read value
               value.readFields(in);                 // read value
             } finally {
             } finally {
               readingCall = null;
               readingCall = null;
@@ -528,16 +526,4 @@ public class Client {
     return connection;
     return connection;
   }
   }
 
 
-  private Writable makeValue() {
-    Writable value;                             // construct value
-    try {
-      value = (Writable)valueClass.newInstance();
-    } catch (InstantiationException e) {
-      throw new RuntimeException(e.toString());
-    } catch (IllegalAccessException e) {
-      throw new RuntimeException(e.toString());
-    }
-    return value;
-  }
-
 }
 }

+ 1 - 17
src/java/org/apache/hadoop/ipc/Server.java

@@ -460,7 +460,7 @@ public abstract class Server {
       if (LOG.isDebugEnabled())
       if (LOG.isDebugEnabled())
         LOG.debug(" got #" + id);
         LOG.debug(" got #" + id);
             
             
-      Writable param = makeParam();           // read param
+      Writable param = (Writable)ReflectionUtils.newInstance(paramClass, conf);           // read param
       param.readFields(dis);        
       param.readFields(dis);        
         
         
       Call call = new Call(id, param, this);
       Call call = new Call(id, param, this);
@@ -633,21 +633,5 @@ public abstract class Server {
 
 
   /** Called for each call. */
   /** Called for each call. */
   public abstract Writable call(Writable param) throws IOException;
   public abstract Writable call(Writable param) throws IOException;
-
   
   
-  private Writable makeParam() {
-    Writable param;                               // construct param
-    try {
-      param = (Writable)paramClass.newInstance();
-      if (param instanceof Configurable) {
-        ((Configurable)param).setConf(conf);
-      }
-    } catch (InstantiationException e) {
-      throw new RuntimeException(e.toString());
-    } catch (IllegalAccessException e) {
-      throw new RuntimeException(e.toString());
-    }
-    return param;
-  }
-
 }
 }

+ 7 - 4
src/java/org/apache/hadoop/mapred/ReduceTask.java

@@ -131,16 +131,19 @@ class ReduceTask extends Task {
     private WritableComparator comparator;
     private WritableComparator comparator;
     private Class keyClass;
     private Class keyClass;
     private Class valClass;
     private Class valClass;
+    private Configuration conf;
     private DataOutputBuffer valOut = new DataOutputBuffer();
     private DataOutputBuffer valOut = new DataOutputBuffer();
     private DataInputBuffer valIn = new DataInputBuffer();
     private DataInputBuffer valIn = new DataInputBuffer();
     private DataInputBuffer keyIn = new DataInputBuffer();
     private DataInputBuffer keyIn = new DataInputBuffer();
 
 
     public ValuesIterator (SequenceFile.Sorter.RawKeyValueIterator in, 
     public ValuesIterator (SequenceFile.Sorter.RawKeyValueIterator in, 
                            WritableComparator comparator, Class keyClass,
                            WritableComparator comparator, Class keyClass,
-                           Class valClass, TaskUmbilicalProtocol umbilical)
+                           Class valClass, TaskUmbilicalProtocol umbilical,
+                           Configuration conf)
       throws IOException {
       throws IOException {
       this.in = in;
       this.in = in;
       this.umbilical = umbilical;
       this.umbilical = umbilical;
+      this.conf = conf;
       this.comparator = comparator;
       this.comparator = comparator;
       this.keyClass = keyClass;
       this.keyClass = keyClass;
       this.valClass = valClass;
       this.valClass = valClass;
@@ -183,8 +186,8 @@ class ReduceTask extends Task {
 
 
       Writable lastKey = key;                     // save previous key
       Writable lastKey = key;                     // save previous key
       try {
       try {
-        key = (WritableComparable)keyClass.newInstance();
-        value = (Writable)valClass.newInstance();
+        key = (WritableComparable)ReflectionUtils.newInstance(keyClass, this.conf);
+        value = (Writable)ReflectionUtils.newInstance(valClass, this.conf);
       } catch (Exception e) {
       } catch (Exception e) {
         throw new RuntimeException(e);
         throw new RuntimeException(e);
       }
       }
@@ -298,7 +301,7 @@ class ReduceTask extends Task {
       Class keyClass = job.getMapOutputKeyClass();
       Class keyClass = job.getMapOutputKeyClass();
       Class valClass = job.getMapOutputValueClass();
       Class valClass = job.getMapOutputValueClass();
       ValuesIterator values = new ValuesIterator(rIter, comparator, keyClass, 
       ValuesIterator values = new ValuesIterator(rIter, comparator, keyClass, 
-                                                 valClass, umbilical);
+                                                 valClass, umbilical, job);
       while (values.more()) {
       while (values.more()) {
         myMetrics.reduceInput();
         myMetrics.reduceInput();
         reducer.reduce(values.getKey(), values, collector, reporter);
         reducer.reduce(values.getKey(), values, collector, reporter);

+ 9 - 0
src/site/src/documentation/content/xdocs/index.xml

@@ -14,6 +14,15 @@
     <section>
     <section>
       <title>News</title>
       <title>News</title>
 
 
+      <section>
+      <title>6 December, 2006: release 0.9.1 available</title>
+      <p>This fixes critical bugs in 0.9.0.  For details see the <a
+      href="http://tinyurl.com/y55d7p">release notes</a>. The release can
+      be obtained from <a
+      href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
+      nearby mirror</a>.
+      </p> </section>
+
       <section>
       <section>
       <title>1 December, 2006: release 0.9.0 available</title>
       <title>1 December, 2006: release 0.9.0 available</title>
       <p>For details see the <a
       <p>For details see the <a

Some files were not shown because too many files changed in this diff