Переглянути джерело

merge -r 443531:447487 from trunk, preparing for 0.6.2 release

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/branches/branch-0.6@447489 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting 18 роки тому
батько
коміт
083c714fc1

+ 12 - 0
CHANGES.txt

@@ -1,6 +1,18 @@
 Hadoop Change Log
 
 
+Release 0.6.2 (unreleased)
+
+1. HADOOP-532.  Fix a bug reading value-compressed sequence files,
+   where an exception was thrown reporting that the full value had not
+   been read.  (omalley via cutting)
+
+2. HADOOP-534.  Change the default value class in JobConf to be Text
+   instead of the now-deprecated UTF8.  This fixes the Grep example
+   program, which was updated to use Text, but relies on this
+   default.  (Hairong Kuang via cutting)
+
+
 Release 0.6.1 - 2006-08-13
 
  1. HADOOP-520.  Fix a bug in libhdfs, where write failures were not

+ 1 - 1
build.xml

@@ -9,7 +9,7 @@
  
   <property name="Name" value="Hadoop"/>
   <property name="name" value="hadoop"/>
-  <property name="version" value="0.6.2-dev"/>
+  <property name="version" value="0.6.3-dev"/>
   <property name="final.name" value="${name}-${version}"/>
   <property name="year" value="2006"/>
   <property name="libhdfs.version" value="1"/>

+ 24 - 15
site/index.html

@@ -122,6 +122,9 @@ document.write("<text>Last Published:</text> " + document.lastModified);
 <a href="#News">News</a>
 <ul class="minitoc">
 <li>
+<a href="#18+September%2C+2006%3A+release+0.6.2+available">18 September, 2006: release 0.6.2 available</a>
+</li>
+<li>
 <a href="#13+September%2C+2006%3A+release+0.6.1+available">13 September, 2006: release 0.6.1 available</a>
 </li>
 <li>
@@ -172,61 +175,67 @@ document.write("<text>Last Published:</text> " + document.lastModified);
 <a name="N1000C"></a><a name="News"></a>
 <h2 class="h3">News</h2>
 <div class="section">
-<a name="N10012"></a><a name="13+September%2C+2006%3A+release+0.6.1+available"></a>
+<a name="N10012"></a><a name="18+September%2C+2006%3A+release+0.6.2+available"></a>
+<h3 class="h4">18 September, 2006: release 0.6.2 available</h3>
+<p>This fixes critical bugs in 0.6.1.  For details see the <a href="http://tinyurl.com/gyb56">release notes</a>. The release can
+      be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
+      nearby mirror</a>.
+      </p>
+<a name="N10024"></a><a name="13+September%2C+2006%3A+release+0.6.1+available"></a>
 <h3 class="h4">13 September, 2006: release 0.6.1 available</h3>
-<p>For details see the <a href="http://tinyurl.com/lykp4">release notes</a>. The release can
+<p>This fixes critical bugs in 0.6.0.  For details see the <a href="http://tinyurl.com/lykp4">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       </p>
-<a name="N10024"></a><a name="8+September%2C+2006%3A+release+0.6.0+available"></a>
+<a name="N10036"></a><a name="8+September%2C+2006%3A+release+0.6.0+available"></a>
 <h3 class="h4">8 September, 2006: release 0.6.0 available</h3>
 <p>For details see the <a href="http://tinyurl.com/r3zoj">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       </p>
-<a name="N10036"></a><a name="4+August%2C+2006%3A+release+0.5.0+available"></a>
+<a name="N10048"></a><a name="4+August%2C+2006%3A+release+0.5.0+available"></a>
 <h3 class="h4">4 August, 2006: release 0.5.0 available</h3>
 <p>For details see the <a href="http://tinyurl.com/pnml2">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       </p>
-<a name="N10048"></a><a name="28+June%2C+2006%3A+release+0.4.0+available"></a>
+<a name="N1005A"></a><a name="28+June%2C+2006%3A+release+0.4.0+available"></a>
 <h3 class="h4">28 June, 2006: release 0.4.0 available</h3>
 <p>For details see the <a href="http://tinyurl.com/o35b6">change log</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       </p>
-<a name="N1005A"></a><a name="9+June%2C+2006%3A+release+0.3.2+available"></a>
+<a name="N1006C"></a><a name="9+June%2C+2006%3A+release+0.3.2+available"></a>
 <h3 class="h4">9 June, 2006: release 0.3.2 available</h3>
 <p>This is a bugfix release.  For details see the <a href="http://tinyurl.com/k9g5c">change log</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       </p>
-<a name="N1006C"></a><a name="8+June%2C+2006%3A+FAQ+added+to+Wiki"></a>
+<a name="N1007E"></a><a name="8+June%2C+2006%3A+FAQ+added+to+Wiki"></a>
 <h3 class="h4">8 June, 2006: FAQ added to Wiki</h3>
 <p>Hadoop now has a <a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>.  Please
       help make this more complete!
       </p>
-<a name="N1007A"></a><a name="5+June%2C+2006%3A+release+0.3.1+available"></a>
+<a name="N1008C"></a><a name="5+June%2C+2006%3A+release+0.3.1+available"></a>
 <h3 class="h4">5 June, 2006: release 0.3.1 available</h3>
 <p>This is a bugfix release.  For details see the <a href="http://tinyurl.com/l6on4">change log</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       </p>
-<a name="N1008C"></a><a name="2+June%2C+2006%3A+release+0.3.0+available"></a>
+<a name="N1009E"></a><a name="2+June%2C+2006%3A+release+0.3.0+available"></a>
 <h3 class="h4">2 June, 2006: release 0.3.0 available</h3>
 <p>This includes many fixes, improving performance, scalability
       and reliability and adding new features.  For details see the <a href="http://tinyurl.com/rq3f7">change log</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       </p>
-<a name="N1009E"></a><a name="12+May%2C+2006%3A+release+0.2.1+available"></a>
+<a name="N100B0"></a><a name="12+May%2C+2006%3A+release+0.2.1+available"></a>
 <h3 class="h4">12 May, 2006: release 0.2.1 available</h3>
 <p>This fixes a few bugs in release 0.2.0, listed in the <a href="http://tinyurl.com/rnnvz">change log</a>. The
       release can be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       </p>
-<a name="N100B0"></a><a name="5+May%2C+2006%3A+release+0.2.0+available"></a>
+<a name="N100C2"></a><a name="5+May%2C+2006%3A+release+0.2.0+available"></a>
 <h3 class="h4">5 May, 2006: release 0.2.0 available</h3>
 <p>We are now aiming for monthly releases.  There have been many
       bug fixes and improvements in the past month.  MapReduce and DFS
@@ -235,24 +244,24 @@ document.write("<text>Last Published:</text> " + document.lastModified);
       details. The release can be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       </p>
-<a name="N100C2"></a><a name="2+April%2C+2006%3A+release+0.1.0+available"></a>
+<a name="N100D4"></a><a name="2+April%2C+2006%3A+release+0.1.0+available"></a>
 <h3 class="h4">2 April, 2006: release 0.1.0 available</h3>
 <p>This is the first Hadoop release.  The release is available
       <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/">
       here</a>.</p>
-<a name="N100D0"></a><a name="6+February%2C+2006%3A+nightly+builds"></a>
+<a name="N100E2"></a><a name="6+February%2C+2006%3A+nightly+builds"></a>
 <h3 class="h4">6 February, 2006: nightly builds</h3>
 <p>Hadoop now has nightly builds.  This automatically creates a
       <a href="http://cvs.apache.org/dist/lucene/hadoop/nightly/">downloadable version of Hadoop every
       night</a>.  All unit tests must pass, or a message is sent to
       the developers mailing list and no new version is created.  This
       also updates the <a href="docs/api/">javadoc</a>.</p>
-<a name="N100E2"></a><a name="3+February%2C+2006%3A+Hadoop+code+moved+out+of+Nutch"></a>
+<a name="N100F4"></a><a name="3+February%2C+2006%3A+Hadoop+code+moved+out+of+Nutch"></a>
 <h3 class="h4">3 February, 2006: Hadoop code moved out of Nutch</h3>
 <p>The Hadoop code has now been moved into its own Subversion
       tree, renamed into packages under <span class="codefrag">org.apache.hadoop</span>.
       All unit tests pass, but little else has yet been tested.</p>
-<a name="N100EF"></a><a name="30+March%2C+2006%3A+Hadoop+project+approved"></a>
+<a name="N10101"></a><a name="30+March%2C+2006%3A+Hadoop+project+approved"></a>
 <h3 class="h4">30 March, 2006: Hadoop project approved</h3>
 <p>The Lucene PMC has elected to split the Nutch MapReduce and
       distributed filesytem code into a new project named Hadoop.</p>

Різницю між файлами не показано, бо вона завелика
+ 26 - 15
site/index.pdf


+ 16 - 8
src/java/org/apache/hadoop/io/SequenceFile.java

@@ -972,10 +972,14 @@ public class SequenceFile {
 
       if (version > 2) {                          // if version > 2
         this.decompress = in.readBoolean();       // is compressed?
+      } else {
+        decompress = false;
       }
 
       if (version >= BLOCK_COMPRESS_VERSION) {    // if version >= 4
         this.blockCompressed = in.readBoolean();  // is block-compressed?
+      } else {
+        blockCompressed = false;
       }
       
       // if version >= 5
@@ -1004,9 +1008,9 @@ public class SequenceFile {
       valBuffer = new DataInputBuffer();
       if (decompress) {
         valInFilter = this.codec.createInputStream(valBuffer);
-        valIn = new DataInputStream(new BufferedInputStream(valInFilter));
+        valIn = new DataInputStream(valInFilter);
       } else {
-        valIn = new DataInputStream(new BufferedInputStream(valBuffer));
+        valIn = valBuffer;
       }
       
       if (blockCompressed) {
@@ -1109,10 +1113,11 @@ public class SequenceFile {
      * corresponding to the 'current' key 
      */
     private synchronized void seekToCurrentValue() throws IOException {
-      if (version < BLOCK_COMPRESS_VERSION || blockCompressed == false) {
+      if (!blockCompressed) {
         if (decompress) {
           valInFilter.resetState();
         }
+        valBuffer.reset();
       } else {
         // Check if this is the first value in the 'block' to be read
         if (lazyDecompress && !valuesDecompressed) {
@@ -1156,13 +1161,15 @@ public class SequenceFile {
       // Position stream to 'current' value
       seekToCurrentValue();
 
-      if (version < BLOCK_COMPRESS_VERSION || blockCompressed == false) {
+      if (!blockCompressed) {
         val.readFields(valIn);
         
-        if (valBuffer.getPosition() != valBuffer.getLength())
+        if (valIn.read() > 0) {
+          LOG.info("available bytes: " + valIn.available());
           throw new IOException(val+" read "+(valBuffer.getPosition()-keyLength)
               + " bytes, should read " +
               (valBuffer.getLength()-keyLength));
+        }
       } else {
         // Get the value
         int valLength = WritableUtils.readVInt(valLenIn);
@@ -1185,7 +1192,7 @@ public class SequenceFile {
       if (key.getClass() != keyClass)
         throw new IOException("wrong key class: "+key+" is not "+keyClass);
 
-      if (version < BLOCK_COMPRESS_VERSION || blockCompressed == false) {
+      if (!blockCompressed) {
         outBuf.reset();
         
         keyLength = next(outBuf);
@@ -1195,6 +1202,7 @@ public class SequenceFile {
         valBuffer.reset(outBuf.getData(), outBuf.getLength());
         
         key.readFields(valBuffer);
+        valBuffer.mark(0);
         if (valBuffer.getPosition() != keyLength)
           throw new IOException(key + " read " + valBuffer.getPosition()
               + " bytes, should read " + keyLength);
@@ -1266,7 +1274,7 @@ public class SequenceFile {
     /** @deprecated Call {@link #nextRaw(DataOutputBuffer,SequenceFile.ValueBytes)}. */
     public synchronized int next(DataOutputBuffer buffer) throws IOException {
       // Unsupported for block-compressed sequence files
-      if (version >= BLOCK_COMPRESS_VERSION && blockCompressed) {
+      if (blockCompressed) {
         throw new IOException("Unsupported call for block-compressed" +
             " SequenceFiles - use SequenceFile.Reader.next(DataOutputStream, ValueBytes)");
       }
@@ -1303,7 +1311,7 @@ public class SequenceFile {
      */
     public int nextRaw(DataOutputBuffer key, ValueBytes val) 
     throws IOException {
-      if (version < BLOCK_COMPRESS_VERSION || blockCompressed == false) {
+      if (!blockCompressed) {
         if (in.getPos() >= end) 
           return -1;
 

+ 3 - 3
src/java/org/apache/hadoop/mapred/JobConf.java

@@ -36,7 +36,7 @@ import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.io.WritableComparator;
 import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.compress.CompressionCodec;
 
 import org.apache.hadoop.mapred.lib.IdentityMapper;
@@ -311,7 +311,7 @@ public class JobConf extends Configuration {
 
   /** @deprecated Call {@link RecordReader#createValue()}. */
   public Class getInputValueClass() {
-    return getClass("mapred.input.value.class", UTF8.class, Writable.class);
+    return getClass("mapred.input.value.class", Text.class, Writable.class);
   }
 
   /** @deprecated Not used */
@@ -441,7 +441,7 @@ public class JobConf extends Configuration {
   }
 
   public Class getOutputValueClass() {
-    return getClass("mapred.output.value.class", UTF8.class, Writable.class);
+    return getClass("mapred.output.value.class", Text.class, Writable.class);
   }
   public void setOutputValueClass(Class theClass) {
     setClass("mapred.output.value.class", theClass, Writable.class);

+ 10 - 1
src/site/src/documentation/content/xdocs/index.xml

@@ -14,9 +14,18 @@
     <section>
       <title>News</title>
 
+      <section>
+      <title>18 September, 2006: release 0.6.2 available</title>
+      <p>This fixes critical bugs in 0.6.1.  For details see the <a
+      href="http://tinyurl.com/gyb56">release notes</a>. The release can
+      be obtained from <a
+      href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
+      nearby mirror</a>.
+      </p> </section>
+
       <section>
       <title>13 September, 2006: release 0.6.1 available</title>
-      <p>For details see the <a
+      <p>This fixes critical bugs in 0.6.0.  For details see the <a
       href="http://tinyurl.com/lykp4">release notes</a>. The release can
       be obtained from <a
       href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a

+ 5 - 1
src/test/org/apache/hadoop/io/RandomDatum.java

@@ -26,11 +26,15 @@ public class RandomDatum implements WritableComparable {
   public RandomDatum() {}
 
   public RandomDatum(Random random) {
-    length = 10 + random.nextInt(100);
+    length = 10 + (int) Math.pow(10.0, random.nextFloat() * 3.0);
     data = new byte[length];
     random.nextBytes(data);
   }
 
+  public int getLength() {
+    return length;
+  }
+  
   public void write(DataOutput out) throws IOException {
     out.writeInt(length);
     out.write(data);

+ 37 - 20
src/test/org/apache/hadoop/io/TestSequenceFile.java

@@ -47,6 +47,7 @@ public class TestSequenceFile extends TestCase {
       new Path(System.getProperty("test.build.data",".")+"/test.bc.seq");
  
     int seed = new Random().nextInt();
+    LOG.info("Seed = " + seed);
 
     FileSystem fs = new LocalFileSystem(conf);
     try {
@@ -115,7 +116,8 @@ public class TestSequenceFile extends TestCase {
       CompressionType compressionType)
     throws IOException {
     fs.delete(file);
-    LOG.debug("creating with " + count + " records");
+    LOG.info("creating " + count + " records with " + compressionType +
+              " compression");
     SequenceFile.Writer writer = 
       SequenceFile.createWriter(fs, conf, file, 
           RandomDatum.class, RandomDatum.class, compressionType);
@@ -146,25 +148,36 @@ public class TestSequenceFile extends TestCase {
       RandomDatum key = generator.getKey();
       RandomDatum value = generator.getValue();
 
-      if ((i%5) == 10) {
-        // Testing 'raw' apis
-        rawKey.reset();
-        reader.nextRaw(rawKey, rawValue);
-      } else {
-        // Testing 'non-raw' apis 
-        if ((i%2) == 0) {
-          reader.next(k);
-          reader.getCurrentValue(v);
+      try {
+        if ((i%5) == 10) {
+          // Testing 'raw' apis
+          rawKey.reset();
+          reader.nextRaw(rawKey, rawValue);
         } else {
-          reader.next(k, v);
+          // Testing 'non-raw' apis 
+          if ((i%2) == 0) {
+            reader.next(k);
+            reader.getCurrentValue(v);
+          } else {
+            reader.next(k, v);
+          }
+          // Sanity check
+          if (!k.equals(key))
+            throw new RuntimeException("wrong key at " + i);
+          if (!v.equals(value))
+            throw new RuntimeException("wrong value at " + i);
         }
-
-        // Sanity check
-        if (!k.equals(key))
-          throw new RuntimeException("wrong key at " + i);
-        if (!v.equals(value))
-          throw new RuntimeException("wrong value at " + i);
+      } catch (IOException ioe) {
+        LOG.info("Problem on row " + i);
+        LOG.info("Expected value = " + value);
+        LOG.info("Expected len = " + value.getLength());
+        LOG.info("Actual value = " + v);
+        LOG.info("Actual len = " + v.getLength());
+        LOG.info("Key equals: " + k.equals(key));
+        LOG.info("value equals: " + v.equals(value));
+        throw ioe;
       }
+
     }
     reader.close();
   }
@@ -284,9 +297,11 @@ public class TestSequenceFile extends TestCase {
     boolean merge = false;
     String compressType = "NONE";
     Path file = null;
+    int seed = new Random().nextInt();
 
     String usage = "Usage: SequenceFile (-local | -dfs <namenode:port>) " +
-        "[-count N] " + "[-check] [-compressType <NONE|RECORD|BLOCK>] " +
+        "[-count N] " + 
+        "[-seed #] [-check] [-compressType <NONE|RECORD|BLOCK>] " +
         "[[-rwonly] | {[-megabytes M] [-factor F] [-nocreate] [-fast] [-merge]}] " +
         " file";
     if (args.length == 0) {
@@ -304,7 +319,9 @@ public class TestSequenceFile extends TestCase {
           } else if (args[i].equals("-megabytes")) {
               megabytes = Integer.parseInt(args[++i]);
           } else if (args[i].equals("-factor")) {
-              factor = Integer.parseInt(args[++i]);
+            factor = Integer.parseInt(args[++i]);
+          } else if (args[i].equals("-seed")) {
+            seed = Integer.parseInt(args[++i]);
           } else if (args[i].equals("-rwonly")) {
               rwonly = true;
           } else if (args[i].equals("-nocreate")) {
@@ -326,6 +343,7 @@ public class TestSequenceFile extends TestCase {
         LOG.info("megabytes = " + megabytes);
         LOG.info("factor = " + factor);
         LOG.info("create = " + create);
+        LOG.info("seed = " + seed);
         LOG.info("rwonly = " + rwonly);
         LOG.info("check = " + check);
         LOG.info("fast = " + fast);
@@ -338,7 +356,6 @@ public class TestSequenceFile extends TestCase {
           System.exit(-1);
         }
 
-        int seed = 0;
         CompressionType compressionType = 
           CompressionType.valueOf(compressType);
 

Деякі файли не було показано, через те що забагато файлів було змінено