1
0
Преглед на файлове

merge -r 453780:462941, from trunk to branch-0.7, preparing for 0.7.1 release

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/branches/branch-0.7@462942 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting преди 18 години
родител
ревизия
0945dd2ff1

+ 19 - 0
CHANGES.txt

@@ -1,6 +1,25 @@
 Hadoop Change Log
 Hadoop Change Log
 
 
 
 
+Release 0.7.1 - 2006-10-11
+
+ 1. HADOOP-593.  Fix a NullPointerException in the JobTracker.
+    (omalley via cutting)
+
+ 2. HADOOP-592.  Fix a NullPointerException in the IPC Server.  Also
+    consistently log when stale calls are discarded.  (omalley via cutting)
+
+ 3. HADOOP-594.  Increase the DFS safe-mode threshold from .95 to
+    .999, so that nearly all blocks must be reported before filesystem
+    modifications are permitted.  (Konstantin Shvachko via cutting)
+
+ 4. HADOOP-598.  Fix tasks to retry when reporting completion, so that
+    a single RPC timeout won't fail a task.  (omalley via cutting)
+
+ 5. HADOOP-597.  Fix TaskTracker to not discard map outputs for errors
+    in transmitting them to reduce nodes.  (omalley via cutting)
+
+
 Release 0.7.0 - 2006-10-06
 Release 0.7.0 - 2006-10-06
 
 
  1. HADOOP-243.  Fix rounding in the display of task and job progress
  1. HADOOP-243.  Fix rounding in the display of task and job progress

+ 1 - 1
build.xml

@@ -9,7 +9,7 @@
  
  
   <property name="Name" value="Hadoop"/>
   <property name="Name" value="Hadoop"/>
   <property name="name" value="hadoop"/>
   <property name="name" value="hadoop"/>
-  <property name="version" value="0.7.1-dev"/>
+  <property name="version" value="0.7.2-dev"/>
   <property name="final.name" value="${name}-${version}"/>
   <property name="final.name" value="${name}-${version}"/>
   <property name="year" value="2006"/>
   <property name="year" value="2006"/>
   <property name="libhdfs.version" value="1"/>
   <property name="libhdfs.version" value="1"/>

+ 1 - 1
conf/hadoop-default.xml

@@ -249,7 +249,7 @@ creations/deletions), or "all".</description>
 
 
 <property>
 <property>
   <name>dfs.safemode.threshold.pct</name>
   <name>dfs.safemode.threshold.pct</name>
-  <value>0.95f</value>
+  <value>0.999f</value>
   <description>
   <description>
   	Specifies the percentage of blocks that should satisfy 
   	Specifies the percentage of blocks that should satisfy 
   	the minimal replication requirement defined by dfs.replication.min.
   	the minimal replication requirement defined by dfs.replication.min.

+ 25 - 16
site/index.html

@@ -122,6 +122,9 @@ document.write("<text>Last Published:</text> " + document.lastModified);
 <a href="#News">News</a>
 <a href="#News">News</a>
 <ul class="minitoc">
 <ul class="minitoc">
 <li>
 <li>
+<a href="#11+October%2C+2006%3A+release+0.7.1+available">11 October, 2006: release 0.7.1 available</a>
+</li>
+<li>
 <a href="#6+October%2C+2006%3A+release+0.7.0+available">6 October, 2006: release 0.7.0 available</a>
 <a href="#6+October%2C+2006%3A+release+0.7.0+available">6 October, 2006: release 0.7.0 available</a>
 </li>
 </li>
 <li>
 <li>
@@ -178,73 +181,79 @@ document.write("<text>Last Published:</text> " + document.lastModified);
 <a name="N1000C"></a><a name="News"></a>
 <a name="N1000C"></a><a name="News"></a>
 <h2 class="h3">News</h2>
 <h2 class="h3">News</h2>
 <div class="section">
 <div class="section">
-<a name="N10012"></a><a name="6+October%2C+2006%3A+release+0.7.0+available"></a>
+<a name="N10012"></a><a name="11+October%2C+2006%3A+release+0.7.1+available"></a>
+<h3 class="h4">11 October, 2006: release 0.7.1 available</h3>
+<p>This fixes critical bugs in 0.7.0.  For details see the <a href="http://tinyurl.com/p7qod">release notes</a>. The release can
+      be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
+      nearby mirror</a>.
+      </p>
+<a name="N10024"></a><a name="6+October%2C+2006%3A+release+0.7.0+available"></a>
 <h3 class="h4">6 October, 2006: release 0.7.0 available</h3>
 <h3 class="h4">6 October, 2006: release 0.7.0 available</h3>
 <p>For details see the <a href="http://tinyurl.com/kvd9m">release notes</a>. The release can
 <p>For details see the <a href="http://tinyurl.com/kvd9m">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N10024"></a><a name="18+September%2C+2006%3A+release+0.6.2+available"></a>
+<a name="N10036"></a><a name="18+September%2C+2006%3A+release+0.6.2+available"></a>
 <h3 class="h4">18 September, 2006: release 0.6.2 available</h3>
 <h3 class="h4">18 September, 2006: release 0.6.2 available</h3>
 <p>This fixes critical bugs in 0.6.1.  For details see the <a href="http://tinyurl.com/gyb56">release notes</a>. The release can
 <p>This fixes critical bugs in 0.6.1.  For details see the <a href="http://tinyurl.com/gyb56">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N10036"></a><a name="13+September%2C+2006%3A+release+0.6.1+available"></a>
+<a name="N10048"></a><a name="13+September%2C+2006%3A+release+0.6.1+available"></a>
 <h3 class="h4">13 September, 2006: release 0.6.1 available</h3>
 <h3 class="h4">13 September, 2006: release 0.6.1 available</h3>
 <p>This fixes critical bugs in 0.6.0.  For details see the <a href="http://tinyurl.com/lykp4">release notes</a>. The release can
 <p>This fixes critical bugs in 0.6.0.  For details see the <a href="http://tinyurl.com/lykp4">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N10048"></a><a name="8+September%2C+2006%3A+release+0.6.0+available"></a>
+<a name="N1005A"></a><a name="8+September%2C+2006%3A+release+0.6.0+available"></a>
 <h3 class="h4">8 September, 2006: release 0.6.0 available</h3>
 <h3 class="h4">8 September, 2006: release 0.6.0 available</h3>
 <p>For details see the <a href="http://tinyurl.com/r3zoj">release notes</a>. The release can
 <p>For details see the <a href="http://tinyurl.com/r3zoj">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N1005A"></a><a name="4+August%2C+2006%3A+release+0.5.0+available"></a>
+<a name="N1006C"></a><a name="4+August%2C+2006%3A+release+0.5.0+available"></a>
 <h3 class="h4">4 August, 2006: release 0.5.0 available</h3>
 <h3 class="h4">4 August, 2006: release 0.5.0 available</h3>
 <p>For details see the <a href="http://tinyurl.com/pnml2">release notes</a>. The release can
 <p>For details see the <a href="http://tinyurl.com/pnml2">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N1006C"></a><a name="28+June%2C+2006%3A+release+0.4.0+available"></a>
+<a name="N1007E"></a><a name="28+June%2C+2006%3A+release+0.4.0+available"></a>
 <h3 class="h4">28 June, 2006: release 0.4.0 available</h3>
 <h3 class="h4">28 June, 2006: release 0.4.0 available</h3>
 <p>For details see the <a href="http://tinyurl.com/o35b6">change log</a>. The release can
 <p>For details see the <a href="http://tinyurl.com/o35b6">change log</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N1007E"></a><a name="9+June%2C+2006%3A+release+0.3.2+available"></a>
+<a name="N10090"></a><a name="9+June%2C+2006%3A+release+0.3.2+available"></a>
 <h3 class="h4">9 June, 2006: release 0.3.2 available</h3>
 <h3 class="h4">9 June, 2006: release 0.3.2 available</h3>
 <p>This is a bugfix release.  For details see the <a href="http://tinyurl.com/k9g5c">change log</a>. The release can
 <p>This is a bugfix release.  For details see the <a href="http://tinyurl.com/k9g5c">change log</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N10090"></a><a name="8+June%2C+2006%3A+FAQ+added+to+Wiki"></a>
+<a name="N100A2"></a><a name="8+June%2C+2006%3A+FAQ+added+to+Wiki"></a>
 <h3 class="h4">8 June, 2006: FAQ added to Wiki</h3>
 <h3 class="h4">8 June, 2006: FAQ added to Wiki</h3>
 <p>Hadoop now has a <a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>.  Please
 <p>Hadoop now has a <a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>.  Please
       help make this more complete!
       help make this more complete!
       </p>
       </p>
-<a name="N1009E"></a><a name="5+June%2C+2006%3A+release+0.3.1+available"></a>
+<a name="N100B0"></a><a name="5+June%2C+2006%3A+release+0.3.1+available"></a>
 <h3 class="h4">5 June, 2006: release 0.3.1 available</h3>
 <h3 class="h4">5 June, 2006: release 0.3.1 available</h3>
 <p>This is a bugfix release.  For details see the <a href="http://tinyurl.com/l6on4">change log</a>. The release can
 <p>This is a bugfix release.  For details see the <a href="http://tinyurl.com/l6on4">change log</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N100B0"></a><a name="2+June%2C+2006%3A+release+0.3.0+available"></a>
+<a name="N100C2"></a><a name="2+June%2C+2006%3A+release+0.3.0+available"></a>
 <h3 class="h4">2 June, 2006: release 0.3.0 available</h3>
 <h3 class="h4">2 June, 2006: release 0.3.0 available</h3>
 <p>This includes many fixes, improving performance, scalability
 <p>This includes many fixes, improving performance, scalability
       and reliability and adding new features.  For details see the <a href="http://tinyurl.com/rq3f7">change log</a>. The release can
       and reliability and adding new features.  For details see the <a href="http://tinyurl.com/rq3f7">change log</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N100C2"></a><a name="12+May%2C+2006%3A+release+0.2.1+available"></a>
+<a name="N100D4"></a><a name="12+May%2C+2006%3A+release+0.2.1+available"></a>
 <h3 class="h4">12 May, 2006: release 0.2.1 available</h3>
 <h3 class="h4">12 May, 2006: release 0.2.1 available</h3>
 <p>This fixes a few bugs in release 0.2.0, listed in the <a href="http://tinyurl.com/rnnvz">change log</a>. The
 <p>This fixes a few bugs in release 0.2.0, listed in the <a href="http://tinyurl.com/rnnvz">change log</a>. The
       release can be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       release can be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N100D4"></a><a name="5+May%2C+2006%3A+release+0.2.0+available"></a>
+<a name="N100E6"></a><a name="5+May%2C+2006%3A+release+0.2.0+available"></a>
 <h3 class="h4">5 May, 2006: release 0.2.0 available</h3>
 <h3 class="h4">5 May, 2006: release 0.2.0 available</h3>
 <p>We are now aiming for monthly releases.  There have been many
 <p>We are now aiming for monthly releases.  There have been many
       bug fixes and improvements in the past month.  MapReduce and DFS
       bug fixes and improvements in the past month.  MapReduce and DFS
@@ -253,24 +262,24 @@ document.write("<text>Last Published:</text> " + document.lastModified);
       details. The release can be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       details. The release can be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N100E6"></a><a name="2+April%2C+2006%3A+release+0.1.0+available"></a>
+<a name="N100F8"></a><a name="2+April%2C+2006%3A+release+0.1.0+available"></a>
 <h3 class="h4">2 April, 2006: release 0.1.0 available</h3>
 <h3 class="h4">2 April, 2006: release 0.1.0 available</h3>
 <p>This is the first Hadoop release.  The release is available
 <p>This is the first Hadoop release.  The release is available
       <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/">
       <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/">
       here</a>.</p>
       here</a>.</p>
-<a name="N100F4"></a><a name="6+February%2C+2006%3A+nightly+builds"></a>
+<a name="N10106"></a><a name="6+February%2C+2006%3A+nightly+builds"></a>
 <h3 class="h4">6 February, 2006: nightly builds</h3>
 <h3 class="h4">6 February, 2006: nightly builds</h3>
 <p>Hadoop now has nightly builds.  This automatically creates a
 <p>Hadoop now has nightly builds.  This automatically creates a
       <a href="http://cvs.apache.org/dist/lucene/hadoop/nightly/">downloadable version of Hadoop every
       <a href="http://cvs.apache.org/dist/lucene/hadoop/nightly/">downloadable version of Hadoop every
       night</a>.  All unit tests must pass, or a message is sent to
       night</a>.  All unit tests must pass, or a message is sent to
       the developers mailing list and no new version is created.  This
       the developers mailing list and no new version is created.  This
       also updates the <a href="docs/api/">javadoc</a>.</p>
       also updates the <a href="docs/api/">javadoc</a>.</p>
-<a name="N10106"></a><a name="3+February%2C+2006%3A+Hadoop+code+moved+out+of+Nutch"></a>
+<a name="N10118"></a><a name="3+February%2C+2006%3A+Hadoop+code+moved+out+of+Nutch"></a>
 <h3 class="h4">3 February, 2006: Hadoop code moved out of Nutch</h3>
 <h3 class="h4">3 February, 2006: Hadoop code moved out of Nutch</h3>
 <p>The Hadoop code has now been moved into its own Subversion
 <p>The Hadoop code has now been moved into its own Subversion
       tree, renamed into packages under <span class="codefrag">org.apache.hadoop</span>.
       tree, renamed into packages under <span class="codefrag">org.apache.hadoop</span>.
       All unit tests pass, but little else has yet been tested.</p>
       All unit tests pass, but little else has yet been tested.</p>
-<a name="N10113"></a><a name="30+March%2C+2006%3A+Hadoop+project+approved"></a>
+<a name="N10125"></a><a name="30+March%2C+2006%3A+Hadoop+project+approved"></a>
 <h3 class="h4">30 March, 2006: Hadoop project approved</h3>
 <h3 class="h4">30 March, 2006: Hadoop project approved</h3>
 <p>The Lucene PMC has elected to split the Nutch MapReduce and
 <p>The Lucene PMC has elected to split the Nutch MapReduce and
       distributed filesytem code into a new project named Hadoop.</p>
       distributed filesytem code into a new project named Hadoop.</p>

Файловите разлики са ограничени, защото са твърде много
+ 28 - 17
site/index.pdf


+ 18 - 5
src/java/org/apache/hadoop/ipc/Server.java

@@ -30,6 +30,7 @@ import java.nio.channels.Selector;
 import java.nio.channels.ServerSocketChannel;
 import java.nio.channels.ServerSocketChannel;
 import java.nio.channels.SocketChannel;
 import java.nio.channels.SocketChannel;
 
 
+import java.net.InetAddress;
 import java.net.InetSocketAddress;
 import java.net.InetSocketAddress;
 import java.net.Socket;
 import java.net.Socket;
 
 
@@ -351,6 +352,10 @@ public abstract class Server {
     private long lastContact;
     private long lastContact;
     private int dataLength;
     private int dataLength;
     private Socket socket;
     private Socket socket;
+    // Cache the remote host & port info so that even if the socket is 
+    // disconnected, we can say where it used to connect to.
+    private String hostAddress;
+    private int remotePort;
 
 
     public Connection(SelectionKey key, SocketChannel channel, 
     public Connection(SelectionKey key, SocketChannel channel, 
     long lastContact) {
     long lastContact) {
@@ -363,14 +368,21 @@ public abstract class Server {
       this.out = new DataOutputStream
       this.out = new DataOutputStream
         (new BufferedOutputStream(
         (new BufferedOutputStream(
          this.channelOut = new SocketChannelOutputStream(channel, 4096)));
          this.channelOut = new SocketChannelOutputStream(channel, 4096)));
+      InetAddress addr = socket.getInetAddress();
+      if (addr == null) {
+        this.hostAddress = "*Unknown*";
+      } else {
+        this.hostAddress = addr.getHostAddress();
+      }
+      this.remotePort = socket.getPort();
     }   
     }   
 
 
     public String toString() {
     public String toString() {
-      return getHostAddress() + ":" + socket.getPort(); 
+      return getHostAddress() + ":" + remotePort; 
     }
     }
     
     
     public String getHostAddress() {
     public String getHostAddress() {
-      return socket.getInetAddress().getHostAddress();
+      return hostAddress;
     }
     }
 
 
     public void setLastContact(long lastContact) {
     public void setLastContact(long lastContact) {
@@ -431,7 +443,8 @@ public abstract class Server {
       Call call = new Call(id, param, this);
       Call call = new Call(id, param, this);
       synchronized (callQueue) {
       synchronized (callQueue) {
         if (callQueue.size() >= maxQueueSize) {
         if (callQueue.size() >= maxQueueSize) {
-          callQueue.removeFirst();
+          Call oldCall = (Call) callQueue.removeFirst();
+          LOG.warn("Call queue overflow discarding oldest call " + oldCall);
         }
         }
         callQueue.addLast(call);              // queue the call
         callQueue.addLast(call);              // queue the call
         callQueue.notify();                   // wake up a waiting handler
         callQueue.notify();                   // wake up a waiting handler
@@ -484,7 +497,7 @@ public abstract class Server {
           // throw the message away if it is too old
           // throw the message away if it is too old
           if (System.currentTimeMillis() - call.receivedTime > 
           if (System.currentTimeMillis() - call.receivedTime > 
               maxCallStartAge) {
               maxCallStartAge) {
-            LOG.info("Call " + call.toString() + 
+            LOG.warn("Call " + call.toString() + 
                      " discarded for being too old (" +
                      " discarded for being too old (" +
                      (System.currentTimeMillis() - call.receivedTime) + ")");
                      (System.currentTimeMillis() - call.receivedTime) + ")");
             continue;
             continue;
@@ -492,7 +505,7 @@ public abstract class Server {
           
           
           if (LOG.isDebugEnabled())
           if (LOG.isDebugEnabled())
             LOG.debug(getName() + ": has #" + call.id + " from " +
             LOG.debug(getName() + ": has #" + call.id + " from " +
-                     call.connection.socket.getInetAddress().getHostAddress());
+                     call.connection);
           
           
           String errorClass = null;
           String errorClass = null;
           String error = null;
           String error = null;

+ 11 - 14
src/java/org/apache/hadoop/mapred/JobInProgress.java

@@ -20,7 +20,6 @@ import org.apache.commons.logging.*;
 import org.apache.hadoop.fs.*;
 import org.apache.hadoop.fs.*;
 import org.apache.hadoop.conf.*;
 import org.apache.hadoop.conf.*;
 import org.apache.hadoop.mapred.JobTracker.JobTrackerMetrics;
 import org.apache.hadoop.mapred.JobTracker.JobTrackerMetrics;
-import org.apache.hadoop.mapred.JobHistory.Keys ; 
 import org.apache.hadoop.mapred.JobHistory.Values ; 
 import org.apache.hadoop.mapred.JobHistory.Values ; 
 import java.io.*;
 import java.io.*;
 import java.net.*;
 import java.net.*;
@@ -33,7 +32,7 @@ import java.util.*;
 // doing bookkeeping of its Tasks.
 // doing bookkeeping of its Tasks.
 ///////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////
 class JobInProgress {
 class JobInProgress {
-    public static final Log LOG = LogFactory.getLog("org.apache.hadoop.mapred.JobInProgress");
+    private static final Log LOG = LogFactory.getLog("org.apache.hadoop.mapred.JobInProgress");
 
 
     JobProfile profile;
     JobProfile profile;
     JobStatus status;
     JobStatus status;
@@ -473,25 +472,24 @@ class JobInProgress {
                    " successfully.");          
                    " successfully.");          
 
 
           String taskTrackerName = status.getTaskTracker();
           String taskTrackerName = status.getTaskTracker();
-          TaskTrackerStatus taskTracker = this.jobtracker.getTaskTracker(taskTrackerName);
           
           
           if(status.getIsMap()){
           if(status.getIsMap()){
             JobHistory.MapAttempt.logStarted(profile.getJobId(), 
             JobHistory.MapAttempt.logStarted(profile.getJobId(), 
                 tip.getTIPId(), status.getTaskId(), status.getStartTime(), 
                 tip.getTIPId(), status.getTaskId(), status.getStartTime(), 
-                taskTracker.getHost()); 
+                taskTrackerName); 
             JobHistory.MapAttempt.logFinished(profile.getJobId(), 
             JobHistory.MapAttempt.logFinished(profile.getJobId(), 
                 tip.getTIPId(), status.getTaskId(), status.getFinishTime(), 
                 tip.getTIPId(), status.getTaskId(), status.getFinishTime(), 
-                taskTracker.getHost()); 
+                taskTrackerName); 
             JobHistory.Task.logFinished(profile.getJobId(), tip.getTIPId(), 
             JobHistory.Task.logFinished(profile.getJobId(), tip.getTIPId(), 
                 Values.MAP.name(), status.getFinishTime()); 
                 Values.MAP.name(), status.getFinishTime()); 
           }else{
           }else{
               JobHistory.ReduceAttempt.logStarted(profile.getJobId(), 
               JobHistory.ReduceAttempt.logStarted(profile.getJobId(), 
                   tip.getTIPId(), status.getTaskId(), status.getStartTime(), 
                   tip.getTIPId(), status.getTaskId(), status.getStartTime(), 
-                  taskTracker.getHost()); 
+                  taskTrackerName); 
               JobHistory.ReduceAttempt.logFinished(profile.getJobId(), 
               JobHistory.ReduceAttempt.logFinished(profile.getJobId(), 
                   tip.getTIPId(), status.getTaskId(), status.getShuffleFinishTime(),
                   tip.getTIPId(), status.getTaskId(), status.getShuffleFinishTime(),
                   status.getSortFinishTime(), status.getFinishTime(), 
                   status.getSortFinishTime(), status.getFinishTime(), 
-                  taskTracker.getHost()); 
+                  taskTrackerName); 
               JobHistory.Task.logFinished(profile.getJobId(), tip.getTIPId(), 
               JobHistory.Task.logFinished(profile.getJobId(), tip.getTIPId(), 
                   Values.REDUCE.name(), status.getFinishTime()); 
                   Values.REDUCE.name(), status.getFinishTime()); 
           }
           }
@@ -609,21 +607,20 @@ class JobInProgress {
         
         
         // update job history
         // update job history
         String taskTrackerName = status.getTaskTracker();
         String taskTrackerName = status.getTaskTracker();
-        TaskTrackerStatus taskTracker = this.jobtracker.getTaskTracker(taskTrackerName);
-        if(status.getIsMap()){
+        if (status.getIsMap()) {
           JobHistory.MapAttempt.logStarted(profile.getJobId(), 
           JobHistory.MapAttempt.logStarted(profile.getJobId(), 
               tip.getTIPId(), status.getTaskId(), status.getStartTime(), 
               tip.getTIPId(), status.getTaskId(), status.getStartTime(), 
-              taskTracker.getHost()); 
+              taskTrackerName); 
           JobHistory.MapAttempt.logFailed(profile.getJobId(), 
           JobHistory.MapAttempt.logFailed(profile.getJobId(), 
               tip.getTIPId(), status.getTaskId(), System.currentTimeMillis(),
               tip.getTIPId(), status.getTaskId(), System.currentTimeMillis(),
-              taskTracker.getHost(), status.getDiagnosticInfo()); 
-        }else{
+              taskTrackerName, status.getDiagnosticInfo()); 
+        } else {
           JobHistory.ReduceAttempt.logStarted(profile.getJobId(), 
           JobHistory.ReduceAttempt.logStarted(profile.getJobId(), 
               tip.getTIPId(), status.getTaskId(), status.getStartTime(), 
               tip.getTIPId(), status.getTaskId(), status.getStartTime(), 
-              taskTracker.getHost()); 
+              taskTrackerName); 
           JobHistory.ReduceAttempt.logFailed(profile.getJobId(), 
           JobHistory.ReduceAttempt.logFailed(profile.getJobId(), 
               tip.getTIPId(), status.getTaskId(), System.currentTimeMillis(),
               tip.getTIPId(), status.getTaskId(), System.currentTimeMillis(),
-              taskTracker.getHost(), status.getDiagnosticInfo()); 
+              taskTrackerName, status.getDiagnosticInfo()); 
         }
         }
         
         
         // After this, try to assign tasks with the one after this, so that
         // After this, try to assign tasks with the one after this, so that

+ 15 - 8
src/java/org/apache/hadoop/mapred/JobTracker.java

@@ -21,6 +21,7 @@ import org.apache.commons.logging.*;
 import org.apache.hadoop.fs.*;
 import org.apache.hadoop.fs.*;
 import org.apache.hadoop.ipc.*;
 import org.apache.hadoop.ipc.*;
 import org.apache.hadoop.conf.*;
 import org.apache.hadoop.conf.*;
+import org.apache.hadoop.util.StringUtils;
 
 
 import java.io.*;
 import java.io.*;
 import java.net.*;
 import java.net.*;
@@ -114,8 +115,8 @@ public class JobTracker implements MRConstants, InterTrackerProtocol, JobSubmiss
       private Map launchingTasks = new LinkedHashMap();
       private Map launchingTasks = new LinkedHashMap();
       
       
       public void run() {
       public void run() {
-        try {
-          while (shouldRun) {
+        while (shouldRun) {
+          try {
             // Every 3 minutes check for any tasks that are overdue
             // Every 3 minutes check for any tasks that are overdue
             Thread.sleep(TASKTRACKER_EXPIRY_INTERVAL/3);
             Thread.sleep(TASKTRACKER_EXPIRY_INTERVAL/3);
             long now = System.currentTimeMillis();
             long now = System.currentTimeMillis();
@@ -151,9 +152,13 @@ public class JobTracker implements MRConstants, InterTrackerProtocol, JobSubmiss
                 }
                 }
               }
               }
             }
             }
+          } catch (InterruptedException ie) {
+            // all done
+            return;
+          } catch (Exception e) {
+            LOG.error("Expire Launching Task Thread got exception: " +
+                      StringUtils.stringifyException(e));
           }
           }
-        } catch (InterruptedException ie) {
-          // all done
         }
         }
       }
       }
       
       
@@ -188,15 +193,13 @@ public class JobTracker implements MRConstants, InterTrackerProtocol, JobSubmiss
          */
          */
         public void run() {
         public void run() {
             while (shouldRun) {
             while (shouldRun) {
+              try {
                 //
                 //
                 // Thread runs periodically to check whether trackers should be expired.
                 // Thread runs periodically to check whether trackers should be expired.
                 // The sleep interval must be no more than half the maximum expiry time
                 // The sleep interval must be no more than half the maximum expiry time
                 // for a task tracker.
                 // for a task tracker.
                 //
                 //
-                try {
-                    Thread.sleep(TASKTRACKER_EXPIRY_INTERVAL / 3);
-                } catch (InterruptedException ie) {
-                }
+                Thread.sleep(TASKTRACKER_EXPIRY_INTERVAL / 3);
 
 
                 //
                 //
                 // Loop through all expired items in the queue
                 // Loop through all expired items in the queue
@@ -232,6 +235,10 @@ public class JobTracker implements MRConstants, InterTrackerProtocol, JobSubmiss
                         }
                         }
                     }
                     }
                 }
                 }
+              } catch (Exception t) {
+                LOG.error("Tracker Expiry Thread got exception: " +
+                          StringUtils.stringifyException(t));
+              }
             }
             }
         }
         }
         
         

+ 21 - 5
src/java/org/apache/hadoop/mapred/Task.java

@@ -176,10 +176,26 @@ abstract class Task implements Writable, Configurable {
     }
     }
   }
   }
 
 
-  public void done(TaskUmbilicalProtocol umbilical)
-    throws IOException {
-    umbilical.progress(getTaskId(),               // send a final status report
-                       taskProgress.get(), taskProgress.toString(), phase);
-    umbilical.done(getTaskId());
+  public void done(TaskUmbilicalProtocol umbilical) throws IOException {
+    int retries = 10;
+    boolean needProgress = true;
+    while (true) {
+      try {
+        if (needProgress) {
+          // send a final status report
+          umbilical.progress(getTaskId(), taskProgress.get(), 
+                             taskProgress.toString(), phase);
+          needProgress = false;
+        }
+        umbilical.done(getTaskId());
+        return;
+      } catch (IOException ie) {
+        LOG.warn("Failure signalling completion: " + 
+                 StringUtils.stringifyException(ie));
+        if (--retries == 0) {
+          throw ie;
+        }
+      }
+    }
   }
   }
 }
 }

+ 15 - 6
src/java/org/apache/hadoop/mapred/TaskTracker.java

@@ -1365,30 +1365,39 @@ public class TaskTracker
         Path filename = conf.getLocalPath(mapId+"/part-"+reduce+".out");
         Path filename = conf.getLocalPath(mapId+"/part-"+reduce+".out");
         response.setContentLength((int) fileSys.getLength(filename));
         response.setContentLength((int) fileSys.getLength(filename));
         InputStream inStream = null;
         InputStream inStream = null;
+        // true iff IOException was caused by attempt to access input
+        boolean isInputException = true;
         try {
         try {
           inStream = fileSys.open(filename);
           inStream = fileSys.open(filename);
           try {
           try {
             int len = inStream.read(buffer);
             int len = inStream.read(buffer);
             while (len > 0) {
             while (len > 0) {
-              outStream.write(buffer, 0, len);
+              try {
+                outStream.write(buffer, 0, len);
+              } catch (IOException ie) {
+                isInputException = false;
+                throw ie;
+              }
               len = inStream.read(buffer);
               len = inStream.read(buffer);
             }
             }
           } finally {
           } finally {
             inStream.close();
             inStream.close();
-            outStream.close();
           }
           }
         } catch (IOException ie) {
         } catch (IOException ie) {
           TaskTracker tracker = 
           TaskTracker tracker = 
             (TaskTracker) context.getAttribute("task.tracker");
             (TaskTracker) context.getAttribute("task.tracker");
           Log log = (Log) context.getAttribute("log");
           Log log = (Log) context.getAttribute("log");
-          String errorMsg = "getMapOutput(" + mapId + "," + reduceId + 
-          ") failed :\n"+
-          StringUtils.stringifyException(ie);
+          String errorMsg = ("getMapOutput(" + mapId + "," + reduceId + 
+                             ") failed :\n"+
+                             StringUtils.stringifyException(ie));
           log.warn(errorMsg);
           log.warn(errorMsg);
-          tracker.mapOutputLost(mapId, errorMsg);
+          if (isInputException) {
+            tracker.mapOutputLost(mapId, errorMsg);
+          }
           response.sendError(HttpServletResponse.SC_GONE, errorMsg);
           response.sendError(HttpServletResponse.SC_GONE, errorMsg);
           throw ie;
           throw ie;
         } 
         } 
+        outStream.close();
       }
       }
     }
     }
 }
 }

+ 1 - 1
src/java/overview.html

@@ -16,7 +16,7 @@ href="org/apache/hadoop/mapred/package-summary.html">org.apache.hadoop.mapred
 
 
 <ol>
 <ol>
   
   
-<li>Java 1.4.x, preferably from <a
+<li>Java 1.5.x, preferably from <a
  href="http://java.sun.com/j2se/downloads.html">Sun</a> Set
  href="http://java.sun.com/j2se/downloads.html">Sun</a> Set
  <tt>JAVA_HOME</tt> to the root of your Java installation.</li>
  <tt>JAVA_HOME</tt> to the root of your Java installation.</li>
   
   

+ 9 - 0
src/site/src/documentation/content/xdocs/index.xml

@@ -14,6 +14,15 @@
     <section>
     <section>
       <title>News</title>
       <title>News</title>
 
 
+      <section>
+      <title>11 October, 2006: release 0.7.1 available</title>
+      <p>This fixes critical bugs in 0.7.0.  For details see the <a
+      href="http://tinyurl.com/p7qod">release notes</a>. The release can
+      be obtained from <a
+      href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
+      nearby mirror</a>.
+      </p> </section>
+
       <section>
       <section>
       <title>6 October, 2006: release 0.7.0 available</title>
       <title>6 October, 2006: release 0.7.0 available</title>
       <p>For details see the <a
       <p>For details see the <a

Някои файлове не бяха показани, защото твърде много файлове са промени