Quellcode durchsuchen

Merging changes from HADOOP-639, HADOOP-791 and HADOOP-827 into 0.9 branch, preparing for 0.9.2 release.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/branches/branch-0.9@487697 13f79535-47bb-0310-9956-ffa450edef68
Doug Cutting vor 18 Jahren
Ursprung
Commit
56b23dc7b0

+ 13 - 0
CHANGES.txt

@@ -1,5 +1,18 @@
 Hadoop Change Log
 Hadoop Change Log
 
 
+
+Release 0.9.2 - 2006-12-15
+
+ 1. HADOOP-639. Restructure InterTrackerProtocol to make task
+    accounting more reliable.  (Arun C Murthy via cutting)
+
+ 2. HADOOP-827. Turn off speculative execution by default, since it's
+    currently broken.  (omalley via cutting)
+
+ 3. HADOOP-791. Fix a deadlock in the task tracker.
+    (Mahadev Konar via cutting)
+
+
 Release 0.9.1 - 2006-12-06
 Release 0.9.1 - 2006-12-06
 
 
  1. HADOOP-780. Use ReflectionUtils to instantiate key and value
  1. HADOOP-780. Use ReflectionUtils to instantiate key and value

+ 1 - 1
build.xml

@@ -9,7 +9,7 @@
  
  
   <property name="Name" value="Hadoop"/>
   <property name="Name" value="Hadoop"/>
   <property name="name" value="hadoop"/>
   <property name="name" value="hadoop"/>
-  <property name="version" value="0.9.2-dev"/>
+  <property name="version" value="0.9.3-dev"/>
   <property name="final.name" value="${name}-${version}"/>
   <property name="final.name" value="${name}-${version}"/>
   <property name="year" value="2006"/>
   <property name="year" value="2006"/>
   <property name="libhdfs.version" value="1"/>
   <property name="libhdfs.version" value="1"/>

+ 3 - 3
conf/hadoop-default.xml

@@ -441,9 +441,9 @@ creations/deletions), or "all".</description>
 
 
 <property>
 <property>
   <name>mapred.speculative.execution</name>
   <name>mapred.speculative.execution</name>
-  <value>true</value>
-  <description>If true, then multiple instances of some map tasks may
-  be executed in parallel.</description>
+  <value>false</value>
+  <description>If true, then multiple instances of some map and reduce tasks 
+               may be executed in parallel.</description>
 </property>
 </property>
 
 
 <property>
 <property>

+ 30 - 21
site/index.html

@@ -125,6 +125,9 @@ document.write("<text>Last Published:</text> " + document.lastModified);
 <a href="#News">News</a>
 <a href="#News">News</a>
 <ul class="minitoc">
 <ul class="minitoc">
 <li>
 <li>
+<a href="#15+December%2C+2006%3A+release+0.9.2+available">15 December, 2006: release 0.9.2 available</a>
+</li>
+<li>
 <a href="#6+December%2C+2006%3A+release+0.9.1+available">6 December, 2006: release 0.9.1 available</a>
 <a href="#6+December%2C+2006%3A+release+0.9.1+available">6 December, 2006: release 0.9.1 available</a>
 </li>
 </li>
 <li>
 <li>
@@ -196,103 +199,109 @@ document.write("<text>Last Published:</text> " + document.lastModified);
 <a name="N1000C"></a><a name="News"></a>
 <a name="N1000C"></a><a name="News"></a>
 <h2 class="h3">News</h2>
 <h2 class="h3">News</h2>
 <div class="section">
 <div class="section">
-<a name="N10012"></a><a name="6+December%2C+2006%3A+release+0.9.1+available"></a>
+<a name="N10012"></a><a name="15+December%2C+2006%3A+release+0.9.2+available"></a>
+<h3 class="h4">15 December, 2006: release 0.9.2 available</h3>
+<p>This fixes critical bugs in 0.9.1.  For details see the <a href="http://tinyurl.com/ya8lfd">release notes</a>. The release can
+      be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
+      nearby mirror</a>.
+      </p>
+<a name="N10024"></a><a name="6+December%2C+2006%3A+release+0.9.1+available"></a>
 <h3 class="h4">6 December, 2006: release 0.9.1 available</h3>
 <h3 class="h4">6 December, 2006: release 0.9.1 available</h3>
 <p>This fixes critical bugs in 0.9.0.  For details see the <a href="http://tinyurl.com/y55d7p">release notes</a>. The release can
 <p>This fixes critical bugs in 0.9.0.  For details see the <a href="http://tinyurl.com/y55d7p">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N10024"></a><a name="1+December%2C+2006%3A+release+0.9.0+available"></a>
+<a name="N10036"></a><a name="1+December%2C+2006%3A+release+0.9.0+available"></a>
 <h3 class="h4">1 December, 2006: release 0.9.0 available</h3>
 <h3 class="h4">1 December, 2006: release 0.9.0 available</h3>
 <p>For details see the <a href="http://tinyurl.com/sdjhb">release notes</a>. The release can
 <p>For details see the <a href="http://tinyurl.com/sdjhb">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N10036"></a><a name="3+November%2C+2006%3A+release+0.8.0+available"></a>
+<a name="N10048"></a><a name="3+November%2C+2006%3A+release+0.8.0+available"></a>
 <h3 class="h4">3 November, 2006: release 0.8.0 available</h3>
 <h3 class="h4">3 November, 2006: release 0.8.0 available</h3>
 <p>For details see the <a href="http://tinyurl.com/ykqv6s">release notes</a>. The release can
 <p>For details see the <a href="http://tinyurl.com/ykqv6s">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N10048"></a><a name="18+October%2C+2006%3A+release+0.7.2+available"></a>
+<a name="N1005A"></a><a name="18+October%2C+2006%3A+release+0.7.2+available"></a>
 <h3 class="h4">18 October, 2006: release 0.7.2 available</h3>
 <h3 class="h4">18 October, 2006: release 0.7.2 available</h3>
 <p>This fixes critical bugs in 0.7.1.  For details see the <a href="http://tinyurl.com/ygy6y7">release notes</a>. The release can
 <p>This fixes critical bugs in 0.7.1.  For details see the <a href="http://tinyurl.com/ygy6y7">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N1005A"></a><a name="11+October%2C+2006%3A+release+0.7.1+available"></a>
+<a name="N1006C"></a><a name="11+October%2C+2006%3A+release+0.7.1+available"></a>
 <h3 class="h4">11 October, 2006: release 0.7.1 available</h3>
 <h3 class="h4">11 October, 2006: release 0.7.1 available</h3>
 <p>This fixes critical bugs in 0.7.0.  For details see the <a href="http://tinyurl.com/p7qod">release notes</a>. The release can
 <p>This fixes critical bugs in 0.7.0.  For details see the <a href="http://tinyurl.com/p7qod">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N1006C"></a><a name="6+October%2C+2006%3A+release+0.7.0+available"></a>
+<a name="N1007E"></a><a name="6+October%2C+2006%3A+release+0.7.0+available"></a>
 <h3 class="h4">6 October, 2006: release 0.7.0 available</h3>
 <h3 class="h4">6 October, 2006: release 0.7.0 available</h3>
 <p>For details see the <a href="http://tinyurl.com/kvd9m">release notes</a>. The release can
 <p>For details see the <a href="http://tinyurl.com/kvd9m">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N1007E"></a><a name="18+September%2C+2006%3A+release+0.6.2+available"></a>
+<a name="N10090"></a><a name="18+September%2C+2006%3A+release+0.6.2+available"></a>
 <h3 class="h4">18 September, 2006: release 0.6.2 available</h3>
 <h3 class="h4">18 September, 2006: release 0.6.2 available</h3>
 <p>This fixes critical bugs in 0.6.1.  For details see the <a href="http://tinyurl.com/gyb56">release notes</a>. The release can
 <p>This fixes critical bugs in 0.6.1.  For details see the <a href="http://tinyurl.com/gyb56">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N10090"></a><a name="13+September%2C+2006%3A+release+0.6.1+available"></a>
+<a name="N100A2"></a><a name="13+September%2C+2006%3A+release+0.6.1+available"></a>
 <h3 class="h4">13 September, 2006: release 0.6.1 available</h3>
 <h3 class="h4">13 September, 2006: release 0.6.1 available</h3>
 <p>This fixes critical bugs in 0.6.0.  For details see the <a href="http://tinyurl.com/lykp4">release notes</a>. The release can
 <p>This fixes critical bugs in 0.6.0.  For details see the <a href="http://tinyurl.com/lykp4">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N100A2"></a><a name="8+September%2C+2006%3A+release+0.6.0+available"></a>
+<a name="N100B4"></a><a name="8+September%2C+2006%3A+release+0.6.0+available"></a>
 <h3 class="h4">8 September, 2006: release 0.6.0 available</h3>
 <h3 class="h4">8 September, 2006: release 0.6.0 available</h3>
 <p>For details see the <a href="http://tinyurl.com/r3zoj">release notes</a>. The release can
 <p>For details see the <a href="http://tinyurl.com/r3zoj">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N100B4"></a><a name="4+August%2C+2006%3A+release+0.5.0+available"></a>
+<a name="N100C6"></a><a name="4+August%2C+2006%3A+release+0.5.0+available"></a>
 <h3 class="h4">4 August, 2006: release 0.5.0 available</h3>
 <h3 class="h4">4 August, 2006: release 0.5.0 available</h3>
 <p>For details see the <a href="http://tinyurl.com/pnml2">release notes</a>. The release can
 <p>For details see the <a href="http://tinyurl.com/pnml2">release notes</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N100C6"></a><a name="28+June%2C+2006%3A+release+0.4.0+available"></a>
+<a name="N100D8"></a><a name="28+June%2C+2006%3A+release+0.4.0+available"></a>
 <h3 class="h4">28 June, 2006: release 0.4.0 available</h3>
 <h3 class="h4">28 June, 2006: release 0.4.0 available</h3>
 <p>For details see the <a href="http://tinyurl.com/o35b6">change log</a>. The release can
 <p>For details see the <a href="http://tinyurl.com/o35b6">change log</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N100D8"></a><a name="9+June%2C+2006%3A+release+0.3.2+available"></a>
+<a name="N100EA"></a><a name="9+June%2C+2006%3A+release+0.3.2+available"></a>
 <h3 class="h4">9 June, 2006: release 0.3.2 available</h3>
 <h3 class="h4">9 June, 2006: release 0.3.2 available</h3>
 <p>This is a bugfix release.  For details see the <a href="http://tinyurl.com/k9g5c">change log</a>. The release can
 <p>This is a bugfix release.  For details see the <a href="http://tinyurl.com/k9g5c">change log</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N100EA"></a><a name="8+June%2C+2006%3A+FAQ+added+to+Wiki"></a>
+<a name="N100FC"></a><a name="8+June%2C+2006%3A+FAQ+added+to+Wiki"></a>
 <h3 class="h4">8 June, 2006: FAQ added to Wiki</h3>
 <h3 class="h4">8 June, 2006: FAQ added to Wiki</h3>
 <p>Hadoop now has a <a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>.  Please
 <p>Hadoop now has a <a href="http://wiki.apache.org/lucene-hadoop/FAQ">FAQ</a>.  Please
       help make this more complete!
       help make this more complete!
       </p>
       </p>
-<a name="N100F8"></a><a name="5+June%2C+2006%3A+release+0.3.1+available"></a>
+<a name="N1010A"></a><a name="5+June%2C+2006%3A+release+0.3.1+available"></a>
 <h3 class="h4">5 June, 2006: release 0.3.1 available</h3>
 <h3 class="h4">5 June, 2006: release 0.3.1 available</h3>
 <p>This is a bugfix release.  For details see the <a href="http://tinyurl.com/l6on4">change log</a>. The release can
 <p>This is a bugfix release.  For details see the <a href="http://tinyurl.com/l6on4">change log</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N1010A"></a><a name="2+June%2C+2006%3A+release+0.3.0+available"></a>
+<a name="N1011C"></a><a name="2+June%2C+2006%3A+release+0.3.0+available"></a>
 <h3 class="h4">2 June, 2006: release 0.3.0 available</h3>
 <h3 class="h4">2 June, 2006: release 0.3.0 available</h3>
 <p>This includes many fixes, improving performance, scalability
 <p>This includes many fixes, improving performance, scalability
       and reliability and adding new features.  For details see the <a href="http://tinyurl.com/rq3f7">change log</a>. The release can
       and reliability and adding new features.  For details see the <a href="http://tinyurl.com/rq3f7">change log</a>. The release can
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N1011C"></a><a name="12+May%2C+2006%3A+release+0.2.1+available"></a>
+<a name="N1012E"></a><a name="12+May%2C+2006%3A+release+0.2.1+available"></a>
 <h3 class="h4">12 May, 2006: release 0.2.1 available</h3>
 <h3 class="h4">12 May, 2006: release 0.2.1 available</h3>
 <p>This fixes a few bugs in release 0.2.0, listed in the <a href="http://tinyurl.com/rnnvz">change log</a>. The
 <p>This fixes a few bugs in release 0.2.0, listed in the <a href="http://tinyurl.com/rnnvz">change log</a>. The
       release can be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       release can be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N1012E"></a><a name="5+May%2C+2006%3A+release+0.2.0+available"></a>
+<a name="N10140"></a><a name="5+May%2C+2006%3A+release+0.2.0+available"></a>
 <h3 class="h4">5 May, 2006: release 0.2.0 available</h3>
 <h3 class="h4">5 May, 2006: release 0.2.0 available</h3>
 <p>We are now aiming for monthly releases.  There have been many
 <p>We are now aiming for monthly releases.  There have been many
       bug fixes and improvements in the past month.  MapReduce and DFS
       bug fixes and improvements in the past month.  MapReduce and DFS
@@ -301,24 +310,24 @@ document.write("<text>Last Published:</text> " + document.lastModified);
       details. The release can be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       details. The release can be obtained from <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
       nearby mirror</a>.
       nearby mirror</a>.
       </p>
       </p>
-<a name="N10140"></a><a name="2+April%2C+2006%3A+release+0.1.0+available"></a>
+<a name="N10152"></a><a name="2+April%2C+2006%3A+release+0.1.0+available"></a>
 <h3 class="h4">2 April, 2006: release 0.1.0 available</h3>
 <h3 class="h4">2 April, 2006: release 0.1.0 available</h3>
 <p>This is the first Hadoop release.  The release is available
 <p>This is the first Hadoop release.  The release is available
       <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/">
       <a href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/">
       here</a>.</p>
       here</a>.</p>
-<a name="N1014E"></a><a name="6+February%2C+2006%3A+nightly+builds"></a>
+<a name="N10160"></a><a name="6+February%2C+2006%3A+nightly+builds"></a>
 <h3 class="h4">6 February, 2006: nightly builds</h3>
 <h3 class="h4">6 February, 2006: nightly builds</h3>
 <p>Hadoop now has nightly builds.  This automatically creates a
 <p>Hadoop now has nightly builds.  This automatically creates a
       <a href="http://cvs.apache.org/dist/lucene/hadoop/nightly/">downloadable version of Hadoop every
       <a href="http://cvs.apache.org/dist/lucene/hadoop/nightly/">downloadable version of Hadoop every
       night</a>.  All unit tests must pass, or a message is sent to
       night</a>.  All unit tests must pass, or a message is sent to
       the developers mailing list and no new version is created.  This
       the developers mailing list and no new version is created.  This
       also updates the <a href="docs/api/">javadoc</a>.</p>
       also updates the <a href="docs/api/">javadoc</a>.</p>
-<a name="N10160"></a><a name="3+February%2C+2006%3A+Hadoop+code+moved+out+of+Nutch"></a>
+<a name="N10172"></a><a name="3+February%2C+2006%3A+Hadoop+code+moved+out+of+Nutch"></a>
 <h3 class="h4">3 February, 2006: Hadoop code moved out of Nutch</h3>
 <h3 class="h4">3 February, 2006: Hadoop code moved out of Nutch</h3>
 <p>The Hadoop code has now been moved into its own Subversion
 <p>The Hadoop code has now been moved into its own Subversion
       tree, renamed into packages under <span class="codefrag">org.apache.hadoop</span>.
       tree, renamed into packages under <span class="codefrag">org.apache.hadoop</span>.
       All unit tests pass, but little else has yet been tested.</p>
       All unit tests pass, but little else has yet been tested.</p>
-<a name="N1016D"></a><a name="30+March%2C+2006%3A+Hadoop+project+approved"></a>
+<a name="N1017F"></a><a name="30+March%2C+2006%3A+Hadoop+project+approved"></a>
 <h3 class="h4">30 March, 2006: Hadoop project approved</h3>
 <h3 class="h4">30 March, 2006: Hadoop project approved</h3>
 <p>The Lucene PMC has elected to split the Nutch MapReduce and
 <p>The Lucene PMC has elected to split the Nutch MapReduce and
       distributed filesytem code into a new project named Hadoop.</p>
       distributed filesytem code into a new project named Hadoop.</p>

Datei-Diff unterdrückt, da er zu groß ist
+ 31 - 20
site/index.pdf


+ 102 - 0
src/java/org/apache/hadoop/mapred/HeartbeatResponse.java

@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
+
+/**
+ * The response sent by the {@link JobTracker} to the hearbeat sent
+ * periodically by the {@link TaskTracker}
+ * 
+ * @author Arun C Murthy
+ */
+class HeartbeatResponse implements Writable, Configurable {
+  Configuration conf = null;
+  short responseId;
+  TaskTrackerAction[] actions;
+
+  HeartbeatResponse() {}
+  
+  HeartbeatResponse(short responseId, TaskTrackerAction[] actions) {
+    this.responseId = responseId;
+    this.actions = actions;
+  }
+  
+  public void setResponseId(short responseId) {
+    this.responseId = responseId; 
+  }
+  
+  public short getResponseId() {
+    return responseId;
+  }
+  
+  public void setActions(TaskTrackerAction[] actions) {
+    this.actions = actions;
+  }
+  
+  public TaskTrackerAction[] getActions() {
+    return actions;
+  }
+  
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
+
+  public Configuration getConf() {
+    return conf;
+  }
+
+  public void write(DataOutput out) throws IOException {
+    out.writeShort(responseId);
+    if (actions == null) {
+      WritableUtils.writeVInt(out, 0);
+    } else {
+      WritableUtils.writeVInt(out, actions.length);
+      for (TaskTrackerAction action : actions) {
+        WritableUtils.writeEnum(out, action.getActionId());
+        action.write(out);
+      }
+    }
+    //ObjectWritable.writeObject(out, actions, actions.getClass(), conf);
+  }
+  
+  public void readFields(DataInput in) throws IOException {
+    this.responseId = in.readShort();
+    int length = WritableUtils.readVInt(in);
+    if (length > 0) {
+      actions = new TaskTrackerAction[length];
+      for (int i=0; i < length; ++i) {
+        TaskTrackerAction.ActionType actionType = 
+          WritableUtils.readEnum(in, TaskTrackerAction.ActionType.class);
+        actions[i] = TaskTrackerAction.createAction(actionType);
+        actions[i].readFields(in);
+      }
+    } else {
+      actions = null;
+    }
+    //actions = (TaskTrackerAction[]) ObjectWritable.readObject(in, conf);
+  }
+}

+ 29 - 20
src/java/org/apache/hadoop/mapred/InterTrackerProtocol.java

@@ -27,31 +27,40 @@ import org.apache.hadoop.ipc.VersionedProtocol;
  * The JobTracker is the Server, which implements this protocol.
  * The JobTracker is the Server, which implements this protocol.
  */ 
  */ 
 interface InterTrackerProtocol extends VersionedProtocol {
 interface InterTrackerProtocol extends VersionedProtocol {
-  // version 2 introduced to replace TaskStatus.State with an enum
-  public static final long versionID = 2L;
+  /**
+   * version 3 introduced to replace 
+   * emitHearbeat/pollForNewTask/pollForTaskWithClosedJob with
+   * {@link #heartbeat(TaskTrackerStatus, boolean, boolean, short)}
+   */
+  public static final long versionID = 3L;
   
   
   public final static int TRACKERS_OK = 0;
   public final static int TRACKERS_OK = 0;
   public final static int UNKNOWN_TASKTRACKER = 1;
   public final static int UNKNOWN_TASKTRACKER = 1;
 
 
-  /** 
-   * Called regularly by the task tracker to update the status of its tasks
-   * within the job tracker.  JobTracker responds with a code that tells the 
-   * TaskTracker whether all is well.
-   *
-   * TaskTracker must also indicate whether this is the first interaction
-   * (since state refresh)
+  /**
+   * Called regularly by the {@link TaskTracker} to update the status of its 
+   * tasks within the job tracker. {@link JobTracker} responds with a 
+   * {@link HeartbeatResponse} that directs the 
+   * {@link TaskTracker} to undertake a series of 'actions' 
+   * (see {@link org.apache.hadoop.mapred.TaskTrackerAction.ActionType}).  
+   * 
+   * {@link TaskTracker} must also indicate whether this is the first 
+   * interaction (since state refresh) and acknowledge the last response
+   * it recieved from the {@link JobTracker} 
+   * 
+   * @param status the status update
+   * @param initialContact <code>true</code> if this is first interaction since
+   *                       'refresh', <code>false</code> otherwise.
+   * @param acceptNewTasks <code>true</code> if the {@link TaskTracker} is
+   *                       ready to accept new tasks to run.                 
+   * @param responseId the last responseId successfully acted upon by the
+   *                   {@link TaskTracker}.
+   * @return a {@link org.apache.hadoop.mapred.HeartbeatResponse} with 
+   *         fresh instructions.
    */
    */
-  int emitHeartbeat(TaskTrackerStatus status, 
-                    boolean initialContact) throws IOException;
-
-  /** Called to get new tasks from from the job tracker for this tracker.*/
-  Task pollForNewTask(String trackerName) throws IOException;
-
-  /** Called to find which tasks that have been run by this tracker should now
-   * be closed because their job is complete.  This is used to, e.g., 
-   * notify a map task that its output is no longer needed and may 
-   * be removed. */
-  String[] pollForTaskWithClosedJob(String trackerName) throws IOException;
+  HeartbeatResponse heartbeat(TaskTrackerStatus status, 
+          boolean initialContact, boolean acceptNewTasks, short responseId)
+  throws IOException;
 
 
   /** Called by a reduce task to find which map tasks are completed.
   /** Called by a reduce task to find which map tasks are completed.
    *
    *

+ 105 - 17
src/java/org/apache/hadoop/mapred/JobTracker.java

@@ -424,6 +424,9 @@ public class JobTracker implements MRConstants, InterTrackerProtocol, JobSubmiss
     // (trackerID->TreeSet of taskids running at that tracker)
     // (trackerID->TreeSet of taskids running at that tracker)
     TreeMap trackerToTaskMap = new TreeMap();
     TreeMap trackerToTaskMap = new TreeMap();
 
 
+    // (trackerID --> last sent HeartBeatResponseID)
+    Map<String, Short> trackerToHeartbeatResponseIDMap = new TreeMap();
+    
     //
     //
     // Watch and expire TaskTracker objects using these structures.
     // Watch and expire TaskTracker objects using these structures.
     // We can map from Name->TaskTrackerStatus, or we can expire by time.
     // We can map from Name->TaskTrackerStatus, or we can expire by time.
@@ -719,6 +722,74 @@ public class JobTracker implements MRConstants, InterTrackerProtocol, JobSubmiss
     // InterTrackerProtocol
     // InterTrackerProtocol
     ////////////////////////////////////////////////////
     ////////////////////////////////////////////////////
 
 
+    /**
+     * The periodic heartbeat mechanism between the {@link TaskTracker} and
+     * the {@link JobTracker}.
+     * 
+     * The {@link JobTracker} processes the status information sent by the 
+     * {@link TaskTracker} and responds with instructions to start/stop 
+     * tasks or jobs, and also 'reset' instructions during contingencies. 
+     */
+    public synchronized HeartbeatResponse heartbeat(TaskTrackerStatus status, 
+            boolean initialContact, boolean acceptNewTasks, short responseId) 
+    throws IOException {
+      LOG.debug("Got heartbeat from: " + status.getTrackerName() + 
+              " (initialContact: " + initialContact + 
+              " acceptNewTasks: " + acceptNewTasks + ")" +
+              " with responseId: " + responseId);
+      
+        // First check if the last heartbeat response got through 
+        String trackerName = status.getTrackerName();
+        Short oldResponseId = trackerToHeartbeatResponseIDMap.get(trackerName);
+      
+        short newResponseId = (short)(responseId + 1);
+        if (!initialContact && oldResponseId != null && 
+                oldResponseId.shortValue() != responseId) {
+            newResponseId = oldResponseId.shortValue();
+        }
+      
+        // Process this heartbeat 
+        if (!processHeartbeat(status, initialContact, 
+                (newResponseId != responseId))) {
+            if (oldResponseId != null) {
+                trackerToHeartbeatResponseIDMap.remove(trackerName);
+            }
+
+            return new HeartbeatResponse(newResponseId, 
+                  new TaskTrackerAction[] {new ReinitTrackerAction()});
+        }
+      
+        // Initialize the response to be sent for the heartbeat
+        HeartbeatResponse response = new HeartbeatResponse(newResponseId, null);
+        List<TaskTrackerAction> actions = new ArrayList();
+      
+        // Check for new tasks to be executed on the tasktracker
+        if (acceptNewTasks) {
+        Task task = getNewTaskForTaskTracker(trackerName);
+            if (task != null) {
+                LOG.debug(trackerName + " -> LaunchTask: " + task.getTaskId());
+                actions.add(new LaunchTaskAction(task));
+            }
+        }
+      
+        // Check for tasks to be killed
+        List<TaskTrackerAction> killTasksList = getTasksToKill(trackerName);
+        if (killTasksList != null) {
+            actions.addAll(killTasksList);
+        }
+     
+        response.setActions(
+                actions.toArray(new TaskTrackerAction[actions.size()]));
+        
+        // Update the trackerToHeartbeatResponseIDMap
+        if (newResponseId != responseId) {
+            trackerToHeartbeatResponseIDMap.put(trackerName, 
+                    new Short(newResponseId));
+        }
+
+        return response;
+    }
+    
     /**
     /**
      * Update the last recorded status for the given task tracker.
      * Update the last recorded status for the given task tracker.
      * It assumes that the taskTrackers are locked on entry.
      * It assumes that the taskTrackers are locked on entry.
@@ -749,16 +820,21 @@ public class JobTracker implements MRConstants, InterTrackerProtocol, JobSubmiss
     /**
     /**
      * Process incoming heartbeat messages from the task trackers.
      * Process incoming heartbeat messages from the task trackers.
      */
      */
-    public synchronized int emitHeartbeat(TaskTrackerStatus trackerStatus, boolean initialContact) {
+    private synchronized boolean processHeartbeat(
+            TaskTrackerStatus trackerStatus, 
+            boolean initialContact, boolean updateStatusTimestamp) {
         String trackerName = trackerStatus.getTrackerName();
         String trackerName = trackerStatus.getTrackerName();
-        trackerStatus.setLastSeen(System.currentTimeMillis());
+        if (initialContact || updateStatusTimestamp) {
+          trackerStatus.setLastSeen(System.currentTimeMillis());
+        }
 
 
         synchronized (taskTrackers) {
         synchronized (taskTrackers) {
             synchronized (trackerExpiryQueue) {
             synchronized (trackerExpiryQueue) {
                 boolean seenBefore = updateTaskTrackerStatus(trackerName,
                 boolean seenBefore = updateTaskTrackerStatus(trackerName,
                                                              trackerStatus);
                                                              trackerStatus);
                 if (initialContact) {
                 if (initialContact) {
-                    // If it's first contact, then clear out any state hanging around
+                    // If it's first contact, then clear out 
+                    // any state hanging around
                     if (seenBefore) {
                     if (seenBefore) {
                         lostTaskTracker(trackerName, trackerStatus.getHost());
                         lostTaskTracker(trackerName, trackerStatus.getHost());
                     }
                     }
@@ -767,7 +843,7 @@ public class JobTracker implements MRConstants, InterTrackerProtocol, JobSubmiss
                     if (!seenBefore) {
                     if (!seenBefore) {
                         LOG.warn("Status from unknown Tracker : " + trackerName);
                         LOG.warn("Status from unknown Tracker : " + trackerName);
                         taskTrackers.remove(trackerName); 
                         taskTrackers.remove(trackerName); 
-                        return InterTrackerProtocol.UNKNOWN_TASKTRACKER;
+                        return false;
                     }
                     }
                 }
                 }
 
 
@@ -779,18 +855,17 @@ public class JobTracker implements MRConstants, InterTrackerProtocol, JobSubmiss
 
 
         updateTaskStatuses(trackerStatus);
         updateTaskStatuses(trackerStatus);
         //LOG.info("Got heartbeat from "+trackerName);
         //LOG.info("Got heartbeat from "+trackerName);
-        return InterTrackerProtocol.TRACKERS_OK;
+        return true;
     }
     }
 
 
     /**
     /**
-     * A tracker wants to know if there's a Task to run.  Returns
-     * a task we'd like the TaskTracker to execute right now.
+     * Returns a task we'd like the TaskTracker to execute right now.
      *
      *
      * Eventually this function should compute load on the various TaskTrackers,
      * Eventually this function should compute load on the various TaskTrackers,
      * and incorporate knowledge of DFS file placement.  But for right now, it
      * and incorporate knowledge of DFS file placement.  But for right now, it
      * just grabs a single item out of the pending task list and hands it back.
      * just grabs a single item out of the pending task list and hands it back.
      */
      */
-    public synchronized Task pollForNewTask(String taskTracker) {
+    private synchronized Task getNewTaskForTaskTracker(String taskTracker) {
         //
         //
         // Compute average map and reduce task numbers across pool
         // Compute average map and reduce task numbers across pool
         //
         //
@@ -933,23 +1008,36 @@ public class JobTracker implements MRConstants, InterTrackerProtocol, JobSubmiss
      * A tracker wants to know if any of its Tasks have been
      * A tracker wants to know if any of its Tasks have been
      * closed (because the job completed, whether successfully or not)
      * closed (because the job completed, whether successfully or not)
      */
      */
-    public synchronized String[] pollForTaskWithClosedJob(String taskTracker) {
-        TreeSet taskIds = (TreeSet) trackerToTaskMap.get(taskTracker);
+    private synchronized List getTasksToKill(String taskTracker) {
+        Set<String> taskIds = (TreeSet) trackerToTaskMap.get(taskTracker);
         if (taskIds != null) {
         if (taskIds != null) {
-            ArrayList list = new ArrayList();
-            for (Iterator it = taskIds.iterator(); it.hasNext(); ) {
-                String taskId = (String) it.next();
-                TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(taskId);
-                if (tip.shouldCloseForClosedJob(taskId)) {
+            List<TaskTrackerAction> killList = new ArrayList();
+            Set<String> killJobIds = new TreeSet(); 
+            for (String killTaskId : taskIds ) {
+                TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(killTaskId);
+                if (tip.shouldCloseForClosedJob(killTaskId)) {
                     // 
                     // 
                     // This is how the JobTracker ends a task at the TaskTracker.
                     // This is how the JobTracker ends a task at the TaskTracker.
                     // It may be successfully completed, or may be killed in
                     // It may be successfully completed, or may be killed in
                     // mid-execution.
                     // mid-execution.
                     //
                     //
-                   list.add(taskId);
+                    if (tip.getJob().getStatus().getRunState() == JobStatus.RUNNING) {
+                        killList.add(new KillTaskAction(killTaskId));
+                        LOG.debug(taskTracker + " -> KillTaskAction: " + killTaskId);
+                    } else {
+                      //killTasksList.add(new KillJobAction(taskId));
+                        String killJobId = tip.getJob().getStatus().getJobId(); 
+                        killJobIds.add(killJobId);
+                    }
                 }
                 }
             }
             }
-            return (String[]) list.toArray(new String[list.size()]);
+            
+            for (String killJobId : killJobIds) {
+                killList.add(new KillJobAction(killJobId));
+                LOG.debug(taskTracker + " -> KillJobAction: " + killJobId);
+            }
+
+            return killList;
         }
         }
         return null;
         return null;
     }
     }

+ 58 - 0
src/java/org/apache/hadoop/mapred/KillJobAction.java

@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Text;
+
+/**
+ * Represents a directive from the {@link org.apache.hadoop.mapred.JobTracker} 
+ * to the {@link org.apache.hadoop.mapred.TaskTracker} to kill the task of 
+ * a job and cleanup resources.
+ * 
+ * @author Arun C Murthy
+ */
+class KillJobAction extends TaskTrackerAction {
+  String jobId;
+
+  public KillJobAction() {
+    super(ActionType.KILL_JOB);
+  }
+
+  public KillJobAction(String taskId) {
+    super(ActionType.KILL_JOB);
+    this.jobId = taskId;
+  }
+  
+  public String getJobId() {
+    return jobId;
+  }
+  
+  public void write(DataOutput out) throws IOException {
+    Text.writeString(out, jobId);
+  }
+
+  public void readFields(DataInput in) throws IOException {
+    jobId = Text.readString(in);
+  }
+
+}

+ 56 - 0
src/java/org/apache/hadoop/mapred/KillTaskAction.java

@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Text;
+
+/**
+ * Represents a directive from the {@link org.apache.hadoop.mapred.JobTracker} 
+ * to the {@link org.apache.hadoop.mapred.TaskTracker} to kill a task.
+ * 
+ * @author Arun C Murthy
+ */
+class KillTaskAction extends TaskTrackerAction {
+  String taskId;
+  
+  public KillTaskAction() {
+    super(ActionType.KILL_TASK);
+  }
+  
+  public KillTaskAction(String taskId) {
+    super(ActionType.KILL_TASK);
+    this.taskId = taskId;
+  }
+
+  public String getTaskId() {
+    return taskId;
+  }
+  
+  public void write(DataOutput out) throws IOException {
+    Text.writeString(out, taskId);
+  }
+
+  public void readFields(DataInput in) throws IOException {
+    taskId = Text.readString(in);
+  }
+}

+ 62 - 0
src/java/org/apache/hadoop/mapred/LaunchTaskAction.java

@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+/**
+ * Represents a directive from the {@link org.apache.hadoop.mapred.JobTracker} 
+ * to the {@link org.apache.hadoop.mapred.TaskTracker} to launch a new task.
+ * 
+ * @author Arun C Murthy
+ */
+class LaunchTaskAction extends TaskTrackerAction {
+  private Task task;
+
+  public LaunchTaskAction() {
+    super(ActionType.LAUNCH_TASK);
+  }
+  
+  public LaunchTaskAction(Task task) {
+    super(ActionType.LAUNCH_TASK);
+    this.task = task;
+  }
+  
+  public Task getTask() {
+    return task;
+  }
+  
+  public void write(DataOutput out) throws IOException {
+    out.writeBoolean(task.isMapTask());
+    task.write(out);
+  }
+  
+  public void readFields(DataInput in) throws IOException {
+    boolean isMapTask = in.readBoolean();
+    if (isMapTask) {
+      task = new MapTask();
+    } else {
+      task = new ReduceTask();
+    }
+    task.readFields(in);
+  }
+
+}

+ 41 - 0
src/java/org/apache/hadoop/mapred/ReinitTrackerAction.java

@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+/**
+ * Represents a directive from the {@link org.apache.hadoop.mapred.JobTracker} 
+ * to the {@link org.apache.hadoop.mapred.TaskTracker} to reinitialize itself.
+ * 
+ * @author Arun C Murthy
+ */
+class ReinitTrackerAction extends TaskTrackerAction {
+
+  public ReinitTrackerAction() {
+    super(ActionType.REINIT_TRACKER);
+  }
+  
+  public void write(DataOutput out) throws IOException {}
+
+  public void readFields(DataInput in) throws IOException {}
+
+}

+ 6 - 1
src/java/org/apache/hadoop/mapred/TaskInProgress.java

@@ -228,7 +228,12 @@ class TaskInProgress {
             (job.getStatus().getRunState() != JobStatus.RUNNING)) {
             (job.getStatus().getRunState() != JobStatus.RUNNING)) {
             tasksReportedClosed.add(taskid);
             tasksReportedClosed.add(taskid);
             return true;
             return true;
-        } else {
+        } else if( !isMapTask() && isComplete() && 
+                ! tasksReportedClosed.contains(taskid) ){
+            tasksReportedClosed.add(taskid);
+            return true; 
+        }
+        else {
             return false;
             return false;
         }
         }
     }
     }

+ 183 - 68
src/java/org/apache/hadoop/mapred/TaskTracker.java

@@ -68,6 +68,9 @@ public class TaskTracker
 
 
     Server taskReportServer = null;
     Server taskReportServer = null;
     InterTrackerProtocol jobClient;
     InterTrackerProtocol jobClient;
+    
+    // last heartbeat response recieved
+    short heartbeatResponseId = -1;
 
 
     StatusHttpServer server = null;
     StatusHttpServer server = null;
     
     
@@ -187,7 +190,7 @@ public class TaskTracker
         }
         }
       }
       }
     }
     }
-    
+
     static String getCacheSubdir() {
     static String getCacheSubdir() {
       return TaskTracker.SUBDIR + Path.SEPARATOR + TaskTracker.CACHEDIR;
       return TaskTracker.SUBDIR + Path.SEPARATOR + TaskTracker.CACHEDIR;
     }
     }
@@ -451,15 +454,23 @@ public class TaskTracker
               }
               }
             }
             }
 
 
-            if (!transmitHeartBeat()) {
+            // Send the heartbeat and process the jobtracker's directives
+            HeartbeatResponse heartbeatResponse = transmitHeartBeat();
+            TaskTrackerAction[] actions = heartbeatResponse.getActions();
+            LOG.debug("Got heartbeatResponse from JobTracker with responseId: " + 
+                    heartbeatResponse.getResponseId() + " and " + 
+                    ((actions != null) ? actions.length : 0) + " actions");
+            
+            if (reinitTaskTracker(actions)) {
               return State.STALE;
               return State.STALE;
             }
             }
+            
             lastHeartbeat = now;
             lastHeartbeat = now;
             justStarted = false;
             justStarted = false;
 
 
-            checkForNewTasks();
+            checkAndStartNewTasks(actions);
             markUnresponsiveTasks();
             markUnresponsiveTasks();
-            closeCompletedTasks();
+            closeCompletedTasks(actions);
             killOverflowingTasks();
             killOverflowingTasks();
             
             
             //we've cleaned up, resume normal operation
             //we've cleaned up, resume normal operation
@@ -491,56 +502,94 @@ public class TaskTracker
      * @return false if the tracker was unknown
      * @return false if the tracker was unknown
      * @throws IOException
      * @throws IOException
      */
      */
-    private boolean transmitHeartBeat() throws IOException {
+    private HeartbeatResponse transmitHeartBeat() throws IOException {
       //
       //
       // Build the heartbeat information for the JobTracker
       // Build the heartbeat information for the JobTracker
       //
       //
-      List<TaskStatus> taskReports = new ArrayList(runningTasks.size());
+      List<TaskStatus> taskReports = 
+        new ArrayList<TaskStatus>(runningTasks.size());
       synchronized (this) {
       synchronized (this) {
-          for (TaskInProgress tip: runningTasks.values()) {
-              taskReports.add(tip.createStatus());
-          }
+        for (TaskInProgress tip: runningTasks.values()) {
+          taskReports.add(tip.createStatus());
+        }
       }
       }
       TaskTrackerStatus status = 
       TaskTrackerStatus status = 
         new TaskTrackerStatus(taskTrackerName, localHostname, 
         new TaskTrackerStatus(taskTrackerName, localHostname, 
-                              httpPort, taskReports, 
-                              failures); 
-
+                httpPort, taskReports, 
+                failures); 
+      
+      //
+      // Check if we should ask for a new Task
+      //
+      boolean askForNewTask = false; 
+      if ((mapTotal < maxCurrentTasks || reduceTotal < maxCurrentTasks) &&
+              acceptNewTasks) {
+        checkLocalDirs(fConf.getLocalDirs());
+        
+        if (enoughFreeSpace(minSpaceStart)) {
+          askForNewTask = true;
+        }
+      }
+      
       //
       //
       // Xmit the heartbeat
       // Xmit the heartbeat
       //
       //
+      HeartbeatResponse heartbeatResponse = jobClient.heartbeat(status, 
+              justStarted, askForNewTask, 
+              heartbeatResponseId);
+      heartbeatResponseId = heartbeatResponse.getResponseId();
       
       
-      int resultCode = jobClient.emitHeartbeat(status, justStarted);
       synchronized (this) {
       synchronized (this) {
-        for (TaskStatus taskStatus: taskReports) {
-            if (taskStatus.getRunState() != TaskStatus.State.RUNNING) {
-                if (taskStatus.getIsMap()) {
-                    mapTotal--;
-                } else {
-                    reduceTotal--;
-                }
-                myMetrics.completeTask();
-                runningTasks.remove(taskStatus.getTaskId());
+        for (TaskStatus taskStatus : taskReports) {
+          if (taskStatus.getRunState() != TaskStatus.State.RUNNING) {
+            if (taskStatus.getIsMap()) {
+              mapTotal--;
+            } else {
+              reduceTotal--;
             }
             }
+            myMetrics.completeTask();
+            runningTasks.remove(taskStatus.getTaskId());
+          }
         }
         }
       }
       }
-      return resultCode != InterTrackerProtocol.UNKNOWN_TASKTRACKER;
+      return heartbeatResponse;
     }
     }
 
 
+    /**
+     * Check if the jobtracker directed a 'reset' of the tasktracker.
+     * 
+     * @param actions the directives of the jobtracker for the tasktracker.
+     * @return <code>true</code> if tasktracker is to be reset, 
+     *         <code>false</code> otherwise.
+     */
+    private boolean reinitTaskTracker(TaskTrackerAction[] actions) {
+      if (actions != null) {
+        for (TaskTrackerAction action : actions) {
+          if (action.getActionId() == 
+            TaskTrackerAction.ActionType.REINIT_TRACKER) {
+            LOG.info("Recieved RenitTrackerAction from JobTracker");
+            return true;
+          }
+        }
+      }
+      return false;
+    }
+    
     /**
     /**
      * Check to see if there are any new tasks that we should run.
      * Check to see if there are any new tasks that we should run.
      * @throws IOException
      * @throws IOException
      */
      */
-    private void checkForNewTasks() throws IOException {
-      //
-      // Check if we should ask for a new Task
-      //
-      if ((mapTotal < maxCurrentTasks || reduceTotal < maxCurrentTasks) &&
-          acceptNewTasks) {
-        checkLocalDirs(fConf.getLocalDirs());
-        
-        if (enoughFreeSpace(minSpaceStart)) {
-          Task t = jobClient.pollForNewTask(taskTrackerName);
+    private void checkAndStartNewTasks(TaskTrackerAction[] actions) 
+    throws IOException {
+      if (actions == null) {
+        return;
+      }
+      
+      for (TaskTrackerAction action : actions) {
+        if (action.getActionId() == 
+          TaskTrackerAction.ActionType.LAUNCH_TASK) {
+          Task t = ((LaunchTaskAction)(action)).getTask();
+          LOG.info("LaunchTaskAction: " + t.getTaskId());
           if (t != null) {
           if (t != null) {
             startNewTask(t);
             startNewTask(t);
           }
           }
@@ -573,24 +622,73 @@ public class TaskTracker
      * Ask the JobTracker if there are any tasks that we should clean up,
      * Ask the JobTracker if there are any tasks that we should clean up,
      * either because we don't need them any more or because the job is done.
      * either because we don't need them any more or because the job is done.
      */
      */
-    private void closeCompletedTasks() throws IOException {
-      String[] toCloseIds = jobClient.pollForTaskWithClosedJob(taskTrackerName);
-      if (toCloseIds != null) {
-        synchronized (this) {
-          for (int i = 0; i < toCloseIds.length; i++) {
-            TaskInProgress tip = tasks.get(toCloseIds[i]);
-            if (tip != null) {
-              // remove the task from running jobs, removing the job if 
-              // it is the last task
-              removeTaskFromJob(tip.getTask().getJobId(), tip);
-              tasksToCleanup.put(tip);
+    private void closeCompletedTasks(TaskTrackerAction[] actions) 
+    throws IOException {
+      if (actions == null) {
+        return;
+      }
+      
+      for (TaskTrackerAction action : actions) {
+        TaskTrackerAction.ActionType actionType = action.getActionId();
+        
+        if (actionType == TaskTrackerAction.ActionType.KILL_JOB) {
+          String jobId = ((KillJobAction)action).getJobId();
+          LOG.info("Received 'KillJobAction' for job: " + jobId);
+          synchronized (runningJobs) {
+            RunningJob rjob = runningJobs.get(jobId);
+            if (rjob == null) {
+              LOG.warn("Unknown job " + jobId + " being deleted.");
             } else {
             } else {
-              LOG.info("Attempt to cleanup unknown tip " + toCloseIds[i]);
+              synchronized (rjob) {
+                int noJobTasks = rjob.tasks.size(); 
+                int taskCtr = 0;
+                
+                // Add this tips of this job to queue of tasks to be purged 
+                for (TaskInProgress tip : rjob.tasks) {
+                  // Purge the job files for the last element in rjob.tasks
+                  if (++taskCtr == noJobTasks) {
+                    tip.setPurgeJobFiles(true);
+                  }
+
+                  tasksToCleanup.put(tip);
+                }
+                
+                // Remove this job 
+                rjob.tasks.clear();
+                runningJobs.remove(jobId);
+              }
             }
             }
           }
           }
+        } else if(actionType == TaskTrackerAction.ActionType.KILL_TASK) {
+          String taskId = ((KillTaskAction)action).getTaskId();
+          LOG.info("Received KillTaskAction for task: " + taskId);
+          purgeTask(tasks.get(taskId), false);
         }
         }
       }
       }
     }
     }
+    
+    /**
+     * Remove the tip and update all relevant state.
+     * 
+     * @param tip {@link TaskInProgress} to be removed.
+     * @param purgeJobFiles <code>true</code> if the job files are to be
+     *                      purged, <code>false</code> otherwise.
+     */
+    private void purgeTask(TaskInProgress tip, boolean purgeJobFiles) {
+      if (tip != null) {
+        LOG.info("About to purge task: " + tip.getTask().getTaskId());
+        
+        // Cleanup the job files? 
+        tip.setPurgeJobFiles(purgeJobFiles);
+        
+        // Remove the task from running jobs, 
+        // removing the job if it's the last task
+        removeTaskFromJob(tip.getTask().getJobId(), tip);
+        
+        // Add this tip to queue of tasks to be purged 
+        tasksToCleanup.put(tip);
+      }
+    }
 
 
     /** Check if we're dangerously low on disk space
     /** Check if we're dangerously low on disk space
      * If so, kill jobs to free up space and make sure
      * If so, kill jobs to free up space and make sure
@@ -822,6 +920,9 @@ public class TaskTracker
         private boolean alwaysKeepTaskFiles;
         private boolean alwaysKeepTaskFiles;
         private TaskStatus taskStatus ; 
         private TaskStatus taskStatus ; 
         private boolean keepJobFiles;
         private boolean keepJobFiles;
+        
+        /** Cleanup the job files when the job is complete (done/failed) */
+        private boolean purgeJobFiles = false;
 
 
         /**
         /**
          */
          */
@@ -886,6 +987,10 @@ public class TaskTracker
             keepFailedTaskFiles = localJobConf.getKeepFailedTaskFiles();
             keepFailedTaskFiles = localJobConf.getKeepFailedTaskFiles();
         }
         }
         
         
+        public void setPurgeJobFiles(boolean purgeJobFiles) {
+          this.purgeJobFiles = purgeJobFiles;
+        }
+        
         /**
         /**
          */
          */
         public synchronized TaskStatus createStatus() {
         public synchronized TaskStatus createStatus() {
@@ -1017,32 +1122,39 @@ public class TaskTracker
          * We no longer need anything from this task, as the job has
          * We no longer need anything from this task, as the job has
          * finished.  If the task is still running, kill it (and clean up
          * finished.  If the task is still running, kill it (and clean up
          */
          */
-        public synchronized void jobHasFinished() throws IOException {
-        	 
-            if (getRunState() == TaskStatus.State.RUNNING) {
+        public void jobHasFinished() throws IOException {
+          boolean killTask = false;  
+          synchronized(this){
+              killTask = (getRunState() == TaskStatus.State.RUNNING);
+              if (killTask) {
                 killAndCleanup(false);
                 killAndCleanup(false);
-            } else {
-                cleanup();
-            }
-            if (keepJobFiles)
-              return;
-            
-            // Delete temp directory in case any task used PhasedFileSystem.
-            try{
-              String systemDir = task.getConf().get("mapred.system.dir");
-              Path taskTempDir = new Path(systemDir + "/" + 
-                  task.getJobId() + "/" + task.getTipId());
-              if( fs.exists(taskTempDir)){
-                fs.delete(taskTempDir) ;
               }
               }
-            }catch(IOException e){
-              LOG.warn("Error in deleting reduce temporary output",e); 
+          }
+          if (!killTask) {
+            cleanup();
+          }
+          if (keepJobFiles)
+            return;
+              
+          synchronized(this){
+              // Delete temp directory in case any task used PhasedFileSystem.
+              try{
+                String systemDir = task.getConf().get("mapred.system.dir");
+                Path taskTempDir = new Path(systemDir + "/" + 
+                    task.getJobId() + "/" + task.getTipId() + "/" + task.getTaskId());
+                if( fs.exists(taskTempDir)){
+                  fs.delete(taskTempDir) ;
+                }
+              }catch(IOException e){
+                LOG.warn("Error in deleting reduce temporary output",e); 
+              }
+            }
+            // Delete the job directory for this  
+            // task if the job is done/failed
+            if (purgeJobFiles) {
+              this.defaultJobConf.deleteLocalFiles(SUBDIR + Path.SEPARATOR + 
+                      JOBCACHE + Path.SEPARATOR +  task.getJobId());
             }
             }
-            
-            // delete the job diretory for this task 
-            // since the job is done/failed
-            this.defaultJobConf.deleteLocalFiles(SUBDIR + Path.SEPARATOR + 
-                    JOBCACHE + Path.SEPARATOR +  task.getJobId());
         }
         }
 
 
         /**
         /**
@@ -1090,6 +1202,9 @@ public class TaskTracker
          * We no longer need anything from this task.  Either the 
          * We no longer need anything from this task.  Either the 
          * controlling job is all done and the files have been copied
          * controlling job is all done and the files have been copied
          * away, or the task failed and we don't need the remains.
          * away, or the task failed and we don't need the remains.
+         * Any calls to cleanup should not lock the tip first.
+         * cleanup does the right thing- updates tasks in Tasktracker
+         * by locking tasktracker first and then locks the tip.
          */
          */
         void cleanup() throws IOException {
         void cleanup() throws IOException {
             String taskId = task.getTaskId();
             String taskId = task.getTaskId();

+ 111 - 0
src/java/org/apache/hadoop/mapred/TaskTrackerAction.java

@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
+
+/**
+ * A generic directive from the {@link org.apache.hadoop.mapred.JobTracker}
+ * to the {@link org.apache.hadoop.mapred.TaskTracker} to take some 'action'. 
+ * 
+ * @author Arun C Murthy
+ */
+abstract class TaskTrackerAction implements Writable {
+  
+  /**
+   * Ennumeration of various 'actions' that the {@link JobTracker}
+   * directs the {@link TaskTracker} to perform periodically.
+   * 
+   * @author Arun C Murthy
+   */
+  public static enum ActionType {
+    /** Launch a new task. */
+    LAUNCH_TASK,
+    
+    /** Kill a task. */
+    KILL_TASK,
+    
+    /** Kill any tasks of this job and cleanup. */
+    KILL_JOB,
+    
+    /** Reinitialize the tasktracker. */
+    REINIT_TRACKER
+  };
+  
+  /**
+   * A factory-method to create objects of given {@link ActionType}. 
+   * @param actionType the {@link ActionType} of object to create.
+   * @return an object of {@link ActionType}.
+   */
+  public static TaskTrackerAction createAction(ActionType actionType) {
+    TaskTrackerAction action = null;
+    
+    switch (actionType) {
+      case LAUNCH_TASK:
+        {
+          action = new LaunchTaskAction();
+        }
+        break;
+        case KILL_TASK:
+        {
+          action = new KillTaskAction();
+        }
+        break;
+      case KILL_JOB:
+        {
+          action = new KillJobAction();
+        }
+        break;
+      case REINIT_TRACKER:
+        {
+          action = new ReinitTrackerAction();
+        }
+        break;
+    }
+
+    return action;
+  }
+  
+  private ActionType actionType;
+  
+  protected TaskTrackerAction(ActionType actionType) {
+    this.actionType = actionType;
+  }
+  
+  /**
+   * Return the {@link ActionType}.
+   * @return the {@link ActionType}.
+   */
+  ActionType getActionId() {
+    return actionType;
+  }
+
+  public void write(DataOutput out) throws IOException {
+    WritableUtils.writeEnum(out, actionType);
+  }
+  
+  public void readFields(DataInput in) throws IOException {
+    actionType = WritableUtils.readEnum(in, ActionType.class);
+  }
+}

+ 9 - 0
src/site/src/documentation/content/xdocs/index.xml

@@ -14,6 +14,15 @@
     <section>
     <section>
       <title>News</title>
       <title>News</title>
 
 
+      <section>
+      <title>15 December, 2006: release 0.9.2 available</title>
+      <p>This fixes critical bugs in 0.9.1.  For details see the <a
+      href="http://tinyurl.com/ya8lfd">release notes</a>. The release can
+      be obtained from <a
+      href="http://www.apache.org/dyn/closer.cgi/lucene/hadoop/"> a
+      nearby mirror</a>.
+      </p> </section>
+
       <section>
       <section>
       <title>6 December, 2006: release 0.9.1 available</title>
       <title>6 December, 2006: release 0.9.1 available</title>
       <p>This fixes critical bugs in 0.9.0.  For details see the <a
       <p>This fixes critical bugs in 0.9.0.  For details see the <a

Einige Dateien werden nicht angezeigt, da zu viele Dateien in diesem Diff geändert wurden.