12 years ago · 697627b541
--- a/BUILDING.txt
+++ b/BUILDING.txt
@@ -6,7 +6,6 @@ Requirements:
 
				 * Unix System
			
 
				 * JDK 1.6
			
 
				 * Maven 3.0
			
 
				-* Forrest 0.8 (if generating docs)
			
 
				 * Findbugs 1.3.9 (if running findbugs)
			
 
				 * ProtocolBuffer 2.4.1+ (for MapReduce and HDFS)
			
 
				 * CMake 2.6 or newer (if compiling native code)
			
--- a/dev-support/relnotes.py
+++ b/dev-support/relnotes.py
@@ -155,7 +155,7 @@ class JiraIter:
 
				     end=1
			
 
				     count=100
			
 
				     while (at < end):
			
 
				-      params = urllib.urlencode({'jql': "project in (HADOOP,HDFS,MAPREDUCE,YARN) and fixVersion in ('"+"' , '".join(versions)+"') and resolution = Fixed", 'startAt':at+1, 'maxResults':count})
			
 
				+      params = urllib.urlencode({'jql': "project in (HADOOP,HDFS,MAPREDUCE,YARN) and fixVersion in ('"+"' , '".join(versions)+"') and resolution = Fixed", 'startAt':at, 'maxResults':count})
			
 
				       resp = urllib.urlopen("https://issues.apache.org/jira/rest/api/2/search?%s"%params)
			
 
				       data = json.loads(resp.read())
			
 
				       if (data.has_key('errorMessages')):
			
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -328,6 +328,11 @@ Trunk (Unreleased)
 
				 
			
 
				     HADOOP-8589 ViewFs tests fail when tests and home dirs are nested (sanjay Radia)
			
 
				 
			
 
				+    HADOOP-9246 Execution phase for hadoop-maven-plugin should be
			
 
				+    process-resources (Karthik Kambatla and Chris Nauroth via jlowe)
			
 
				+
			
 
				+    HADOOP-9190. packaging docs is broken. (Andy Isaacson via atm)
			
 
				+
			
 
				 Release 2.0.3-alpha - Unreleased 
			
 
				 
			
 
				   INCOMPATIBLE CHANGES
			
@@ -575,6 +580,9 @@ Release 2.0.3-alpha - Unreleased
 
				     HADOOP-9215. when using cmake-2.6, libhadoop.so doesn't get created
			
 
				     (only libhadoop.so.1.0.0) (Colin Patrick McCabe via todd)
			
 
				 
			
 
				+    HADOOP-8857. hadoop.http.authentication.signature.secret.file docs 
			
 
				+    should not state that secret is randomly generated. (tucu)
			
 
				+
			
 
				 Release 2.0.2-alpha - 2012-09-07 
			
 
				 
			
 
				   INCOMPATIBLE CHANGES
			
@@ -1293,6 +1301,10 @@ Release 0.23.6 - UNRELEASED
 
				     HADOOP-9242. Duplicate surefire plugin config in hadoop-common.
			
 
				     (Andrey Klochkov via suresh)
			
 
				 
			
 
				+    HADOOP-9247. Parametrize Clover "generateXxx" properties to make them
			
 
				+    re-definable via -D in mvn calls. (Ivan A. Veselovsky via suresh)
			
 
				+
			
 
				+
			
 
				   OPTIMIZATIONS
			
 
				 
			
 
				   BUG FIXES
			
@@ -1310,6 +1322,8 @@ Release 0.23.6 - UNRELEASED
 
				 
			
 
				     HADOOP-9097. Maven RAT plugin is not checking all source files (tgraves)
			
 
				 
			
 
				+    HADOOP-9255. relnotes.py missing last jira (tgraves)
			
 
				+
			
 
				 Release 0.23.5 - 2012-11-28
			
 
				 
			
 
				 
			
--- a/hadoop-common-project/hadoop-common/pom.xml
+++ b/hadoop-common-project/hadoop-common/pom.xml
@@ -279,7 +279,7 @@
 
				         <executions>
			
 
				           <execution>
			
 
				             <id>version-info</id>
			
 
				-            <phase>compile</phase>
			
 
				+            <phase>generate-resources</phase>
			
 
				             <goals>
			
 
				               <goal>version-info</goal>
			
 
				             </goals>
			
--- a/hadoop-common-project/hadoop-common/src/main/docs/forrest.properties
+++ b/hadoop-common-project/hadoop-common/src/main/docs/forrest.properties
@@ -1,112 +0,0 @@
 
				-#
			
 
				-# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				-# contributor license agreements.  See the NOTICE file distributed with
			
 
				-# this work for additional information regarding copyright ownership.
			
 
				-# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				-# (the "License"); you may not use this file except in compliance with
			
 
				-# the License.  You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-#
			
 
				-
			
 
				-##############
			
 
				-# Properties used by forrest.build.xml for building the website
			
 
				-# These are the defaults, un-comment them if you need to change them.
			
 
				-##############
			
 
				-
			
 
				-# Prints out a summary of Forrest settings for this project
			
 
				-#forrest.echo=true 
			
 
				-
			
 
				-# Project name (used to name .war file)
			
 
				-#project.name=my-project
			
 
				-
			
 
				-# Specifies name of Forrest skin to use
			
 
				-#project.skin=tigris
			
 
				-#project.skin=pelt
			
 
				-
			
 
				-# comma separated list, file:// is supported
			
 
				-#forrest.skins.descriptors=http://forrest.apache.org/skins/skins.xml,file:///c:/myskins/skins.xml
			
 
				-
			
 
				-##############
			
 
				-# behavioural properties
			
 
				-#project.menu-scheme=tab_attributes
			
 
				-#project.menu-scheme=directories
			
 
				-
			
 
				-##############
			
 
				-# layout properties
			
 
				-
			
 
				-# Properties that can be set to override the default locations
			
 
				-#
			
 
				-# Parent properties must be set. This usually means uncommenting
			
 
				-# project.content-dir if any other property using it is uncommented
			
 
				-
			
 
				-#project.status=status.xml
			
 
				-#project.content-dir=src/documentation
			
 
				-#project.raw-content-dir=${project.content-dir}/content
			
 
				-#project.conf-dir=${project.content-dir}/conf
			
 
				-#project.sitemap-dir=${project.content-dir}
			
 
				-#project.xdocs-dir=${project.content-dir}/content/xdocs
			
 
				-#project.resources-dir=${project.content-dir}/resources
			
 
				-#project.stylesheets-dir=${project.resources-dir}/stylesheets
			
 
				-#project.images-dir=${project.resources-dir}/images
			
 
				-#project.schema-dir=${project.resources-dir}/schema
			
 
				-#project.skins-dir=${project.content-dir}/skins
			
 
				-#project.skinconf=${project.content-dir}/skinconf.xml
			
 
				-#project.lib-dir=${project.content-dir}/lib
			
 
				-#project.classes-dir=${project.content-dir}/classes
			
 
				-#project.translations-dir=${project.content-dir}/translations
			
 
				-
			
 
				-##############
			
 
				-# validation properties
			
 
				-
			
 
				-# This set of properties determine if validation is performed
			
 
				-# Values are inherited unless overridden.
			
 
				-# e.g. if forrest.validate=false then all others are false unless set to true.
			
 
				-#forrest.validate=true
			
 
				-#forrest.validate.xdocs=${forrest.validate}
			
 
				-#forrest.validate.skinconf=${forrest.validate}
			
 
				-# Workaround (HADOOP-7072) for http://issues.apache.org/jira/browse/FOR-984
			
 
				-# Remove when forrest-0.9 is available
			
 
				-forrest.validate.sitemap=false
			
 
				-forrest.validate.stylesheets=false
			
 
				-# End of forrest-0.8 + JDK6 workaround
			
 
				-#forrest.validate.skins=${forrest.validate}
			
 
				-forrest.validate.skins.stylesheets=false
			
 
				-
			
 
				-# *.failonerror=(true|false) - stop when an XML file is invalid
			
 
				-#forrest.validate.failonerror=true
			
 
				-
			
 
				-# *.excludes=(pattern) - comma-separated list of path patterns to not validate
			
 
				-# e.g.
			
 
				-#forrest.validate.xdocs.excludes=samples/subdir/**, samples/faq.xml
			
 
				-#forrest.validate.xdocs.excludes=
			
 
				-
			
 
				-
			
 
				-##############
			
 
				-# General Forrest properties
			
 
				-
			
 
				-# The URL to start crawling from
			
 
				-#project.start-uri=linkmap.html
			
 
				-# Set logging level for messages printed to the console
			
 
				-# (DEBUG, INFO, WARN, ERROR, FATAL_ERROR)
			
 
				-#project.debuglevel=ERROR
			
 
				-# Max memory to allocate to Java
			
 
				-#forrest.maxmemory=64m
			
 
				-# Any other arguments to pass to the JVM. For example, to run on an X-less
			
 
				-# server, set to -Djava.awt.headless=true
			
 
				-#forrest.jvmargs=
			
 
				-# The bugtracking URL - the issue number will be appended
			
 
				-#project.bugtracking-url=http://issues.apache.org/bugzilla/show_bug.cgi?id=
			
 
				-#project.bugtracking-url=http://issues.apache.org/jira/browse/
			
 
				-# The issues list as rss
			
 
				-#project.issues-rss-url=
			
 
				-#I18n Property only works for the "forrest run" target.
			
 
				-#project.i18n=true
			
 
				-project.configfile=${project.home}/src/documentation/conf/cli.xconf
			
 
				-
			
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@@ -890,7 +890,6 @@
 
				   <value>${user.home}/hadoop-http-auth-signature-secret</value>
			
 
				   <description>
			
 
				     The signature secret for signing the authentication tokens.
			
 
				-    If not set a random secret is generated at startup time.
			
 
				     The same secret should be used for JT/NN/DN/TT configurations.
			
 
				   </description>
			
 
				 </property>
			
--- a/hadoop-common-project/hadoop-common/src/site/apt/HttpAuthentication.apt.vm
+++ b/hadoop-common-project/hadoop-common/src/site/apt/HttpAuthentication.apt.vm
@@ -64,10 +64,9 @@ Authentication for Hadoop HTTP web-consoles
 
				    The default value is <<<36000>>>.
			
 
				 
			
 
				    <<<hadoop.http.authentication.signature.secret.file>>>: The signature secret
			
 
				-   file for signing the authentication tokens. If not set a random secret is
			
 
				-   generated at startup time. The same secret should be used for all nodes
			
 
				-   in the cluster, JobTracker, NameNode, DataNode and TastTracker. The
			
 
				-   default value is <<<${user.home}/hadoop-http-auth-signature-secret>>>.
			
 
				+   file for signing the authentication tokens. The same secret should be used 
			
 
				+   for all nodes in the cluster, JobTracker, NameNode, DataNode and TastTracker. 
			
 
				+   The default value is <<<${user.home}/hadoop-http-auth-signature-secret>>>.
			
 
				    IMPORTANT: This file should be readable only by the Unix user running the
			
 
				    daemons.
			
 
				 
			
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -497,6 +497,8 @@ Release 2.0.3-alpha - Unreleased
 
				     HDFS-4403. DFSClient can infer checksum type when not provided by reading
			
 
				     first byte (todd)
			
 
				 
			
 
				+    HDFS-4259. Improve pipeline DN replacement failure message (harsh)
			
 
				+
			
 
				   OPTIMIZATIONS
			
 
				 
			
 
				     HDFS-3429. DataNode reads checksums even if client does not need them (todd)
			
@@ -728,6 +730,9 @@ Release 2.0.3-alpha - Unreleased
 
				     HDFS-4359. Slow RPC responses from NN can prevent metrics collection on
			
 
				     DNs. (liang xie via atm)
			
 
				 
			
 
				+    HDFS-4444. Add space between total transaction time and number of
			
 
				+    transactions in FSEditLog#printStatistics. (Stephen Chu via suresh)
			
 
				+
			
 
				   BREAKDOWN OF HDFS-3077 SUBTASKS
			
 
				 
			
 
				     HDFS-3077. Quorum-based protocol for reading and writing edit logs.
			
@@ -1377,6 +1382,9 @@ Release 2.0.2-alpha - 2012-09-07
 
				     HDFS-3944. Httpfs resolveAuthority() is not resolving host correctly. (tucu)
			
 
				 
			
 
				     HDFS-3972. Trash emptier fails in secure HA cluster. (todd via eli)
			
 
				+
			
 
				+    HDFS-4443. Remove a trailing '`' character from the HTML code generated by
			
 
				+    NamenodeJspHelper.generateNodeData(..).  (Christian Rohling via szetszwo)
			
 
				  
			
 
				   BREAKDOWN OF HDFS-3042 SUBTASKS
			
 
				 
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/forrest.properties
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/forrest.properties
@@ -1,112 +0,0 @@
 
				-#
			
 
				-# Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				-# contributor license agreements.  See the NOTICE file distributed with
			
 
				-# this work for additional information regarding copyright ownership.
			
 
				-# The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				-# (the "License"); you may not use this file except in compliance with
			
 
				-# the License.  You may obtain a copy of the License at
			
 
				-#
			
 
				-#     http://www.apache.org/licenses/LICENSE-2.0
			
 
				-#
			
 
				-# Unless required by applicable law or agreed to in writing, software
			
 
				-# distributed under the License is distributed on an "AS IS" BASIS,
			
 
				-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				-# See the License for the specific language governing permissions and
			
 
				-# limitations under the License.
			
 
				-#
			
 
				-
			
 
				-##############
			
 
				-# Properties used by forrest.build.xml for building the website
			
 
				-# These are the defaults, un-comment them if you need to change them.
			
 
				-##############
			
 
				-
			
 
				-# Prints out a summary of Forrest settings for this project
			
 
				-#forrest.echo=true 
			
 
				-
			
 
				-# Project name (used to name .war file)
			
 
				-#project.name=my-project
			
 
				-
			
 
				-# Specifies name of Forrest skin to use
			
 
				-#project.skin=tigris
			
 
				-#project.skin=pelt
			
 
				-
			
 
				-# comma separated list, file:// is supported
			
 
				-#forrest.skins.descriptors=http://forrest.apache.org/skins/skins.xml,file:///c:/myskins/skins.xml
			
 
				-
			
 
				-##############
			
 
				-# behavioural properties
			
 
				-#project.menu-scheme=tab_attributes
			
 
				-#project.menu-scheme=directories
			
 
				-
			
 
				-##############
			
 
				-# layout properties
			
 
				-
			
 
				-# Properties that can be set to override the default locations
			
 
				-#
			
 
				-# Parent properties must be set. This usually means uncommenting
			
 
				-# project.content-dir if any other property using it is uncommented
			
 
				-
			
 
				-#project.status=status.xml
			
 
				-#project.content-dir=src/documentation
			
 
				-#project.raw-content-dir=${project.content-dir}/content
			
 
				-#project.conf-dir=${project.content-dir}/conf
			
 
				-#project.sitemap-dir=${project.content-dir}
			
 
				-#project.xdocs-dir=${project.content-dir}/content/xdocs
			
 
				-#project.resources-dir=${project.content-dir}/resources
			
 
				-#project.stylesheets-dir=${project.resources-dir}/stylesheets
			
 
				-#project.images-dir=${project.resources-dir}/images
			
 
				-#project.schema-dir=${project.resources-dir}/schema
			
 
				-#project.skins-dir=${project.content-dir}/skins
			
 
				-#project.skinconf=${project.content-dir}/skinconf.xml
			
 
				-#project.lib-dir=${project.content-dir}/lib
			
 
				-#project.classes-dir=${project.content-dir}/classes
			
 
				-#project.translations-dir=${project.content-dir}/translations
			
 
				-
			
 
				-##############
			
 
				-# validation properties
			
 
				-
			
 
				-# This set of properties determine if validation is performed
			
 
				-# Values are inherited unless overridden.
			
 
				-# e.g. if forrest.validate=false then all others are false unless set to true.
			
 
				-#forrest.validate=true
			
 
				-#forrest.validate.xdocs=${forrest.validate}
			
 
				-#forrest.validate.skinconf=${forrest.validate}
			
 
				-# Workaround (HADOOP-7072) for http://issues.apache.org/jira/browse/FOR-984
			
 
				-# Remove when forrest-0.9 is available
			
 
				-forrest.validate.sitemap=false
			
 
				-forrest.validate.stylesheets=false
			
 
				-# End of forrest-0.8 + JDK6 workaround
			
 
				-#forrest.validate.skins=${forrest.validate}
			
 
				-forrest.validate.skins.stylesheets=false
			
 
				-
			
 
				-# *.failonerror=(true|false) - stop when an XML file is invalid
			
 
				-#forrest.validate.failonerror=true
			
 
				-
			
 
				-# *.excludes=(pattern) - comma-separated list of path patterns to not validate
			
 
				-# e.g.
			
 
				-#forrest.validate.xdocs.excludes=samples/subdir/**, samples/faq.xml
			
 
				-#forrest.validate.xdocs.excludes=
			
 
				-
			
 
				-
			
 
				-##############
			
 
				-# General Forrest properties
			
 
				-
			
 
				-# The URL to start crawling from
			
 
				-#project.start-uri=linkmap.html
			
 
				-# Set logging level for messages printed to the console
			
 
				-# (DEBUG, INFO, WARN, ERROR, FATAL_ERROR)
			
 
				-#project.debuglevel=ERROR
			
 
				-# Max memory to allocate to Java
			
 
				-#forrest.maxmemory=64m
			
 
				-# Any other arguments to pass to the JVM. For example, to run on an X-less
			
 
				-# server, set to -Djava.awt.headless=true
			
 
				-#forrest.jvmargs=
			
 
				-# The bugtracking URL - the issue number will be appended
			
 
				-#project.bugtracking-url=http://issues.apache.org/bugzilla/show_bug.cgi?id=
			
 
				-#project.bugtracking-url=http://issues.apache.org/jira/browse/
			
 
				-# The issues list as rss
			
 
				-#project.issues-rss-url=
			
 
				-#I18n Property only works for the "forrest run" target.
			
 
				-#project.i18n=true
			
 
				-project.configfile=${project.home}/src/documentation/conf/cli.xconf
			
 
				-
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
@@ -785,13 +785,18 @@ public class DFSOutputStream extends FSOutputSummer implements Syncable {
 
				     private int findNewDatanode(final DatanodeInfo[] original
			
 
				         ) throws IOException {
			
 
				       if (nodes.length != original.length + 1) {
			
 
				-        throw new IOException("Failed to add a datanode.  "
			
 
				-            + "User may turn off this feature by setting "
			
 
				-            + DFSConfigKeys.DFS_CLIENT_WRITE_REPLACE_DATANODE_ON_FAILURE_POLICY_KEY
			
 
				-            + " in configuration, where the current policy is "
			
 
				-            + dfsClient.dtpReplaceDatanodeOnFailure
			
 
				-            + ".  (Nodes: current=" + Arrays.asList(nodes)
			
 
				-            + ", original=" + Arrays.asList(original) + ")");
			
 
				+        throw new IOException(
			
 
				+            new StringBuilder()
			
 
				+            .append("Failed to replace a bad datanode on the existing pipeline ")
			
 
				+            .append("due to no more good datanodes being available to try. ")
			
 
				+            .append("(Nodes: current=").append(Arrays.asList(nodes))
			
 
				+            .append(", original=").append(Arrays.asList(original)).append("). ")
			
 
				+            .append("The current failed datanode replacement policy is ")
			
 
				+            .append(dfsClient.dtpReplaceDatanodeOnFailure).append(", and ")
			
 
				+            .append("a client may configure this via '")
			
 
				+            .append(DFSConfigKeys.DFS_CLIENT_WRITE_REPLACE_DATANODE_ON_FAILURE_POLICY_KEY)
			
 
				+            .append("' in its configuration.")
			
 
				+            .toString());
			
 
				       }
			
 
				       for(int i = 0; i < nodes.length; i++) {
			
 
				         int j = 0;
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@@ -641,7 +641,7 @@ public class FSEditLog implements LogsPurgeable {
 
				     buf.append(numTransactions);
			
 
				     buf.append(" Total time for transactions(ms): ");
			
 
				     buf.append(totalTimeTransactions);
			
 
				-    buf.append("Number of transactions batched in Syncs: ");
			
 
				+    buf.append(" Number of transactions batched in Syncs: ");
			
 
				     buf.append(numTransactionsBatchedInSync);
			
 
				     buf.append(" Number of syncs: ");
			
 
				     buf.append(editLogStream.getNumSync());
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java
@@ -592,7 +592,7 @@ class NamenodeJspHelper {
 
				           + "<td class=\"pcused\">"
			
 
				           + ServletUtil.percentageGraph((int) Double.parseDouble(percentUsed),
			
 
				               100) 
			
 
				-          + "<td align=\"right\" class=\"pcremaining`\">"
			
 
				+          + "<td align=\"right\" class=\"pcremaining\">"
			
 
				           + percentRemaining 
			
 
				           + "<td title=" + "\"blocks scheduled : "
			
 
				           + d.getBlocksScheduled() + "\" class=\"blocks\">" + d.numBlocks()+"\n"
			
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -11,16 +11,9 @@ Trunk (Unreleased)
 
				     MAPREDUCE-2669. Add new examples for Mean, Median, and Standard Deviation.
			
 
				     (Plamen Jeliazkov via shv)
			
 
				 
			
 
				-    MAPREDUCE-4049. Experimental api to allow for alternate shuffle plugins.
			
 
				-    (Avner BenHanoch via acmurthy) 
			
 
				-
			
 
				-    MAPREDUCE-4807. Allow MapOutputBuffer to be pluggable. (masokan via tucu)
			
 
				-
			
 
				     MAPREDUCE-4887. Add RehashPartitioner, to smooth distributions
			
 
				     with poor implementations of Object#hashCode().  (Radim Kolar via cutting)
			
 
				 
			
 
				-    MAPREDUCE-4808. Refactor MapOutput and MergeManager to facilitate reuse by Shuffle implementations. (masokan via tucu)
			
 
				-
			
 
				   IMPROVEMENTS
			
 
				 
			
 
				     MAPREDUCE-3787. [Gridmix] Optimize job monitoring and STRESS mode for
			
@@ -78,9 +71,6 @@ Trunk (Unreleased)
 
				     MAPREDUCE-4735. Make arguments in TestDFSIO case insensitive.
			
 
				     (Brandon Li via suresh)
			
 
				 
			
 
				-    MAPREDUCE-4809. Change visibility of classes for pluggable sort changes. 
			
 
				-    (masokan via tucu)
			
 
				-
			
 
				   BUG FIXES
			
 
				 
			
 
				     MAPREDUCE-4272. SortedRanges.Range#compareTo is not spec compliant.
			
@@ -180,6 +170,14 @@ Release 2.0.3-alpha - Unreleased
 
				     MAPREDUCE-4810. Added new admin command options for MR AM. (Jerry Chen via
			
 
				     vinodkv)
			
 
				 
			
 
				+    MAPREDUCE-4049. Experimental api to allow for alternate shuffle plugins.
			
 
				+    (Avner BenHanoch via acmurthy) 
			
 
				+
			
 
				+    MAPREDUCE-4807. Allow MapOutputBuffer to be pluggable. (masokan via tucu)
			
 
				+
			
 
				+    MAPREDUCE-4808. Refactor MapOutput and MergeManager to facilitate reuse 
			
 
				+    by Shuffle implementations. (masokan via tucu)
			
 
				+
			
 
				   IMPROVEMENTS
			
 
				 
			
 
				     MAPREDUCE-3678. The Map tasks logs should have the value of input
			
@@ -211,6 +209,12 @@ Release 2.0.3-alpha - Unreleased
 
				 
			
 
				     MAPREDUCE-4949. Enable multiple pi jobs to run in parallel. (sandyr via tucu)
			
 
				 
			
 
				+    MAPREDUCE-4809. Change visibility of classes for pluggable sort changes. 
			
 
				+    (masokan via tucu)
			
 
				+
			
 
				+    MAPREDUCE-4838. Add additional fields like Locality, Avataar to the
			
 
				+    JobHistory logs. (Zhijie Shen via sseth)
			
 
				+
			
 
				   OPTIMIZATIONS
			
 
				 
			
 
				   BUG FIXES
			
@@ -268,6 +272,12 @@ Release 2.0.3-alpha - Unreleased
 
				     MAPREDUCE-4948. Fix a failing unit test TestYARNRunner.testHistoryServerToken.
			
 
				     (Junping Du via sseth)
			
 
				 
			
 
				+    MAPREDUCE-4803. Remove duplicate copy of TestIndexCache. (Mariappan Asokan
			
 
				+    via sseth)
			
 
				+
			
 
				+    MAPREDUCE-2264. Job status exceeds 100% in some cases. 
			
 
				+    (devaraj.k and sandyr via tucu)
			
 
				+
			
 
				 Release 2.0.2-alpha - 2012-09-07 
			
 
				 
			
 
				   INCOMPATIBLE CHANGES
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
@@ -28,6 +28,7 @@ import java.util.LinkedHashMap;
 
				 import java.util.LinkedHashSet;
			
 
				 import java.util.List;
			
 
				 import java.util.Map;
			
 
				+import java.util.Map.Entry;
			
 
				 import java.util.Set;
			
 
				 import java.util.concurrent.locks.Lock;
			
 
				 import java.util.concurrent.locks.ReadWriteLock;
			
@@ -1192,6 +1193,39 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
				     }
			
 
				   }
			
 
				   */
			
 
				+  /**
			
 
				+    * Get the workflow adjacencies from the job conf
			
 
				+    * The string returned is of the form "key"="value" "key"="value" ...
			
 
				+    */
			
 
				+  private static String getWorkflowAdjacencies(Configuration conf) {
			
 
				+    int prefixLen = MRJobConfig.WORKFLOW_ADJACENCY_PREFIX_STRING.length();
			
 
				+    Map<String,String> adjacencies = 
			
 
				+        conf.getValByRegex(MRJobConfig.WORKFLOW_ADJACENCY_PREFIX_PATTERN);
			
 
				+    if (adjacencies.isEmpty()) {
			
 
				+      return "";
			
 
				+    }
			
 
				+    int size = 0;
			
 
				+    for (Entry<String,String> entry : adjacencies.entrySet()) {
			
 
				+      int keyLen = entry.getKey().length();
			
 
				+      size += keyLen - prefixLen;
			
 
				+      size += entry.getValue().length() + 6;
			
 
				+    }
			
 
				+    StringBuilder sb = new StringBuilder(size);
			
 
				+    for (Entry<String,String> entry : adjacencies.entrySet()) {
			
 
				+      int keyLen = entry.getKey().length();
			
 
				+      sb.append("\"");
			
 
				+      sb.append(escapeString(entry.getKey().substring(prefixLen, keyLen)));
			
 
				+      sb.append("\"=\"");
			
 
				+      sb.append(escapeString(entry.getValue()));
			
 
				+      sb.append("\" ");
			
 
				+    }
			
 
				+    return sb.toString();
			
 
				+  }
			
 
				+  
			
 
				+  public static String escapeString(String data) {
			
 
				+    return StringUtils.escapeString(data, StringUtils.ESCAPE_CHAR,
			
 
				+        new char[] {'"', '=', '.'});
			
 
				+  }
			
 
				 
			
 
				   public static class InitTransition 
			
 
				       implements MultipleArcTransition<JobImpl, JobEvent, JobStateInternal> {
			
@@ -1217,7 +1251,11 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
				             job.conf.get(MRJobConfig.USER_NAME, "mapred"),
			
 
				             job.appSubmitTime,
			
 
				             job.remoteJobConfFile.toString(),
			
 
				-            job.jobACLs, job.queueName);
			
 
				+            job.jobACLs, job.queueName,
			
 
				+            job.conf.get(MRJobConfig.WORKFLOW_ID, ""),
			
 
				+            job.conf.get(MRJobConfig.WORKFLOW_NAME, ""),
			
 
				+            job.conf.get(MRJobConfig.WORKFLOW_NODE_NAME, ""),
			
 
				+            getWorkflowAdjacencies(job.conf));
			
 
				         job.eventHandler.handle(new JobHistoryEvent(job.jobId, jse));
			
 
				         //TODO JH Verify jobACLs, UserName via UGI?
			
 
				 
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
@@ -66,6 +66,8 @@ import org.apache.hadoop.mapreduce.jobhistory.TaskAttemptStartedEvent;
 
				 import org.apache.hadoop.mapreduce.jobhistory.TaskAttemptUnsuccessfulCompletionEvent;
			
 
				 import org.apache.hadoop.mapreduce.security.TokenCache;
			
 
				 import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
			
 
				+import org.apache.hadoop.mapreduce.v2.api.records.Avataar;
			
 
				+import org.apache.hadoop.mapreduce.v2.api.records.Locality;
			
 
				 import org.apache.hadoop.mapreduce.v2.api.records.Phase;
			
 
				 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
			
 
				 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptReport;
			
@@ -156,7 +158,8 @@ public abstract class TaskAttemptImpl implements
 
				   private final org.apache.hadoop.mapred.JobID oldJobId;
			
 
				   private final TaskAttemptListener taskAttemptListener;
			
 
				   private final Resource resourceCapability;
			
 
				-  private final String[] dataLocalHosts;
			
 
				+  protected Set<String> dataLocalHosts;
			
 
				+  protected Set<String> dataLocalRacks;
			
 
				   private final List<String> diagnostics = new ArrayList<String>();
			
 
				   private final Lock readLock;
			
 
				   private final Lock writeLock;
			
@@ -175,6 +178,8 @@ public abstract class TaskAttemptImpl implements
 
				   private int shufflePort = -1;
			
 
				   private String trackerName;
			
 
				   private int httpPort;
			
 
				+  private Locality locality;
			
 
				+  private Avataar avataar;
			
 
				 
			
 
				   private static final CleanupContainerTransition CLEANUP_CONTAINER_TRANSITION =
			
 
				     new CleanupContainerTransition();
			
@@ -532,8 +537,16 @@ public abstract class TaskAttemptImpl implements
 
				         getMemoryRequired(conf, taskId.getTaskType()));
			
 
				     this.resourceCapability.setVirtualCores(
			
 
				         getCpuRequired(conf, taskId.getTaskType()));
			
 
				-    this.dataLocalHosts = dataLocalHosts;
			
 
				+
			
 
				+    this.dataLocalHosts = resolveHosts(dataLocalHosts);
			
 
				     RackResolver.init(conf);
			
 
				+    this.dataLocalRacks = new HashSet<String>(); 
			
 
				+    for (String host : this.dataLocalHosts) {
			
 
				+      this.dataLocalRacks.add(RackResolver.resolve(host).getNetworkLocation());
			
 
				+    }
			
 
				+
			
 
				+    locality = Locality.OFF_SWITCH;
			
 
				+    avataar = Avataar.VIRGIN;
			
 
				 
			
 
				     // This "this leak" is okay because the retained pointer is in an
			
 
				     //  instance variable.
			
@@ -1032,6 +1045,23 @@ public abstract class TaskAttemptImpl implements
 
				     }
			
 
				   }
			
 
				 
			
 
				+  public Locality getLocality() {
			
 
				+    return locality;
			
 
				+  }
			
 
				+  
			
 
				+  public void setLocality(Locality locality) {
			
 
				+    this.locality = locality;
			
 
				+  }
			
 
				+
			
 
				+  public Avataar getAvataar()
			
 
				+  {
			
 
				+    return avataar;
			
 
				+  }
			
 
				+  
			
 
				+  public void setAvataar(Avataar avataar) {
			
 
				+    this.avataar = avataar;
			
 
				+  }
			
 
				+  
			
 
				   private static TaskAttemptState getExternalState(
			
 
				       TaskAttemptStateInternal smState) {
			
 
				     switch (smState) {
			
@@ -1232,25 +1262,27 @@ public abstract class TaskAttemptImpl implements
 
				                 taskAttempt.attemptId, 
			
 
				                 taskAttempt.resourceCapability));
			
 
				       } else {
			
 
				-        Set<String> racks = new HashSet<String>(); 
			
 
				-        for (String host : taskAttempt.dataLocalHosts) {
			
 
				-          racks.add(RackResolver.resolve(host).getNetworkLocation());
			
 
				-        }
			
 
				         taskAttempt.eventHandler.handle(new ContainerRequestEvent(
			
 
				-            taskAttempt.attemptId, taskAttempt.resourceCapability, taskAttempt
			
 
				-                .resolveHosts(taskAttempt.dataLocalHosts), racks
			
 
				-                .toArray(new String[racks.size()])));
			
 
				+            taskAttempt.attemptId, taskAttempt.resourceCapability,
			
 
				+            taskAttempt.dataLocalHosts.toArray(
			
 
				+                new String[taskAttempt.dataLocalHosts.size()]),
			
 
				+            taskAttempt.dataLocalRacks.toArray(
			
 
				+                new String[taskAttempt.dataLocalRacks.size()])));
			
 
				       }
			
 
				     }
			
 
				   }
			
 
				 
			
 
				-  protected String[] resolveHosts(String[] src) {
			
 
				-    String[] result = new String[src.length];
			
 
				-    for (int i = 0; i < src.length; i++) {
			
 
				-      if (isIP(src[i])) {
			
 
				-        result[i] = resolveHost(src[i]);
			
 
				-      } else {
			
 
				-        result[i] = src[i];
			
 
				+  protected Set<String> resolveHosts(String[] src) {
			
 
				+    Set<String> result = new HashSet<String>();
			
 
				+    if (src != null) {
			
 
				+      for (int i = 0; i < src.length; i++) {
			
 
				+        if (src[i] == null) {
			
 
				+          continue;
			
 
				+        } else if (isIP(src[i])) {
			
 
				+          result.add(resolveHost(src[i]));
			
 
				+        } else {
			
 
				+          result.add(src[i]);
			
 
				+        }
			
 
				       }
			
 
				     }
			
 
				     return result;
			
@@ -1300,6 +1332,20 @@ public abstract class TaskAttemptImpl implements
 
				           taskAttempt.remoteTask.isMapTask(), taskAttempt.containerID.getId());
			
 
				       taskAttempt.taskAttemptListener.registerPendingTask(
			
 
				           taskAttempt.remoteTask, taskAttempt.jvmID);
			
 
				+
			
 
				+      taskAttempt.locality = Locality.OFF_SWITCH;
			
 
				+      if (taskAttempt.dataLocalHosts.size() > 0) {
			
 
				+        String cHost = taskAttempt.resolveHost(
			
 
				+            taskAttempt.containerNodeId.getHost());
			
 
				+        if (taskAttempt.dataLocalHosts.contains(cHost)) {
			
 
				+          taskAttempt.locality = Locality.NODE_LOCAL;
			
 
				+        }
			
 
				+      }
			
 
				+      if (taskAttempt.locality == Locality.OFF_SWITCH) {
			
 
				+        if (taskAttempt.dataLocalRacks.contains(taskAttempt.nodeRackName)) {
			
 
				+          taskAttempt.locality = Locality.RACK_LOCAL;
			
 
				+        }
			
 
				+      }
			
 
				       
			
 
				       //launch the container
			
 
				       //create the container object to be launched for a given Task attempt
			
@@ -1376,7 +1422,7 @@ public abstract class TaskAttemptImpl implements
 
				             taskAttempt.attemptId.getTaskId().getJobId(), tauce));
			
 
				       } else {
			
 
				         LOG.debug("Not generating HistoryFinish event since start event not " +
			
 
				-        		"generated for taskAttempt: " + taskAttempt.getID());
			
 
				+            "generated for taskAttempt: " + taskAttempt.getID());
			
 
				       }
			
 
				     }
			
 
				   }
			
@@ -1421,7 +1467,8 @@ public abstract class TaskAttemptImpl implements
 
				             TypeConverter.fromYarn(taskAttempt.attemptId.getTaskId().getTaskType()),
			
 
				             taskAttempt.launchTime,
			
 
				             nodeHttpInetAddr.getHostName(), nodeHttpInetAddr.getPort(),
			
 
				-            taskAttempt.shufflePort, taskAttempt.containerID);
			
 
				+            taskAttempt.shufflePort, taskAttempt.containerID,
			
 
				+            taskAttempt.locality.toString(), taskAttempt.avataar.toString());
			
 
				       taskAttempt.eventHandler.handle
			
 
				           (new JobHistoryEvent(taskAttempt.attemptId.getTaskId().getJobId(), tase));
			
 
				       taskAttempt.eventHandler.handle
			
@@ -1510,7 +1557,7 @@ public abstract class TaskAttemptImpl implements
 
				         // handling failed map/reduce events.
			
 
				       }else {
			
 
				         LOG.debug("Not generating HistoryFinish event since start event not " +
			
 
				-        		"generated for taskAttempt: " + taskAttempt.getID());
			
 
				+            "generated for taskAttempt: " + taskAttempt.getID());
			
 
				       }
			
 
				       taskAttempt.eventHandler.handle(new TaskTAttemptEvent(
			
 
				           taskAttempt.attemptId, TaskEventType.T_ATTEMPT_FAILED));
			
@@ -1580,7 +1627,7 @@ public abstract class TaskAttemptImpl implements
 
				             taskAttempt.attemptId.getTaskId().getJobId(), tauce));
			
 
				       }else {
			
 
				         LOG.debug("Not generating HistoryFinish event since start event not " +
			
 
				-        		"generated for taskAttempt: " + taskAttempt.getID());
			
 
				+            "generated for taskAttempt: " + taskAttempt.getID());
			
 
				       }
			
 
				       taskAttempt.eventHandler.handle(new TaskTAttemptEvent(
			
 
				           taskAttempt.attemptId, TaskEventType.T_ATTEMPT_FAILED));
			
@@ -1648,7 +1695,7 @@ public abstract class TaskAttemptImpl implements
 
				             taskAttempt.attemptId.getTaskId().getJobId(), tauce));
			
 
				       }else {
			
 
				         LOG.debug("Not generating HistoryFinish event since start event not " +
			
 
				-        		"generated for taskAttempt: " + taskAttempt.getID());
			
 
				+            "generated for taskAttempt: " + taskAttempt.getID());
			
 
				       }
			
 
				 //      taskAttempt.logAttemptFinishedEvent(TaskAttemptStateInternal.KILLED); Not logging Map/Reduce attempts in case of failure.
			
 
				       taskAttempt.eventHandler.handle(new TaskTAttemptEvent(
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent;
 
				 import org.apache.hadoop.mapreduce.jobhistory.TaskFinishedEvent;
			
 
				 import org.apache.hadoop.mapreduce.jobhistory.TaskStartedEvent;
			
 
				 import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
			
 
				+import org.apache.hadoop.mapreduce.v2.api.records.Avataar;
			
 
				 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
			
 
				 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent;
			
 
				 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEventStatus;
			
@@ -594,8 +595,9 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
 
				   }
			
 
				 
			
 
				   // This is always called in the Write Lock
			
 
				-  private void addAndScheduleAttempt() {
			
 
				+  private void addAndScheduleAttempt(Avataar avataar) {
			
 
				     TaskAttempt attempt = createAttempt();
			
 
				+    ((TaskAttemptImpl) attempt).setAvataar(avataar);
			
 
				     if (LOG.isDebugEnabled()) {
			
 
				       LOG.debug("Created attempt " + attempt.getID());
			
 
				     }
			
@@ -749,7 +751,7 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
 
				 
			
 
				     @Override
			
 
				     public void transition(TaskImpl task, TaskEvent event) {
			
 
				-      task.addAndScheduleAttempt();
			
 
				+      task.addAndScheduleAttempt(Avataar.VIRGIN);
			
 
				       task.scheduledTime = task.clock.getTime();
			
 
				       TaskStartedEvent tse = new TaskStartedEvent(
			
 
				           TypeConverter.fromYarn(task.taskId), task.getLaunchTime(),
			
@@ -772,7 +774,7 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
 
				     @Override
			
 
				     public void transition(TaskImpl task, TaskEvent event) {
			
 
				       LOG.info("Scheduling a redundant attempt for task " + task.taskId);
			
 
				-      task.addAndScheduleAttempt();
			
 
				+      task.addAndScheduleAttempt(Avataar.SPECULATIVE);
			
 
				     }
			
 
				   }
			
 
				 
			
@@ -849,7 +851,7 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
 
				       task.finishedAttempts.add(taskAttemptId);
			
 
				       task.inProgressAttempts.remove(taskAttemptId);
			
 
				       if (task.successfulAttempt == null) {
			
 
				-        task.addAndScheduleAttempt();
			
 
				+        task.addAndScheduleAttempt(Avataar.VIRGIN);
			
 
				       }
			
 
				     }
			
 
				   }
			
@@ -937,7 +939,7 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
 
				         task.inProgressAttempts.remove(taskAttemptId);
			
 
				         if (task.inProgressAttempts.size() == 0
			
 
				             && task.successfulAttempt == null) {
			
 
				-          task.addAndScheduleAttempt();
			
 
				+          task.addAndScheduleAttempt(Avataar.VIRGIN);
			
 
				         }
			
 
				       } else {
			
 
				         task.handleTaskAttemptCompletion(
			
@@ -1053,7 +1055,7 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
 
				       // from the map splitInfo. So the bad node might be sent as a location
			
 
				       // to the RM. But the RM would ignore that just like it would ignore
			
 
				       // currently pending container requests affinitized to bad nodes.
			
 
				-      task.addAndScheduleAttempt();
			
 
				+      task.addAndScheduleAttempt(Avataar.VIRGIN);
			
 
				       return TaskStateInternal.SCHEDULED;
			
 
				     }
			
 
				   }
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
@@ -67,7 +67,6 @@ import org.apache.hadoop.yarn.api.records.NodeId;
 
				 import org.apache.hadoop.yarn.api.records.NodeReport;
			
 
				 import org.apache.hadoop.yarn.api.records.NodeState;
			
 
				 import org.apache.hadoop.yarn.api.records.Priority;
			
 
				-import org.apache.hadoop.yarn.api.records.Resource;
			
 
				 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
			
 
				 import org.apache.hadoop.yarn.util.RackResolver;
			
 
				 
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java
@@ -33,6 +33,9 @@ import org.apache.commons.io.FileUtils;
 
				 import org.apache.hadoop.conf.Configuration;
			
 
				 import org.apache.hadoop.mapreduce.JobACL;
			
 
				 import org.apache.hadoop.mapreduce.JobContext;
			
 
				+import org.apache.hadoop.mapreduce.jobhistory.EventType;
			
 
				+import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent;
			
 
				+import org.apache.hadoop.mapreduce.jobhistory.JobSubmittedEvent;
			
 
				 import org.apache.hadoop.mapreduce.JobID;
			
 
				 import org.apache.hadoop.mapreduce.JobStatus.State;
			
 
				 import org.apache.hadoop.mapreduce.MRConfig;
			
@@ -66,6 +69,7 @@ import org.apache.hadoop.yarn.SystemClock;
 
				 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
			
 
				 import org.apache.hadoop.yarn.event.AsyncDispatcher;
			
 
				 import org.apache.hadoop.yarn.event.Dispatcher;
			
 
				+import org.apache.hadoop.yarn.event.Event;
			
 
				 import org.apache.hadoop.yarn.event.EventHandler;
			
 
				 import org.apache.hadoop.yarn.state.StateMachine;
			
 
				 import org.apache.hadoop.yarn.state.StateMachineFactory;
			
@@ -105,6 +109,13 @@ public class TestJobImpl {
 
				     Configuration conf = new Configuration();
			
 
				     conf.setInt(MRJobConfig.NUM_REDUCES, 0);
			
 
				     conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
			
 
				+    conf.set(MRJobConfig.WORKFLOW_ID, "testId");
			
 
				+    conf.set(MRJobConfig.WORKFLOW_NAME, "testName");
			
 
				+    conf.set(MRJobConfig.WORKFLOW_NODE_NAME, "testNodeName");
			
 
				+    conf.set(MRJobConfig.WORKFLOW_ADJACENCY_PREFIX_STRING + "key1", "value1");
			
 
				+    conf.set(MRJobConfig.WORKFLOW_ADJACENCY_PREFIX_STRING + "key2", "value2");
			
 
				+    
			
 
				+ 
			
 
				     AsyncDispatcher dispatcher = new AsyncDispatcher();
			
 
				     dispatcher.init(conf);
			
 
				     dispatcher.start();
			
@@ -114,6 +125,9 @@ public class TestJobImpl {
 
				     commitHandler.init(conf);
			
 
				     commitHandler.start();
			
 
				 
			
 
				+    JobSubmittedEventHandler jseHandler = new JobSubmittedEventHandler("testId",
			
 
				+        "testName", "testNodeName", "\"key2\"=\"value2\" \"key1\"=\"value1\" ");
			
 
				+    dispatcher.register(EventType.class, jseHandler);
			
 
				     JobImpl job = createStubbedJob(conf, dispatcher, 0);
			
 
				     job.handle(new JobEvent(job.getID(), JobEventType.JOB_INIT));
			
 
				     assertJobState(job, JobStateInternal.INITED);
			
@@ -121,6 +135,11 @@ public class TestJobImpl {
 
				     assertJobState(job, JobStateInternal.SUCCEEDED);
			
 
				     dispatcher.stop();
			
 
				     commitHandler.stop();
			
 
				+    try {
			
 
				+      Assert.assertTrue(jseHandler.getAssertValue());
			
 
				+    } catch (InterruptedException e) {
			
 
				+      Assert.fail("Workflow related attributes are not tested properly");
			
 
				+    }
			
 
				   }
			
 
				 
			
 
				   @Test(timeout=20000)
			
@@ -614,6 +633,67 @@ public class TestJobImpl {
 
				     Assert.assertEquals(state, job.getInternalState());
			
 
				   }
			
 
				 
			
 
				+  private static class JobSubmittedEventHandler implements
			
 
				+      EventHandler<JobHistoryEvent> {
			
 
				+
			
 
				+    private String workflowId;
			
 
				+    
			
 
				+    private String workflowName;
			
 
				+    
			
 
				+    private String workflowNodeName;
			
 
				+    
			
 
				+    private String workflowAdjacencies;
			
 
				+    
			
 
				+    private Boolean assertBoolean;
			
 
				+
			
 
				+    public JobSubmittedEventHandler(String workflowId, String workflowName,
			
 
				+        String workflowNodeName, String workflowAdjacencies) {
			
 
				+      this.workflowId = workflowId;
			
 
				+      this.workflowName = workflowName;
			
 
				+      this.workflowNodeName = workflowNodeName;
			
 
				+      this.workflowAdjacencies = workflowAdjacencies;
			
 
				+      assertBoolean = null;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public void handle(JobHistoryEvent jhEvent) {
			
 
				+      if (jhEvent.getType() != EventType.JOB_SUBMITTED) {
			
 
				+        return;
			
 
				+      }
			
 
				+      JobSubmittedEvent jsEvent = (JobSubmittedEvent) jhEvent.getHistoryEvent();
			
 
				+      if (!workflowId.equals(jsEvent.getWorkflowId())) {
			
 
				+        setAssertValue(false);
			
 
				+        return;
			
 
				+      }
			
 
				+      if (!workflowName.equals(jsEvent.getWorkflowName())) {
			
 
				+        setAssertValue(false);
			
 
				+        return;
			
 
				+      }
			
 
				+      if (!workflowNodeName.equals(jsEvent.getWorkflowNodeName())) {
			
 
				+        setAssertValue(false);
			
 
				+        return;
			
 
				+      }
			
 
				+      if (!workflowAdjacencies.equals(jsEvent.getWorkflowAdjacencies())) {
			
 
				+        setAssertValue(false);
			
 
				+        return;
			
 
				+      }
			
 
				+      setAssertValue(true);
			
 
				+    }
			
 
				+    
			
 
				+    private synchronized void setAssertValue(Boolean bool) {
			
 
				+      assertBoolean = bool;
			
 
				+      notify();
			
 
				+    }
			
 
				+    
			
 
				+    public synchronized boolean getAssertValue() throws InterruptedException {
			
 
				+      while (assertBoolean == null) {
			
 
				+        wait();
			
 
				+      }
			
 
				+      return assertBoolean;
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				   private static class StubbedJob extends JobImpl {
			
 
				     //override the init transition
			
 
				     private final InitTransition initTransition;
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java
@@ -48,6 +48,7 @@ import org.apache.hadoop.mapreduce.jobhistory.TaskAttemptUnsuccessfulCompletion;
 
				 import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
			
 
				 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
			
 
				 import org.apache.hadoop.mapreduce.v2.api.records.JobState;
			
 
				+import org.apache.hadoop.mapreduce.v2.api.records.Locality;
			
 
				 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
			
 
				 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptReport;
			
 
				 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState;
			
@@ -157,6 +158,7 @@ public class TestTaskAttempt{
 
				         createMapTaskAttemptImplForTest(eventHandler, splitInfo);
			
 
				     TaskAttemptImpl spyTa = spy(mockTaskAttempt);
			
 
				     when(spyTa.resolveHost(hosts[0])).thenReturn("host1");
			
 
				+    spyTa.dataLocalHosts = spyTa.resolveHosts(splitInfo.getLocations());
			
 
				 
			
 
				     TaskAttemptEvent mockTAEvent = mock(TaskAttemptEvent.class);
			
 
				     rct.transition(spyTa, mockTAEvent);
			
@@ -360,6 +362,8 @@ public class TestTaskAttempt{
 
				     taImpl.handle(new TaskAttemptEvent(attemptId,
			
 
				         TaskAttemptEventType.TA_CONTAINER_LAUNCH_FAILED));
			
 
				     assertFalse(eventHandler.internalError);
			
 
				+    assertEquals("Task attempt is not assigned on the local node", 
			
 
				+        Locality.NODE_LOCAL, taImpl.getLocality());
			
 
				   }
			
 
				 
			
 
				   @Test
			
@@ -398,7 +402,7 @@ public class TestTaskAttempt{
 
				           mock(Token.class), new Credentials(),
			
 
				           new SystemClock(), appCtx);
			
 
				 
			
 
				-    NodeId nid = BuilderUtils.newNodeId("127.0.0.1", 0);
			
 
				+    NodeId nid = BuilderUtils.newNodeId("127.0.0.2", 0);
			
 
				     ContainerId contId = BuilderUtils.newContainerId(appAttemptId, 3);
			
 
				     Container container = mock(Container.class);
			
 
				     when(container.getId()).thenReturn(contId);
			
@@ -416,6 +420,8 @@ public class TestTaskAttempt{
 
				         TaskAttemptEventType.TA_CONTAINER_CLEANED));
			
 
				     assertFalse("InternalError occurred trying to handle TA_CONTAINER_CLEANED",
			
 
				         eventHandler.internalError);
			
 
				+    assertEquals("Task attempt is not assigned on the local rack",
			
 
				+        Locality.RACK_LOCAL, taImpl.getLocality());
			
 
				   }
			
 
				 
			
 
				   @Test
			
@@ -439,7 +445,7 @@ public class TestTaskAttempt{
 
				     jobConf.set(MRJobConfig.APPLICATION_ATTEMPT_ID, "10");
			
 
				 
			
 
				     TaskSplitMetaInfo splits = mock(TaskSplitMetaInfo.class);
			
 
				-    when(splits.getLocations()).thenReturn(new String[] {"127.0.0.1"});
			
 
				+    when(splits.getLocations()).thenReturn(new String[] {});
			
 
				 
			
 
				     AppContext appCtx = mock(AppContext.class);
			
 
				     ClusterInfo clusterInfo = mock(ClusterInfo.class);
			
@@ -475,6 +481,8 @@ public class TestTaskAttempt{
 
				         TaskAttemptEventType.TA_CONTAINER_CLEANED));
			
 
				     assertFalse("InternalError occurred trying to handle TA_CONTAINER_CLEANED",
			
 
				         eventHandler.internalError);
			
 
				+    assertEquals("Task attempt is assigned locally", Locality.OFF_SWITCH,
			
 
				+        taImpl.getLocality());
			
 
				   }
			
 
				 
			
 
				   @Test
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.mapred.TaskUmbilicalProtocol;
 
				 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo;
			
 
				 import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
			
 
				 import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
			
 
				+import org.apache.hadoop.mapreduce.v2.api.records.Avataar;
			
 
				 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
			
 
				 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
			
 
				 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState;
			
@@ -46,10 +47,12 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskState;
 
				 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
			
 
				 import org.apache.hadoop.mapreduce.v2.app.AppContext;
			
 
				 import org.apache.hadoop.mapreduce.v2.app.TaskAttemptListener;
			
 
				+import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt;
			
 
				 import org.apache.hadoop.mapreduce.v2.app.job.TaskStateInternal;
			
 
				 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent;
			
 
				 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType;
			
 
				 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent;
			
 
				+import org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl;
			
 
				 import org.apache.hadoop.mapreduce.v2.app.metrics.MRAppMetrics;
			
 
				 import org.apache.hadoop.security.Credentials;
			
 
				 import org.apache.hadoop.security.token.Token;
			
@@ -254,6 +257,7 @@ public class TestTaskImpl {
 
				     mockTask.handle(new TaskEvent(taskId, 
			
 
				         TaskEventType.T_SCHEDULE));
			
 
				     assertTaskScheduledState();
			
 
				+    assertTaskAttemptAvataar(Avataar.VIRGIN);
			
 
				   }
			
 
				   
			
 
				   private void killTask(TaskId taskId) {
			
@@ -338,6 +342,19 @@ public class TestTaskImpl {
 
				   private void assertTaskSucceededState() {
			
 
				     assertEquals(TaskState.SUCCEEDED, mockTask.getState());
			
 
				   }
			
 
				+
			
 
				+  /**
			
 
				+   * {@link Avataar}
			
 
				+   */
			
 
				+  private void assertTaskAttemptAvataar(Avataar avataar) {
			
 
				+    for (TaskAttempt taskAttempt : mockTask.getAttempts().values()) {
			
 
				+      if (((TaskAttemptImpl) taskAttempt).getAvataar() == avataar) {
			
 
				+        return;
			
 
				+      }
			
 
				+    }
			
 
				+    fail("There is no " + (avataar == Avataar.VIRGIN ? "virgin" : "speculative")
			
 
				+        + "task attempt");
			
 
				+  }
			
 
				   
			
 
				   @Test
			
 
				   public void testInit() {
			
@@ -516,6 +533,9 @@ public class TestTaskImpl {
 
				     
			
 
				     // The task should still be in the succeeded state
			
 
				     assertTaskSucceededState();
			
 
				+    
			
 
				+    // The task should contain speculative a task attempt
			
 
				+    assertTaskAttemptAvataar(Avataar.SPECULATIVE);
			
 
				   }
			
 
				   
			
 
				   @Test
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/Avataar.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/Avataar.java
@@ -0,0 +1,24 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce.v2.api.records;
			
 
				+
			
 
				+public enum Avataar {
			
 
				+  VIRGIN,
			
 
				+  SPECULATIVE
			
 
				+}
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/Locality.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/Locality.java
@@ -0,0 +1,25 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce.v2.api.records;
			
 
				+
			
 
				+public enum Locality {
			
 
				+  NODE_LOCAL,
			
 
				+  RACK_LOCAL,
			
 
				+  OFF_SWITCH
			
 
				+}
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/avro/Events.avpr
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/avro/Events.avpr
@@ -91,7 +91,11 @@
 
				                                     "values": "string"
			
 
				                                    }
			
 
				           },
			
 
				-          {"name": "jobQueueName", "type": "string"}
			
 
				+          {"name": "jobQueueName", "type": "string"},
			
 
				+          {"name": "workflowId", "type": "string"},
			
 
				+          {"name": "workflowName", "type": "string"},
			
 
				+          {"name": "workflowNodeName", "type": "string"},
			
 
				+          {"name": "workflowAdjacencies", "type": "string"}
			
 
				       ]
			
 
				      },
			
 
				 
			
@@ -191,7 +195,9 @@
 
				           {"name": "trackerName", "type": "string"},
			
 
				           {"name": "httpPort", "type": "int"},
			
 
				           {"name": "shufflePort", "type": "int"},
			
 
				-          {"name": "containerId", "type": "string"}
			
 
				+          {"name": "containerId", "type": "string"},
			
 
				+          {"name": "locality", "type": "string"},
			
 
				+          {"name": "avataar", "type": "string"}
			
 
				       ]
			
 
				      },
			
 
				 
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Merger.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Merger.java
@@ -218,6 +218,7 @@ public class Merger {
 
				     CompressionCodec codec = null;
			
 
				     long segmentOffset = 0;
			
 
				     long segmentLength = -1;
			
 
				+    long rawDataLength = -1;
			
 
				     
			
 
				     Counters.Counter mapOutputsCounter = null;
			
 
				 
			
@@ -234,6 +235,15 @@ public class Merger {
 
				       this(conf, fs, file, 0, fs.getFileStatus(file).getLen(), codec, preserve, 
			
 
				            mergedMapOutputsCounter);
			
 
				     }
			
 
				+    
			
 
				+    public Segment(Configuration conf, FileSystem fs, Path file,
			
 
				+        CompressionCodec codec, boolean preserve,
			
 
				+        Counters.Counter mergedMapOutputsCounter, long rawDataLength)
			
 
				+            throws IOException {
			
 
				+      this(conf, fs, file, 0, fs.getFileStatus(file).getLen(), codec, preserve, 
			
 
				+          mergedMapOutputsCounter);
			
 
				+      this.rawDataLength = rawDataLength;
			
 
				+    }
			
 
				 
			
 
				     public Segment(Configuration conf, FileSystem fs, Path file,
			
 
				                    long segmentOffset, long segmentLength,
			
@@ -261,6 +271,11 @@ public class Merger {
 
				     public Segment(Reader<K, V> reader, boolean preserve) {
			
 
				       this(reader, preserve, null);
			
 
				     }
			
 
				+
			
 
				+    public Segment(Reader<K, V> reader, boolean preserve, long rawDataLength) {
			
 
				+      this(reader, preserve, null);
			
 
				+      this.rawDataLength = rawDataLength;
			
 
				+    }
			
 
				     
			
 
				     public Segment(Reader<K, V> reader, boolean preserve, 
			
 
				                    Counters.Counter mapOutputsCounter) {
			
@@ -300,6 +315,10 @@ public class Merger {
 
				         segmentLength : reader.getLength();
			
 
				     }
			
 
				     
			
 
				+    public long getRawDataLength() {
			
 
				+      return (rawDataLength > 0) ? rawDataLength : getLength();
			
 
				+    }
			
 
				+
			
 
				     boolean nextRawKey() throws IOException {
			
 
				       return reader.nextRawKey(key);
			
 
				     }
			
@@ -633,7 +652,7 @@ public class Merger {
 
				             totalBytesProcessed = 0;
			
 
				             totalBytes = 0;
			
 
				             for (int i = 0; i < segmentsToMerge.size(); i++) {
			
 
				-              totalBytes += segmentsToMerge.get(i).getLength();
			
 
				+              totalBytes += segmentsToMerge.get(i).getRawDataLength();
			
 
				             }
			
 
				           }
			
 
				           if (totalBytes != 0) //being paranoid
			
@@ -702,7 +721,7 @@ public class Merger {
 
				           // size will match(almost) if combiner is not called in merge.
			
 
				           long inputBytesOfThisMerge = totalBytesProcessed -
			
 
				                                        bytesProcessedInPrevMerges;
			
 
				-          totalBytes -= inputBytesOfThisMerge - tempSegment.getLength();
			
 
				+          totalBytes -= inputBytesOfThisMerge - tempSegment.getRawDataLength();
			
 
				           if (totalBytes != 0) {
			
 
				             progPerByte = 1.0f / (float)totalBytes;
			
 
				           }
			
@@ -768,7 +787,7 @@ public class Merger {
 
				       for (int i = 0; i < numSegments; i++) {
			
 
				         // Not handling empty segments here assuming that it would not affect
			
 
				         // much in calculation of mergeProgress.
			
 
				-        segmentSizes.add(segments.get(i).getLength());
			
 
				+        segmentSizes.add(segments.get(i).getRawDataLength());
			
 
				       }
			
 
				       
			
 
				       // If includeFinalMerge is true, allow the following while loop iterate
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
@@ -647,5 +647,18 @@ public interface MRJobConfig {
 
				       "$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*",
			
 
				       "$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*",
			
 
				   };
			
 
				+
			
 
				+  public static final String WORKFLOW_ID = "mapreduce.workflow.id";
			
 
				+  
			
 
				+  public static final String WORKFLOW_NAME = "mapreduce.workflow.name";
			
 
				+  
			
 
				+  public static final String WORKFLOW_NODE_NAME =
			
 
				+      "mapreduce.workflow.node.name";
			
 
				+  
			
 
				+  public static final String WORKFLOW_ADJACENCY_PREFIX_STRING =
			
 
				+      "mapreduce.workflow.adjacency.";
			
 
				+  
			
 
				+  public static final String WORKFLOW_ADJACENCY_PREFIX_PATTERN =
			
 
				+      "^mapreduce\\.workflow\\.adjacency\\..+";
			
 
				   
			
 
				 }
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobSubmittedEvent.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobSubmittedEvent.java
@@ -52,6 +52,29 @@ public class JobSubmittedEvent implements HistoryEvent {
 
				   public JobSubmittedEvent(JobID id, String jobName, String userName,
			
 
				       long submitTime, String jobConfPath,
			
 
				       Map<JobACL, AccessControlList> jobACLs, String jobQueueName) {
			
 
				+    this(id, jobName, userName, submitTime, jobConfPath, jobACLs,
			
 
				+        jobQueueName, "", "", "", "");
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Create an event to record job submission
			
 
				+   * @param id The job Id of the job
			
 
				+   * @param jobName Name of the job
			
 
				+   * @param userName Name of the user who submitted the job
			
 
				+   * @param submitTime Time of submission
			
 
				+   * @param jobConfPath Path of the Job Configuration file
			
 
				+   * @param jobACLs The configured acls for the job.
			
 
				+   * @param jobQueueName The job-queue to which this job was submitted to
			
 
				+   * @param workflowId The Id of the workflow
			
 
				+   * @param workflowName The name of the workflow
			
 
				+   * @param workflowNodeName The node name of the workflow
			
 
				+   * @param workflowAdjacencies The adjacencies of the workflow
			
 
				+   */
			
 
				+  public JobSubmittedEvent(JobID id, String jobName, String userName,
			
 
				+      long submitTime, String jobConfPath,
			
 
				+      Map<JobACL, AccessControlList> jobACLs, String jobQueueName,
			
 
				+      String workflowId, String workflowName, String workflowNodeName,
			
 
				+      String workflowAdjacencies) {
			
 
				     datum.jobid = new Utf8(id.toString());
			
 
				     datum.jobName = new Utf8(jobName);
			
 
				     datum.userName = new Utf8(userName);
			
@@ -66,6 +89,18 @@ public class JobSubmittedEvent implements HistoryEvent {
 
				     if (jobQueueName != null) {
			
 
				       datum.jobQueueName = new Utf8(jobQueueName);
			
 
				     }
			
 
				+    if (workflowId != null) {
			
 
				+      datum.workflowId = new Utf8(workflowId);
			
 
				+    }
			
 
				+    if (workflowName != null) {
			
 
				+      datum.workflowName = new Utf8(workflowName);
			
 
				+    }
			
 
				+    if (workflowNodeName != null) {
			
 
				+      datum.workflowNodeName = new Utf8(workflowNodeName);
			
 
				+    }
			
 
				+    if (workflowAdjacencies != null) {
			
 
				+      datum.workflowAdjacencies = new Utf8(workflowAdjacencies);
			
 
				+    }
			
 
				   }
			
 
				 
			
 
				   JobSubmittedEvent() {}
			
@@ -105,6 +140,34 @@ public class JobSubmittedEvent implements HistoryEvent {
 
				     }
			
 
				     return jobAcls;
			
 
				   }
			
 
				+  /** Get the id of the workflow */
			
 
				+  public String getWorkflowId() {
			
 
				+    if (datum.workflowId != null) {
			
 
				+      return datum.workflowId.toString();
			
 
				+    }
			
 
				+    return null;
			
 
				+  }
			
 
				+  /** Get the name of the workflow */
			
 
				+  public String getWorkflowName() {
			
 
				+    if (datum.workflowName != null) {
			
 
				+      return datum.workflowName.toString();
			
 
				+    }
			
 
				+    return null;
			
 
				+  }
			
 
				+  /** Get the node name of the workflow */
			
 
				+  public String getWorkflowNodeName() {
			
 
				+    if (datum.workflowNodeName != null) {
			
 
				+      return datum.workflowNodeName.toString();
			
 
				+    }
			
 
				+    return null;
			
 
				+  }
			
 
				+  /** Get the adjacencies of the workflow */
			
 
				+  public String getWorkflowAdjacencies() {
			
 
				+    if (datum.workflowAdjacencies != null) {
			
 
				+      return datum.workflowAdjacencies.toString();
			
 
				+    }
			
 
				+    return null;
			
 
				+  }
			
 
				   /** Get the event type */
			
 
				   public EventType getEventType() { return EventType.JOB_SUBMITTED; }
			
 
				 
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/TaskAttemptStartedEvent.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/TaskAttemptStartedEvent.java
@@ -46,10 +46,13 @@ public class TaskAttemptStartedEvent implements HistoryEvent {
 
				    * @param httpPort The port number of the tracker
			
 
				    * @param shufflePort The shuffle port number of the container
			
 
				    * @param containerId The containerId for the task attempt.
			
 
				+   * @param locality The locality of the task attempt
			
 
				+   * @param avataar The avataar of the task attempt
			
 
				    */
			
 
				   public TaskAttemptStartedEvent( TaskAttemptID attemptId,  
			
 
				       TaskType taskType, long startTime, String trackerName,
			
 
				-      int httpPort, int shufflePort, ContainerId containerId) {
			
 
				+      int httpPort, int shufflePort, ContainerId containerId,
			
 
				+      String locality, String avataar) {
			
 
				     datum.attemptId = new Utf8(attemptId.toString());
			
 
				     datum.taskid = new Utf8(attemptId.getTaskID().toString());
			
 
				     datum.startTime = startTime;
			
@@ -58,14 +61,21 @@ public class TaskAttemptStartedEvent implements HistoryEvent {
 
				     datum.httpPort = httpPort;
			
 
				     datum.shufflePort = shufflePort;
			
 
				     datum.containerId = new Utf8(containerId.toString());
			
 
				+    if (locality != null) {
			
 
				+      datum.locality = new Utf8(locality);
			
 
				+    }
			
 
				+    if (avataar != null) {
			
 
				+      datum.avataar = new Utf8(avataar);
			
 
				+    }
			
 
				   }
			
 
				 
			
 
				   // TODO Remove after MrV1 is removed.
			
 
				   // Using a dummy containerId to prevent jobHistory parse failures.
			
 
				   public TaskAttemptStartedEvent(TaskAttemptID attemptId, TaskType taskType,
			
 
				-      long startTime, String trackerName, int httpPort, int shufflePort) {
			
 
				+      long startTime, String trackerName, int httpPort, int shufflePort,
			
 
				+      String locality, String avataar) {
			
 
				     this(attemptId, taskType, startTime, trackerName, httpPort, shufflePort,
			
 
				-        ConverterUtils.toContainerId("container_-1_-1_-1_-1"));
			
 
				+        ConverterUtils.toContainerId("container_-1_-1_-1_-1"), locality, avataar);
			
 
				   }
			
 
				 
			
 
				   TaskAttemptStartedEvent() {}
			
@@ -105,4 +115,19 @@ public class TaskAttemptStartedEvent implements HistoryEvent {
 
				   public ContainerId getContainerId() {
			
 
				     return ConverterUtils.toContainerId(datum.containerId.toString());
			
 
				   }
			
 
				+  /** Get the locality */
			
 
				+  public String getLocality() {
			
 
				+    if (datum.locality != null) {
			
 
				+      return datum.locality.toString();
			
 
				+    }
			
 
				+    return null;
			
 
				+  }
			
 
				+  /** Get the avataar */
			
 
				+  public String getAvataar() {
			
 
				+    if (datum.avataar != null) {
			
 
				+      return datum.avataar.toString();
			
 
				+    }
			
 
				+    return null;
			
 
				+  }
			
 
				+
			
 
				 }
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManagerImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManagerImpl.java
@@ -89,7 +89,7 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
 
				     new TreeSet<InMemoryMapOutput<K,V>>(new MapOutputComparator<K, V>());
			
 
				   private final MergeThread<InMemoryMapOutput<K,V>, K,V> inMemoryMerger;
			
 
				   
			
 
				-  Set<Path> onDiskMapOutputs = new TreeSet<Path>();
			
 
				+  Set<CompressAwarePath> onDiskMapOutputs = new TreeSet<CompressAwarePath>();
			
 
				   private final OnDiskMerger onDiskMerger;
			
 
				   
			
 
				   private final long memoryLimit;
			
@@ -336,7 +336,7 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
 
				              inMemoryMergedMapOutputs.size());
			
 
				   }
			
 
				   
			
 
				-  public synchronized void closeOnDiskFile(Path file) {
			
 
				+  public synchronized void closeOnDiskFile(CompressAwarePath file) {
			
 
				     onDiskMapOutputs.add(file);
			
 
				     
			
 
				     if (onDiskMapOutputs.size() >= (2 * ioSortFactor - 1)) {
			
@@ -356,7 +356,7 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
 
				     List<InMemoryMapOutput<K, V>> memory = 
			
 
				       new ArrayList<InMemoryMapOutput<K, V>>(inMemoryMergedMapOutputs);
			
 
				     memory.addAll(inMemoryMapOutputs);
			
 
				-    List<Path> disk = new ArrayList<Path>(onDiskMapOutputs);
			
 
				+    List<CompressAwarePath> disk = new ArrayList<CompressAwarePath>(onDiskMapOutputs);
			
 
				     return finalMerge(jobConf, rfs, memory, disk);
			
 
				   }
			
 
				    
			
@@ -456,6 +456,7 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
 
				                         codec, null);
			
 
				 
			
 
				       RawKeyValueIterator rIter = null;
			
 
				+      CompressAwarePath compressAwarePath;
			
 
				       try {
			
 
				         LOG.info("Initiating in-memory merge with " + noInMemorySegments + 
			
 
				                  " segments...");
			
@@ -474,6 +475,8 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
 
				           combineCollector.setWriter(writer);
			
 
				           combineAndSpill(rIter, reduceCombineInputCounter);
			
 
				         }
			
 
				+        compressAwarePath = new CompressAwarePath(outputPath,
			
 
				+            writer.getRawLength());
			
 
				         writer.close();
			
 
				 
			
 
				         LOG.info(reduceId +  
			
@@ -489,12 +492,12 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
 
				       }
			
 
				 
			
 
				       // Note the output of the merge
			
 
				-      closeOnDiskFile(outputPath);
			
 
				+      closeOnDiskFile(compressAwarePath);
			
 
				     }
			
 
				 
			
 
				   }
			
 
				   
			
 
				-  private class OnDiskMerger extends MergeThread<Path,K,V> {
			
 
				+  private class OnDiskMerger extends MergeThread<CompressAwarePath,K,V> {
			
 
				     
			
 
				     public OnDiskMerger(MergeManagerImpl<K, V> manager) {
			
 
				       super(manager, Integer.MAX_VALUE, exceptionReporter);
			
@@ -503,7 +506,7 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
 
				     }
			
 
				     
			
 
				     @Override
			
 
				-    public void merge(List<Path> inputs) throws IOException {
			
 
				+    public void merge(List<CompressAwarePath> inputs) throws IOException {
			
 
				       // sanity check
			
 
				       if (inputs == null || inputs.isEmpty()) {
			
 
				         LOG.info("No ondisk files to merge...");
			
@@ -518,7 +521,7 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
 
				                " map outputs on disk. Triggering merge...");
			
 
				       
			
 
				       // 1. Prepare the list of files to be merged. 
			
 
				-      for (Path file : inputs) {
			
 
				+      for (CompressAwarePath file : inputs) {
			
 
				         approxOutputSize += localFS.getFileStatus(file).getLen();
			
 
				       }
			
 
				 
			
@@ -536,6 +539,7 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
 
				                         (Class<V>) jobConf.getMapOutputValueClass(),
			
 
				                         codec, null);
			
 
				       RawKeyValueIterator iter  = null;
			
 
				+      CompressAwarePath compressAwarePath;
			
 
				       Path tmpDir = new Path(reduceId.toString());
			
 
				       try {
			
 
				         iter = Merger.merge(jobConf, rfs,
			
@@ -548,13 +552,15 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
 
				                             mergedMapOutputsCounter, null);
			
 
				 
			
 
				         Merger.writeFile(iter, writer, reporter, jobConf);
			
 
				+        compressAwarePath = new CompressAwarePath(outputPath,
			
 
				+            writer.getRawLength());
			
 
				         writer.close();
			
 
				       } catch (IOException e) {
			
 
				         localFS.delete(outputPath, true);
			
 
				         throw e;
			
 
				       }
			
 
				 
			
 
				-      closeOnDiskFile(outputPath);
			
 
				+      closeOnDiskFile(compressAwarePath);
			
 
				 
			
 
				       LOG.info(reduceId +
			
 
				           " Finished merging " + inputs.size() + 
			
@@ -653,7 +659,7 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
 
				 
			
 
				   private RawKeyValueIterator finalMerge(JobConf job, FileSystem fs,
			
 
				                                        List<InMemoryMapOutput<K,V>> inMemoryMapOutputs,
			
 
				-                                       List<Path> onDiskMapOutputs
			
 
				+                                       List<CompressAwarePath> onDiskMapOutputs
			
 
				                                        ) throws IOException {
			
 
				     LOG.info("finalMerge called with " + 
			
 
				              inMemoryMapOutputs.size() + " in-memory map-outputs and " + 
			
@@ -712,7 +718,8 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
 
				         try {
			
 
				           Merger.writeFile(rIter, writer, reporter, job);
			
 
				           // add to list of final disk outputs.
			
 
				-          onDiskMapOutputs.add(outputPath);
			
 
				+          onDiskMapOutputs.add(new CompressAwarePath(outputPath,
			
 
				+              writer.getRawLength()));
			
 
				         } catch (IOException e) {
			
 
				           if (null != outputPath) {
			
 
				             try {
			
@@ -742,15 +749,19 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
 
				     // segments on disk
			
 
				     List<Segment<K,V>> diskSegments = new ArrayList<Segment<K,V>>();
			
 
				     long onDiskBytes = inMemToDiskBytes;
			
 
				-    Path[] onDisk = onDiskMapOutputs.toArray(new Path[onDiskMapOutputs.size()]);
			
 
				-    for (Path file : onDisk) {
			
 
				-      onDiskBytes += fs.getFileStatus(file).getLen();
			
 
				-      LOG.debug("Disk file: " + file + " Length is " + 
			
 
				-          fs.getFileStatus(file).getLen());
			
 
				+    long rawBytes = inMemToDiskBytes;
			
 
				+    CompressAwarePath[] onDisk = onDiskMapOutputs.toArray(
			
 
				+        new CompressAwarePath[onDiskMapOutputs.size()]);
			
 
				+    for (CompressAwarePath file : onDisk) {
			
 
				+      long fileLength = fs.getFileStatus(file).getLen();
			
 
				+      onDiskBytes += fileLength;
			
 
				+      rawBytes += (file.getRawDataLength() > 0) ? file.getRawDataLength() : fileLength;
			
 
				+
			
 
				+      LOG.debug("Disk file: " + file + " Length is " + fileLength);
			
 
				       diskSegments.add(new Segment<K, V>(job, fs, file, codec, keepInputs,
			
 
				                                          (file.toString().endsWith(
			
 
				                                              Task.MERGED_OUTPUT_PREFIX) ?
			
 
				-                                          null : mergedMapOutputsCounter)
			
 
				+                                          null : mergedMapOutputsCounter), file.getRawDataLength()
			
 
				                                         ));
			
 
				     }
			
 
				     LOG.info("Merging " + onDisk.length + " files, " +
			
@@ -786,7 +797,7 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
 
				         return diskMerge;
			
 
				       }
			
 
				       finalSegments.add(new Segment<K,V>(
			
 
				-            new RawKVIteratorReader(diskMerge, onDiskBytes), true));
			
 
				+            new RawKVIteratorReader(diskMerge, onDiskBytes), true, rawBytes));
			
 
				     }
			
 
				     return Merger.merge(job, fs, keyClass, valueClass,
			
 
				                  finalSegments, finalSegments.size(), tmpDir,
			
@@ -794,4 +805,27 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
 
				                  null);
			
 
				   
			
 
				   }
			
 
				+
			
 
				+  static class CompressAwarePath extends Path {
			
 
				+    private long rawDataLength;
			
 
				+
			
 
				+    public CompressAwarePath(Path path, long rawDataLength) {
			
 
				+      super(path.toUri());
			
 
				+      this.rawDataLength = rawDataLength;
			
 
				+    }
			
 
				+
			
 
				+    public long getRawDataLength() {
			
 
				+      return rawDataLength;
			
 
				+    }
			
 
				+    
			
 
				+    @Override
			
 
				+    public boolean equals(Object other) {
			
 
				+      return super.equals(other);
			
 
				+    }
			
 
				+    
			
 
				+    @Override
			
 
				+    public int hashCode() {
			
 
				+      return super.hashCode();
			
 
				+    }
			
 
				+  }
			
 
				 }
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/OnDiskMapOutput.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/OnDiskMapOutput.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.mapred.Reporter;
 
				 import org.apache.hadoop.mapred.MapOutputFile;
			
 
				 
			
 
				 import org.apache.hadoop.mapreduce.TaskAttemptID;
			
 
				+import org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath;
			
 
				 
			
 
				 @InterfaceAudience.Private
			
 
				 @InterfaceStability.Unstable
			
@@ -112,7 +113,9 @@ class OnDiskMapOutput<K, V> extends MapOutput<K, V> {
 
				   @Override
			
 
				   public void commit() throws IOException {
			
 
				     localFS.rename(tmpOutputPath, outputPath);
			
 
				-    merger.closeOnDiskFile(outputPath);
			
 
				+    CompressAwarePath compressAwarePath = new CompressAwarePath(outputPath,
			
 
				+        getSize());
			
 
				+    merger.closeOnDiskFile(compressAwarePath);
			
 
				   }
			
 
				   
			
 
				   @Override
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestMerger.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestMerger.java
@@ -0,0 +1,272 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.hadoop.mapreduce.task.reduce;
			
 
				+
			
 
				+import static org.mockito.Matchers.any;
			
 
				+import static org.mockito.Mockito.mock;
			
 
				+import static org.mockito.Mockito.when;
			
 
				+import static org.mockito.Mockito.doAnswer;
			
 
				+
			
 
				+import java.io.ByteArrayOutputStream;
			
 
				+import java.io.IOException;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Arrays;
			
 
				+import java.util.List;
			
 
				+import java.util.Map;
			
 
				+import java.util.TreeMap;
			
 
				+
			
 
				+import junit.framework.Assert;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FSDataOutputStream;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.LocalDirAllocator;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.DataInputBuffer;
			
 
				+import org.apache.hadoop.io.RawComparator;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.mapred.Counters.Counter;
			
 
				+import org.apache.hadoop.mapred.IFile.Reader;
			
 
				+import org.apache.hadoop.mapred.IFile;
			
 
				+import org.apache.hadoop.mapred.JobConf;
			
 
				+import org.apache.hadoop.mapred.MROutputFiles;
			
 
				+import org.apache.hadoop.mapred.Merger;
			
 
				+import org.apache.hadoop.mapred.Merger.Segment;
			
 
				+import org.apache.hadoop.mapred.RawKeyValueIterator;
			
 
				+import org.apache.hadoop.mapred.Reporter;
			
 
				+import org.apache.hadoop.mapreduce.JobID;
			
 
				+import org.apache.hadoop.mapreduce.MRConfig;
			
 
				+import org.apache.hadoop.mapreduce.TaskAttemptID;
			
 
				+import org.apache.hadoop.mapreduce.TaskID;
			
 
				+import org.apache.hadoop.mapreduce.TaskType;
			
 
				+import org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl;
			
 
				+import org.apache.hadoop.util.Progress;
			
 
				+import org.apache.hadoop.util.Progressable;
			
 
				+import org.junit.After;
			
 
				+import org.junit.Before;
			
 
				+import org.junit.Test;
			
 
				+import org.mockito.invocation.InvocationOnMock;
			
 
				+import org.mockito.stubbing.Answer;
			
 
				+
			
 
				+public class TestMerger {
			
 
				+
			
 
				+  private Configuration conf;
			
 
				+  private JobConf jobConf;
			
 
				+  private FileSystem fs;
			
 
				+  
			
 
				+  @Before
			
 
				+  public void setup() throws IOException {
			
 
				+    conf = new Configuration();
			
 
				+    jobConf = new JobConf();
			
 
				+    fs = FileSystem.getLocal(conf);
			
 
				+  }
			
 
				+  
			
 
				+  @After
			
 
				+  public void cleanup() throws IOException {    
			
 
				+    fs.delete(new Path(jobConf.getLocalDirs()[0]), true);
			
 
				+  }
			
 
				+  
			
 
				+  @Test
			
 
				+  public void testInMemoryMerger() throws IOException {
			
 
				+    JobID jobId = new JobID("a", 0);
			
 
				+    TaskAttemptID reduceId = new TaskAttemptID(
			
 
				+        new TaskID(jobId, TaskType.REDUCE, 0), 0);
			
 
				+    TaskAttemptID mapId1 = new TaskAttemptID(
			
 
				+        new TaskID(jobId, TaskType.MAP, 1), 0);
			
 
				+    TaskAttemptID mapId2 = new TaskAttemptID(
			
 
				+        new TaskID(jobId, TaskType.MAP, 2), 0);
			
 
				+    
			
 
				+    LocalDirAllocator lda = new LocalDirAllocator(MRConfig.LOCAL_DIR);
			
 
				+    
			
 
				+    MergeManagerImpl<Text, Text> mergeManager = new MergeManagerImpl<Text, Text>(
			
 
				+        reduceId, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null,
			
 
				+        null, null, new Progress(), new MROutputFiles());
			
 
				+    
			
 
				+    // write map outputs
			
 
				+    Map<String, String> map1 = new TreeMap<String, String>();
			
 
				+    map1.put("apple", "disgusting");
			
 
				+    map1.put("carrot", "delicious");
			
 
				+    Map<String, String> map2 = new TreeMap<String, String>();
			
 
				+    map1.put("banana", "pretty good");
			
 
				+    byte[] mapOutputBytes1 = writeMapOutput(conf, map1);
			
 
				+    byte[] mapOutputBytes2 = writeMapOutput(conf, map2);
			
 
				+    InMemoryMapOutput<Text, Text> mapOutput1 = new InMemoryMapOutput<Text, Text>(
			
 
				+        conf, mapId1, mergeManager, mapOutputBytes1.length, null, true);
			
 
				+    InMemoryMapOutput<Text, Text> mapOutput2 = new InMemoryMapOutput<Text, Text>(
			
 
				+        conf, mapId2, mergeManager, mapOutputBytes2.length, null, true);
			
 
				+    System.arraycopy(mapOutputBytes1, 0, mapOutput1.getMemory(), 0,
			
 
				+        mapOutputBytes1.length);
			
 
				+    System.arraycopy(mapOutputBytes2, 0, mapOutput2.getMemory(), 0,
			
 
				+        mapOutputBytes2.length);
			
 
				+    
			
 
				+    // create merger and run merge
			
 
				+    MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger =
			
 
				+        mergeManager.createInMemoryMerger();
			
 
				+    List<InMemoryMapOutput<Text, Text>> mapOutputs =
			
 
				+        new ArrayList<InMemoryMapOutput<Text, Text>>();
			
 
				+    mapOutputs.add(mapOutput1);
			
 
				+    mapOutputs.add(mapOutput2);
			
 
				+    
			
 
				+    inMemoryMerger.merge(mapOutputs);
			
 
				+    
			
 
				+    Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
			
 
				+    Path outPath = mergeManager.onDiskMapOutputs.iterator().next();
			
 
				+    
			
 
				+    List<String> keys = new ArrayList<String>();
			
 
				+    List<String> values = new ArrayList<String>();
			
 
				+    readOnDiskMapOutput(conf, fs, outPath, keys, values);
			
 
				+    Assert.assertEquals(keys, Arrays.asList("apple", "banana", "carrot"));
			
 
				+    Assert.assertEquals(values, Arrays.asList("disgusting", "pretty good", "delicious"));
			
 
				+  }
			
 
				+  
			
 
				+  private byte[] writeMapOutput(Configuration conf, Map<String, String> keysToValues)
			
 
				+      throws IOException {
			
 
				+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
			
 
				+    FSDataOutputStream fsdos = new FSDataOutputStream(baos, null);
			
 
				+    IFile.Writer<Text, Text> writer = new IFile.Writer<Text, Text>(conf, fsdos,
			
 
				+        Text.class, Text.class, null, null);
			
 
				+    for (String key : keysToValues.keySet()) {
			
 
				+      String value = keysToValues.get(key);
			
 
				+      writer.append(new Text(key), new Text(value));
			
 
				+    }
			
 
				+    writer.close();
			
 
				+    return baos.toByteArray();
			
 
				+  }
			
 
				+  
			
 
				+  private void readOnDiskMapOutput(Configuration conf, FileSystem fs, Path path,
			
 
				+      List<String> keys, List<String> values) throws IOException {
			
 
				+    IFile.Reader<Text, Text> reader = new IFile.Reader<Text, Text>(conf, fs,
			
 
				+        path, null, null);
			
 
				+    DataInputBuffer keyBuff = new DataInputBuffer();
			
 
				+    DataInputBuffer valueBuff = new DataInputBuffer();
			
 
				+    Text key = new Text();
			
 
				+    Text value = new Text();
			
 
				+    while (reader.nextRawKey(keyBuff)) {
			
 
				+      key.readFields(keyBuff);
			
 
				+      keys.add(key.toString());
			
 
				+      reader.nextRawValue(valueBuff);
			
 
				+      value.readFields(valueBuff);
			
 
				+      values.add(value.toString());
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  @Test
			
 
				+  public void testCompressed() throws IOException {
			
 
				+    testMergeShouldReturnProperProgress(getCompressedSegments());
			
 
				+  }
			
 
				+  
			
 
				+  @Test
			
 
				+  public void testUncompressed() throws IOException {
			
 
				+    testMergeShouldReturnProperProgress(getUncompressedSegments());
			
 
				+  }
			
 
				+  
			
 
				+  @SuppressWarnings( { "deprecation", "unchecked" })
			
 
				+  public void testMergeShouldReturnProperProgress(
			
 
				+      List<Segment<Text, Text>> segments) throws IOException {
			
 
				+    Path tmpDir = new Path("localpath");
			
 
				+    Class<Text> keyClass = (Class<Text>) jobConf.getMapOutputKeyClass();
			
 
				+    Class<Text> valueClass = (Class<Text>) jobConf.getMapOutputValueClass();
			
 
				+    RawComparator<Text> comparator = jobConf.getOutputKeyComparator();
			
 
				+    Counter readsCounter = new Counter();
			
 
				+    Counter writesCounter = new Counter();
			
 
				+    Progress mergePhase = new Progress();
			
 
				+    RawKeyValueIterator mergeQueue = Merger.merge(conf, fs, keyClass,
			
 
				+        valueClass, segments, 2, tmpDir, comparator, getReporter(),
			
 
				+        readsCounter, writesCounter, mergePhase);
			
 
				+    Assert.assertEquals(1.0f, mergeQueue.getProgress().get());
			
 
				+  }
			
 
				+
			
 
				+  private Progressable getReporter() {
			
 
				+    Progressable reporter = new Progressable() {
			
 
				+      @Override
			
 
				+      public void progress() {
			
 
				+      }
			
 
				+    };
			
 
				+    return reporter;
			
 
				+  }
			
 
				+
			
 
				+  private List<Segment<Text, Text>> getUncompressedSegments() throws IOException {
			
 
				+    List<Segment<Text, Text>> segments = new ArrayList<Segment<Text, Text>>();
			
 
				+    for (int i = 1; i < 1; i++) {
			
 
				+      segments.add(getUncompressedSegment(i));
			
 
				+    }
			
 
				+    return segments;
			
 
				+  }
			
 
				+  
			
 
				+  private List<Segment<Text, Text>> getCompressedSegments() throws IOException {
			
 
				+    List<Segment<Text, Text>> segments = new ArrayList<Segment<Text, Text>>();
			
 
				+    for (int i = 1; i < 1; i++) {
			
 
				+      segments.add(getCompressedSegment(i));
			
 
				+    }
			
 
				+    return segments;
			
 
				+  }
			
 
				+  
			
 
				+  private Segment<Text, Text> getUncompressedSegment(int i) throws IOException {
			
 
				+    return new Segment<Text, Text>(getReader(i), false);
			
 
				+  }
			
 
				+
			
 
				+  private Segment<Text, Text> getCompressedSegment(int i) throws IOException {
			
 
				+    return new Segment<Text, Text>(getReader(i), false, 3000l);
			
 
				+  }
			
 
				+
			
 
				+  @SuppressWarnings("unchecked")
			
 
				+  private Reader<Text, Text> getReader(int i) throws IOException {
			
 
				+    Reader<Text, Text> readerMock = mock(Reader.class);
			
 
				+    when(readerMock.getPosition()).thenReturn(0l).thenReturn(10l).thenReturn(
			
 
				+        20l);
			
 
				+    when(
			
 
				+        readerMock.nextRawKey(any(DataInputBuffer.class)))
			
 
				+        .thenAnswer(getKeyAnswer("Segment" + i));
			
 
				+    doAnswer(getValueAnswer("Segment" + i)).when(readerMock).nextRawValue(
			
 
				+        any(DataInputBuffer.class));
			
 
				+
			
 
				+    return readerMock;
			
 
				+  }
			
 
				+
			
 
				+  private Answer<?> getKeyAnswer(final String segmentName) {
			
 
				+    return new Answer<Object>() {
			
 
				+      int i = 0;
			
 
				+
			
 
				+      public Boolean answer(InvocationOnMock invocation) {
			
 
				+        Object[] args = invocation.getArguments();
			
 
				+        DataInputBuffer key = (DataInputBuffer) args[0];
			
 
				+        if (i++ == 2) {
			
 
				+          return false;
			
 
				+        }
			
 
				+        key.reset(("Segment Key " + segmentName + i).getBytes(), 20);
			
 
				+        return true;
			
 
				+      }
			
 
				+    };
			
 
				+  }
			
 
				+  
			
 
				+  private Answer<?> getValueAnswer(final String segmentName) {
			
 
				+    return new Answer<Void>() {
			
 
				+      int i = 0;
			
 
				+
			
 
				+      public Void answer(InvocationOnMock invocation) {
			
 
				+        Object[] args = invocation.getArguments();
			
 
				+        DataInputBuffer key = (DataInputBuffer) args[0];
			
 
				+        if (i++ == 2) {
			
 
				+          return null;
			
 
				+        }
			
 
				+        key.reset(("Segment Value " + segmentName + i).getBytes(), 20);
			
 
				+        return null;
			
 
				+      }
			
 
				+    };
			
 
				+  }
			
 
				+}
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestIndexCache.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestIndexCache.java
@@ -1,324 +0,0 @@
 
				-/**
			
 
				- * Licensed to the Apache Software Foundation (ASF) under one
			
 
				- * or more contributor license agreements.  See the NOTICE file
			
 
				- * distributed with this work for additional information
			
 
				- * regarding copyright ownership.  The ASF licenses this file
			
 
				- * to you under the Apache License, Version 2.0 (the
			
 
				- * "License"); you may not use this file except in compliance
			
 
				- * with the License.  You may obtain a copy of the License at
			
 
				- *
			
 
				- *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				- *
			
 
				- * Unless required by applicable law or agreed to in writing, software
			
 
				- * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				- * See the License for the specific language governing permissions and
			
 
				- * limitations under the License.
			
 
				- */
			
 
				-package org.apache.hadoop.mapred;
			
 
				-
			
 
				-import java.io.DataOutputStream;
			
 
				-import java.io.FileNotFoundException;
			
 
				-import java.io.IOException;
			
 
				-import java.util.Random;
			
 
				-import java.util.zip.CRC32;
			
 
				-import java.util.zip.CheckedOutputStream;
			
 
				-
			
 
				-import org.apache.hadoop.fs.ChecksumException;
			
 
				-import org.apache.hadoop.fs.FileStatus;
			
 
				-import org.apache.hadoop.fs.FileSystem;
			
 
				-import org.apache.hadoop.fs.Path;
			
 
				-import org.apache.hadoop.fs.FSDataOutputStream;
			
 
				-import org.apache.hadoop.security.UserGroupInformation;
			
 
				-import org.apache.hadoop.mapreduce.server.tasktracker.TTConfig;
			
 
				-
			
 
				-import junit.framework.TestCase;
			
 
				-
			
 
				-public class TestIndexCache extends TestCase {
			
 
				-  private JobConf conf;
			
 
				-  private FileSystem fs;
			
 
				-  private Path p;
			
 
				-
			
 
				-  @Override
			
 
				-  public void setUp() throws IOException {
			
 
				-    conf = new JobConf();
			
 
				-    fs = FileSystem.getLocal(conf).getRaw();
			
 
				-    p =  new Path(System.getProperty("test.build.data", "/tmp"),
			
 
				-        "cache").makeQualified(fs.getUri(), fs.getWorkingDirectory());
			
 
				-  }
			
 
				-
			
 
				-  public void testLRCPolicy() throws Exception {
			
 
				-    Random r = new Random();
			
 
				-    long seed = r.nextLong();
			
 
				-    r.setSeed(seed);
			
 
				-    System.out.println("seed: " + seed);
			
 
				-    fs.delete(p, true);
			
 
				-    conf.setInt(TTConfig.TT_INDEX_CACHE, 1);
			
 
				-    final int partsPerMap = 1000;
			
 
				-    final int bytesPerFile = partsPerMap * 24;
			
 
				-    IndexCache cache = new IndexCache(conf);
			
 
				-
			
 
				-    // fill cache
			
 
				-    int totalsize = bytesPerFile;
			
 
				-    for (; totalsize < 1024 * 1024; totalsize += bytesPerFile) {
			
 
				-      Path f = new Path(p, Integer.toString(totalsize, 36));
			
 
				-      writeFile(fs, f, totalsize, partsPerMap);
			
 
				-      IndexRecord rec = cache.getIndexInformation(
			
 
				-        Integer.toString(totalsize, 36), r.nextInt(partsPerMap), f,
			
 
				-        UserGroupInformation.getCurrentUser().getShortUserName());
			
 
				-      checkRecord(rec, totalsize);
			
 
				-    }
			
 
				-
			
 
				-    // delete files, ensure cache retains all elem
			
 
				-    for (FileStatus stat : fs.listStatus(p)) {
			
 
				-      fs.delete(stat.getPath(),true);
			
 
				-    }
			
 
				-    for (int i = bytesPerFile; i < 1024 * 1024; i += bytesPerFile) {
			
 
				-      Path f = new Path(p, Integer.toString(i, 36));
			
 
				-      IndexRecord rec = cache.getIndexInformation(Integer.toString(i, 36),
			
 
				-        r.nextInt(partsPerMap), f,
			
 
				-        UserGroupInformation.getCurrentUser().getShortUserName());
			
 
				-      checkRecord(rec, i);
			
 
				-    }
			
 
				-
			
 
				-    // push oldest (bytesPerFile) out of cache
			
 
				-    Path f = new Path(p, Integer.toString(totalsize, 36));
			
 
				-    writeFile(fs, f, totalsize, partsPerMap);
			
 
				-    cache.getIndexInformation(Integer.toString(totalsize, 36),
			
 
				-        r.nextInt(partsPerMap), f,
			
 
				-        UserGroupInformation.getCurrentUser().getShortUserName());
			
 
				-    fs.delete(f, false);
			
 
				-
			
 
				-    // oldest fails to read, or error
			
 
				-    boolean fnf = false;
			
 
				-    try {
			
 
				-      cache.getIndexInformation(Integer.toString(bytesPerFile, 36),
			
 
				-        r.nextInt(partsPerMap), new Path(p, Integer.toString(bytesPerFile)),
			
 
				-        UserGroupInformation.getCurrentUser().getShortUserName());
			
 
				-    } catch (IOException e) {
			
 
				-      if (e.getCause() == null ||
			
 
				-          !(e.getCause()  instanceof FileNotFoundException)) {
			
 
				-        throw e;
			
 
				-      }
			
 
				-      else {
			
 
				-        fnf = true;
			
 
				-      }
			
 
				-    }
			
 
				-    if (!fnf)
			
 
				-      fail("Failed to push out last entry");
			
 
				-    // should find all the other entries
			
 
				-    for (int i = bytesPerFile << 1; i < 1024 * 1024; i += bytesPerFile) {
			
 
				-      IndexRecord rec = cache.getIndexInformation(Integer.toString(i, 36),
			
 
				-          r.nextInt(partsPerMap), new Path(p, Integer.toString(i, 36)),
			
 
				-          UserGroupInformation.getCurrentUser().getShortUserName());
			
 
				-      checkRecord(rec, i);
			
 
				-    }
			
 
				-    IndexRecord rec = cache.getIndexInformation(Integer.toString(totalsize, 36),
			
 
				-      r.nextInt(partsPerMap), f,
			
 
				-      UserGroupInformation.getCurrentUser().getShortUserName());
			
 
				-
			
 
				-    checkRecord(rec, totalsize);
			
 
				-  }
			
 
				-
			
 
				-  public void testBadIndex() throws Exception {
			
 
				-    final int parts = 30;
			
 
				-    fs.delete(p, true);
			
 
				-    conf.setInt(TTConfig.TT_INDEX_CACHE, 1);
			
 
				-    IndexCache cache = new IndexCache(conf);
			
 
				-
			
 
				-    Path f = new Path(p, "badindex");
			
 
				-    FSDataOutputStream out = fs.create(f, false);
			
 
				-    CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
			
 
				-    DataOutputStream dout = new DataOutputStream(iout);
			
 
				-    for (int i = 0; i < parts; ++i) {
			
 
				-      for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
			
 
				-        if (0 == (i % 3)) {
			
 
				-          dout.writeLong(i);
			
 
				-        } else {
			
 
				-          out.writeLong(i);
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-    out.writeLong(iout.getChecksum().getValue());
			
 
				-    dout.close();
			
 
				-    try {
			
 
				-      cache.getIndexInformation("badindex", 7, f,
			
 
				-        UserGroupInformation.getCurrentUser().getShortUserName());
			
 
				-      fail("Did not detect bad checksum");
			
 
				-    } catch (IOException e) {
			
 
				-      if (!(e.getCause() instanceof ChecksumException)) {
			
 
				-        throw e;
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  public void testInvalidReduceNumberOrLength() throws Exception {
			
 
				-    fs.delete(p, true);
			
 
				-    conf.setInt(TTConfig.TT_INDEX_CACHE, 1);
			
 
				-    final int partsPerMap = 1000;
			
 
				-    final int bytesPerFile = partsPerMap * 24;
			
 
				-    IndexCache cache = new IndexCache(conf);
			
 
				-
			
 
				-    // fill cache
			
 
				-    Path feq = new Path(p, "invalidReduceOrPartsPerMap");
			
 
				-    writeFile(fs, feq, bytesPerFile, partsPerMap);
			
 
				-
			
 
				-    // Number of reducers should always be less than partsPerMap as reducer
			
 
				-    // numbers start from 0 and there cannot be more reducer than parts
			
 
				-
			
 
				-    try {
			
 
				-      // Number of reducers equal to partsPerMap
			
 
				-      cache.getIndexInformation("reduceEqualPartsPerMap", 
			
 
				-               partsPerMap, // reduce number == partsPerMap
			
 
				-               feq, UserGroupInformation.getCurrentUser().getShortUserName());
			
 
				-      fail("Number of reducers equal to partsPerMap did not fail");
			
 
				-    } catch (Exception e) {
			
 
				-      if (!(e instanceof IOException)) {
			
 
				-        throw e;
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    try {
			
 
				-      // Number of reducers more than partsPerMap
			
 
				-      cache.getIndexInformation(
			
 
				-      "reduceMorePartsPerMap", 
			
 
				-      partsPerMap + 1, // reduce number > partsPerMap
			
 
				-      feq, UserGroupInformation.getCurrentUser().getShortUserName());
			
 
				-      fail("Number of reducers more than partsPerMap did not fail");
			
 
				-    } catch (Exception e) {
			
 
				-      if (!(e instanceof IOException)) {
			
 
				-        throw e;
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				-  public void testRemoveMap() throws Exception {
			
 
				-    // This test case use two thread to call getIndexInformation and 
			
 
				-    // removeMap concurrently, in order to construct race condition.
			
 
				-    // This test case may not repeatable. But on my macbook this test 
			
 
				-    // fails with probability of 100% on code before MAPREDUCE-2541,
			
 
				-    // so it is repeatable in practice.
			
 
				-    fs.delete(p, true);
			
 
				-    conf.setInt(TTConfig.TT_INDEX_CACHE, 10);
			
 
				-    // Make a big file so removeMapThread almost surely runs faster than 
			
 
				-    // getInfoThread 
			
 
				-    final int partsPerMap = 100000;
			
 
				-    final int bytesPerFile = partsPerMap * 24;
			
 
				-    final IndexCache cache = new IndexCache(conf);
			
 
				-
			
 
				-    final Path big = new Path(p, "bigIndex");
			
 
				-    final String user = 
			
 
				-      UserGroupInformation.getCurrentUser().getShortUserName();
			
 
				-    writeFile(fs, big, bytesPerFile, partsPerMap);
			
 
				-    
			
 
				-    // run multiple times
			
 
				-    for (int i = 0; i < 20; ++i) {
			
 
				-      Thread getInfoThread = new Thread() {
			
 
				-        @Override
			
 
				-        public void run() {
			
 
				-          try {
			
 
				-            cache.getIndexInformation("bigIndex", partsPerMap, big, user);
			
 
				-          } catch (Exception e) {
			
 
				-            // should not be here
			
 
				-          }
			
 
				-        }
			
 
				-      };
			
 
				-      Thread removeMapThread = new Thread() {
			
 
				-        @Override
			
 
				-        public void run() {
			
 
				-          cache.removeMap("bigIndex");
			
 
				-        }
			
 
				-      };
			
 
				-      if (i%2==0) {
			
 
				-        getInfoThread.start();
			
 
				-        removeMapThread.start();        
			
 
				-      } else {
			
 
				-        removeMapThread.start();        
			
 
				-        getInfoThread.start();
			
 
				-      }
			
 
				-      getInfoThread.join();
			
 
				-      removeMapThread.join();
			
 
				-      assertEquals(true, cache.checkTotalMemoryUsed());
			
 
				-    }      
			
 
				-  }
			
 
				-  
			
 
				-  public void testCreateRace() throws Exception {
			
 
				-    fs.delete(p, true);
			
 
				-    conf.setInt(TTConfig.TT_INDEX_CACHE, 1);
			
 
				-    final int partsPerMap = 1000;
			
 
				-    final int bytesPerFile = partsPerMap * 24;
			
 
				-    final IndexCache cache = new IndexCache(conf);
			
 
				-    
			
 
				-    final Path racy = new Path(p, "racyIndex");
			
 
				-    final String user =  
			
 
				-      UserGroupInformation.getCurrentUser().getShortUserName();
			
 
				-    writeFile(fs, racy, bytesPerFile, partsPerMap);
			
 
				-
			
 
				-    // run multiple instances
			
 
				-    Thread[] getInfoThreads = new Thread[50];
			
 
				-    for (int i = 0; i < 50; i++) {
			
 
				-      getInfoThreads[i] = new Thread() {
			
 
				-        @Override
			
 
				-        public void run() {
			
 
				-          try {
			
 
				-            cache.getIndexInformation("racyIndex", partsPerMap, racy, user);
			
 
				-            cache.removeMap("racyIndex");
			
 
				-          } catch (Exception e) {
			
 
				-            // should not be here
			
 
				-          }
			
 
				-        }
			
 
				-      };
			
 
				-    }
			
 
				-
			
 
				-    for (int i = 0; i < 50; i++) {
			
 
				-      getInfoThreads[i].start();
			
 
				-    }
			
 
				-
			
 
				-    final Thread mainTestThread = Thread.currentThread();
			
 
				-
			
 
				-    Thread timeoutThread = new Thread() {
			
 
				-      @Override
			
 
				-      public void run() {
			
 
				-        try {
			
 
				-          Thread.sleep(15000);
			
 
				-          mainTestThread.interrupt();
			
 
				-        } catch (InterruptedException ie) {
			
 
				-          // we are done;
			
 
				-        }
			
 
				-      }
			
 
				-    };
			
 
				-
			
 
				-    for (int i = 0; i < 50; i++) {
			
 
				-      try {
			
 
				-        getInfoThreads[i].join();
			
 
				-      } catch (InterruptedException ie) {
			
 
				-        // we haven't finished in time. Potential deadlock/race.
			
 
				-        fail("Unexpectedly long delay during concurrent cache entry creations");
			
 
				-      }
			
 
				-    }
			
 
				-    // stop the timeoutThread. If we get interrupted before stopping, there
			
 
				-    // must be something wrong, although it wasn't a deadlock. No need to
			
 
				-    // catch and swallow.
			
 
				-    timeoutThread.interrupt();
			
 
				-  }
			
 
				-
			
 
				-  private static void checkRecord(IndexRecord rec, long fill) {
			
 
				-    assertEquals(fill, rec.startOffset);
			
 
				-    assertEquals(fill, rec.rawLength);
			
 
				-    assertEquals(fill, rec.partLength);
			
 
				-  }
			
 
				-
			
 
				-  private static void writeFile(FileSystem fs, Path f, long fill, int parts)
			
 
				-      throws IOException {
			
 
				-    FSDataOutputStream out = fs.create(f, false);
			
 
				-    CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
			
 
				-    DataOutputStream dout = new DataOutputStream(iout);
			
 
				-    for (int i = 0; i < parts; ++i) {
			
 
				-      for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
			
 
				-        dout.writeLong(fill);
			
 
				-      }
			
 
				-    }
			
 
				-    out.writeLong(iout.getChecksum().getValue());
			
 
				-    dout.close();
			
 
				-  }
			
 
				-}
			
--- a/hadoop-maven-plugins/src/main/java/org/apache/hadoop/maven/plugin/versioninfo/VersionInfoMojo.java
+++ b/hadoop-maven-plugins/src/main/java/org/apache/hadoop/maven/plugin/versioninfo/VersionInfoMojo.java
@@ -46,7 +46,7 @@ import java.util.TimeZone;
 
				  * build.  The version information includes build time, SCM URI, SCM branch, SCM
			
 
				  * commit, and an MD5 checksum of the contents of the files in the codebase.
			
 
				  */
			
 
				-@Mojo(name="version-info", defaultPhase=LifecyclePhase.INITIALIZE)
			
 
				+@Mojo(name="version-info")
			
 
				 public class VersionInfoMojo extends AbstractMojo {
			
 
				 
			
 
				   @Parameter(defaultValue="${project}")
			
--- a/hadoop-project-dist/pom.xml
+++ b/hadoop-project-dist/pom.xml
@@ -247,13 +247,6 @@
 
				                     </copy>
			
 
				 
			
 
				                     <!-- Docs -->
			
 
				-                    <exec dir="${project.build.directory}/docs-src"
			
 
				-                          executable="${env.FORREST_HOME}/bin/forrest"
			
 
				-                          failonerror="true">
			
 
				-                    </exec>
			
 
				-                    <copy todir="${project.build.directory}/site">
			
 
				-                      <fileset dir="${project.build.directory}/docs-src/build/site"/>
			
 
				-                    </copy>
			
 
				                     <copy file="${project.build.directory}/docs-src/releasenotes.html"
			
 
				                           todir="${project.build.directory}/site"/>
			
 
				                     <style basedir="${basedir}/src/main/resources"
			
--- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Hadoop20JHParser.java
+++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Hadoop20JHParser.java
@@ -67,6 +67,11 @@ public class Hadoop20JHParser implements JobHistoryParser {
 
				     reader = new LineReader(input);
			
 
				   }
			
 
				 
			
 
				+  public Hadoop20JHParser(LineReader reader) throws IOException {
			
 
				+    super();
			
 
				+    this.reader = reader;
			
 
				+  }
			
 
				+
			
 
				   Map<String, HistoryEventEmitter> liveEmitters =
			
 
				       new HashMap<String, HistoryEventEmitter>();
			
 
				   Queue<HistoryEvent> remainingEvents = new LinkedList<HistoryEvent>();
			
--- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Job20LineHistoryEventEmitter.java
+++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Job20LineHistoryEventEmitter.java
@@ -76,6 +76,23 @@ public class Job20LineHistoryEventEmitter extends HistoryEventEmitter {
 
				       }
			
 
				       String jobName = line.get("JOBNAME");
			
 
				       String jobQueueName = line.get("JOB_QUEUE");// could be null
			
 
				+      String workflowId = line.get("WORKFLOW_ID");
			
 
				+      if (workflowId == null) {
			
 
				+        workflowId = "";
			
 
				+      }
			
 
				+      String workflowName = line.get("WORKFLOW_NAME");
			
 
				+      if (workflowName == null) {
			
 
				+        workflowName = "";
			
 
				+      }
			
 
				+      String workflowNodeName = line.get("WORKFLOW_NODE_NAME");
			
 
				+      if (workflowNodeName == null) {
			
 
				+        workflowNodeName = "";
			
 
				+      }
			
 
				+      String workflowAdjacencies = line.get("WORKFLOW_ADJACENCIES");
			
 
				+      if (workflowAdjacencies == null) {
			
 
				+        workflowAdjacencies = "";
			
 
				+      }
			
 
				+      
			
 
				 
			
 
				       if (submitTime != null) {
			
 
				         Job20LineHistoryEventEmitter that =
			
@@ -86,7 +103,8 @@ public class Job20LineHistoryEventEmitter extends HistoryEventEmitter {
 
				         Map<JobACL, AccessControlList> jobACLs =
			
 
				           new HashMap<JobACL, AccessControlList>();
			
 
				         return new JobSubmittedEvent(jobID, jobName, user,
			
 
				-            that.originalSubmitTime, jobConf, jobACLs, jobQueueName);
			
 
				+            that.originalSubmitTime, jobConf, jobACLs, jobQueueName,
			
 
				+            workflowId, workflowName, workflowNodeName, workflowAdjacencies);
			
 
				       }
			
 
				 
			
 
				       return null;
			
--- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/TaskAttempt20LineEventEmitter.java
+++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/TaskAttempt20LineEventEmitter.java
@@ -65,6 +65,14 @@ public abstract class TaskAttempt20LineEventEmitter extends HistoryEventEmitter
 
				       String taskType = line.get("TASK_TYPE");
			
 
				       String trackerName = line.get("TRACKER_NAME");
			
 
				       String httpPort = line.get("HTTP_PORT");
			
 
				+      String locality = line.get("LOCALITY");
			
 
				+      if (locality == null) {
			
 
				+        locality = "";
			
 
				+      }
			
 
				+      String avataar = line.get("AVATAAR");
			
 
				+      if (avataar == null) {
			
 
				+        avataar = "";
			
 
				+      }
			
 
				 
			
 
				       if (startTime != null && taskType != null) {
			
 
				         TaskAttempt20LineEventEmitter that =
			
@@ -79,7 +87,8 @@ public abstract class TaskAttempt20LineEventEmitter extends HistoryEventEmitter
 
				                 .parseInt(httpPort);
			
 
				 
			
 
				         return new TaskAttemptStartedEvent(taskAttemptID,
			
 
				-            that.originalTaskType, that.originalStartTime, trackerName, port, -1);
			
 
				+            that.originalTaskType, that.originalStartTime, trackerName, port, -1,
			
 
				+            locality, avataar);
			
 
				       }
			
 
				 
			
 
				       return null;
			
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -274,6 +274,9 @@ Release 0.23.7 - UNRELEASED
 
				 
			
 
				   IMPROVEMENTS
			
 
				 
			
 
				+    YARN-133 Update web services docs for RM clusterMetrics (Ravi Prakash via
			
 
				+    kihwal)
			
 
				+
			
 
				   OPTIMIZATIONS
			
 
				 
			
 
				   BUG FIXES
			
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml
@@ -93,7 +93,7 @@
 
				         <executions>
			
 
				           <execution>
			
 
				             <id>version-info</id>
			
 
				-            <phase>compile</phase>
			
 
				+            <phase>generate-resources</phase>
			
 
				             <goals>
			
 
				               <goal>version-info</goal>
			
 
				             </goals>
			
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ResourceManagerRest.apt.vm
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ResourceManagerRest.apt.vm
@@ -177,6 +177,16 @@ ResourceManager REST API's.
 
				 *---------------+--------------+-------------------------------+
			
 
				 | appsSubmitted | int | The number of applications submitted |
			
 
				 *---------------+--------------+-------------------------------+
			
 
				+| appsCompleted | int | The number of applications completed |
			
 
				+*---------------+--------------+-------------------------------+
			
 
				+| appsPending | int | The number of applications pending |
			
 
				+*---------------+--------------+-------------------------------+
			
 
				+| appsRunning | int | The number of applications running |
			
 
				+*---------------+--------------+-------------------------------+
			
 
				+| appsFailed | int | The number of applications failed |
			
 
				+*---------------+--------------+-------------------------------+
			
 
				+| appsKilled | int | The number of applications killed |
			
 
				+*---------------+--------------+-------------------------------+
			
 
				 | reservedMB    | long         | The amount of memory reserved in MB |
			
 
				 *---------------+--------------+-------------------------------+
			
 
				 | availableMB   | long         | The amount of memory available in MB |
			
@@ -187,6 +197,10 @@ ResourceManager REST API's.
 
				 *---------------+--------------+-------------------------------+
			
 
				 | containersAllocated | int | The number of containers allocated |
			
 
				 *---------------+--------------+-------------------------------+
			
 
				+| containersReserved | int | The number of containers reserved |
			
 
				+*---------------+--------------+-------------------------------+
			
 
				+| containersPending | int | The number of containers pending |
			
 
				+*---------------+--------------+-------------------------------+
			
 
				 | totalNodes | int | The total number of nodes |
			
 
				 *---------------+--------------+-------------------------------+
			
 
				 | activeNodes | int | The number of active nodes |
			
@@ -223,20 +237,26 @@ ResourceManager REST API's.
 
				 
			
 
				 +---+
			
 
				   {
			
 
				-    "clusterMetrics":
			
 
				-    {
			
 
				-      "appsSubmitted":4,
			
 
				+    "clusterMetrics":{
			
 
				+      "appsSubmitted":0,
			
 
				+      "appsCompleted":0,
			
 
				+      "appsPending":0,
			
 
				+      "appsRunning":0,
			
 
				+      "appsFailed":0,
			
 
				+      "appsKilled":0,
			
 
				       "reservedMB":0,
			
 
				-      "availableMB":8192,
			
 
				+      "availableMB":17408,
			
 
				       "allocatedMB":0,
			
 
				-      "totalMB":8192,
			
 
				       "containersAllocated":0,
			
 
				+      "containersReserved":0,
			
 
				+      "containersPending":0,
			
 
				+      "totalMB":17408,
			
 
				       "totalNodes":1,
			
 
				-      "activeNodes":1,
			
 
				       "lostNodes":0,
			
 
				       "unhealthyNodes":0,
			
 
				       "decommissionedNodes":0,
			
 
				-      "rebootedNodes":0
			
 
				+      "rebootedNodes":0,
			
 
				+      "activeNodes":1
			
 
				     }
			
 
				   }
			
 
				 +---+
			
@@ -264,18 +284,25 @@ ResourceManager REST API's.
 
				 +---+
			
 
				 <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
			
 
				 <clusterMetrics>
			
 
				-  <appsSubmitted>4</appsSubmitted>
			
 
				+  <appsSubmitted>0</appsSubmitted>
			
 
				+  <appsCompleted>0</appsCompleted>
			
 
				+  <appsPending>0</appsPending>
			
 
				+  <appsRunning>0</appsRunning>
			
 
				+  <appsFailed>0</appsFailed>
			
 
				+  <appsKilled>0</appsKilled>
			
 
				   <reservedMB>0</reservedMB>
			
 
				-  <availableMB>8192</availableMB>
			
 
				+  <availableMB>17408</availableMB>
			
 
				   <allocatedMB>0</allocatedMB>
			
 
				   <containersAllocated>0</containersAllocated>
			
 
				-  <totalMB>8192</totalMB>
			
 
				+  <containersReserved>0</containersReserved>
			
 
				+  <containersPending>0</containersPending>
			
 
				+  <totalMB>17408</totalMB>
			
 
				   <totalNodes>1</totalNodes>
			
 
				-  <activeNodes>1</activeNodes>
			
 
				   <lostNodes>0</lostNodes>
			
 
				   <unhealthyNodes>0</unhealthyNodes>
			
 
				   <decommissionedNodes>0</decommissionedNodes>
			
 
				   <rebootedNodes>0</rebootedNodes>
			
 
				+  <activeNodes>1</activeNodes>
			
 
				 </clusterMetrics>
			
 
				 +---+
			
 
				 
			
--- a/pom.xml
+++ b/pom.xml
@@ -517,6 +517,18 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs
 
				       <properties>
			
 
				         <cloverLicenseLocation>${user.home}/.clover.license</cloverLicenseLocation>
			
 
				         <cloverDatabase>${project.build.directory}/clover/hadoop-coverage.db</cloverDatabase>
			
 
				+        <!-- NB: This additional parametrization is made in order 
			
 
				+             to be able to re-define these properties with "-Dk=v" maven options.
			
 
				+             By some reason the expressions declared in clover 
			
 
				+             docs like "${maven.clover.generateHtml}" do not work in that way. 
			
 
				+             However, the below properties are confirmed to work: e.g. 
			
 
				+             -DcloverGenHtml=false switches off the Html generation.  
			
 
				+             The default values provided here exactly correspond to Clover defaults, so
			
 
				+             the behavior is 100% backwards compatible. -->
			
 
				+        <cloverAlwaysReport>true</cloverAlwaysReport>
			
 
				+        <cloverGenHtml>true</cloverGenHtml>
			
 
				+        <cloverGenXml>true</cloverGenXml>
			
 
				+        <cloverGenHistorical>false</cloverGenHistorical>
			
 
				       </properties>
			
 
				       <build>
			
 
				         <plugins>
			
@@ -530,8 +542,10 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs
 
				               <cloverDatabase>${cloverDatabase}</cloverDatabase>
			
 
				               <targetPercentage>50%</targetPercentage>
			
 
				               <outputDirectory>${project.build.directory}/clover</outputDirectory>
			
 
				-              <generateHtml>true</generateHtml>
			
 
				-              <generateXml>true</generateXml>
			
 
				+              <alwaysReport>${cloverAlwaysReport}</alwaysReport>
			
 
				+              <generateHtml>${cloverGenHtml}</generateHtml>
			
 
				+              <generateXml>${cloverGenXml}</generateXml>
			
 
				+              <generateHistorical>${cloverGenHistorical}</generateHistorical>
			
 
				             </configuration>
			
 
				             <executions>
			
 
				               <execution>