Browse Source

Merge trunk into branch

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1437623 13f79535-47bb-0310-9956-ffa450edef68
Todd Lipcon 12 years ago
parent
commit
797465820b
100 changed files with 3471 additions and 1776 deletions
  1. 50 3
      hadoop-common-project/hadoop-common/CHANGES.txt
  2. 0 67
      hadoop-common-project/hadoop-common/dev-support/saveVersion.sh
  3. 59 16
      hadoop-common-project/hadoop-common/pom.xml
  4. 4 4
      hadoop-common-project/hadoop-common/src/main/bin/hadoop
  5. 219 0
      hadoop-common-project/hadoop-common/src/main/conf/hadoop-policy.xml
  6. 0 16
      hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/service_level_auth.xml
  7. 0 74
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/HadoopVersionAnnotation.java
  8. 14 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
  9. 5 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java
  10. 76 11
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java
  11. 1 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer.java
  12. 1 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java
  13. 15 3
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
  14. 27 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Credentials.java
  15. 5 3
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
  16. 62 28
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/VersionInfo.java
  17. 25 0
      hadoop-common-project/hadoop-common/src/main/resources/common-version-info.properties
  18. 18 2
      hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
  19. 100 31
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java
  20. 29 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestHttpServer.java
  21. 12 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecFactory.java
  22. 20 1
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java
  23. 17 1
      hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/servlet/HostnameFilter.java
  24. 26 0
      hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/lib/servlet/TestHostnameFilter.java
  25. 32 2
      hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
  26. 4 0
      hadoop-hdfs-project/hadoop-hdfs/pom.xml
  27. 5 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/hdfs_permissions_guide.xml
  28. 126 38
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
  29. 12 8
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HDFSPolicyProvider.java
  30. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientDatanodeProtocolTranslatorPB.java
  31. 76 72
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java
  32. 34 17
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
  33. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java
  34. 12 10
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java
  35. 9 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/JournalProtocolServerSideTranslatorPB.java
  36. 8 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolServerSideTranslatorPB.java
  37. 9 9
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java
  38. 6 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshAuthorizationPolicyProtocolClientSideTranslatorPB.java
  39. 5 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshAuthorizationPolicyProtocolServerSideTranslatorPB.java
  40. 12 6
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshUserMappingsProtocolClientSideTranslatorPB.java
  41. 11 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshUserMappingsProtocolServerSideTranslatorPB.java
  42. 9 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolServerSideTranslatorPB.java
  43. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
  44. 1 12
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileChecksumServlets.java
  45. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto
  46. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/proto/datatransfer.proto
  47. 1 15
      hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
  48. 40 4
      hadoop-mapreduce-project/CHANGES.txt
  49. 1 1
      hadoop-mapreduce-project/dev-support/findbugs-exclude.xml
  50. 2 3
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java
  51. 2 2
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/client/MRClientService.java
  52. 2 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/Job.java
  53. 56 23
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
  54. 7 3
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java
  55. 2 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MockJobs.java
  56. 12 6
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestFetchFailure.java
  57. 2 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRuntimeEstimators.java
  58. 0 2
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/protocolrecords/impl/pb/GetDelegationTokenResponsePBImpl.java
  59. 5 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/TaggedInputSplit.java
  60. 3 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
  61. 5 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/TaggedInputSplit.java
  62. 7 117
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java
  63. 127 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/InMemoryMapOutput.java
  64. 2 2
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/InMemoryReader.java
  65. 19 136
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MapOutput.java
  66. 24 751
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManager.java
  67. 797 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManagerImpl.java
  68. 2 2
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeThread.java
  69. 131 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/OnDiskMapOutput.java
  70. 15 18
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java
  71. 11 0
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
  72. 5 6
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestFetcher.java
  73. 31 29
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestMergeManager.java
  74. 4 3
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedJob.java
  75. 2 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/PartialJob.java
  76. 2 1
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/MockHistoryJobs.java
  77. 8 8
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java
  78. 3 4
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/NotRunningJob.java
  79. 39 8
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java
  80. 17 5
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/NotificationTestCase.java
  81. 61 2
      hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java
  82. 14 11
      hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/QuasiMonteCarlo.java
  83. 6 6
      hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/dancing/DistributedPentomino.java
  84. 76 0
      hadoop-maven-plugins/pom.xml
  85. 118 0
      hadoop-maven-plugins/src/main/java/org/apache/hadoop/maven/plugin/util/Exec.java
  86. 61 0
      hadoop-maven-plugins/src/main/java/org/apache/hadoop/maven/plugin/util/FileSetUtils.java
  87. 343 0
      hadoop-maven-plugins/src/main/java/org/apache/hadoop/maven/plugin/versioninfo/VersionInfoMojo.java
  88. 5 0
      hadoop-project/pom.xml
  89. 3 0
      hadoop-tools/hadoop-pipes/pom.xml
  90. 36 1
      hadoop-yarn-project/CHANGES.txt
  91. 4 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationAttemptId.java
  92. 4 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationId.java
  93. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationMaster.java
  94. 2 2
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationReport.java
  95. 39 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ClientToken.java
  96. 1 53
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerToken.java
  97. 1 53
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/DelegationToken.java
  98. 82 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Token.java
  99. 34 14
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationMasterPBImpl.java
  100. 30 14
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationReportPBImpl.java

+ 50 - 3
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -146,6 +146,9 @@ Trunk (Unreleased)
     HADOOP-9162. Add utility to check native library availability.
     (Binglin Chang via suresh)
 
+    HADOOP-8924. Add maven plugin alternative to shell script to save
+    package-info.java. (Chris Nauroth via suresh)
+
   BUG FIXES
 
     HADOOP-8419. Fixed GzipCode NPE reset for IBM JDK. (Yu Li via eyang)
@@ -326,6 +329,8 @@ Release 2.0.3-alpha - Unreleased
 
   INCOMPATIBLE CHANGES
 
+    HADOOP-8999. SASL negotiation is flawed (daryn)
+
   NEW FEATURES
 
     HADOOP-8597. Permit FsShell's text command to read Avro files.
@@ -436,6 +441,18 @@ Release 2.0.3-alpha - Unreleased
     HADOOP-9192. Move token related request/response messages to common.
     (suresh)
 
+    HADOOP-8712. Change default hadoop.security.group.mapping to
+    JniBasedUnixGroupsNetgroupMappingWithFallback (Robert Parker via todd)
+
+    HADOOP-9106. Allow configuration of IPC connect timeout.
+    (Rober Parker via suresh)
+
+    HADOOP-9216. CompressionCodecFactory#getCodecClasses should trim the
+    result of parsing by Configuration. (Tsuyoshi Ozawa via todd)
+
+    HADOOP-9231. Parametrize staging URL for the uniformity of
+    distributionManagement. (Konstantin Boudnik via suresh)
+
   OPTIMIZATIONS
 
     HADOOP-8866. SampleQuantiles#query is O(N^2) instead of O(N). (Andrew Wang
@@ -496,8 +513,6 @@ Release 2.0.3-alpha - Unreleased
 
     HADOOP-7115. Add a cache for getpwuid_r and getpwgid_r calls (tucu)
 
-    HADOOP-8999. SASL negotiation is flawed (daryn)
-
     HADOOP-6607. Add different variants of non caching HTTP headers. (tucu)
 
     HADOOP-9049. DelegationTokenRenewer needs to be Singleton and FileSystems
@@ -543,6 +558,20 @@ Release 2.0.3-alpha - Unreleased
     HADOOP-9203. RPCCallBenchmark should find a random available port.
     (Andrew Purtell via suresh)
 
+    HADOOP-9178. src/main/conf is missing hadoop-policy.xml.
+    (Sandy Ryza via eli)
+
+    HADOOP-8816. HTTP Error 413 full HEAD if using kerberos authentication. 
+    (moritzmoeller via tucu)
+    
+    HADOOP-9212. Potential deadlock in FileSystem.Cache/IPC/UGI. (tomwhite)
+
+    HADOOP-9193. hadoop script can inadvertently expand wildcard arguments
+    when delegating to hdfs script. (Andy Isaacson via todd)
+
+    HADOOP-9215. when using cmake-2.6, libhadoop.so doesn't get created
+    (only libhadoop.so.1.0.0) (Colin Patrick McCabe via todd)
+
 Release 2.0.2-alpha - 2012-09-07 
 
   INCOMPATIBLE CHANGES
@@ -1233,6 +1262,21 @@ Release 2.0.0-alpha - 05-23-2012
     HADOOP-8655. Fix TextInputFormat for large deliminators. (Gelesh via
     bobby) 
 
+Release 0.23.7 - UNRELEASED
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+    HADOOP-8849. FileUtil#fullyDelete should grant the target directories +rwx
+    permissions (Ivan A. Veselovsky via bobby)
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
 Release 0.23.6 - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -1240,6 +1284,8 @@ Release 0.23.6 - UNRELEASED
   NEW FEATURES
 
   IMPROVEMENTS
+    HADOOP-9217. Print thread dumps when hadoop-common tests fail.
+    (Andrey Klochkov via suresh)
 
   OPTIMIZATIONS
 
@@ -1258,7 +1304,8 @@ Release 0.23.6 - UNRELEASED
 
     HADOOP-9097. Maven RAT plugin is not checking all source files (tgraves)
 
-Release 0.23.5 - UNRELEASED
+Release 0.23.5 - 2012-11-28
+
 
   INCOMPATIBLE CHANGES
 

+ 0 - 67
hadoop-common-project/hadoop-common/dev-support/saveVersion.sh

@@ -1,67 +0,0 @@
-#!/bin/sh
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This file is used to generate the package-info.java class that
-# records the version, revision, branch, user, timestamp, and url
-unset LANG
-unset LC_CTYPE
-unset LC_TIME
-version=$1
-build_dir=$2
-user=`whoami | tr '\n\r' '\n'`
-date=`date`
-cwd=`pwd`
-if git rev-parse HEAD 2>/dev/null > /dev/null ; then
-  revision=`git log -1 --pretty=format:"%H"`
-  hostname=`hostname`
-  branch=`git branch | sed -n -e 's/^* //p'`
-  url="git://${hostname}${cwd}"
-elif [ -d .svn ]; then
-  revision=`svn info | sed -n -e 's/Last Changed Rev: \(.*\)/\1/p'`
-  url=`svn info | sed -n -e 's/^URL: \(.*\)/\1/p'`
-  # Get canonical branch (branches/X, tags/X, or trunk)
-  branch=`echo $url | sed -n -e 's,.*\(branches/.*\)$,\1,p' \
-                             -e 's,.*\(tags/.*\)$,\1,p' \
-                             -e 's,.*trunk$,trunk,p'`
-else
-  revision="Unknown"
-  branch="Unknown"
-  url="file://$cwd"
-fi
-
-which md5sum > /dev/null
-if [ "$?" = "0" ] ; then
-  srcChecksum=`find src/main/java -name '*.java' | LC_ALL=C sort | xargs md5sum | md5sum | cut -d ' ' -f 1`
-else
-  srcChecksum="Not Available"
-fi
-
-mkdir -p $build_dir/org/apache/hadoop
-cat << EOF | \
-  sed -e "s/VERSION/$version/" -e "s/USER/$user/" -e "s/DATE/$date/" \
-      -e "s|URL|$url|" -e "s/REV/$revision/" \
-      -e "s|BRANCH|$branch|" -e "s/SRCCHECKSUM/$srcChecksum/" \
-      > $build_dir/org/apache/hadoop/package-info.java
-/*
- * Generated by src/saveVersion.sh
- */
-@HadoopVersionAnnotation(version="VERSION", revision="REV", branch="BRANCH",
-                         user="USER", date="DATE", url="URL",
-                         srcChecksum="SRCCHECKSUM")
-package org.apache.hadoop;
-EOF

+ 59 - 16
hadoop-common-project/hadoop-common/pom.xml

@@ -244,7 +244,51 @@
   </dependencies>
 
   <build>
+    <!--
+    Include all files in src/main/resources.  By default, do not apply property
+    substitution (filtering=false), but do apply property substitution to
+    common-version-info.properties (filtering=true).  This will substitute the
+    version information correctly, but prevent Maven from altering other files
+    like core-default.xml.
+    -->
+    <resources>
+      <resource>
+        <directory>${basedir}/src/main/resources</directory>
+        <excludes>
+          <exclude>common-version-info.properties</exclude>
+        </excludes>
+        <filtering>false</filtering>
+      </resource>
+      <resource>
+        <directory>${basedir}/src/main/resources</directory>
+        <includes>
+          <include>common-version-info.properties</include>
+        </includes>
+        <filtering>true</filtering>
+      </resource>
+    </resources>
     <plugins>
+      <plugin>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-maven-plugins</artifactId>
+        <executions>
+          <execution>
+            <id>version-info</id>
+            <goals>
+              <goal>version-info</goal>
+            </goals>
+            <configuration>
+              <source>
+                <directory>${basedir}/src/main</directory>
+                <includes>
+                  <include>java/**/*.java</include>
+                  <include>proto/**/*.proto</include>
+                </includes>
+              </source>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-surefire-plugin</artifactId>
@@ -288,22 +332,6 @@
               </target>
             </configuration>
           </execution>
-          <execution>
-            <id>save-version</id>
-            <phase>generate-sources</phase>
-            <goals>
-              <goal>run</goal>
-            </goals>
-            <configuration>
-              <target>
-                <mkdir dir="${project.build.directory}/generated-sources/java"/>
-                <exec executable="sh">
-                  <arg
-                      line="${basedir}/dev-support/saveVersion.sh ${project.version} ${project.build.directory}/generated-sources/java"/>
-                </exec>
-              </target>
-            </configuration>
-          </execution>
           <execution>
             <id>generate-test-sources</id>
             <phase>generate-test-sources</phase>
@@ -453,6 +481,18 @@
           </excludes>
         </configuration>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <properties>
+            <property>
+              <name>listener</name>
+              <value>org.apache.hadoop.test.TimedOutTestsListener</value>
+            </property>
+          </properties>
+        </configuration>
+      </plugin>
     </plugins>
   </build>
 
@@ -515,6 +555,9 @@
                     <exec executable="make" dir="${project.build.directory}/native" failonerror="true">
                       <arg line="VERBOSE=1"/>
                     </exec>
+                    <!-- The second make is a workaround for HADOOP-9215.  It can
+                         be removed when version 2.6 of cmake is no longer supported . -->
+                    <exec executable="make" dir="${project.build.directory}/native" failonerror="true"></exec>
                   </target>
                 </configuration>
               </execution>

+ 4 - 4
hadoop-common-project/hadoop-common/src/main/bin/hadoop

@@ -58,9 +58,9 @@ case $COMMAND in
     #try to locate hdfs and if present, delegate to it.  
     shift
     if [ -f "${HADOOP_HDFS_HOME}"/bin/hdfs ]; then
-      exec "${HADOOP_HDFS_HOME}"/bin/hdfs ${COMMAND/dfsgroups/groups}  $*
+      exec "${HADOOP_HDFS_HOME}"/bin/hdfs ${COMMAND/dfsgroups/groups}  "$@"
     elif [ -f "${HADOOP_PREFIX}"/bin/hdfs ]; then
-      exec "${HADOOP_PREFIX}"/bin/hdfs ${COMMAND/dfsgroups/groups} $*
+      exec "${HADOOP_PREFIX}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@"
     else
       echo "HADOOP_HDFS_HOME not found!"
       exit 1
@@ -75,9 +75,9 @@ case $COMMAND in
     #try to locate mapred and if present, delegate to it.
     shift
     if [ -f "${HADOOP_MAPRED_HOME}"/bin/mapred ]; then
-      exec "${HADOOP_MAPRED_HOME}"/bin/mapred ${COMMAND/mrgroups/groups} $*
+      exec "${HADOOP_MAPRED_HOME}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
     elif [ -f "${HADOOP_PREFIX}"/bin/mapred ]; then
-      exec "${HADOOP_PREFIX}"/bin/mapred ${COMMAND/mrgroups/groups} $*
+      exec "${HADOOP_PREFIX}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
     else
       echo "HADOOP_MAPRED_HOME not found!"
       exit 1

+ 219 - 0
hadoop-common-project/hadoop-common/src/main/conf/hadoop-policy.xml

@@ -0,0 +1,219 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+
+ Copyright 2011 The Apache Software Foundation
+ 
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+-->
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+  <property>
+    <name>security.client.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ClientProtocol, which is used by user code
+    via the DistributedFileSystem.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.client.datanode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ClientDatanodeProtocol, the client-to-datanode protocol
+    for block recovery.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.datanode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for DatanodeProtocol, which is used by datanodes to
+    communicate with the namenode.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.inter.datanode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for InterDatanodeProtocol, the inter-datanode protocol
+    for updating generation timestamp.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.namenode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for NamenodeProtocol, the protocol used by the secondary
+    namenode to communicate with the namenode.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+ <property>
+    <name>security.admin.operations.protocol.acl</name>
+    <value>${HADOOP_HDFS_USER}</value>
+    <description>ACL for AdminOperationsProtocol. Used for admin commands.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.refresh.usertogroups.mappings.protocol.acl</name>
+    <value>${HADOOP_HDFS_USER}</value>
+    <description>ACL for RefreshUserMappingsProtocol. Used to refresh
+    users mappings. The ACL is a comma-separated list of user and
+    group names. The user and group list is separated by a blank. For
+    e.g. "alice,bob users,wheel".  A special value of "*" means all
+    users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.refresh.policy.protocol.acl</name>
+    <value>${HADOOP_HDFS_USER}</value>
+    <description>ACL for RefreshAuthorizationPolicyProtocol, used by the
+    dfsadmin and mradmin commands to refresh the security policy in-effect.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.ha.service.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for HAService protocol used by HAAdmin to manage the
+      active and stand-by states of namenode.</description>
+  </property>
+
+  <property>
+    <name>security.zkfc.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for access to the ZK Failover Controller
+    </description>
+  </property>
+
+  <property>
+    <name>security.qjournal.service.protocol.acl</name>
+    <value>${HADOOP_HDFS_USER}</value>
+    <description>ACL for QJournalProtocol, used by the NN to communicate with
+    JNs when using the QuorumJournalManager for edit logs.</description>
+  </property>
+
+  <property>
+    <name>security.mrhs.client.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for HSClientProtocol, used by job clients to
+    communciate with the MR History Server job status etc. 
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <!-- YARN Protocols -->
+
+  <property>
+    <name>security.resourcetracker.protocol.acl</name>
+    <value>${HADOOP_YARN_USER}</value>
+    <description>ACL for ResourceTracker protocol, used by the
+    ResourceManager and NodeManager to communicate with each other.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.admin.protocol.acl</name>
+    <value>${HADOOP_YARN_USER}</value>
+    <description>ACL for RMAdminProtocol, for admin commands. 
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.client.resourcemanager.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ClientRMProtocol, used by the ResourceManager 
+    and applications submission clients to communicate with each other.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.applicationmaster.resourcemanager.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for AMRMProtocol, used by the ResourceManager 
+    and ApplicationMasters to communicate with each other.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.containermanager.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ContainerManager protocol, used by the NodeManager 
+    and ApplicationMasters to communicate with each other.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.resourcelocalizer.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ResourceLocalizer protocol, used by the NodeManager 
+    and ResourceLocalizer to communicate with each other.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.job.task.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for TaskUmbilicalProtocol, used by the map and reduce
+    tasks to communicate with the parent tasktracker.
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.job.client.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for MRClientProtocol, used by job clients to
+    communciate with the MR ApplicationMaster to query job status etc. 
+    The ACL is a comma-separated list of user and group names. The user and
+    group list is separated by a blank. For e.g. "alice,bob users,wheel".
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+</configuration>

+ 0 - 16
hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/service_level_auth.xml

@@ -116,22 +116,6 @@
             <td>ACL for NamenodeProtocol, the protocol used by the secondary
             namenode to communicate with the namenode.</td>
           </tr>
-          <tr>
-            <td><code>security.inter.tracker.protocol.acl</code></td>
-            <td>ACL for InterTrackerProtocol, used by the tasktrackers to 
-            communicate with the jobtracker.</td>
-          </tr>
-          <tr>
-            <td><code>security.job.submission.protocol.acl</code></td>
-            <td>ACL for JobSubmissionProtocol, used by job clients to 
-            communciate with the jobtracker for job submission, querying job status 
-            etc.</td>
-          </tr>
-          <tr>
-            <td><code>security.task.umbilical.protocol.acl</code></td>
-            <td>ACL for TaskUmbilicalProtocol, used by the map and reduce 
-            tasks to communicate with the parent tasktracker.</td>
-          </tr>
           <tr>
             <td><code>security.refresh.policy.protocol.acl</code></td>
             <td>ACL for RefreshAuthorizationPolicyProtocol, used by the 

+ 0 - 74
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/HadoopVersionAnnotation.java

@@ -1,74 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop;
-
-import java.lang.annotation.*;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.classification.InterfaceStability;
-
-/**
- * A package attribute that captures the version of Hadoop that was compiled.
- */
-@Retention(RetentionPolicy.RUNTIME)
-@Target(ElementType.PACKAGE)
-@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
-@InterfaceStability.Unstable
-public @interface HadoopVersionAnnotation {
- 
-  /**
-   * Get the Hadoop version
-   * @return the version string "0.6.3-dev"
-   */
-  String version();
-  
-  /**
-   * Get the username that compiled Hadoop.
-   */
-  String user();
-  
-  /**
-   * Get the date when Hadoop was compiled.
-   * @return the date in unix 'date' format
-   */
-  String date();
-    
-  /**
-   * Get the url for the subversion repository.
-   */
-  String url();
-  
-  /**
-   * Get the subversion revision.
-   * @return the revision number as a string (eg. "451451")
-   */
-  String revision();
-
-  /**
-   * Get the branch from which this was compiled.
-   * @return The branch name, e.g. "trunk" or "branches/branch-0.20"
-   */
-  String branch();
-
-  /**
-   * Get a checksum of the source files from which
-   * Hadoop was compiled.
-   * @return a string that uniquely identifies the source
-   **/
-  String srcChecksum();    
-}

+ 14 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java

@@ -21,6 +21,7 @@ package org.apache.hadoop.fs;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.http.lib.StaticUserWebFilter;
+import org.apache.hadoop.security.authorize.Service;
 
 /** 
  * This class contains constants for configuration keys used
@@ -114,7 +115,18 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
   SECURITY_HA_SERVICE_PROTOCOL_ACL = "security.ha.service.protocol.acl";
   public static final String 
   SECURITY_ZKFC_PROTOCOL_ACL = "security.zkfc.protocol.acl";
-  
+  public static final String
+  SECURITY_CLIENT_PROTOCOL_ACL = "security.client.protocol.acl";
+  public static final String SECURITY_CLIENT_DATANODE_PROTOCOL_ACL =
+      "security.client.datanode.protocol.acl";
+  public static final String
+  SECURITY_DATANODE_PROTOCOL_ACL = "security.datanode.protocol.acl";
+  public static final String
+  SECURITY_INTER_DATANODE_PROTOCOL_ACL = "security.inter.datanode.protocol.acl";
+  public static final String
+  SECURITY_NAMENODE_PROTOCOL_ACL = "security.namenode.protocol.acl";
+  public static final String SECURITY_QJOURNAL_SERVICE_PROTOCOL_ACL =
+      "security.qjournal.service.protocol.acl";
   public static final String HADOOP_SECURITY_TOKEN_SERVICE_USE_IP =
       "hadoop.security.token.service.use_ip";
   public static final boolean HADOOP_SECURITY_TOKEN_SERVICE_USE_IP_DEFAULT =
@@ -191,4 +203,4 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
   public static final long HADOOP_SECURITY_UID_NAME_CACHE_TIMEOUT_DEFAULT =
     4*60*60; // 4 hours
 
-}
+}

+ 5 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java

@@ -173,6 +173,11 @@ public class CommonConfigurationKeysPublic {
   /** Default value for IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY */
   public static final int     IPC_CLIENT_CONNECTION_MAXIDLETIME_DEFAULT = 10000; // 10s
   /** See <a href="{@docRoot}/../core-default.html">core-default.xml</a> */
+  public static final String  IPC_CLIENT_CONNECT_TIMEOUT_KEY =
+    "ipc.client.connect.timeout";
+  /** Default value for IPC_CLIENT_CONNECT_TIMEOUT_KEY */
+  public static final int     IPC_CLIENT_CONNECT_TIMEOUT_DEFAULT = 20000; // 20s
+  /** See <a href="{@docRoot}/../core-default.html">core-default.xml</a> */
   public static final String  IPC_CLIENT_CONNECT_MAX_RETRIES_KEY =
     "ipc.client.connect.max.retries";
   /** Default value for IPC_CLIENT_CONNECT_MAX_RETRIES_KEY */

+ 76 - 11
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java

@@ -87,33 +87,98 @@ public class FileUtil {
    * (4) If dir is a normal directory, then dir and all its contents recursively
    *     are deleted.
    */
-  public static boolean fullyDelete(File dir) {
-    if (dir.delete()) {
+  public static boolean fullyDelete(final File dir) {
+    return fullyDelete(dir, false);
+  }
+  
+  /**
+   * Delete a directory and all its contents.  If
+   * we return false, the directory may be partially-deleted.
+   * (1) If dir is symlink to a file, the symlink is deleted. The file pointed
+   *     to by the symlink is not deleted.
+   * (2) If dir is symlink to a directory, symlink is deleted. The directory
+   *     pointed to by symlink is not deleted.
+   * (3) If dir is a normal file, it is deleted.
+   * (4) If dir is a normal directory, then dir and all its contents recursively
+   *     are deleted.
+   * @param dir the file or directory to be deleted
+   * @param tryGrantPermissions true if permissions should be modified to delete a file.
+   * @return true on success false on failure.
+   */
+  public static boolean fullyDelete(final File dir, boolean tryGrantPermissions) {
+    if (tryGrantPermissions) {
+      // try to chmod +rwx the parent folder of the 'dir': 
+      File parent = dir.getParentFile();
+      grantPermissions(parent);
+    }
+    if (deleteImpl(dir, false)) {
       // dir is (a) normal file, (b) symlink to a file, (c) empty directory or
       // (d) symlink to a directory
       return true;
     }
-
     // handle nonempty directory deletion
-    if (!fullyDeleteContents(dir)) {
+    if (!fullyDeleteContents(dir, tryGrantPermissions)) {
       return false;
     }
-    return dir.delete();
+    return deleteImpl(dir, true);
+  }
+  
+  /*
+   * Pure-Java implementation of "chmod +rwx f".
+   */
+  private static void grantPermissions(final File f) {
+      f.setExecutable(true);
+      f.setReadable(true);
+      f.setWritable(true);
   }
 
+  private static boolean deleteImpl(final File f, final boolean doLog) {
+    if (f == null) {
+      LOG.warn("null file argument.");
+      return false;
+    }
+    final boolean wasDeleted = f.delete();
+    if (wasDeleted) {
+      return true;
+    }
+    final boolean ex = f.exists();
+    if (doLog && ex) {
+      LOG.warn("Failed to delete file or dir ["
+          + f.getAbsolutePath() + "]: it still exists.");
+    }
+    return !ex;
+  }
+  
   /**
    * Delete the contents of a directory, not the directory itself.  If
    * we return false, the directory may be partially-deleted.
    * If dir is a symlink to a directory, all the contents of the actual
    * directory pointed to by dir will be deleted.
    */
-  public static boolean fullyDeleteContents(File dir) {
+  public static boolean fullyDeleteContents(final File dir) {
+    return fullyDeleteContents(dir, false);
+  }
+  
+  /**
+   * Delete the contents of a directory, not the directory itself.  If
+   * we return false, the directory may be partially-deleted.
+   * If dir is a symlink to a directory, all the contents of the actual
+   * directory pointed to by dir will be deleted.
+   * @param tryGrantPermissions if 'true', try grant +rwx permissions to this 
+   * and all the underlying directories before trying to delete their contents.
+   */
+  public static boolean fullyDeleteContents(final File dir, final boolean tryGrantPermissions) {
+    if (tryGrantPermissions) {
+      // to be able to list the dir and delete files from it
+      // we must grant the dir rwx permissions: 
+      grantPermissions(dir);
+    }
     boolean deletionSucceeded = true;
-    File contents[] = dir.listFiles();
+    final File[] contents = dir.listFiles();
     if (contents != null) {
       for (int i = 0; i < contents.length; i++) {
         if (contents[i].isFile()) {
-          if (!contents[i].delete()) {// normal file or symlink to another file
+          if (!deleteImpl(contents[i], true)) {// normal file or symlink to another file
             deletionSucceeded = false;
             continue; // continue deletion of other files/dirs under dir
           }
@@ -121,16 +186,16 @@ public class FileUtil {
           // Either directory or symlink to another directory.
           // Try deleting the directory as this might be a symlink
           boolean b = false;
-          b = contents[i].delete();
+          b = deleteImpl(contents[i], false);
           if (b){
             //this was indeed a symlink or an empty directory
             continue;
           }
           // if not an empty directory or symlink let
           // fullydelete handle it.
-          if (!fullyDelete(contents[i])) {
+          if (!fullyDelete(contents[i], tryGrantPermissions)) {
             deletionSucceeded = false;
-            continue; // continue deletion of other files/dirs under dir
+            // continue deletion of other files/dirs under dir
           }
         }
       }

+ 1 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer.java

@@ -305,6 +305,7 @@ public class HttpServer implements FilterContainer {
     ret.setAcceptQueueSize(128);
     ret.setResolveNames(false);
     ret.setUseDirectBuffers(false);
+    ret.setHeaderBufferSize(1024*64);
     return ret;
   }
 

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CompressionCodecFactory.java

@@ -122,7 +122,7 @@ public class CompressionCodecFactory {
     if (codecsString != null) {
       StringTokenizer codecSplit = new StringTokenizer(codecsString, ",");
       while (codecSplit.hasMoreElements()) {
-        String codecSubstring = codecSplit.nextToken();
+        String codecSubstring = codecSplit.nextToken().trim();
         if (codecSubstring.length() != 0) {
           try {
             Class<?> cls = conf.getClassByName(codecSubstring);

+ 15 - 3
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java

@@ -106,6 +106,8 @@ public class Client {
 
   private SocketFactory socketFactory;           // how to create sockets
   private int refCount = 1;
+
+  private final int connectionTimeout;
   
   final static int PING_CALL_ID = -1;
   
@@ -159,7 +161,16 @@ public class Client {
     }
     return -1;
   }
-  
+  /**
+   * set the connection timeout value in configuration
+   * 
+   * @param conf Configuration
+   * @param timeout the socket connect timeout value
+   */
+  public static final void setConnectTimeout(Configuration conf, int timeout) {
+    conf.setInt(CommonConfigurationKeys.IPC_CLIENT_CONNECT_TIMEOUT_KEY, timeout);
+  }
+
   /**
    * Increment this client's reference count
    *
@@ -494,8 +505,7 @@ public class Client {
             }
           }
           
-          // connection time out is 20s
-          NetUtils.connect(this.socket, server, 20000);
+          NetUtils.connect(this.socket, server, connectionTimeout);
           if (rpcTimeout > 0) {
             pingInterval = rpcTimeout;  // rpcTimeout overwrites pingInterval
           }
@@ -1034,6 +1044,8 @@ public class Client {
     this.valueClass = valueClass;
     this.conf = conf;
     this.socketFactory = factory;
+    this.connectionTimeout = conf.getInt(CommonConfigurationKeys.IPC_CLIENT_CONNECT_TIMEOUT_KEY,
+        CommonConfigurationKeys.IPC_CLIENT_CONNECT_TIMEOUT_DEFAULT);
   }
 
   /**

+ 27 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Credentials.java

@@ -18,10 +18,13 @@
 
 package org.apache.hadoop.security;
 
+import java.io.BufferedInputStream;
 import java.io.DataInput;
 import java.io.DataInputStream;
 import java.io.DataOutput;
 import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
@@ -148,8 +151,32 @@ public class Credentials implements Writable {
       in.close();
       return credentials;
     } catch(IOException ioe) {
+      throw new IOException("Exception reading " + filename, ioe);
+    } finally {
       IOUtils.cleanup(LOG, in);
+    }
+  }
+
+  /**
+   * Convenience method for reading a token storage file, and loading the Tokens
+   * therein in the passed UGI
+   * @param filename
+   * @param conf
+   * @throws IOException
+   */
+  public static Credentials readTokenStorageFile(File filename, Configuration conf)
+      throws IOException {
+    DataInputStream in = null;
+    Credentials credentials = new Credentials();
+    try {
+      in = new DataInputStream(new BufferedInputStream(
+          new FileInputStream(filename)));
+      credentials.readTokenStorageStream(in);
+      return credentials;
+    } catch(IOException ioe) {
       throw new IOException("Exception reading " + filename, ioe);
+    } finally {
+      IOUtils.cleanup(LOG, in);
     }
   }
   

+ 5 - 3
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java

@@ -20,6 +20,7 @@ package org.apache.hadoop.security;
 import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN;
 import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN_DEFAULT;
 
+import java.io.File;
 import java.io.IOException;
 import java.lang.reflect.UndeclaredThrowableException;
 import java.security.AccessControlContext;
@@ -656,10 +657,11 @@ public class UserGroupInformation {
 
         String fileLocation = System.getenv(HADOOP_TOKEN_FILE_LOCATION);
         if (fileLocation != null) {
-          // load the token storage file and put all of the tokens into the
-          // user.
+          // Load the token storage file and put all of the tokens into the
+          // user. Don't use the FileSystem API for reading since it has a lock
+          // cycle (HADOOP-9212).
           Credentials cred = Credentials.readTokenStorageFile(
-              new Path("file:///" + fileLocation), conf);
+              new File(fileLocation), conf);
           loginUser.addCredentials(cred);
         }
         loginUser.spawnAutoRenewalThreadForUserCreds();

+ 62 - 28
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/VersionInfo.java

@@ -20,41 +20,78 @@ package org.apache.hadoop.util;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.HadoopVersionAnnotation;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Properties;
+
 /**
- * This class finds the package info for Hadoop and the HadoopVersionAnnotation
- * information.
+ * This class returns build information about Hadoop components.
  */
 @InterfaceAudience.Private
 @InterfaceStability.Unstable
 public class VersionInfo {
   private static final Log LOG = LogFactory.getLog(VersionInfo.class);
 
-  private static Package myPackage;
-  private static HadoopVersionAnnotation version;
-  
-  static {
-    myPackage = HadoopVersionAnnotation.class.getPackage();
-    version = myPackage.getAnnotation(HadoopVersionAnnotation.class);
+  private Properties info;
+
+  protected VersionInfo(String component) {
+    info = new Properties();
+    String versionInfoFile = component + "-version-info.properties";
+    try {
+      InputStream is = Thread.currentThread().getContextClassLoader()
+        .getResourceAsStream(versionInfoFile);
+      info.load(is);
+    } catch (IOException ex) {
+      LogFactory.getLog(getClass()).warn("Could not read '" + 
+        versionInfoFile + "', " + ex.toString(), ex);
+    }
   }
 
-  /**
-   * Get the meta-data for the Hadoop package.
-   * @return
-   */
-  static Package getPackage() {
-    return myPackage;
+  protected String _getVersion() {
+    return info.getProperty("version", "Unknown");
   }
-  
+
+  protected String _getRevision() {
+    return info.getProperty("revision", "Unknown");
+  }
+
+  protected String _getBranch() {
+    return info.getProperty("branch", "Unknown");
+  }
+
+  protected String _getDate() {
+    return info.getProperty("date", "Unknown");
+  }
+
+  protected String _getUser() {
+    return info.getProperty("user", "Unknown");
+  }
+
+  protected String _getUrl() {
+    return info.getProperty("url", "Unknown");
+  }
+
+  protected String _getSrcChecksum() {
+    return info.getProperty("srcChecksum", "Unknown");
+  }
+
+  protected String _getBuildVersion(){
+    return getVersion() +
+      " from " + _getRevision() +
+      " by " + _getUser() +
+      " source checksum " + _getSrcChecksum();
+  }
+
+  private static VersionInfo COMMON_VERSION_INFO = new VersionInfo("common");
   /**
    * Get the Hadoop version.
    * @return the Hadoop version string, eg. "0.6.3-dev"
    */
   public static String getVersion() {
-    return version != null ? version.version() : "Unknown";
+    return COMMON_VERSION_INFO._getVersion();
   }
   
   /**
@@ -62,7 +99,7 @@ public class VersionInfo {
    * @return the revision number, eg. "451451"
    */
   public static String getRevision() {
-    return version != null ? version.revision() : "Unknown";
+    return COMMON_VERSION_INFO._getRevision();
   }
 
   /**
@@ -70,7 +107,7 @@ public class VersionInfo {
    * @return The branch name, e.g. "trunk" or "branches/branch-0.20"
    */
   public static String getBranch() {
-    return version != null ? version.branch() : "Unknown";
+    return COMMON_VERSION_INFO._getBranch();
   }
 
   /**
@@ -78,7 +115,7 @@ public class VersionInfo {
    * @return the compilation date in unix date format
    */
   public static String getDate() {
-    return version != null ? version.date() : "Unknown";
+    return COMMON_VERSION_INFO._getDate();
   }
   
   /**
@@ -86,14 +123,14 @@ public class VersionInfo {
    * @return the username of the user
    */
   public static String getUser() {
-    return version != null ? version.user() : "Unknown";
+    return COMMON_VERSION_INFO._getUser();
   }
   
   /**
    * Get the subversion URL for the root Hadoop directory.
    */
   public static String getUrl() {
-    return version != null ? version.url() : "Unknown";
+    return COMMON_VERSION_INFO._getUrl();
   }
 
   /**
@@ -101,7 +138,7 @@ public class VersionInfo {
    * built.
    **/
   public static String getSrcChecksum() {
-    return version != null ? version.srcChecksum() : "Unknown";
+    return COMMON_VERSION_INFO._getSrcChecksum();
   }
 
   /**
@@ -109,14 +146,11 @@ public class VersionInfo {
    * revision, user and date. 
    */
   public static String getBuildVersion(){
-    return VersionInfo.getVersion() + 
-    " from " + VersionInfo.getRevision() +
-    " by " + VersionInfo.getUser() + 
-    " source checksum " + VersionInfo.getSrcChecksum();
+    return COMMON_VERSION_INFO._getBuildVersion();
   }
   
   public static void main(String[] args) {
-    LOG.debug("version: "+ version);
+    LOG.debug("version: "+ getVersion());
     System.out.println("Hadoop " + getVersion());
     System.out.println("Subversion " + getUrl() + " -r " + getRevision());
     System.out.println("Compiled by " + getUser() + " on " + getDate());

+ 25 - 0
hadoop-common-project/hadoop-common/src/main/resources/common-version-info.properties

@@ -0,0 +1,25 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+version=${pom.version}
+revision=${version-info.scm.commit}
+branch=${version-info.scm.branch}
+user=${user.name}
+date=${version-info.build.time}
+url=${version-info.scm.uri}
+srcChecksum=${version-info.source.md5}

+ 18 - 2
hadoop-common-project/hadoop-common/src/main/resources/core-default.xml

@@ -80,9 +80,17 @@
 
 <property>
   <name>hadoop.security.group.mapping</name>
-  <value>org.apache.hadoop.security.ShellBasedUnixGroupsMapping</value>
+  <value>org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback</value>
   <description>
-    Class for user to group mapping (get groups for a given user) for ACL
+    Class for user to group mapping (get groups for a given user) for ACL. 
+    The default implementation,
+    org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback, 
+    will determine if the Java Native Interface (JNI) is available. If JNI is 
+    available the implementation will use the API within hadoop to resolve a 
+    list of groups for a user. If JNI is not available then the shell 
+    implementation, ShellBasedUnixGroupsMapping, is used.  This implementation 
+    shells out to the Linux/Unix environment with the 
+    <code>bash -c groups</code> command to resolve a list of groups for a user.
   </description>
 </property>
 
@@ -565,6 +573,14 @@
   </description>
 </property>
 
+<property>
+  <name>ipc.client.connect.timeout</name>
+  <value>20000</value>
+  <description>Indicates the number of milliseconds a client will wait for the 
+               socket to establish a server connection.
+  </description>
+</property>
+
 <property>
   <name>ipc.client.connect.max.retries.on.timeouts</name>
   <value>45</value>

+ 100 - 31
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileUtil.java

@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.fs;
 
+import org.junit.Before;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileReader;
@@ -173,12 +174,26 @@ public class TestFileUtil {
       //Expected an IOException
     }
   }
+
+  @Before
+  public void before() throws IOException {
+    cleanupImpl();
+  }
   
   @After
   public void tearDown() throws IOException {
-    FileUtil.fullyDelete(del);
-    FileUtil.fullyDelete(tmp);
-    FileUtil.fullyDelete(partitioned);
+    cleanupImpl();
+  }
+  
+  private void cleanupImpl() throws IOException  {
+    FileUtil.fullyDelete(del, true);
+    Assert.assertTrue(!del.exists());
+    
+    FileUtil.fullyDelete(tmp, true);
+    Assert.assertTrue(!tmp.exists());
+    
+    FileUtil.fullyDelete(partitioned, true);
+    Assert.assertTrue(!partitioned.exists());
   }
 
   @Test
@@ -269,12 +284,14 @@ public class TestFileUtil {
     Assert.assertTrue(new File(tmp, FILE).exists());
   }
 
-  private File xSubDir = new File(del, "xsubdir");
-  private File ySubDir = new File(del, "ysubdir");
-  static String file1Name = "file1";
-  private File file2 = new File(xSubDir, "file2");
-  private File file3 = new File(ySubDir, "file3");
-  private File zlink = new File(del, "zlink");
+  private final File xSubDir = new File(del, "xSubDir");
+  private final File xSubSubDir = new File(xSubDir, "xSubSubDir");
+  private final File ySubDir = new File(del, "ySubDir");
+  private static final String file1Name = "file1";
+  private final File file2 = new File(xSubDir, "file2");
+  private final File file22 = new File(xSubSubDir, "file22");
+  private final File file3 = new File(ySubDir, "file3");
+  private final File zlink = new File(del, "zlink");
   
   /**
    * Creates a directory which can not be deleted completely.
@@ -286,10 +303,14 @@ public class TestFileUtil {
    *                       |
    *    .---------------------------------------,
    *    |            |              |           |
-   *  file1(!w)   xsubdir(-w)   ysubdir(+w)   zlink
-   *                 |              |
-   *               file2          file3
-   *
+   *  file1(!w)   xSubDir(-rwx)   ySubDir(+w)   zlink
+   *              |  |              |
+   *              | file2(-rwx)   file3
+   *              |
+   *            xSubSubDir(-rwx) 
+   *              |
+   *             file22(-rwx)
+   *             
    * @throws IOException
    */
   private void setupDirsAndNonWritablePermissions() throws IOException {
@@ -302,7 +323,16 @@ public class TestFileUtil {
 
     xSubDir.mkdirs();
     file2.createNewFile();
-    xSubDir.setWritable(false);
+    
+    xSubSubDir.mkdirs();
+    file22.createNewFile();
+    
+    revokePermissions(file22);
+    revokePermissions(xSubSubDir);
+    
+    revokePermissions(file2);
+    revokePermissions(xSubDir);
+    
     ySubDir.mkdirs();
     file3.createNewFile();
 
@@ -314,23 +344,43 @@ public class TestFileUtil {
     FileUtil.symLink(tmpFile.toString(), zlink.toString());
   }
   
+  private static void grantPermissions(final File f) {
+    f.setReadable(true);
+    f.setWritable(true);
+    f.setExecutable(true);
+  }
+  
+  private static void revokePermissions(final File f) {
+     f.setWritable(false);
+     f.setExecutable(false);
+     f.setReadable(false);
+  }
+  
   // Validates the return value.
-  // Validates the existence of directory "xsubdir" and the file "file1"
-  // Sets writable permissions for the non-deleted dir "xsubdir" so that it can
-  // be deleted in tearDown().
-  private void validateAndSetWritablePermissions(boolean ret) {
-    xSubDir.setWritable(true);
-    Assert.assertFalse("The return value should have been false!", ret);
-    Assert.assertTrue("The file file1 should not have been deleted!",
+  // Validates the existence of the file "file1"
+  private void validateAndSetWritablePermissions(
+      final boolean expectedRevokedPermissionDirsExist, final boolean ret) {
+    grantPermissions(xSubDir);
+    grantPermissions(xSubSubDir);
+    
+    Assert.assertFalse("The return value should have been false.", ret);
+    Assert.assertTrue("The file file1 should not have been deleted.",
         new File(del, file1Name).exists());
-    Assert.assertTrue(
-        "The directory xsubdir should not have been deleted!",
-        xSubDir.exists());
-    Assert.assertTrue("The file file2 should not have been deleted!",
-        file2.exists());
-    Assert.assertFalse("The directory ysubdir should have been deleted!",
+    
+    Assert.assertEquals(
+        "The directory xSubDir *should* not have been deleted.",
+        expectedRevokedPermissionDirsExist, xSubDir.exists());
+    Assert.assertEquals("The file file2 *should* not have been deleted.",
+        expectedRevokedPermissionDirsExist, file2.exists());
+    Assert.assertEquals(
+        "The directory xSubSubDir *should* not have been deleted.",
+        expectedRevokedPermissionDirsExist, xSubSubDir.exists());
+    Assert.assertEquals("The file file22 *should* not have been deleted.",
+        expectedRevokedPermissionDirsExist, file22.exists());
+    
+    Assert.assertFalse("The directory ySubDir should have been deleted.",
         ySubDir.exists());
-    Assert.assertFalse("The link zlink should have been deleted!",
+    Assert.assertFalse("The link zlink should have been deleted.",
         zlink.exists());
   }
 
@@ -339,7 +389,15 @@ public class TestFileUtil {
     LOG.info("Running test to verify failure of fullyDelete()");
     setupDirsAndNonWritablePermissions();
     boolean ret = FileUtil.fullyDelete(new MyFile(del));
-    validateAndSetWritablePermissions(ret);
+    validateAndSetWritablePermissions(true, ret);
+  }
+  
+  @Test
+  public void testFailFullyDeleteGrantPermissions() throws IOException {
+    setupDirsAndNonWritablePermissions();
+    boolean ret = FileUtil.fullyDelete(new MyFile(del), true);
+    // this time the directories with revoked permissions *should* be deleted:
+    validateAndSetWritablePermissions(false, ret);
   }
 
   /**
@@ -388,7 +446,10 @@ public class TestFileUtil {
      */
     @Override
     public File[] listFiles() {
-      File[] files = super.listFiles();
+      final File[] files = super.listFiles();
+      if (files == null) {
+         return null;
+      }
       List<File> filesList = Arrays.asList(files);
       Collections.sort(filesList);
       File[] myFiles = new MyFile[files.length];
@@ -405,9 +466,17 @@ public class TestFileUtil {
     LOG.info("Running test to verify failure of fullyDeleteContents()");
     setupDirsAndNonWritablePermissions();
     boolean ret = FileUtil.fullyDeleteContents(new MyFile(del));
-    validateAndSetWritablePermissions(ret);
+    validateAndSetWritablePermissions(true, ret);
   }
 
+  @Test
+  public void testFailFullyDeleteContentsGrantPermissions() throws IOException {
+    setupDirsAndNonWritablePermissions();
+    boolean ret = FileUtil.fullyDeleteContents(new MyFile(del), true);
+    // this time the directories with revoked permissions *should* be deleted:
+    validateAndSetWritablePermissions(false, ret);
+  }
+  
   @Test
   public void testCopyMergeSingleDirectory() throws IOException {
     setupDirs();

+ 29 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestHttpServer.java

@@ -119,6 +119,18 @@ public class TestHttpServer extends HttpServerFunctionalTest {
     }    
   }
 
+  @SuppressWarnings("serial")
+  public static class LongHeaderServlet extends HttpServlet {
+    @SuppressWarnings("unchecked")
+    @Override
+    public void doGet(HttpServletRequest request,
+                      HttpServletResponse response
+    ) throws ServletException, IOException {
+      Assert.assertEquals(63 * 1024, request.getHeader("longheader").length());
+      response.setStatus(HttpServletResponse.SC_OK);
+    }
+  }
+
   @SuppressWarnings("serial")
   public static class HtmlContentServlet extends HttpServlet {
     @Override
@@ -139,6 +151,7 @@ public class TestHttpServer extends HttpServerFunctionalTest {
     server.addServlet("echo", "/echo", EchoServlet.class);
     server.addServlet("echomap", "/echomap", EchoMapServlet.class);
     server.addServlet("htmlcontent", "/htmlcontent", HtmlContentServlet.class);
+    server.addServlet("longheader", "/longheader", LongHeaderServlet.class);
     server.addJerseyResourcePackage(
         JerseyResource.class.getPackage().getName(), "/jersey/*");
     server.start();
@@ -197,6 +210,22 @@ public class TestHttpServer extends HttpServerFunctionalTest {
                  readOutput(new URL(baseUrl, "/echomap?a=b&c<=d&a=>")));
   }
 
+  /** 
+   *  Test that verifies headers can be up to 64K long. 
+   *  The test adds a 63K header leaving 1K for other headers.
+   *  This is because the header buffer setting is for ALL headers,
+   *  names and values included. */
+  @Test public void testLongHeader() throws Exception {
+    URL url = new URL(baseUrl, "/longheader");
+    HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0 ; i < 63 * 1024; i++) {
+      sb.append("a");
+    }
+    conn.setRequestProperty("longheader", sb.toString());
+    assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode());
+  }
+
   @Test public void testContentTypes() throws Exception {
     // Static CSS files should have text/css
     URL cssUrl = new URL(baseUrl, "/static/test.css");

+ 12 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecFactory.java

@@ -256,5 +256,17 @@ public class TestCodecFactory extends TestCase {
     checkCodec("overridden factory for .gz", NewGzipCodec.class, codec);
     codec = factory.getCodecByClassName(NewGzipCodec.class.getCanonicalName());
     checkCodec("overridden factory for gzip codec", NewGzipCodec.class, codec);
+    
+    Configuration conf = new Configuration();
+    conf.set("io.compression.codecs", 
+        "   org.apache.hadoop.io.compress.GzipCodec   , " +
+        "    org.apache.hadoop.io.compress.DefaultCodec  , " +
+        " org.apache.hadoop.io.compress.BZip2Codec   ");
+    try {
+      CompressionCodecFactory.getCodecClasses(conf);
+    } catch (IllegalArgumentException e) {
+      fail("IllegalArgumentException is unexpected");
+    }
+
   }
 }

+ 20 - 1
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java

@@ -62,7 +62,6 @@ public class TestIPC {
   final private static Configuration conf = new Configuration();
   final static private int PING_INTERVAL = 1000;
   final static private int MIN_SLEEP_TIME = 1000;
-
   /**
    * Flag used to turn off the fault injection behavior
    * of the various writables.
@@ -499,6 +498,26 @@ public class TestIPC {
     client.call(new LongWritable(RANDOM.nextLong()),
         addr, null, null, 3*PING_INTERVAL+MIN_SLEEP_TIME, conf);
   }
+
+  @Test
+  public void testIpcConnectTimeout() throws Exception {
+    // start server
+    Server server = new TestServer(1, true);
+    InetSocketAddress addr = NetUtils.getConnectAddress(server);
+    //Intentionally do not start server to get a connection timeout
+
+    // start client
+    Client.setConnectTimeout(conf, 100);
+    Client client = new Client(LongWritable.class, conf);
+    // set the rpc timeout to twice the MIN_SLEEP_TIME
+    try {
+      client.call(new LongWritable(RANDOM.nextLong()),
+              addr, null, null, MIN_SLEEP_TIME*2, conf);
+      fail("Expected an exception to have been thrown");
+    } catch (SocketTimeoutException e) {
+      LOG.info("Get a SocketTimeoutException ", e);
+    }
+  }
   
   /**
    * Check that file descriptors aren't leaked by starting

+ 17 - 1
hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/servlet/HostnameFilter.java

@@ -29,6 +29,9 @@ import javax.servlet.ServletRequest;
 import javax.servlet.ServletResponse;
 import java.io.IOException;
 import java.net.InetAddress;
+import java.net.UnknownHostException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Filter that resolves the requester hostname.
@@ -36,6 +39,7 @@ import java.net.InetAddress;
 @InterfaceAudience.Private
 public class HostnameFilter implements Filter {
   static final ThreadLocal<String> HOSTNAME_TL = new ThreadLocal<String>();
+  private static final Logger log = LoggerFactory.getLogger(HostnameFilter.class);
 
   /**
    * Initializes the filter.
@@ -66,7 +70,19 @@ public class HostnameFilter implements Filter {
   public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain)
     throws IOException, ServletException {
     try {
-      String hostname = InetAddress.getByName(request.getRemoteAddr()).getCanonicalHostName();
+      String hostname;
+      try {
+        String address = request.getRemoteAddr();
+        if (address != null) {
+          hostname = InetAddress.getByName(address).getCanonicalHostName();
+        } else {
+          log.warn("Request remote address is NULL");
+          hostname = "???";
+        }
+      } catch (UnknownHostException ex) {
+        log.warn("Request remote address could not be resolved, {0}", ex.toString(), ex);
+        hostname = "???";
+      }
       HOSTNAME_TL.set(hostname);
       chain.doFilter(request, response);
     } finally {

+ 26 - 0
hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/lib/servlet/TestHostnameFilter.java

@@ -64,4 +64,30 @@ public class TestHostnameFilter extends HTestCase {
     filter.destroy();
   }
 
+  @Test
+  public void testMissingHostname() throws Exception {
+    ServletRequest request = Mockito.mock(ServletRequest.class);
+    Mockito.when(request.getRemoteAddr()).thenReturn(null);
+
+    ServletResponse response = Mockito.mock(ServletResponse.class);
+
+    final AtomicBoolean invoked = new AtomicBoolean();
+
+    FilterChain chain = new FilterChain() {
+      @Override
+      public void doFilter(ServletRequest servletRequest, ServletResponse servletResponse)
+        throws IOException, ServletException {
+        assertTrue(HostnameFilter.get().contains("???"));
+        invoked.set(true);
+      }
+    };
+
+    Filter filter = new HostnameFilter();
+    filter.init(null);
+    assertNull(HostnameFilter.get());
+    filter.doFilter(request, response, chain);
+    assertTrue(invoked.get());
+    assertNull(HostnameFilter.get());
+    filter.destroy();
+  }
 }

+ 32 - 2
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -488,6 +488,15 @@ Release 2.0.3-alpha - Unreleased
     HDFS-4375. Use token request messages defined in hadoop common.
     (suresh)
 
+    HDFS-4392. Use NetUtils#getFreeSocketPort in MiniDFSCluster.
+    (Andrew Purtell via suresh)
+
+    HDFS-4393. Make empty request and responses in protocol translators can be
+    static final members. (Brandon Li via suresh)
+
+    HDFS-4403. DFSClient can infer checksum type when not provided by reading
+    first byte (todd)
+
   OPTIMIZATIONS
 
     HDFS-3429. DataNode reads checksums even if client does not need them (todd)
@@ -713,6 +722,12 @@ Release 2.0.3-alpha - Unreleased
 
     HDFS-1245. Pluggable block id generation. (shv)
 
+    HDFS-4415. HostnameFilter should handle hostname resolution failures and
+    continue processing. (Robert Kanter via atm)
+
+    HDFS-4359. Slow RPC responses from NN can prevent metrics collection on
+    DNs. (liang xie via atm)
+
   BREAKDOWN OF HDFS-3077 SUBTASKS
 
     HDFS-3077. Quorum-based protocol for reading and writing edit logs.
@@ -815,9 +830,12 @@ Release 2.0.3-alpha - Unreleased
     HDFS-4017. Unclosed FileInputStream in GetJournalEditServlet
     (Chao Shi via todd)
 
-    HDFS-4351.  In BlockPlacementPolicyDefault.chooseTarget(..), numOfReplicas
+    HDFS-4351. In BlockPlacementPolicyDefault.chooseTarget(..), numOfReplicas
     needs to be updated when avoiding stale nodes.  (Andrew Wang via szetszwo)
 
+    HDFS-4399. Fix RAT warnings by excluding images sub-dir in docs. (Thomas
+    Graves via acmurthy) 
+
 Release 2.0.2-alpha - 2012-09-07 
 
   INCOMPATIBLE CHANGES
@@ -2195,6 +2213,18 @@ Release 2.0.0-alpha - 05-23-2012
     
     HDFS-3039. Address findbugs and javadoc warnings on branch. (todd via atm)
 
+Release 0.23.7 - UNRELEASED
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
 Release 0.23.6 - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -2214,7 +2244,7 @@ Release 0.23.6 - UNRELEASED
 
     HDFS-4385. Maven RAT plugin is not checking all source files (tgraves)
 
-Release 0.23.5 - UNRELEASED
+Release 0.23.5 - 2012-11-28
 
   INCOMPATIBLE CHANGES
 

+ 4 - 0
hadoop-hdfs-project/hadoop-hdfs/pom.xml

@@ -522,6 +522,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
             <exclude>src/main/webapps/datanode/robots.txt</exclude>
             <exclude>src/main/docs/releasenotes.html</exclude>
             <exclude>src/contrib/**</exclude>
+            <exclude>src/site/resources/images/*</exclude>
           </excludes>
         </configuration>
       </plugin>
@@ -566,6 +567,9 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
                     <exec executable="make" dir="${project.build.directory}/native" failonerror="true">
                       <arg line="VERBOSE=1"/>
                     </exec>
+                    <!-- The second make is a workaround for HADOOP-9215.  It can
+                         be removed when version 2.6 of cmake is no longer supported . -->
+                    <exec executable="make" dir="${project.build.directory}/native" failonerror="true"></exec>
                   </target>
                 </configuration>
               </execution>

+ 5 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/hdfs_permissions_guide.xml

@@ -92,10 +92,11 @@ There is no provision within HDFS for creating user identities, establishing gro
 
 <section><title>Group Mapping</title>
 <p>
-Once a username has been determined as described above, the list of groups is determined by a <em>group mapping
-service</em>, configured by the <code>hadoop.security.group.mapping</code> property.
-The default implementation, <code>org.apache.hadoop.security.ShellBasedUnixGroupsMapping</code>, will shell out
-to the Unix <code>bash -c groups</code> command to resolve a list of groups for a user.
+Once a username has been determined as described above, the list of groups is 
+determined by a <em>group mapping service</em>, configured by the 
+<code>hadoop.security.group.mapping</code> property. Refer to the 
+core-default.xml for details of the <code>hadoop.security.group.mapping</code>
+implementation.
 </p>
 <p>
 An alternate implementation, which connects directly to an LDAP server to resolve the list of groups, is available

+ 126 - 38
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java

@@ -149,6 +149,7 @@ import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenRenewer;
 import org.apache.hadoop.util.DataChecksum;
+import org.apache.hadoop.util.DataChecksum.Type;
 import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.util.Time;
 
@@ -1559,7 +1560,7 @@ public class DFSClient implements java.io.Closeable {
    */
   public MD5MD5CRC32FileChecksum getFileChecksum(String src) throws IOException {
     checkOpen();
-    return getFileChecksum(src, namenode, socketFactory,
+    return getFileChecksum(src, clientName, namenode, socketFactory,
         dfsClientConf.socketTimeout, getDataEncryptionKey(),
         dfsClientConf.connectToDnViaHostname);
   }
@@ -1602,9 +1603,16 @@ public class DFSClient implements java.io.Closeable {
   /**
    * Get the checksum of a file.
    * @param src The file path
+   * @param clientName the name of the client requesting the checksum.
+   * @param namenode the RPC proxy for the namenode
+   * @param socketFactory to create sockets to connect to DNs
+   * @param socketTimeout timeout to use when connecting and waiting for a response
+   * @param encryptionKey the key needed to communicate with DNs in this cluster
+   * @param connectToDnViaHostname {@see #connectToDnViaHostname()}
    * @return The checksum 
    */
-  public static MD5MD5CRC32FileChecksum getFileChecksum(String src,
+  static MD5MD5CRC32FileChecksum getFileChecksum(String src,
+      String clientName,
       ClientProtocol namenode, SocketFactory socketFactory, int socketTimeout,
       DataEncryptionKey encryptionKey, boolean connectToDnViaHostname)
       throws IOException {
@@ -1639,32 +1647,16 @@ public class DFSClient implements java.io.Closeable {
       final int timeout = 3000 * datanodes.length + socketTimeout;
       boolean done = false;
       for(int j = 0; !done && j < datanodes.length; j++) {
-        Socket sock = null;
         DataOutputStream out = null;
         DataInputStream in = null;
         
         try {
           //connect to a datanode
-          sock = socketFactory.createSocket();
-          String dnAddr = datanodes[j].getXferAddr(connectToDnViaHostname);
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("Connecting to datanode " + dnAddr);
-          }
-          NetUtils.connect(sock, NetUtils.createSocketAddr(dnAddr), timeout);
-          sock.setSoTimeout(timeout);
-
-          OutputStream unbufOut = NetUtils.getOutputStream(sock);
-          InputStream unbufIn = NetUtils.getInputStream(sock);
-          if (encryptionKey != null) {
-            IOStreamPair encryptedStreams =
-                DataTransferEncryptor.getEncryptedStreams(
-                    unbufOut, unbufIn, encryptionKey);
-            unbufOut = encryptedStreams.out;
-            unbufIn = encryptedStreams.in;
-          }
-          out = new DataOutputStream(new BufferedOutputStream(unbufOut,
+          IOStreamPair pair = connectToDN(socketFactory, connectToDnViaHostname,
+              encryptionKey, datanodes[j], timeout);
+          out = new DataOutputStream(new BufferedOutputStream(pair.out,
               HdfsConstants.SMALL_BUFFER_SIZE));
-          in = new DataInputStream(unbufIn);
+          in = new DataInputStream(pair.in);
 
           if (LOG.isDebugEnabled()) {
             LOG.debug("write to " + datanodes[j] + ": "
@@ -1677,19 +1669,8 @@ public class DFSClient implements java.io.Closeable {
             BlockOpResponseProto.parseFrom(PBHelper.vintPrefixed(in));
 
           if (reply.getStatus() != Status.SUCCESS) {
-            if (reply.getStatus() == Status.ERROR_ACCESS_TOKEN
-                && i > lastRetriedIndex) {
-              if (LOG.isDebugEnabled()) {
-                LOG.debug("Got access token error in response to OP_BLOCK_CHECKSUM "
-                    + "for file " + src + " for block " + block
-                    + " from datanode " + datanodes[j]
-                    + ". Will retry the block once.");
-              }
-              lastRetriedIndex = i;
-              done = true; // actually it's not done; but we'll retry
-              i--; // repeat at i-th block
-              refetchBlocks = true;
-              break;
+            if (reply.getStatus() == Status.ERROR_ACCESS_TOKEN) {
+              throw new InvalidBlockTokenException();
             } else {
               throw new IOException("Bad response " + reply + " for block "
                   + block + " from datanode " + datanodes[j]);
@@ -1721,8 +1702,18 @@ public class DFSClient implements java.io.Closeable {
           md5.write(md5out);
           
           // read crc-type
-          final DataChecksum.Type ct = PBHelper.convert(checksumData
-              .getCrcType());
+          final DataChecksum.Type ct;
+          if (checksumData.hasCrcType()) {
+            ct = PBHelper.convert(checksumData
+                .getCrcType());
+          } else {
+            LOG.debug("Retrieving checksum from an earlier-version DataNode: " +
+                      "inferring checksum by reading first byte");
+            ct = inferChecksumTypeByReading(
+                clientName, socketFactory, socketTimeout, lb, datanodes[j],
+                encryptionKey, connectToDnViaHostname);
+          }
+
           if (i == 0) { // first block
             crcType = ct;
           } else if (crcType != DataChecksum.Type.MIXED
@@ -1740,12 +1731,25 @@ public class DFSClient implements java.io.Closeable {
             }
             LOG.debug("got reply from " + datanodes[j] + ": md5=" + md5);
           }
+        } catch (InvalidBlockTokenException ibte) {
+          if (i > lastRetriedIndex) {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("Got access token error in response to OP_BLOCK_CHECKSUM "
+                  + "for file " + src + " for block " + block
+                  + " from datanode " + datanodes[j]
+                  + ". Will retry the block once.");
+            }
+            lastRetriedIndex = i;
+            done = true; // actually it's not done; but we'll retry
+            i--; // repeat at i-th block
+            refetchBlocks = true;
+            break;
+          }
         } catch (IOException ie) {
           LOG.warn("src=" + src + ", datanodes["+j+"]=" + datanodes[j], ie);
         } finally {
           IOUtils.closeStream(in);
           IOUtils.closeStream(out);
-          IOUtils.closeSocket(sock);        
         }
       }
 
@@ -1777,6 +1781,90 @@ public class DFSClient implements java.io.Closeable {
     }
   }
 
+  /**
+   * Connect to the given datanode's datantrasfer port, and return
+   * the resulting IOStreamPair. This includes encryption wrapping, etc.
+   */
+  private static IOStreamPair connectToDN(
+      SocketFactory socketFactory, boolean connectToDnViaHostname,
+      DataEncryptionKey encryptionKey, DatanodeInfo dn, int timeout)
+      throws IOException
+  {
+    boolean success = false;
+    Socket sock = null;
+    try {
+      sock = socketFactory.createSocket();
+      String dnAddr = dn.getXferAddr(connectToDnViaHostname);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Connecting to datanode " + dnAddr);
+      }
+      NetUtils.connect(sock, NetUtils.createSocketAddr(dnAddr), timeout);
+      sock.setSoTimeout(timeout);
+  
+      OutputStream unbufOut = NetUtils.getOutputStream(sock);
+      InputStream unbufIn = NetUtils.getInputStream(sock);
+      IOStreamPair ret;
+      if (encryptionKey != null) {
+        ret = DataTransferEncryptor.getEncryptedStreams(
+                unbufOut, unbufIn, encryptionKey);
+      } else {
+        ret = new IOStreamPair(unbufIn, unbufOut);        
+      }
+      success = true;
+      return ret;
+    } finally {
+      if (!success) {
+        IOUtils.closeSocket(sock);
+      }
+    }
+  }
+  
+  /**
+   * Infer the checksum type for a replica by sending an OP_READ_BLOCK
+   * for the first byte of that replica. This is used for compatibility
+   * with older HDFS versions which did not include the checksum type in
+   * OpBlockChecksumResponseProto.
+   *
+   * @param in input stream from datanode
+   * @param out output stream to datanode
+   * @param lb the located block
+   * @param clientName the name of the DFSClient requesting the checksum
+   * @param dn the connected datanode
+   * @return the inferred checksum type
+   * @throws IOException if an error occurs
+   */
+  private static Type inferChecksumTypeByReading(
+      String clientName, SocketFactory socketFactory, int socketTimeout,
+      LocatedBlock lb, DatanodeInfo dn,
+      DataEncryptionKey encryptionKey, boolean connectToDnViaHostname)
+      throws IOException {
+    IOStreamPair pair = connectToDN(socketFactory, connectToDnViaHostname,
+        encryptionKey, dn, socketTimeout);
+
+    try {
+      DataOutputStream out = new DataOutputStream(new BufferedOutputStream(pair.out,
+          HdfsConstants.SMALL_BUFFER_SIZE));
+      DataInputStream in = new DataInputStream(pair.in);
+  
+      new Sender(out).readBlock(lb.getBlock(), lb.getBlockToken(), clientName, 0, 1, true);
+      final BlockOpResponseProto reply =
+          BlockOpResponseProto.parseFrom(PBHelper.vintPrefixed(in));
+      
+      if (reply.getStatus() != Status.SUCCESS) {
+        if (reply.getStatus() == Status.ERROR_ACCESS_TOKEN) {
+          throw new InvalidBlockTokenException();
+        } else {
+          throw new IOException("Bad response " + reply + " trying to read "
+              + lb.getBlock() + " from datanode " + dn);
+        }
+      }
+      
+      return PBHelper.convert(reply.getReadOpChecksumInfo().getChecksum().getType());
+    } finally {
+      IOUtils.cleanup(null, pair.in, pair.out);
+    }
+  }
+
   /**
    * Set permissions to a file or directory.
    * @param src path name.

+ 12 - 8
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HDFSPolicyProvider.java

@@ -40,14 +40,18 @@ import org.apache.hadoop.tools.GetUserMappingsProtocol;
 public class HDFSPolicyProvider extends PolicyProvider {
   private static final Service[] hdfsServices =
     new Service[] {
-    new Service("security.client.protocol.acl", ClientProtocol.class),
-    new Service("security.client.datanode.protocol.acl", 
-                ClientDatanodeProtocol.class),
-    new Service("security.datanode.protocol.acl", DatanodeProtocol.class),
-    new Service("security.inter.datanode.protocol.acl", 
-                InterDatanodeProtocol.class),
-    new Service("security.namenode.protocol.acl", NamenodeProtocol.class),
-    new Service("security.qjournal.service.protocol.acl", QJournalProtocol.class),
+    new Service(CommonConfigurationKeys.SECURITY_CLIENT_PROTOCOL_ACL,
+        ClientProtocol.class),
+    new Service(CommonConfigurationKeys.SECURITY_CLIENT_DATANODE_PROTOCOL_ACL,
+        ClientDatanodeProtocol.class),
+    new Service(CommonConfigurationKeys.SECURITY_DATANODE_PROTOCOL_ACL,
+        DatanodeProtocol.class),
+    new Service(CommonConfigurationKeys.SECURITY_INTER_DATANODE_PROTOCOL_ACL, 
+        InterDatanodeProtocol.class),
+    new Service(CommonConfigurationKeys.SECURITY_NAMENODE_PROTOCOL_ACL,
+        NamenodeProtocol.class),
+    new Service(CommonConfigurationKeys.SECURITY_QJOURNAL_SERVICE_PROTOCOL_ACL,
+        QJournalProtocol.class),
     new Service(CommonConfigurationKeys.SECURITY_HA_SERVICE_PROTOCOL_ACL,
         HAServiceProtocol.class),
     new Service(CommonConfigurationKeys.SECURITY_ZKFC_PROTOCOL_ACL,

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientDatanodeProtocolTranslatorPB.java

@@ -77,7 +77,7 @@ public class ClientDatanodeProtocolTranslatorPB implements
   /** RpcController is not used and hence is set to null */
   private final static RpcController NULL_CONTROLLER = null;
   private final ClientDatanodeProtocolPB rpcProxy;
-  private final static RefreshNamenodesRequestProto REFRESH_NAMENODES = 
+  private final static RefreshNamenodesRequestProto VOID_REFRESH_NAMENODES = 
       RefreshNamenodesRequestProto.newBuilder().build();
 
   public ClientDatanodeProtocolTranslatorPB(DatanodeID datanodeid,
@@ -170,7 +170,7 @@ public class ClientDatanodeProtocolTranslatorPB implements
   @Override
   public void refreshNamenodes() throws IOException {
     try {
-      rpcProxy.refreshNamenodes(NULL_CONTROLLER, REFRESH_NAMENODES);
+      rpcProxy.refreshNamenodes(NULL_CONTROLLER, VOID_REFRESH_NAMENODES);
     } catch (ServiceException e) {
       throw ProtobufHelper.getRemoteException(e);
     }

+ 76 - 72
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java

@@ -148,6 +148,78 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     ClientNamenodeProtocolPB {
   final private ClientProtocol server;
 
+  private static final CreateResponseProto VOID_CREATE_RESPONSE = 
+  CreateResponseProto.newBuilder().build();
+
+  private static final AppendResponseProto VOID_APPEND_RESPONSE = 
+  AppendResponseProto.newBuilder().build();
+
+  private static final SetPermissionResponseProto VOID_SET_PERM_RESPONSE = 
+  SetPermissionResponseProto.newBuilder().build();
+
+  private static final SetOwnerResponseProto VOID_SET_OWNER_RESPONSE = 
+  SetOwnerResponseProto.newBuilder().build();
+
+  private static final AbandonBlockResponseProto VOID_ADD_BLOCK_RESPONSE = 
+  AbandonBlockResponseProto.newBuilder().build();
+
+  private static final ReportBadBlocksResponseProto VOID_REP_BAD_BLOCK_RESPONSE = 
+  ReportBadBlocksResponseProto.newBuilder().build();
+
+  private static final ConcatResponseProto VOID_CONCAT_RESPONSE = 
+  ConcatResponseProto.newBuilder().build();
+
+  private static final Rename2ResponseProto VOID_RENAME2_RESPONSE = 
+  Rename2ResponseProto.newBuilder().build();
+
+  private static final GetListingResponseProto VOID_GETLISTING_RESPONSE = 
+  GetListingResponseProto.newBuilder().build();
+
+  private static final RenewLeaseResponseProto VOID_RENEWLEASE_RESPONSE = 
+  RenewLeaseResponseProto.newBuilder().build();
+
+  private static final SaveNamespaceResponseProto VOID_SAVENAMESPACE_RESPONSE = 
+  SaveNamespaceResponseProto.newBuilder().build();
+
+  private static final RefreshNodesResponseProto VOID_REFRESHNODES_RESPONSE = 
+  RefreshNodesResponseProto.newBuilder().build();
+
+  private static final FinalizeUpgradeResponseProto VOID_FINALIZEUPGRADE_RESPONSE = 
+  FinalizeUpgradeResponseProto.newBuilder().build();
+
+  private static final MetaSaveResponseProto VOID_METASAVE_RESPONSE = 
+  MetaSaveResponseProto.newBuilder().build();
+
+  private static final GetFileInfoResponseProto VOID_GETFILEINFO_RESPONSE = 
+  GetFileInfoResponseProto.newBuilder().build();
+
+  private static final GetFileLinkInfoResponseProto VOID_GETFILELINKINFO_RESPONSE = 
+  GetFileLinkInfoResponseProto.newBuilder().build();
+
+  private static final SetQuotaResponseProto VOID_SETQUOTA_RESPONSE = 
+  SetQuotaResponseProto.newBuilder().build();
+
+  private static final FsyncResponseProto VOID_FSYNC_RESPONSE = 
+  FsyncResponseProto.newBuilder().build();
+
+  private static final SetTimesResponseProto VOID_SETTIMES_RESPONSE = 
+  SetTimesResponseProto.newBuilder().build();
+
+  private static final CreateSymlinkResponseProto VOID_CREATESYMLINK_RESPONSE = 
+  CreateSymlinkResponseProto.newBuilder().build();
+
+  private static final UpdatePipelineResponseProto
+    VOID_UPDATEPIPELINE_RESPONSE = 
+  UpdatePipelineResponseProto.newBuilder().build();
+
+  private static final CancelDelegationTokenResponseProto 
+      VOID_CANCELDELEGATIONTOKEN_RESPONSE = 
+          CancelDelegationTokenResponseProto.newBuilder().build();
+
+  private static final SetBalancerBandwidthResponseProto 
+      VOID_SETBALANCERBANDWIDTH_RESPONSE = 
+        SetBalancerBandwidthResponseProto.newBuilder().build();
+
   /**
    * Constructor
    * 
@@ -192,9 +264,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
   }
 
   
-  static final CreateResponseProto VOID_CREATE_RESPONSE = 
-      CreateResponseProto.newBuilder().build();
-  
   @Override
   public CreateResponseProto create(RpcController controller,
       CreateRequestProto req) throws ServiceException {
@@ -209,9 +278,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     return VOID_CREATE_RESPONSE;
   }
   
-  static final AppendResponseProto NULL_APPEND_RESPONSE = 
-      AppendResponseProto.newBuilder().build();
-  
   @Override
   public AppendResponseProto append(RpcController controller,
       AppendRequestProto req) throws ServiceException {
@@ -221,7 +287,7 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
         return AppendResponseProto.newBuilder()
             .setBlock(PBHelper.convert(result)).build();
       }
-      return NULL_APPEND_RESPONSE;
+      return VOID_APPEND_RESPONSE;
     } catch (IOException e) {
       throw new ServiceException(e);
     }
@@ -240,9 +306,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
   }
 
 
-  static final SetPermissionResponseProto VOID_SET_PERM_RESPONSE = 
-      SetPermissionResponseProto.newBuilder().build();
-
   @Override
   public SetPermissionResponseProto setPermission(RpcController controller,
       SetPermissionRequestProto req) throws ServiceException {
@@ -254,9 +317,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     return VOID_SET_PERM_RESPONSE;
   }
 
-  static final SetOwnerResponseProto VOID_SET_OWNER_RESPONSE = 
-      SetOwnerResponseProto.newBuilder().build();
-
   @Override
   public SetOwnerResponseProto setOwner(RpcController controller,
       SetOwnerRequestProto req) throws ServiceException {
@@ -270,9 +330,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     return VOID_SET_OWNER_RESPONSE;
   }
 
-  static final AbandonBlockResponseProto VOID_ADD_BLOCK_RESPONSE = 
-      AbandonBlockResponseProto.newBuilder().build();
-
   @Override
   public AbandonBlockResponseProto abandonBlock(RpcController controller,
       AbandonBlockRequestProto req) throws ServiceException {
@@ -338,9 +395,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     }
   }
   
-  static final ReportBadBlocksResponseProto VOID_REP_BAD_BLOCK_RESPONSE = 
-      ReportBadBlocksResponseProto.newBuilder().build();
-  
   @Override
   public ReportBadBlocksResponseProto reportBadBlocks(RpcController controller,
       ReportBadBlocksRequestProto req) throws ServiceException {
@@ -354,9 +408,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     return VOID_REP_BAD_BLOCK_RESPONSE;
   }
 
-  static final ConcatResponseProto VOID_CONCAT_RESPONSE = 
-      ConcatResponseProto.newBuilder().build();
-  
   @Override
   public ConcatResponseProto concat(RpcController controller,
       ConcatRequestProto req) throws ServiceException {
@@ -380,9 +431,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     }
   }
 
-  static final Rename2ResponseProto VOID_RENAME2_RESPONSE = 
-      Rename2ResponseProto.newBuilder().build();
-  
   @Override
   public Rename2ResponseProto rename2(RpcController controller,
       Rename2RequestProto req) throws ServiceException {
@@ -419,8 +467,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     }
   }
 
-  static final GetListingResponseProto NULL_GETLISTING_RESPONSE = 
-      GetListingResponseProto.newBuilder().build();
   @Override
   public GetListingResponseProto getListing(RpcController controller,
       GetListingRequestProto req) throws ServiceException {
@@ -432,16 +478,13 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
         return GetListingResponseProto.newBuilder().setDirList(
           PBHelper.convert(result)).build();
       } else {
-        return NULL_GETLISTING_RESPONSE;
+        return VOID_GETLISTING_RESPONSE;
       }
     } catch (IOException e) {
       throw new ServiceException(e);
     }
   }
   
-  static final RenewLeaseResponseProto VOID_RENEWLEASE_RESPONSE = 
-      RenewLeaseResponseProto.newBuilder().build();
-  
   @Override
   public RenewLeaseResponseProto renewLease(RpcController controller,
       RenewLeaseRequestProto req) throws ServiceException {
@@ -526,9 +569,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     }
   }
   
-  static final SaveNamespaceResponseProto VOID_SAVENAMESPACE_RESPONSE = 
-      SaveNamespaceResponseProto.newBuilder().build();
-
   @Override
   public SaveNamespaceResponseProto saveNamespace(RpcController controller,
       SaveNamespaceRequestProto req) throws ServiceException {
@@ -555,9 +595,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
   }
 
 
-  static final RefreshNodesResponseProto VOID_REFRESHNODES_RESPONSE = 
-      RefreshNodesResponseProto.newBuilder().build();
-
   @Override
   public RefreshNodesResponseProto refreshNodes(RpcController controller,
       RefreshNodesRequestProto req) throws ServiceException {
@@ -570,9 +607,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
 
   }
 
-  static final FinalizeUpgradeResponseProto VOID_FINALIZEUPGRADE_RESPONSE = 
-      FinalizeUpgradeResponseProto.newBuilder().build();
-
   @Override
   public FinalizeUpgradeResponseProto finalizeUpgrade(RpcController controller,
       FinalizeUpgradeRequestProto req) throws ServiceException {
@@ -599,9 +633,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     }
   }
 
-  static final MetaSaveResponseProto VOID_METASAVE_RESPONSE = 
-      MetaSaveResponseProto.newBuilder().build();
-  
   @Override
   public MetaSaveResponseProto metaSave(RpcController controller,
       MetaSaveRequestProto req) throws ServiceException {
@@ -614,8 +645,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
 
   }
 
-  static final GetFileInfoResponseProto NULL_GETFILEINFO_RESPONSE = 
-      GetFileInfoResponseProto.newBuilder().build();
   @Override
   public GetFileInfoResponseProto getFileInfo(RpcController controller,
       GetFileInfoRequestProto req) throws ServiceException {
@@ -626,14 +655,12 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
         return GetFileInfoResponseProto.newBuilder().setFs(
             PBHelper.convert(result)).build();
       }
-      return NULL_GETFILEINFO_RESPONSE;      
+      return VOID_GETFILEINFO_RESPONSE;      
     } catch (IOException e) {
       throw new ServiceException(e);
     }
   }
 
-  static final GetFileLinkInfoResponseProto NULL_GETFILELINKINFO_RESPONSE = 
-      GetFileLinkInfoResponseProto.newBuilder().build();
   @Override
   public GetFileLinkInfoResponseProto getFileLinkInfo(RpcController controller,
       GetFileLinkInfoRequestProto req) throws ServiceException {
@@ -645,7 +672,7 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
             PBHelper.convert(result)).build();
       } else {
         System.out.println("got  null result for getFileLinkInfo for " + req.getSrc());
-        return NULL_GETFILELINKINFO_RESPONSE;      
+        return VOID_GETFILELINKINFO_RESPONSE;      
       }
 
     } catch (IOException e) {
@@ -666,9 +693,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     }
   }
   
-  static final SetQuotaResponseProto VOID_SETQUOTA_RESPONSE = 
-      SetQuotaResponseProto.newBuilder().build();
-  
   @Override
   public SetQuotaResponseProto setQuota(RpcController controller,
       SetQuotaRequestProto req) throws ServiceException {
@@ -681,9 +705,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     }
   }
   
-  static final FsyncResponseProto VOID_FSYNC_RESPONSE = 
-      FsyncResponseProto.newBuilder().build();
-
   @Override
   public FsyncResponseProto fsync(RpcController controller,
       FsyncRequestProto req) throws ServiceException {
@@ -695,9 +716,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     }
   }
 
-  static final SetTimesResponseProto VOID_SETTIMES_RESPONSE = 
-      SetTimesResponseProto.newBuilder().build();
-
   @Override
   public SetTimesResponseProto setTimes(RpcController controller,
       SetTimesRequestProto req) throws ServiceException {
@@ -709,9 +727,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     }
   }
 
-  static final CreateSymlinkResponseProto VOID_CREATESYMLINK_RESPONSE = 
-      CreateSymlinkResponseProto.newBuilder().build();
-
   @Override
   public CreateSymlinkResponseProto createSymlink(RpcController controller,
       CreateSymlinkRequestProto req) throws ServiceException {
@@ -755,9 +770,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     }
   }
 
-  static final UpdatePipelineResponseProto VOID_UPDATEPIPELINE_RESPONSE = 
-      UpdatePipelineResponseProto.newBuilder().build();
-
   @Override
   public UpdatePipelineResponseProto updatePipeline(RpcController controller,
       UpdatePipelineRequestProto req) throws ServiceException {
@@ -805,10 +817,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     }
   }
 
-  static final CancelDelegationTokenResponseProto 
-      VOID_CANCELDELEGATIONTOKEN_RESPONSE = 
-      CancelDelegationTokenResponseProto.newBuilder().build();
-  
   @Override
   public CancelDelegationTokenResponseProto cancelDelegationToken(
       RpcController controller, CancelDelegationTokenRequestProto req)
@@ -822,10 +830,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     }
   }
 
-  static final SetBalancerBandwidthResponseProto 
-    VOID_SETBALANCERBANDWIDTH_RESPONSE = 
-      SetBalancerBandwidthResponseProto.newBuilder().build();
-
   @Override
   public SetBalancerBandwidthResponseProto setBalancerBandwidth(
       RpcController controller, SetBalancerBandwidthRequestProto req)

+ 34 - 17
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java

@@ -129,6 +129,29 @@ public class ClientNamenodeProtocolTranslatorPB implements
     ProtocolMetaInterface, ClientProtocol, Closeable, ProtocolTranslator {
   final private ClientNamenodeProtocolPB rpcProxy;
 
+  static final GetServerDefaultsRequestProto VOID_GET_SERVER_DEFAULT_REQUEST = 
+  GetServerDefaultsRequestProto.newBuilder().build();
+
+  private final static GetFsStatusRequestProto VOID_GET_FSSTATUS_REQUEST =
+  GetFsStatusRequestProto.newBuilder().build();
+
+  private final static SaveNamespaceRequestProto VOID_SAVE_NAMESPACE_REQUEST =
+  SaveNamespaceRequestProto.newBuilder().build();
+
+  private final static RollEditsRequestProto VOID_ROLLEDITS_REQUEST = 
+  RollEditsRequestProto.getDefaultInstance();
+
+  private final static RefreshNodesRequestProto VOID_REFRESH_NODES_REQUEST =
+  RefreshNodesRequestProto.newBuilder().build();
+
+  private final static FinalizeUpgradeRequestProto
+  VOID_FINALIZE_UPGRADE_REQUEST =
+      FinalizeUpgradeRequestProto.newBuilder().build();
+
+  private final static GetDataEncryptionKeyRequestProto
+  VOID_GET_DATA_ENCRYPTIONKEY_REQUEST =
+      GetDataEncryptionKeyRequestProto.newBuilder().build();
+
   public ClientNamenodeProtocolTranslatorPB(ClientNamenodeProtocolPB proxy) {
     rpcProxy = proxy;
   }
@@ -160,7 +183,7 @@ public class ClientNamenodeProtocolTranslatorPB implements
 
   @Override
   public FsServerDefaults getServerDefaults() throws IOException {
-    GetServerDefaultsRequestProto req = GetServerDefaultsRequestProto.newBuilder().build();
+    GetServerDefaultsRequestProto req = VOID_GET_SERVER_DEFAULT_REQUEST;
     try {
       return PBHelper
           .convert(rpcProxy.getServerDefaults(null, req).getServerDefaults());
@@ -473,9 +496,9 @@ public class ClientNamenodeProtocolTranslatorPB implements
 
   @Override
   public long[] getStats() throws IOException {
-    GetFsStatusRequestProto req = GetFsStatusRequestProto.newBuilder().build();
     try {
-      return PBHelper.convert(rpcProxy.getFsStats(null, req));
+      return PBHelper.convert(rpcProxy.getFsStats(null,
+          VOID_GET_FSSTATUS_REQUEST));
     } catch (ServiceException e) {
       throw ProtobufHelper.getRemoteException(e);
     }
@@ -522,10 +545,8 @@ public class ClientNamenodeProtocolTranslatorPB implements
 
   @Override
   public void saveNamespace() throws AccessControlException, IOException {
-    SaveNamespaceRequestProto req = SaveNamespaceRequestProto.newBuilder()
-        .build();
     try {
-      rpcProxy.saveNamespace(null, req);
+      rpcProxy.saveNamespace(null, VOID_SAVE_NAMESPACE_REQUEST);
     } catch (ServiceException e) {
       throw ProtobufHelper.getRemoteException(e);
     }
@@ -533,9 +554,9 @@ public class ClientNamenodeProtocolTranslatorPB implements
   
   @Override
   public long rollEdits() throws AccessControlException, IOException {
-    RollEditsRequestProto req = RollEditsRequestProto.getDefaultInstance();
     try {
-      RollEditsResponseProto resp = rpcProxy.rollEdits(null, req);
+      RollEditsResponseProto resp = rpcProxy.rollEdits(null,
+          VOID_ROLLEDITS_REQUEST);
       return resp.getNewSegmentTxId();
     } catch (ServiceException se) {
       throw ProtobufHelper.getRemoteException(se);
@@ -557,9 +578,8 @@ public class ClientNamenodeProtocolTranslatorPB implements
 
   @Override
   public void refreshNodes() throws IOException {
-    RefreshNodesRequestProto req = RefreshNodesRequestProto.newBuilder().build();
     try {
-      rpcProxy.refreshNodes(null, req);
+      rpcProxy.refreshNodes(null, VOID_REFRESH_NODES_REQUEST);
     } catch (ServiceException e) {
       throw ProtobufHelper.getRemoteException(e);
     }
@@ -567,9 +587,8 @@ public class ClientNamenodeProtocolTranslatorPB implements
 
   @Override
   public void finalizeUpgrade() throws IOException {
-    FinalizeUpgradeRequestProto req = FinalizeUpgradeRequestProto.newBuilder().build();
     try {
-      rpcProxy.finalizeUpgrade(null, req);
+      rpcProxy.finalizeUpgrade(null, VOID_FINALIZE_UPGRADE_REQUEST);
     } catch (ServiceException e) {
       throw ProtobufHelper.getRemoteException(e);
     }
@@ -818,12 +837,10 @@ public class ClientNamenodeProtocolTranslatorPB implements
   
   @Override
   public DataEncryptionKey getDataEncryptionKey() throws IOException {
-    GetDataEncryptionKeyRequestProto req = GetDataEncryptionKeyRequestProto
-        .newBuilder().build();
     try {
-      GetDataEncryptionKeyResponseProto rsp = 
-          rpcProxy.getDataEncryptionKey(null, req);
-      return rsp.hasDataEncryptionKey() ? 
+      GetDataEncryptionKeyResponseProto rsp = rpcProxy.getDataEncryptionKey(
+          null, VOID_GET_DATA_ENCRYPTIONKEY_REQUEST);
+     return rsp.hasDataEncryptionKey() ? 
           PBHelper.convert(rsp.getDataEncryptionKey()) : null;
     } catch (ServiceException e) {
       throw ProtobufHelper.getRemoteException(e);

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java

@@ -84,7 +84,7 @@ public class DatanodeProtocolClientSideTranslatorPB implements
   
   /** RpcController is not used and hence is set to null */
   private final DatanodeProtocolPB rpcProxy;
-  private static final VersionRequestProto VERSION_REQUEST = 
+  private static final VersionRequestProto VOID_VERSION_REQUEST = 
       VersionRequestProto.newBuilder().build();
   private final static RpcController NULL_CONTROLLER = null;
   
@@ -243,7 +243,7 @@ public class DatanodeProtocolClientSideTranslatorPB implements
   public NamespaceInfo versionRequest() throws IOException {
     try {
       return PBHelper.convert(rpcProxy.versionRequest(NULL_CONTROLLER,
-          VERSION_REQUEST).getInfo());
+          VOID_VERSION_REQUEST).getInfo());
     } catch (ServiceException e) {
       throw ProtobufHelper.getRemoteException(e);
     }

+ 12 - 10
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java

@@ -62,15 +62,17 @@ public class DatanodeProtocolServerSideTranslatorPB implements
     DatanodeProtocolPB {
 
   private final DatanodeProtocol impl;
-  private static final ErrorReportResponseProto ERROR_REPORT_RESPONSE_PROTO = 
-      ErrorReportResponseProto.newBuilder().build();
+  private static final ErrorReportResponseProto
+      VOID_ERROR_REPORT_RESPONSE_PROTO = 
+          ErrorReportResponseProto.newBuilder().build();
   private static final BlockReceivedAndDeletedResponseProto 
-      BLOCK_RECEIVED_AND_DELETE_RESPONSE = 
+      VOID_BLOCK_RECEIVED_AND_DELETE_RESPONSE = 
           BlockReceivedAndDeletedResponseProto.newBuilder().build();
-  private static final ReportBadBlocksResponseProto REPORT_BAD_BLOCK_RESPONSE = 
-      ReportBadBlocksResponseProto.newBuilder().build();
+  private static final ReportBadBlocksResponseProto
+      VOID_REPORT_BAD_BLOCK_RESPONSE = 
+          ReportBadBlocksResponseProto.newBuilder().build();
   private static final CommitBlockSynchronizationResponseProto 
-      COMMIT_BLOCK_SYNCHRONIZATION_RESPONSE_PROTO =
+      VOID_COMMIT_BLOCK_SYNCHRONIZATION_RESPONSE_PROTO =
           CommitBlockSynchronizationResponseProto.newBuilder().build();
 
   public DatanodeProtocolServerSideTranslatorPB(DatanodeProtocol impl) {
@@ -180,7 +182,7 @@ public class DatanodeProtocolServerSideTranslatorPB implements
     } catch (IOException e) {
       throw new ServiceException(e);
     }
-    return BLOCK_RECEIVED_AND_DELETE_RESPONSE;
+    return VOID_BLOCK_RECEIVED_AND_DELETE_RESPONSE;
   }
 
   @Override
@@ -192,7 +194,7 @@ public class DatanodeProtocolServerSideTranslatorPB implements
     } catch (IOException e) {
       throw new ServiceException(e);
     }
-    return ERROR_REPORT_RESPONSE_PROTO;
+    return VOID_ERROR_REPORT_RESPONSE_PROTO;
   }
 
   @Override
@@ -221,7 +223,7 @@ public class DatanodeProtocolServerSideTranslatorPB implements
     } catch (IOException e) {
       throw new ServiceException(e);
     }
-    return REPORT_BAD_BLOCK_RESPONSE;
+    return VOID_REPORT_BAD_BLOCK_RESPONSE;
   }
 
   @Override
@@ -242,6 +244,6 @@ public class DatanodeProtocolServerSideTranslatorPB implements
     } catch (IOException e) {
       throw new ServiceException(e);
     }
-    return COMMIT_BLOCK_SYNCHRONIZATION_RESPONSE_PROTO;
+    return VOID_COMMIT_BLOCK_SYNCHRONIZATION_RESPONSE_PROTO;
   }
 }

+ 9 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/JournalProtocolServerSideTranslatorPB.java

@@ -42,6 +42,13 @@ public class JournalProtocolServerSideTranslatorPB implements JournalProtocolPB
   /** Server side implementation to delegate the requests to */
   private final JournalProtocol impl;
 
+  private final static JournalResponseProto VOID_JOURNAL_RESPONSE = 
+  JournalResponseProto.newBuilder().build();
+
+  private final static StartLogSegmentResponseProto
+  VOID_START_LOG_SEGMENT_RESPONSE =
+      StartLogSegmentResponseProto.newBuilder().build();
+
   public JournalProtocolServerSideTranslatorPB(JournalProtocol impl) {
     this.impl = impl;
   }
@@ -56,7 +63,7 @@ public class JournalProtocolServerSideTranslatorPB implements JournalProtocolPB
     } catch (IOException e) {
       throw new ServiceException(e);
     }
-    return JournalResponseProto.newBuilder().build();
+    return VOID_JOURNAL_RESPONSE;
   }
 
   /** @see JournalProtocol#startLogSegment */
@@ -69,7 +76,7 @@ public class JournalProtocolServerSideTranslatorPB implements JournalProtocolPB
     } catch (IOException e) {
       throw new ServiceException(e);
     }
-    return StartLogSegmentResponseProto.newBuilder().build();
+    return VOID_START_LOG_SEGMENT_RESPONSE;
   }
 
   @Override

+ 8 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolServerSideTranslatorPB.java

@@ -63,6 +63,12 @@ public class NamenodeProtocolServerSideTranslatorPB implements
     NamenodeProtocolPB {
   private final NamenodeProtocol impl;
 
+  private final static ErrorReportResponseProto VOID_ERROR_REPORT_RESPONSE = 
+  ErrorReportResponseProto.newBuilder().build();
+
+  private final static EndCheckpointResponseProto VOID_END_CHECKPOINT_RESPONSE =
+  EndCheckpointResponseProto.newBuilder().build();
+
   public NamenodeProtocolServerSideTranslatorPB(NamenodeProtocol impl) {
     this.impl = impl;
   }
@@ -147,7 +153,7 @@ public class NamenodeProtocolServerSideTranslatorPB implements
     } catch (IOException e) {
       throw new ServiceException(e);
     }
-    return ErrorReportResponseProto.newBuilder().build();
+    return VOID_ERROR_REPORT_RESPONSE;
   }
 
   @Override
@@ -185,7 +191,7 @@ public class NamenodeProtocolServerSideTranslatorPB implements
     } catch (IOException e) {
       throw new ServiceException(e);
     }
-    return EndCheckpointResponseProto.newBuilder().build();
+    return VOID_END_CHECKPOINT_RESPONSE;
   }
 
   @Override

+ 9 - 9
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java

@@ -68,13 +68,13 @@ public class NamenodeProtocolTranslatorPB implements NamenodeProtocol,
   /*
    * Protobuf requests with no parameters instantiated only once
    */
-  private static final GetBlockKeysRequestProto GET_BLOCKKEYS = 
+  private static final GetBlockKeysRequestProto VOID_GET_BLOCKKEYS_REQUEST = 
       GetBlockKeysRequestProto.newBuilder().build();
-  private static final GetTransactionIdRequestProto GET_TRANSACTIONID = 
+  private static final GetTransactionIdRequestProto VOID_GET_TRANSACTIONID_REQUEST = 
       GetTransactionIdRequestProto.newBuilder().build();
-  private static final RollEditLogRequestProto ROLL_EDITLOG = 
+  private static final RollEditLogRequestProto VOID_ROLL_EDITLOG_REQUEST = 
       RollEditLogRequestProto.newBuilder().build();
-  private static final VersionRequestProto VERSION_REQUEST = 
+  private static final VersionRequestProto VOID_VERSION_REQUEST = 
       VersionRequestProto.newBuilder().build();
 
   final private NamenodeProtocolPB rpcProxy;
@@ -106,7 +106,7 @@ public class NamenodeProtocolTranslatorPB implements NamenodeProtocol,
   public ExportedBlockKeys getBlockKeys() throws IOException {
     try {
       GetBlockKeysResponseProto rsp = rpcProxy.getBlockKeys(NULL_CONTROLLER,
-          GET_BLOCKKEYS);
+          VOID_GET_BLOCKKEYS_REQUEST);
       return rsp.hasKeys() ? PBHelper.convert(rsp.getKeys()) : null;
     } catch (ServiceException e) {
       throw ProtobufHelper.getRemoteException(e);
@@ -116,8 +116,8 @@ public class NamenodeProtocolTranslatorPB implements NamenodeProtocol,
   @Override
   public long getTransactionID() throws IOException {
     try {
-      return rpcProxy.getTransactionId(NULL_CONTROLLER, GET_TRANSACTIONID)
-          .getTxId();
+      return rpcProxy.getTransactionId(NULL_CONTROLLER,
+          VOID_GET_TRANSACTIONID_REQUEST).getTxId();
     } catch (ServiceException e) {
       throw ProtobufHelper.getRemoteException(e);
     }
@@ -137,7 +137,7 @@ public class NamenodeProtocolTranslatorPB implements NamenodeProtocol,
   public CheckpointSignature rollEditLog() throws IOException {
     try {
       return PBHelper.convert(rpcProxy.rollEditLog(NULL_CONTROLLER,
-          ROLL_EDITLOG).getSignature());
+          VOID_ROLL_EDITLOG_REQUEST).getSignature());
     } catch (ServiceException e) {
       throw ProtobufHelper.getRemoteException(e);
     }
@@ -147,7 +147,7 @@ public class NamenodeProtocolTranslatorPB implements NamenodeProtocol,
   public NamespaceInfo versionRequest() throws IOException {
     try {
       return PBHelper.convert(rpcProxy.versionRequest(NULL_CONTROLLER,
-          VERSION_REQUEST).getInfo());
+          VOID_VERSION_REQUEST).getInfo());
     } catch (ServiceException e) {
       throw ProtobufHelper.getRemoteException(e);
     }

+ 6 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshAuthorizationPolicyProtocolClientSideTranslatorPB.java

@@ -38,6 +38,10 @@ public class RefreshAuthorizationPolicyProtocolClientSideTranslatorPB implements
   private final static RpcController NULL_CONTROLLER = null;
   private final RefreshAuthorizationPolicyProtocolPB rpcProxy;
   
+  private final static RefreshServiceAclRequestProto
+  VOID_REFRESH_SERVICE_ACL_REQUEST =
+      RefreshServiceAclRequestProto.newBuilder().build();
+
   public RefreshAuthorizationPolicyProtocolClientSideTranslatorPB(
       RefreshAuthorizationPolicyProtocolPB rpcProxy) {
     this.rpcProxy = rpcProxy;
@@ -50,10 +54,9 @@ public class RefreshAuthorizationPolicyProtocolClientSideTranslatorPB implements
 
   @Override
   public void refreshServiceAcl() throws IOException {
-    RefreshServiceAclRequestProto request = RefreshServiceAclRequestProto
-        .newBuilder().build();
     try {
-      rpcProxy.refreshServiceAcl(NULL_CONTROLLER, request);
+      rpcProxy.refreshServiceAcl(NULL_CONTROLLER,
+          VOID_REFRESH_SERVICE_ACL_REQUEST);
     } catch (ServiceException se) {
       throw ProtobufHelper.getRemoteException(se);
     }

+ 5 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshAuthorizationPolicyProtocolServerSideTranslatorPB.java

@@ -32,6 +32,10 @@ public class RefreshAuthorizationPolicyProtocolServerSideTranslatorPB implements
 
   private final RefreshAuthorizationPolicyProtocol impl;
 
+  private final static RefreshServiceAclResponseProto
+  VOID_REFRESH_SERVICE_ACL_RESPONSE = RefreshServiceAclResponseProto
+      .newBuilder().build();
+
   public RefreshAuthorizationPolicyProtocolServerSideTranslatorPB(
       RefreshAuthorizationPolicyProtocol impl) {
     this.impl = impl;
@@ -46,6 +50,6 @@ public class RefreshAuthorizationPolicyProtocolServerSideTranslatorPB implements
     } catch (IOException e) {
       throw new ServiceException(e);
     }
-    return RefreshServiceAclResponseProto.newBuilder().build();
+    return VOID_REFRESH_SERVICE_ACL_RESPONSE;
   }
 }

+ 12 - 6
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshUserMappingsProtocolClientSideTranslatorPB.java

@@ -39,6 +39,14 @@ public class RefreshUserMappingsProtocolClientSideTranslatorPB implements
   private final static RpcController NULL_CONTROLLER = null;
   private final RefreshUserMappingsProtocolPB rpcProxy;
   
+  private final static RefreshUserToGroupsMappingsRequestProto 
+  VOID_REFRESH_USER_TO_GROUPS_MAPPING_REQUEST = 
+      RefreshUserToGroupsMappingsRequestProto.newBuilder().build();
+
+  private final static RefreshSuperUserGroupsConfigurationRequestProto
+  VOID_REFRESH_SUPERUSER_GROUPS_CONFIGURATION_REQUEST = 
+      RefreshSuperUserGroupsConfigurationRequestProto.newBuilder().build();
+
   public RefreshUserMappingsProtocolClientSideTranslatorPB(
       RefreshUserMappingsProtocolPB rpcProxy) {
     this.rpcProxy = rpcProxy;
@@ -51,10 +59,9 @@ public class RefreshUserMappingsProtocolClientSideTranslatorPB implements
 
   @Override
   public void refreshUserToGroupsMappings() throws IOException {
-    RefreshUserToGroupsMappingsRequestProto request = 
-        RefreshUserToGroupsMappingsRequestProto.newBuilder().build();
     try {
-      rpcProxy.refreshUserToGroupsMappings(NULL_CONTROLLER, request);
+      rpcProxy.refreshUserToGroupsMappings(NULL_CONTROLLER,
+          VOID_REFRESH_USER_TO_GROUPS_MAPPING_REQUEST);
     } catch (ServiceException se) {
       throw ProtobufHelper.getRemoteException(se);
     }
@@ -62,10 +69,9 @@ public class RefreshUserMappingsProtocolClientSideTranslatorPB implements
 
   @Override
   public void refreshSuperUserGroupsConfiguration() throws IOException {
-    RefreshSuperUserGroupsConfigurationRequestProto request = 
-        RefreshSuperUserGroupsConfigurationRequestProto.newBuilder().build();
     try {
-      rpcProxy.refreshSuperUserGroupsConfiguration(NULL_CONTROLLER, request);
+      rpcProxy.refreshSuperUserGroupsConfiguration(NULL_CONTROLLER,
+          VOID_REFRESH_SUPERUSER_GROUPS_CONFIGURATION_REQUEST);
     } catch (ServiceException se) {
       throw ProtobufHelper.getRemoteException(se);
     }

+ 11 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshUserMappingsProtocolServerSideTranslatorPB.java

@@ -33,6 +33,15 @@ public class RefreshUserMappingsProtocolServerSideTranslatorPB implements Refres
 
   private final RefreshUserMappingsProtocol impl;
   
+  private final static RefreshUserToGroupsMappingsResponseProto 
+  VOID_REFRESH_USER_GROUPS_MAPPING_RESPONSE =
+      RefreshUserToGroupsMappingsResponseProto.newBuilder().build();
+
+  private final static RefreshSuperUserGroupsConfigurationResponseProto
+  VOID_REFRESH_SUPERUSER_GROUPS_CONFIGURATION_RESPONSE = 
+      RefreshSuperUserGroupsConfigurationResponseProto.newBuilder()
+      .build();
+
   public RefreshUserMappingsProtocolServerSideTranslatorPB(RefreshUserMappingsProtocol impl) {
     this.impl = impl;
   }
@@ -47,7 +56,7 @@ public class RefreshUserMappingsProtocolServerSideTranslatorPB implements Refres
     } catch (IOException e) {
       throw new ServiceException(e);
     }
-    return RefreshUserToGroupsMappingsResponseProto.newBuilder().build();
+    return VOID_REFRESH_USER_GROUPS_MAPPING_RESPONSE;
   }
 
   @Override
@@ -60,7 +69,6 @@ public class RefreshUserMappingsProtocolServerSideTranslatorPB implements Refres
     } catch (IOException e) {
       throw new ServiceException(e);
     }
-    return RefreshSuperUserGroupsConfigurationResponseProto.newBuilder()
-        .build();
+    return VOID_REFRESH_SUPERUSER_GROUPS_CONFIGURATION_RESPONSE;
   }
 }

+ 9 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/QJournalProtocolServerSideTranslatorPB.java

@@ -65,6 +65,13 @@ public class QJournalProtocolServerSideTranslatorPB implements QJournalProtocolP
   /** Server side implementation to delegate the requests to */
   private final QJournalProtocol impl;
 
+  private final static JournalResponseProto VOID_JOURNAL_RESPONSE =
+  JournalResponseProto.newBuilder().build();
+
+  private final static StartLogSegmentResponseProto
+  VOID_START_LOG_SEGMENT_RESPONSE =
+      StartLogSegmentResponseProto.newBuilder().build();
+
   public QJournalProtocolServerSideTranslatorPB(QJournalProtocol impl) {
     this.impl = impl;
   }
@@ -135,7 +142,7 @@ public class QJournalProtocolServerSideTranslatorPB implements QJournalProtocolP
     } catch (IOException e) {
       throw new ServiceException(e);
     }
-    return JournalResponseProto.newBuilder().build();
+    return VOID_JOURNAL_RESPONSE;
   }
 
   /** @see JournalProtocol#heartbeat */
@@ -160,7 +167,7 @@ public class QJournalProtocolServerSideTranslatorPB implements QJournalProtocolP
     } catch (IOException e) {
       throw new ServiceException(e);
     }
-    return StartLogSegmentResponseProto.newBuilder().build();
+    return VOID_START_LOG_SEGMENT_RESPONSE;
   }
   
   @Override

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java

@@ -395,7 +395,7 @@ class BPOfferService {
   }
 
   @VisibleForTesting
-  synchronized List<BPServiceActor> getBPServiceActors() {
+  List<BPServiceActor> getBPServiceActors() {
     return Lists.newArrayList(bpServices);
   }
   

+ 1 - 12
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileChecksumServlets.java

@@ -21,7 +21,6 @@ import java.io.IOException;
 import java.io.PrintWriter;
 import java.net.URL;
 
-import javax.net.SocketFactory;
 import javax.servlet.ServletContext;
 import javax.servlet.ServletException;
 import javax.servlet.http.HttpServletRequest;
@@ -33,14 +32,11 @@ import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
-import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
-import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
 import org.apache.hadoop.hdfs.server.common.JspHelper;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.DatanodeJspHelper;
-import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.ServletUtil;
 import org.znerd.xmlenc.XMLOutputter;
@@ -116,18 +112,11 @@ public class FileChecksumServlets {
       final DataNode datanode = (DataNode) context.getAttribute("datanode");
       final Configuration conf = 
         new HdfsConfiguration(datanode.getConf());
-      final int socketTimeout = conf.getInt(
-          DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY,
-          HdfsServerConstants.READ_TIMEOUT);
-      final SocketFactory socketFactory = NetUtils.getSocketFactory(conf,
-          ClientProtocol.class);
       
       try {
         final DFSClient dfs = DatanodeJspHelper.getDFSClient(request, 
             datanode, conf, getUGI(request, conf));
-        final ClientProtocol nnproxy = dfs.getNamenode();
-        final MD5MD5CRC32FileChecksum checksum = DFSClient.getFileChecksum(
-            path, nnproxy, socketFactory, socketTimeout, dfs.getDataEncryptionKey(), false);
+        final MD5MD5CRC32FileChecksum checksum = dfs.getFileChecksum(path);
         MD5MD5CRC32FileChecksum.write(xml, checksum);
       } catch(IOException ioe) {
         writeXml(ioe, path, xml);

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto

@@ -168,7 +168,7 @@ message RenameRequestProto {
   required string dst = 2;
 }
 
-message RenameResponseProto { // void response
+message RenameResponseProto {
   required bool result = 1;
 }
 

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/proto/datatransfer.proto

@@ -204,5 +204,5 @@ message OpBlockChecksumResponseProto {
   required uint32 bytesPerCrc = 1;
   required uint64 crcPerBlock = 2;
   required bytes md5 = 3;
-  optional ChecksumTypeProto crcType = 4 [default = CHECKSUM_CRC32];
+  optional ChecksumTypeProto crcType = 4;
 }

+ 1 - 15
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java

@@ -48,7 +48,6 @@ import java.io.IOException;
 import java.io.PrintWriter;
 import java.io.RandomAccessFile;
 import java.net.InetSocketAddress;
-import java.net.ServerSocket;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.nio.channels.FileChannel;
@@ -2321,19 +2320,6 @@ public class MiniDFSCluster {
     return nameNodes[nnIndex].nameNode;
   }
   
-  private int getFreeSocketPort() {
-    int port = 0;
-    try {
-      ServerSocket s = new ServerSocket(0);
-      port = s.getLocalPort();
-      s.close();
-      return port;
-    } catch (IOException e) {
-      // Could not get a free port. Return default port 0.
-    }
-    return port;
-  }
-  
   protected void setupDatanodeAddress(Configuration conf, boolean setupHostsFile,
                            boolean checkDataNodeAddrConfig) throws IOException {
     if (setupHostsFile) {
@@ -2342,7 +2328,7 @@ public class MiniDFSCluster {
         throw new IOException("Parameter dfs.hosts is not setup in conf");
       }
       // Setup datanode in the include file, if it is defined in the conf
-      String address = "127.0.0.1:" + getFreeSocketPort();
+      String address = "127.0.0.1:" + NetUtils.getFreeSocketPort();
       if (checkDataNodeAddrConfig) {
         conf.setIfUnset(DFS_DATANODE_ADDRESS_KEY, address);
       } else {

+ 40 - 4
hadoop-mapreduce-project/CHANGES.txt

@@ -19,6 +19,8 @@ Trunk (Unreleased)
     MAPREDUCE-4887. Add RehashPartitioner, to smooth distributions
     with poor implementations of Object#hashCode().  (Radim Kolar via cutting)
 
+    MAPREDUCE-4808. Refactor MapOutput and MergeManager to facilitate reuse by Shuffle implementations. (masokan via tucu)
+
   IMPROVEMENTS
 
     MAPREDUCE-3787. [Gridmix] Optimize job monitoring and STRESS mode for
@@ -151,9 +153,6 @@ Trunk (Unreleased)
 
     MAPREDUCE-3223. Remove MR1 configs from mapred-default.xml (tlipcon via harsh)
 
-    MAPREDUCE-4678. Running the Pentomino example with defaults throws
-    java.lang.NegativeArraySizeException (Chris McConnell via harsh)
-
     MAPREDUCE-4695. Fix LocalRunner on trunk after MAPREDUCE-3223 broke it
     (harsh)
 
@@ -210,6 +209,8 @@ Release 2.0.3-alpha - Unreleased
     MAPREDUCE-4907. TrackerDistributedCacheManager issues too many getFileStatus
     calls. (sandyr via tucu)
 
+    MAPREDUCE-4949. Enable multiple pi jobs to run in parallel. (sandyr via tucu)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -256,6 +257,17 @@ Release 2.0.3-alpha - Unreleased
     MAPREDUCE-1700. User supplied dependencies may conflict with MapReduce
     system JARs. (tomwhite)
 
+    MAPREDUCE-4936. JobImpl uber checks for cpu are wrong (Arun C Murthy via
+    jlowe)
+
+    MAPREDUCE-4924. flakey test: org.apache.hadoop.mapred.TestClusterMRNotification.testMR. 
+    (rkanter via tucu)
+
+    MAPREDUCE-4923. Add toString method to TaggedInputSplit. (sandyr via tucu)
+
+    MAPREDUCE-4948. Fix a failing unit test TestYARNRunner.testHistoryServerToken.
+    (Junping Du via sseth)
+
 Release 2.0.2-alpha - 2012-09-07 
 
   INCOMPATIBLE CHANGES
@@ -632,6 +644,24 @@ Release 2.0.0-alpha - 05-23-2012
     MAPREDUCE-4444. nodemanager fails to start when one of the local-dirs is
     bad (Jason Lowe via bobby)
 
+Release 0.23.7 - UNRELEASED
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+  OPTIMIZATIONS
+
+    MAPREDUCE-4946. Fix a performance problem for large jobs by reducing the
+    number of map completion event type conversions. (Jason Lowe via sseth)
+
+  BUG FIXES
+
+    MAPREDUCE-4458. Warn if java.library.path is used for AM or Task
+    (Robert Parker via jeagles)
+
 Release 0.23.6 - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -699,7 +729,13 @@ Release 0.23.6 - UNRELEASED
 
     MAPREDUCE-4934. Maven RAT plugin is not checking all source files (tgraves)
 
-Release 0.23.5 - UNRELEASED
+    MAPREDUCE-4678. Running the Pentomino example with defaults throws
+    java.lang.NegativeArraySizeException (Chris McConnell via harsh)
+
+    MAPREDUCE-4925. The pentomino option parser may be buggy.
+    (Karthik Kambatla via harsh)
+
+Release 0.23.5 - 2012-11-28
 
   INCOMPATIBLE CHANGES
 

+ 1 - 1
hadoop-mapreduce-project/dev-support/findbugs-exclude.xml

@@ -268,7 +268,7 @@
      This class is unlikely to get subclassed, so ignore
     -->
      <Match>
-       <Class name="org.apache.hadoop.mapreduce.task.reduce.MergeManager" />
+       <Class name="org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl" />
        <Bug pattern="SC_START_IN_CTOR" />
      </Match>
 

+ 2 - 3
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java

@@ -275,14 +275,13 @@ public class TaskAttemptListenerImpl extends CompositeService
     boolean shouldReset = false;
     org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID =
       TypeConverter.toYarn(taskAttemptID);
-    org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent[] events =
+    TaskCompletionEvent[] events =
         context.getJob(attemptID.getTaskId().getJobId()).getMapAttemptCompletionEvents(
             startIndex, maxEvents);
 
     taskHeartbeatHandler.progressing(attemptID);
     
-    return new MapTaskCompletionEventsUpdate(
-        TypeConverter.fromYarn(events), shouldReset);
+    return new MapTaskCompletionEventsUpdate(events, shouldReset);
   }
 
   @Override

+ 2 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/client/MRClientService.java

@@ -125,8 +125,8 @@ public class MRClientService extends AbstractService
               .getenv(ApplicationConstants.APPLICATION_CLIENT_SECRET_ENV_NAME);
       byte[] bytes = Base64.decodeBase64(secretKeyStr);
       secretManager =
-          new ClientToAMTokenSecretManager(this.appContext.getApplicationID(),
-            bytes);
+          new ClientToAMTokenSecretManager(
+            this.appContext.getApplicationAttemptId(), bytes);
     }
     server =
         rpc.getServer(MRClientProtocol.class, protocolHandler, address,

+ 2 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/Job.java

@@ -24,6 +24,7 @@ import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.TaskCompletionEvent;
 import org.apache.hadoop.mapreduce.Counters;
 import org.apache.hadoop.mapreduce.JobACL;
 import org.apache.hadoop.mapreduce.v2.api.records.AMInfo;
@@ -88,7 +89,7 @@ public interface Job {
   TaskAttemptCompletionEvent[]
       getTaskAttemptCompletionEvents(int fromEventId, int maxEvents);
 
-  TaskAttemptCompletionEvent[]
+  TaskCompletionEvent[]
       getMapAttemptCompletionEvents(int startIndex, int maxEvents);
 
   /**

+ 56 - 23
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java

@@ -43,6 +43,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.JobACLsManager;
 import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.TaskCompletionEvent;
 import org.apache.hadoop.mapreduce.Counters;
 import org.apache.hadoop.mapreduce.JobACL;
 import org.apache.hadoop.mapreduce.JobContext;
@@ -130,6 +131,9 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
   private static final TaskAttemptCompletionEvent[]
     EMPTY_TASK_ATTEMPT_COMPLETION_EVENTS = new TaskAttemptCompletionEvent[0];
 
+  private static final TaskCompletionEvent[]
+    EMPTY_TASK_COMPLETION_EVENTS = new TaskCompletionEvent[0];
+
   private static final Log LOG = LogFactory.getLog(JobImpl.class);
 
   //The maximum fraction of fetch failures allowed for a map
@@ -196,7 +200,8 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
   private int allowedMapFailuresPercent = 0;
   private int allowedReduceFailuresPercent = 0;
   private List<TaskAttemptCompletionEvent> taskAttemptCompletionEvents;
-  private List<TaskAttemptCompletionEvent> mapAttemptCompletionEvents;
+  private List<TaskCompletionEvent> mapAttemptCompletionEvents;
+  private List<Integer> taskCompletionIdxToMapCompletionIdx;
   private final List<String> diagnostics = new ArrayList<String>();
   
   //task/attempt related datastructures
@@ -684,27 +689,31 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
   @Override
   public TaskAttemptCompletionEvent[] getTaskAttemptCompletionEvents(
       int fromEventId, int maxEvents) {
-    return getAttemptCompletionEvents(taskAttemptCompletionEvents,
-        fromEventId, maxEvents);
+    TaskAttemptCompletionEvent[] events = EMPTY_TASK_ATTEMPT_COMPLETION_EVENTS;
+    readLock.lock();
+    try {
+      if (taskAttemptCompletionEvents.size() > fromEventId) {
+        int actualMax = Math.min(maxEvents,
+            (taskAttemptCompletionEvents.size() - fromEventId));
+        events = taskAttemptCompletionEvents.subList(fromEventId,
+            actualMax + fromEventId).toArray(events);
+      }
+      return events;
+    } finally {
+      readLock.unlock();
+    }
   }
 
   @Override
-  public TaskAttemptCompletionEvent[] getMapAttemptCompletionEvents(
+  public TaskCompletionEvent[] getMapAttemptCompletionEvents(
       int startIndex, int maxEvents) {
-    return getAttemptCompletionEvents(mapAttemptCompletionEvents,
-        startIndex, maxEvents);
-  }
-
-  private TaskAttemptCompletionEvent[] getAttemptCompletionEvents(
-      List<TaskAttemptCompletionEvent> eventList,
-      int startIndex, int maxEvents) {
-    TaskAttemptCompletionEvent[] events = EMPTY_TASK_ATTEMPT_COMPLETION_EVENTS;
+    TaskCompletionEvent[] events = EMPTY_TASK_COMPLETION_EVENTS;
     readLock.lock();
     try {
-      if (eventList.size() > startIndex) {
+      if (mapAttemptCompletionEvents.size() > startIndex) {
         int actualMax = Math.min(maxEvents,
-            (eventList.size() - startIndex));
-        events = eventList.subList(startIndex,
+            (mapAttemptCompletionEvents.size() - startIndex));
+        events = mapAttemptCompletionEvents.subList(startIndex,
             actualMax + startIndex).toArray(events);
       }
       return events;
@@ -1068,9 +1077,13 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
     boolean smallCpu =
         (
             Math.max(
-                conf.getInt(MRJobConfig.MAP_CPU_VCORES, 1), 
-                conf.getInt(MRJobConfig.REDUCE_CPU_VCORES, 1)) < 
-             sysCPUSizeForUberSlot
+                conf.getInt(
+                    MRJobConfig.MAP_CPU_VCORES, 
+                    MRJobConfig.DEFAULT_MAP_CPU_VCORES), 
+                conf.getInt(
+                    MRJobConfig.REDUCE_CPU_VCORES, 
+                    MRJobConfig.DEFAULT_REDUCE_CPU_VCORES)) 
+             <= sysCPUSizeForUberSlot
         );
     boolean notChainJob = !isChainJob(conf);
 
@@ -1243,7 +1256,9 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
             new ArrayList<TaskAttemptCompletionEvent>(
                 job.numMapTasks + job.numReduceTasks + 10);
         job.mapAttemptCompletionEvents =
-            new ArrayList<TaskAttemptCompletionEvent>(job.numMapTasks + 10);
+            new ArrayList<TaskCompletionEvent>(job.numMapTasks + 10);
+        job.taskCompletionIdxToMapCompletionIdx = new ArrayList<Integer>(
+            job.numMapTasks + job.numReduceTasks + 10);
 
         job.allowedMapFailuresPercent =
             job.conf.getInt(MRJobConfig.MAP_FAILURES_MAX_PERCENT, 0);
@@ -1558,19 +1573,37 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
       //eventId is equal to index in the arraylist
       tce.setEventId(job.taskAttemptCompletionEvents.size());
       job.taskAttemptCompletionEvents.add(tce);
+      int mapEventIdx = -1;
       if (TaskType.MAP.equals(tce.getAttemptId().getTaskId().getTaskType())) {
-        job.mapAttemptCompletionEvents.add(tce);
+        // we track map completions separately from task completions because
+        // - getMapAttemptCompletionEvents uses index ranges specific to maps
+        // - type converting the same events over and over is expensive
+        mapEventIdx = job.mapAttemptCompletionEvents.size();
+        job.mapAttemptCompletionEvents.add(TypeConverter.fromYarn(tce));
       }
+      job.taskCompletionIdxToMapCompletionIdx.add(mapEventIdx);
       
       TaskAttemptId attemptId = tce.getAttemptId();
       TaskId taskId = attemptId.getTaskId();
       //make the previous completion event as obsolete if it exists
-      Object successEventNo = 
-        job.successAttemptCompletionEventNoMap.remove(taskId);
+      Integer successEventNo =
+          job.successAttemptCompletionEventNoMap.remove(taskId);
       if (successEventNo != null) {
         TaskAttemptCompletionEvent successEvent = 
-          job.taskAttemptCompletionEvents.get((Integer) successEventNo);
+          job.taskAttemptCompletionEvents.get(successEventNo);
         successEvent.setStatus(TaskAttemptCompletionEventStatus.OBSOLETE);
+        int mapCompletionIdx =
+            job.taskCompletionIdxToMapCompletionIdx.get(successEventNo);
+        if (mapCompletionIdx >= 0) {
+          // update the corresponding TaskCompletionEvent for the map
+          TaskCompletionEvent mapEvent =
+              job.mapAttemptCompletionEvents.get(mapCompletionIdx);
+          job.mapAttemptCompletionEvents.set(mapCompletionIdx,
+              new TaskCompletionEvent(mapEvent.getEventId(),
+                  mapEvent.getTaskAttemptId(), mapEvent.idWithinJob(),
+                  mapEvent.isMapTask(), TaskCompletionEvent.Status.OBSOLETE,
+                  mapEvent.getTaskTrackerHttp()));
+        }
       }
       
       // if this attempt is not successful then why is the previous successful 

+ 7 - 3
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java

@@ -34,6 +34,7 @@ import java.util.Arrays;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.TaskType;
+import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent;
@@ -153,9 +154,12 @@ public class TestTaskAttemptListenerImpl {
       .thenReturn(Arrays.copyOfRange(taskEvents, 0, 2));
     when(mockJob.getTaskAttemptCompletionEvents(2, 100))
       .thenReturn(Arrays.copyOfRange(taskEvents, 2, 4));
-    when(mockJob.getMapAttemptCompletionEvents(0, 100)).thenReturn(mapEvents);
-    when(mockJob.getMapAttemptCompletionEvents(0, 2)).thenReturn(mapEvents);
-    when(mockJob.getMapAttemptCompletionEvents(2, 100)).thenReturn(empty);
+    when(mockJob.getMapAttemptCompletionEvents(0, 100)).thenReturn(
+        TypeConverter.fromYarn(mapEvents));
+    when(mockJob.getMapAttemptCompletionEvents(0, 2)).thenReturn(
+        TypeConverter.fromYarn(mapEvents));
+    when(mockJob.getMapAttemptCompletionEvents(2, 100)).thenReturn(
+        TypeConverter.fromYarn(empty));
 
     AppContext appCtx = mock(AppContext.class);
     when(appCtx.getJob(any(JobId.class))).thenReturn(mockJob);

+ 2 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MockJobs.java

@@ -33,6 +33,7 @@ import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobACLsManager;
 import org.apache.hadoop.mapred.ShuffleHandler;
+import org.apache.hadoop.mapred.TaskCompletionEvent;
 import org.apache.hadoop.mapreduce.Counters;
 import org.apache.hadoop.mapreduce.FileSystemCounter;
 import org.apache.hadoop.mapreduce.JobACL;
@@ -556,7 +557,7 @@ public class MockJobs extends MockApps {
       }
 
       @Override
-      public TaskAttemptCompletionEvent[] getMapAttemptCompletionEvents(
+      public TaskCompletionEvent[] getMapAttemptCompletionEvents(
           int startIndex, int maxEvents) {
         return null;
       }

+ 12 - 6
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestFetchFailure.java

@@ -25,8 +25,10 @@ import java.util.Arrays;
 import java.util.Iterator;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.TaskCompletionEvent;
 import org.apache.hadoop.mapreduce.Counters;
 import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler;
 import org.apache.hadoop.mapreduce.v2.api.records.JobState;
@@ -150,14 +152,16 @@ public class TestFetchFailure {
     Assert.assertEquals("Event status not correct for reduce attempt1",
         TaskAttemptCompletionEventStatus.SUCCEEDED, events[3].getStatus());
 
-    TaskAttemptCompletionEvent mapEvents[] =
+    TaskCompletionEvent mapEvents[] =
         job.getMapAttemptCompletionEvents(0, 2);
+    TaskCompletionEvent convertedEvents[] = TypeConverter.fromYarn(events);
     Assert.assertEquals("Incorrect number of map events", 2, mapEvents.length);
     Assert.assertArrayEquals("Unexpected map events",
-        Arrays.copyOfRange(events, 0, 2), mapEvents);
+        Arrays.copyOfRange(convertedEvents, 0, 2), mapEvents);
     mapEvents = job.getMapAttemptCompletionEvents(2, 200);
     Assert.assertEquals("Incorrect number of map events", 1, mapEvents.length);
-    Assert.assertEquals("Unexpected map event", events[2], mapEvents[0]);
+    Assert.assertEquals("Unexpected map event", convertedEvents[2],
+        mapEvents[0]);
   }
   
   /**
@@ -395,14 +399,16 @@ public class TestFetchFailure {
     Assert.assertEquals("Event status not correct for reduce attempt1",
         TaskAttemptCompletionEventStatus.SUCCEEDED, events[3].getStatus());
 
-    TaskAttemptCompletionEvent mapEvents[] =
+    TaskCompletionEvent mapEvents[] =
         job.getMapAttemptCompletionEvents(0, 2);
+    TaskCompletionEvent convertedEvents[] = TypeConverter.fromYarn(events);
     Assert.assertEquals("Incorrect number of map events", 2, mapEvents.length);
     Assert.assertArrayEquals("Unexpected map events",
-        Arrays.copyOfRange(events, 0, 2), mapEvents);
+        Arrays.copyOfRange(convertedEvents, 0, 2), mapEvents);
     mapEvents = job.getMapAttemptCompletionEvents(2, 200);
     Assert.assertEquals("Incorrect number of map events", 1, mapEvents.length);
-    Assert.assertEquals("Unexpected map event", events[2], mapEvents[0]);
+    Assert.assertEquals("Unexpected map event", convertedEvents[2],
+        mapEvents[0]);
   }
   
 

+ 2 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRuntimeEstimators.java

@@ -32,6 +32,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.TaskCompletionEvent;
 import org.apache.hadoop.mapreduce.Counters;
 import org.apache.hadoop.mapreduce.JobACL;
 import org.apache.hadoop.mapreduce.v2.api.records.AMInfo;
@@ -441,7 +442,7 @@ public class TestRuntimeEstimators {
     }
 
     @Override
-    public TaskAttemptCompletionEvent[]
+    public TaskCompletionEvent[]
             getMapAttemptCompletionEvents(int startIndex, int maxEvents) {
       throw new UnsupportedOperationException("Not supported yet.");
     }

+ 0 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/protocolrecords/impl/pb/GetDelegationTokenResponsePBImpl.java

@@ -25,7 +25,6 @@ import org.apache.hadoop.yarn.api.records.DelegationToken;
 import org.apache.hadoop.yarn.api.records.ProtoBase;
 import org.apache.hadoop.yarn.api.records.impl.pb.DelegationTokenPBImpl;
 
-
 public class GetDelegationTokenResponsePBImpl extends
       ProtoBase<GetDelegationTokenResponseProto> implements GetDelegationTokenResponse {
   
@@ -97,7 +96,6 @@ public class GetDelegationTokenResponsePBImpl extends
     }
     viaProto = false;
   }
-   
 
   private DelegationTokenPBImpl convertFromProtoFormat(TokenProto p) {
     return new DelegationTokenPBImpl(p);

+ 5 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/TaggedInputSplit.java

@@ -138,4 +138,9 @@ class TaggedInputSplit implements Configurable, InputSplit {
     this.conf = conf;
   }
 
+  @Override
+  public String toString() {
+    return inputSplit.toString();
+  }
+
 }

+ 3 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java

@@ -530,6 +530,9 @@ public interface MRJobConfig {
   public static final String MR_AM_ENV =
       MR_AM_PREFIX + "env";
   
+  public static final String MR_AM_ADMIN_USER_ENV =
+      MR_AM_PREFIX + "admin.user.env";
+  
   public static final String MAPRED_MAP_ADMIN_JAVA_OPTS =
       "mapreduce.admin.map.child.java.opts";
 

+ 5 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/TaggedInputSplit.java

@@ -157,4 +157,9 @@ class TaggedInputSplit extends InputSplit implements Configurable, Writable {
     this.conf = conf;
   }
 
+  @Override
+  public String toString() {
+    return inputSplit.toString();
+  }
+
 }

+ 7 - 117
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java

@@ -19,8 +19,6 @@ package org.apache.hadoop.mapreduce.task.reduce;
 
 import java.io.DataInputStream;
 import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
 import java.net.ConnectException;
 import java.net.HttpURLConnection;
 import java.net.MalformedURLException;
@@ -38,12 +36,7 @@ import javax.net.ssl.HttpsURLConnection;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.io.compress.CodecPool;
-import org.apache.hadoop.io.compress.CompressionCodec;
-import org.apache.hadoop.io.compress.Decompressor;
-import org.apache.hadoop.io.compress.DefaultCodec;
 import org.apache.hadoop.mapred.Counters;
-import org.apache.hadoop.mapred.IFileInputStream;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapreduce.MRConfig;
@@ -51,9 +44,6 @@ import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.hadoop.mapreduce.security.SecureShuffleUtils;
 import org.apache.hadoop.security.ssl.SSLFactory;
-import org.apache.hadoop.mapreduce.task.reduce.MapOutput.Type;
-import org.apache.hadoop.util.Progressable;
-import org.apache.hadoop.util.ReflectionUtils;
 
 import com.google.common.annotations.VisibleForTesting;
 
@@ -70,7 +60,7 @@ class Fetcher<K,V> extends Thread {
   /* Default read timeout (in milliseconds) */
   private final static int DEFAULT_READ_TIMEOUT = 3 * 60 * 1000;
 
-  private final Progressable reporter;
+  private final Reporter reporter;
   private static enum ShuffleErrors{IO_ERROR, WRONG_LENGTH, BAD_ID, WRONG_MAP,
                                     CONNECTION, WRONG_REDUCE}
   
@@ -92,15 +82,10 @@ class Fetcher<K,V> extends Thread {
   private final int connectionTimeout;
   private final int readTimeout;
   
-  // Decompression of map-outputs
-  private final CompressionCodec codec;
-  private final Decompressor decompressor;
   private final SecretKey jobTokenSecret;
 
   private volatile boolean stopped = false;
 
-  private JobConf job;
-
   private static boolean sslShuffle;
   private static SSLFactory sslFactory;
 
@@ -108,7 +93,6 @@ class Fetcher<K,V> extends Thread {
                  ShuffleScheduler<K,V> scheduler, MergeManager<K,V> merger,
                  Reporter reporter, ShuffleClientMetrics metrics,
                  ExceptionReporter exceptionReporter, SecretKey jobTokenSecret) {
-    this.job = job;
     this.reporter = reporter;
     this.scheduler = scheduler;
     this.merger = merger;
@@ -130,16 +114,6 @@ class Fetcher<K,V> extends Thread {
     wrongReduceErrs = reporter.getCounter(SHUFFLE_ERR_GRP_NAME,
         ShuffleErrors.WRONG_REDUCE.toString());
     
-    if (job.getCompressMapOutput()) {
-      Class<? extends CompressionCodec> codecClass =
-        job.getMapOutputCompressorClass(DefaultCodec.class);
-      codec = ReflectionUtils.newInstance(codecClass, job);
-      decompressor = CodecPool.getDecompressor(codec);
-    } else {
-      codec = null;
-      decompressor = null;
-    }
-
     this.connectionTimeout = 
       job.getInt(MRJobConfig.SHUFFLE_CONNECT_TIMEOUT,
                  DEFAULT_STALLED_COPY_TIMEOUT);
@@ -170,7 +144,7 @@ class Fetcher<K,V> extends Thread {
         MapHost host = null;
         try {
           // If merge is on, block
-          merger.waitForInMemoryMerge();
+          merger.waitForResource();
 
           // Get a host to shuffle from
           host = scheduler.getHost();
@@ -386,8 +360,8 @@ class Fetcher<K,V> extends Thread {
       mapOutput = merger.reserve(mapId, decompressedLength, id);
       
       // Check if we can shuffle *now* ...
-      if (mapOutput.getType() == Type.WAIT) {
-        LOG.info("fetcher#" + id + " - MergerManager returned Status.WAIT ...");
+      if (mapOutput == null) {
+        LOG.info("fetcher#" + id + " - MergeManager returned status WAIT ...");
         //Not an error but wait to process data.
         return EMPTY_ATTEMPT_ID_ARRAY;
       } 
@@ -396,13 +370,9 @@ class Fetcher<K,V> extends Thread {
       LOG.info("fetcher#" + id + " about to shuffle output of map " + 
                mapOutput.getMapId() + " decomp: " +
                decompressedLength + " len: " + compressedLength + " to " +
-               mapOutput.getType());
-      if (mapOutput.getType() == Type.MEMORY) {
-        shuffleToMemory(host, mapOutput, input, 
-                        (int) decompressedLength, (int) compressedLength);
-      } else {
-        shuffleToDisk(host, mapOutput, input, compressedLength);
-      }
+               mapOutput.getDescription());
+      mapOutput.shuffle(host, input, compressedLength, decompressedLength,
+                        metrics, reporter);
       
       // Inform the shuffle scheduler
       long endTime = System.currentTimeMillis();
@@ -538,84 +508,4 @@ class Fetcher<K,V> extends Thread {
       }
     }
   }
-
-  private void shuffleToMemory(MapHost host, MapOutput<K,V> mapOutput, 
-                               InputStream input, 
-                               int decompressedLength, 
-                               int compressedLength) throws IOException {    
-    IFileInputStream checksumIn = 
-      new IFileInputStream(input, compressedLength, job);
-
-    input = checksumIn;       
-  
-    // Are map-outputs compressed?
-    if (codec != null) {
-      decompressor.reset();
-      input = codec.createInputStream(input, decompressor);
-    }
-  
-    // Copy map-output into an in-memory buffer
-    byte[] shuffleData = mapOutput.getMemory();
-    
-    try {
-      IOUtils.readFully(input, shuffleData, 0, shuffleData.length);
-      metrics.inputBytes(shuffleData.length);
-      reporter.progress();
-      LOG.info("Read " + shuffleData.length + " bytes from map-output for " +
-               mapOutput.getMapId());
-    } catch (IOException ioe) {      
-      // Close the streams
-      IOUtils.cleanup(LOG, input);
-
-      // Re-throw
-      throw ioe;
-    }
-
-  }
-  
-  private void shuffleToDisk(MapHost host, MapOutput<K,V> mapOutput, 
-                             InputStream input, 
-                             long compressedLength) 
-  throws IOException {
-    // Copy data to local-disk
-    OutputStream output = mapOutput.getDisk();
-    long bytesLeft = compressedLength;
-    try {
-      final int BYTES_TO_READ = 64 * 1024;
-      byte[] buf = new byte[BYTES_TO_READ];
-      while (bytesLeft > 0) {
-        int n = input.read(buf, 0, (int) Math.min(bytesLeft, BYTES_TO_READ));
-        if (n < 0) {
-          throw new IOException("read past end of stream reading " + 
-                                mapOutput.getMapId());
-        }
-        output.write(buf, 0, n);
-        bytesLeft -= n;
-        metrics.inputBytes(n);
-        reporter.progress();
-      }
-
-      LOG.info("Read " + (compressedLength - bytesLeft) + 
-               " bytes from map-output for " +
-               mapOutput.getMapId());
-
-      output.close();
-    } catch (IOException ioe) {
-      // Close the streams
-      IOUtils.cleanup(LOG, input, output);
-
-      // Re-throw
-      throw ioe;
-    }
-
-    // Sanity check
-    if (bytesLeft != 0) {
-      throw new IOException("Incomplete map output received for " +
-                            mapOutput.getMapId() + " from " +
-                            host.getHostName() + " (" + 
-                            bytesLeft + " bytes missing of " + 
-                            compressedLength + ")"
-      );
-    }
-  }
 }

+ 127 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/InMemoryMapOutput.java

@@ -0,0 +1,127 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapreduce.task.reduce;
+
+import java.io.InputStream;
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.hadoop.conf.Configuration;
+
+import org.apache.hadoop.io.BoundedByteArrayOutputStream;
+import org.apache.hadoop.io.IOUtils;
+
+import org.apache.hadoop.io.compress.CodecPool;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.Decompressor;
+
+import org.apache.hadoop.mapred.IFileInputStream;
+import org.apache.hadoop.mapred.Reporter;
+
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+class InMemoryMapOutput<K, V> extends MapOutput<K, V> {
+  private static final Log LOG = LogFactory.getLog(InMemoryMapOutput.class);
+  private Configuration conf;
+  private final MergeManagerImpl<K, V> merger;
+  private final byte[] memory;
+  private BoundedByteArrayOutputStream byteStream;
+  // Decompression of map-outputs
+  private final CompressionCodec codec;
+  private final Decompressor decompressor;
+
+  public InMemoryMapOutput(Configuration conf, TaskAttemptID mapId,
+                           MergeManagerImpl<K, V> merger,
+                           int size, CompressionCodec codec,
+                           boolean primaryMapOutput) {
+    super(mapId, (long)size, primaryMapOutput);
+    this.conf = conf;
+    this.merger = merger;
+    this.codec = codec;
+    byteStream = new BoundedByteArrayOutputStream(size);
+    memory = byteStream.getBuffer();
+    if (codec != null) {
+      decompressor = CodecPool.getDecompressor(codec);
+    } else {
+      decompressor = null;
+    }
+  }
+
+  public byte[] getMemory() {
+    return memory;
+  }
+
+  public BoundedByteArrayOutputStream getArrayStream() {
+    return byteStream;
+  }
+
+  @Override
+  public void shuffle(MapHost host, InputStream input,
+                      long compressedLength, long decompressedLength,
+                      ShuffleClientMetrics metrics,
+                      Reporter reporter) throws IOException {
+    IFileInputStream checksumIn = 
+      new IFileInputStream(input, compressedLength, conf);
+
+    input = checksumIn;       
+  
+    // Are map-outputs compressed?
+    if (codec != null) {
+      decompressor.reset();
+      input = codec.createInputStream(input, decompressor);
+    }
+  
+    try {
+      IOUtils.readFully(input, memory, 0, memory.length);
+      metrics.inputBytes(memory.length);
+      reporter.progress();
+      LOG.info("Read " + memory.length + " bytes from map-output for " +
+                getMapId());
+    } catch (IOException ioe) {      
+      // Close the streams
+      IOUtils.cleanup(LOG, input);
+
+      // Re-throw
+      throw ioe;
+    } finally {
+      CodecPool.returnDecompressor(decompressor);
+    }
+  }
+
+  @Override
+  public void commit() throws IOException {
+    merger.closeInMemoryFile(this);
+  }
+  
+  @Override
+  public void abort() {
+    merger.unreserve(memory.length);
+  }
+
+  @Override
+  public String getDescription() {
+    return "MEMORY";
+  }
+}

+ 2 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/InMemoryReader.java

@@ -35,12 +35,12 @@ import org.apache.hadoop.mapreduce.TaskAttemptID;
 @InterfaceStability.Unstable
 public class InMemoryReader<K, V> extends Reader<K, V> {
   private final TaskAttemptID taskAttemptId;
-  private final MergeManager<K,V> merger;
+  private final MergeManagerImpl<K,V> merger;
   DataInputBuffer memDataIn = new DataInputBuffer();
   private int start;
   private int length;
 
-  public InMemoryReader(MergeManager<K,V> merger, TaskAttemptID taskAttemptId,
+  public InMemoryReader(MergeManagerImpl<K,V> merger, TaskAttemptID taskAttemptId,
                         byte[] data, int start, int length)
   throws IOException {
     super(null, null, length - start, null, null);

+ 19 - 136
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MapOutput.java

@@ -17,119 +17,36 @@
  */
 package org.apache.hadoop.mapreduce.task.reduce;
 
+import java.io.InputStream;
 import java.io.IOException;
-import java.io.OutputStream;
+
 import java.util.Comparator;
 import java.util.concurrent.atomic.AtomicInteger;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocalDirAllocator;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.BoundedByteArrayOutputStream;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapOutputFile;
+
+import org.apache.hadoop.mapred.Reporter;
+
 import org.apache.hadoop.mapreduce.TaskAttemptID;
 
 @InterfaceAudience.LimitedPrivate({"MapReduce"})
 @InterfaceStability.Unstable
-public class MapOutput<K,V> {
-  private static final Log LOG = LogFactory.getLog(MapOutput.class);
+public abstract class MapOutput<K, V> {
   private static AtomicInteger ID = new AtomicInteger(0);
   
-  public static enum Type {
-    WAIT,
-    MEMORY,
-    DISK
-  }
-  
   private final int id;
-  
-  private final MergeManager<K,V> merger;
   private final TaskAttemptID mapId;
-  
   private final long size;
-  
-  private final byte[] memory;
-  private BoundedByteArrayOutputStream byteStream;
-  
-  private final FileSystem localFS;
-  private final Path tmpOutputPath;
-  private final Path outputPath;
-  private final OutputStream disk; 
-  
-  private final Type type;
-  
   private final boolean primaryMapOutput;
   
-  public MapOutput(TaskAttemptID mapId, MergeManager<K,V> merger, long size, 
-            JobConf conf, LocalDirAllocator localDirAllocator,
-            int fetcher, boolean primaryMapOutput, MapOutputFile mapOutputFile)
-         throws IOException {
+  public MapOutput(TaskAttemptID mapId, long size, boolean primaryMapOutput) {
     this.id = ID.incrementAndGet();
     this.mapId = mapId;
-    this.merger = merger;
-
-    type = Type.DISK;
-
-    memory = null;
-    byteStream = null;
-
     this.size = size;
-    
-    this.localFS = FileSystem.getLocal(conf);
-    outputPath =
-      mapOutputFile.getInputFileForWrite(mapId.getTaskID(),size);
-    tmpOutputPath = outputPath.suffix(String.valueOf(fetcher));
-
-    disk = localFS.create(tmpOutputPath);
-    
     this.primaryMapOutput = primaryMapOutput;
   }
   
-  public MapOutput(TaskAttemptID mapId, MergeManager<K,V> merger, int size, 
-            boolean primaryMapOutput) {
-    this.id = ID.incrementAndGet();
-    this.mapId = mapId;
-    this.merger = merger;
-
-    type = Type.MEMORY;
-    byteStream = new BoundedByteArrayOutputStream(size);
-    memory = byteStream.getBuffer();
-
-    this.size = size;
-    
-    localFS = null;
-    disk = null;
-    outputPath = null;
-    tmpOutputPath = null;
-    
-    this.primaryMapOutput = primaryMapOutput;
-  }
-
-  public MapOutput(TaskAttemptID mapId) {
-    this.id = ID.incrementAndGet();
-    this.mapId = mapId;
-    
-    type = Type.WAIT;
-    merger = null;
-    memory = null;
-    byteStream = null;
-    
-    size = -1;
-    
-    localFS = null;
-    disk = null;
-    outputPath = null;
-    tmpOutputPath = null;
-
-    this.primaryMapOutput = false;
-}
-  
   public boolean isPrimaryMapOutput() {
     return primaryMapOutput;
   }
@@ -147,62 +64,28 @@ public class MapOutput<K,V> {
     return id;
   }
 
-  public Path getOutputPath() {
-    return outputPath;
-  }
-
-  public byte[] getMemory() {
-    return memory;
-  }
-
-  public BoundedByteArrayOutputStream getArrayStream() {
-    return byteStream;
-  }
-  
-  public OutputStream getDisk() {
-    return disk;
-  }
-
   public TaskAttemptID getMapId() {
     return mapId;
   }
 
-  public Type getType() {
-    return type;
-  }
-
   public long getSize() {
     return size;
   }
 
-  public void commit() throws IOException {
-    if (type == Type.MEMORY) {
-      merger.closeInMemoryFile(this);
-    } else if (type == Type.DISK) {
-      localFS.rename(tmpOutputPath, outputPath);
-      merger.closeOnDiskFile(outputPath);
-    } else {
-      throw new IOException("Cannot commit MapOutput of type WAIT!");
-    }
-  }
-  
-  public void abort() {
-    if (type == Type.MEMORY) {
-      merger.unreserve(memory.length);
-    } else if (type == Type.DISK) {
-      try {
-        localFS.delete(tmpOutputPath, false);
-      } catch (IOException ie) {
-        LOG.info("failure to clean up " + tmpOutputPath, ie);
-      }
-    } else {
-      throw new IllegalArgumentException
-                   ("Cannot commit MapOutput with of type WAIT!");
-    }
-  }
+  public abstract void shuffle(MapHost host, InputStream input,
+                               long compressedLength,
+                               long decompressedLength,
+                               ShuffleClientMetrics metrics,
+                               Reporter reporter) throws IOException;
+
+  public abstract void commit() throws IOException;
   
+  public abstract void abort();
+
+  public abstract String getDescription();
+
   public String toString() {
-    return "MapOutput(" + mapId + ", " + type + ")";
+    return "MapOutput(" + mapId + ", " + getDescription() + ")";
   }
   
   public static class MapOutputComparator<K, V> 

+ 24 - 751
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManager.java

@@ -15,783 +15,56 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.hadoop.mapreduce.task.reduce;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.List;
-import java.util.Set;
-import java.util.TreeSet;
+package org.apache.hadoop.mapreduce.task.reduce;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.fs.ChecksumFileSystem;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocalDirAllocator;
-import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.DataInputBuffer;
-import org.apache.hadoop.io.RawComparator;
 import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.mapred.Counters;
-import org.apache.hadoop.mapred.IFile;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MapOutputFile;
-import org.apache.hadoop.mapred.Merger;
 import org.apache.hadoop.mapred.RawKeyValueIterator;
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.Task;
-import org.apache.hadoop.mapred.IFile.Reader;
-import org.apache.hadoop.mapred.IFile.Writer;
-import org.apache.hadoop.mapred.Merger.Segment;
 import org.apache.hadoop.mapred.Task.CombineOutputCollector;
-import org.apache.hadoop.mapred.Task.CombineValuesIterator;
-import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.apache.hadoop.mapreduce.TaskID;
-import org.apache.hadoop.mapreduce.task.reduce.MapOutput.MapOutputComparator;
 import org.apache.hadoop.util.Progress;
-import org.apache.hadoop.util.ReflectionUtils;
 
-import com.google.common.annotations.VisibleForTesting;
+import java.io.IOException;
 
-@SuppressWarnings(value={"unchecked"})
-@InterfaceAudience.LimitedPrivate({"MapReduce"})
+/**
+ * An interface for a reduce side merge that works with the default Shuffle
+ * implementation.
+ */
+@InterfaceAudience.Private
 @InterfaceStability.Unstable
-public class MergeManager<K, V> {
-  
-  private static final Log LOG = LogFactory.getLog(MergeManager.class);
-  
-  /* Maximum percentage of the in-memory limit that a single shuffle can 
-   * consume*/ 
-  private static final float DEFAULT_SHUFFLE_MEMORY_LIMIT_PERCENT
-    = 0.25f;
-
-  private final TaskAttemptID reduceId;
-  
-  private final JobConf jobConf;
-  private final FileSystem localFS;
-  private final FileSystem rfs;
-  private final LocalDirAllocator localDirAllocator;
-  
-  protected MapOutputFile mapOutputFile;
-  
-  Set<MapOutput<K, V>> inMemoryMergedMapOutputs = 
-    new TreeSet<MapOutput<K,V>>(new MapOutputComparator<K, V>());
-  private final IntermediateMemoryToMemoryMerger memToMemMerger;
-
-  Set<MapOutput<K, V>> inMemoryMapOutputs = 
-    new TreeSet<MapOutput<K,V>>(new MapOutputComparator<K, V>());
-  private final MergeThread<MapOutput<K,V>, K,V> inMemoryMerger;
-  
-  Set<Path> onDiskMapOutputs = new TreeSet<Path>();
-  private final OnDiskMerger onDiskMerger;
-  
-  private final long memoryLimit;
-  private long usedMemory;
-  private long commitMemory;
-  private final long maxSingleShuffleLimit;
-  
-  private final int memToMemMergeOutputsThreshold; 
-  private final long mergeThreshold;
-  
-  private final int ioSortFactor;
-
-  private final Reporter reporter;
-  private final ExceptionReporter exceptionReporter;
-  
+public interface MergeManager<K, V> {
   /**
-   * Combiner class to run during in-memory merge, if defined.
+   * To wait until merge has some freed resources available so that it can
+   * accept shuffled data.  This will be called before a network connection is
+   * established to get the map output.
    */
-  private final Class<? extends Reducer> combinerClass;
+  public void waitForResource() throws InterruptedException;
 
   /**
-   * Resettable collector used for combine.
+   * To reserve resources for data to be shuffled.  This will be called after
+   * a network connection is made to shuffle the data.
+   * @param mapId mapper from which data will be shuffled.
+   * @param requestedSize size in bytes of data that will be shuffled.
+   * @param fetcher id of the map output fetcher that will shuffle the data.
+   * @return a MapOutput object that can be used by shuffle to shuffle data.  If
+   * required resources cannot be reserved immediately, a null can be returned.
    */
-  private final CombineOutputCollector<K,V> combineCollector;
-
-  private final Counters.Counter spilledRecordsCounter;
-
-  private final Counters.Counter reduceCombineInputCounter;
-
-  private final Counters.Counter mergedMapOutputsCounter;
-  
-  private final CompressionCodec codec;
-  
-  private final Progress mergePhase;
-
-  public MergeManager(TaskAttemptID reduceId, JobConf jobConf, 
-                      FileSystem localFS,
-                      LocalDirAllocator localDirAllocator,  
-                      Reporter reporter,
-                      CompressionCodec codec,
-                      Class<? extends Reducer> combinerClass,
-                      CombineOutputCollector<K,V> combineCollector,
-                      Counters.Counter spilledRecordsCounter,
-                      Counters.Counter reduceCombineInputCounter,
-                      Counters.Counter mergedMapOutputsCounter,
-                      ExceptionReporter exceptionReporter,
-                      Progress mergePhase, MapOutputFile mapOutputFile) {
-    this.reduceId = reduceId;
-    this.jobConf = jobConf;
-    this.localDirAllocator = localDirAllocator;
-    this.exceptionReporter = exceptionReporter;
-    
-    this.reporter = reporter;
-    this.codec = codec;
-    this.combinerClass = combinerClass;
-    this.combineCollector = combineCollector;
-    this.reduceCombineInputCounter = reduceCombineInputCounter;
-    this.spilledRecordsCounter = spilledRecordsCounter;
-    this.mergedMapOutputsCounter = mergedMapOutputsCounter;
-    this.mapOutputFile = mapOutputFile;
-    this.mapOutputFile.setConf(jobConf);
-    
-    this.localFS = localFS;
-    this.rfs = ((LocalFileSystem)localFS).getRaw();
-    
-    final float maxInMemCopyUse =
-      jobConf.getFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.90f);
-    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
-      throw new IllegalArgumentException("Invalid value for " +
-          MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT + ": " +
-          maxInMemCopyUse);
-    }
-
-    // Allow unit tests to fix Runtime memory
-    this.memoryLimit = 
-      (long)(jobConf.getLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES,
-          Math.min(Runtime.getRuntime().maxMemory(), Integer.MAX_VALUE))
-        * maxInMemCopyUse);
- 
-    this.ioSortFactor = jobConf.getInt(MRJobConfig.IO_SORT_FACTOR, 100);
+  public MapOutput<K, V> reserve(TaskAttemptID mapId, long requestedSize,
+                                 int fetcher) throws IOException;
 
-    final float singleShuffleMemoryLimitPercent =
-        jobConf.getFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT,
-            DEFAULT_SHUFFLE_MEMORY_LIMIT_PERCENT);
-    if (singleShuffleMemoryLimitPercent <= 0.0f
-        || singleShuffleMemoryLimitPercent > 1.0f) {
-      throw new IllegalArgumentException("Invalid value for "
-          + MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT + ": "
-          + singleShuffleMemoryLimitPercent);
-    }
-
-    usedMemory = 0L;
-    commitMemory = 0L;
-    this.maxSingleShuffleLimit = 
-      (long)(memoryLimit * singleShuffleMemoryLimitPercent);
-    this.memToMemMergeOutputsThreshold = 
-            jobConf.getInt(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, ioSortFactor);
-    this.mergeThreshold = (long)(this.memoryLimit * 
-                          jobConf.getFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 
-                                           0.90f));
-    LOG.info("MergerManager: memoryLimit=" + memoryLimit + ", " +
-             "maxSingleShuffleLimit=" + maxSingleShuffleLimit + ", " +
-             "mergeThreshold=" + mergeThreshold + ", " + 
-             "ioSortFactor=" + ioSortFactor + ", " +
-             "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold);
-
-    if (this.maxSingleShuffleLimit >= this.mergeThreshold) {
-      throw new RuntimeException("Invlaid configuration: "
-          + "maxSingleShuffleLimit should be less than mergeThreshold"
-          + "maxSingleShuffleLimit: " + this.maxSingleShuffleLimit
-          + "mergeThreshold: " + this.mergeThreshold);
-    }
-
-    boolean allowMemToMemMerge = 
-      jobConf.getBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, false);
-    if (allowMemToMemMerge) {
-      this.memToMemMerger = 
-        new IntermediateMemoryToMemoryMerger(this,
-                                             memToMemMergeOutputsThreshold);
-      this.memToMemMerger.start();
-    } else {
-      this.memToMemMerger = null;
-    }
-    
-    this.inMemoryMerger = createInMemoryMerger();
-    this.inMemoryMerger.start();
-    
-    this.onDiskMerger = new OnDiskMerger(this);
-    this.onDiskMerger.start();
-    
-    this.mergePhase = mergePhase;
-  }
-  
-  protected MergeThread<MapOutput<K,V>, K,V> createInMemoryMerger() {
-    return new InMemoryMerger(this);
-  }
-
-  TaskAttemptID getReduceId() {
-    return reduceId;
-  }
-
-  @VisibleForTesting
-  ExceptionReporter getExceptionReporter() {
-    return exceptionReporter;
-  }
-
-  public void waitForInMemoryMerge() throws InterruptedException {
-    inMemoryMerger.waitForMerge();
-  }
-  
-  private boolean canShuffleToMemory(long requestedSize) {
-    return (requestedSize < maxSingleShuffleLimit); 
-  }
-  
-  final private MapOutput<K,V> stallShuffle = new MapOutput<K,V>(null);
-
-  public synchronized MapOutput<K,V> reserve(TaskAttemptID mapId, 
-                                             long requestedSize,
-                                             int fetcher
-                                             ) throws IOException {
-    if (!canShuffleToMemory(requestedSize)) {
-      LOG.info(mapId + ": Shuffling to disk since " + requestedSize + 
-               " is greater than maxSingleShuffleLimit (" + 
-               maxSingleShuffleLimit + ")");
-      return new MapOutput<K,V>(mapId, this, requestedSize, jobConf, 
-                                localDirAllocator, fetcher, true,
-                                mapOutputFile);
-    }
-    
-    // Stall shuffle if we are above the memory limit
-
-    // It is possible that all threads could just be stalling and not make
-    // progress at all. This could happen when:
-    //
-    // requested size is causing the used memory to go above limit &&
-    // requested size < singleShuffleLimit &&
-    // current used size < mergeThreshold (merge will not get triggered)
-    //
-    // To avoid this from happening, we allow exactly one thread to go past
-    // the memory limit. We check (usedMemory > memoryLimit) and not
-    // (usedMemory + requestedSize > memoryLimit). When this thread is done
-    // fetching, this will automatically trigger a merge thereby unlocking
-    // all the stalled threads
-    
-    if (usedMemory > memoryLimit) {
-      LOG.debug(mapId + ": Stalling shuffle since usedMemory (" + usedMemory
-          + ") is greater than memoryLimit (" + memoryLimit + ")." + 
-          " CommitMemory is (" + commitMemory + ")"); 
-      return stallShuffle;
-    }
-    
-    // Allow the in-memory shuffle to progress
-    LOG.debug(mapId + ": Proceeding with shuffle since usedMemory ("
-        + usedMemory + ") is lesser than memoryLimit (" + memoryLimit + ")."
-        + "CommitMemory is (" + commitMemory + ")"); 
-    return unconditionalReserve(mapId, requestedSize, true);
-  }
-  
   /**
-   * Unconditional Reserve is used by the Memory-to-Memory thread
-   * @return
+   * Called at the end of shuffle.
+   * @return a key value iterator object.
    */
-  private synchronized MapOutput<K, V> unconditionalReserve(
-      TaskAttemptID mapId, long requestedSize, boolean primaryMapOutput) {
-    usedMemory += requestedSize;
-    return new MapOutput<K,V>(mapId, this, (int)requestedSize, 
-        primaryMapOutput);
-  }
-  
-  synchronized void unreserve(long size) {
-    usedMemory -= size;
-  }
-
-  public synchronized void closeInMemoryFile(MapOutput<K,V> mapOutput) { 
-    inMemoryMapOutputs.add(mapOutput);
-    LOG.info("closeInMemoryFile -> map-output of size: " + mapOutput.getSize()
-        + ", inMemoryMapOutputs.size() -> " + inMemoryMapOutputs.size()
-        + ", commitMemory -> " + commitMemory + ", usedMemory ->" + usedMemory);
-
-    commitMemory+= mapOutput.getSize();
-
-    // Can hang if mergeThreshold is really low.
-    if (commitMemory >= mergeThreshold) {
-      LOG.info("Starting inMemoryMerger's merge since commitMemory=" +
-          commitMemory + " > mergeThreshold=" + mergeThreshold + 
-          ". Current usedMemory=" + usedMemory);
-      inMemoryMapOutputs.addAll(inMemoryMergedMapOutputs);
-      inMemoryMergedMapOutputs.clear();
-      inMemoryMerger.startMerge(inMemoryMapOutputs);
-      commitMemory = 0L;  // Reset commitMemory.
-    }
-    
-    if (memToMemMerger != null) {
-      if (inMemoryMapOutputs.size() >= memToMemMergeOutputsThreshold) { 
-        memToMemMerger.startMerge(inMemoryMapOutputs);
-      }
-    }
-  }
-  
-  
-  public synchronized void closeInMemoryMergedFile(MapOutput<K,V> mapOutput) {
-    inMemoryMergedMapOutputs.add(mapOutput);
-    LOG.info("closeInMemoryMergedFile -> size: " + mapOutput.getSize() + 
-             ", inMemoryMergedMapOutputs.size() -> " + 
-             inMemoryMergedMapOutputs.size());
-  }
-  
-  public synchronized void closeOnDiskFile(Path file) {
-    onDiskMapOutputs.add(file);
-    
-    if (onDiskMapOutputs.size() >= (2 * ioSortFactor - 1)) {
-      onDiskMerger.startMerge(onDiskMapOutputs);
-    }
-  }
-  
-  public RawKeyValueIterator close() throws Throwable {
-    // Wait for on-going merges to complete
-    if (memToMemMerger != null) { 
-      memToMemMerger.close();
-    }
-    inMemoryMerger.close();
-    onDiskMerger.close();
-    
-    List<MapOutput<K, V>> memory = 
-      new ArrayList<MapOutput<K, V>>(inMemoryMergedMapOutputs);
-    memory.addAll(inMemoryMapOutputs);
-    List<Path> disk = new ArrayList<Path>(onDiskMapOutputs);
-    return finalMerge(jobConf, rfs, memory, disk);
-  }
-   
-  private class IntermediateMemoryToMemoryMerger 
-  extends MergeThread<MapOutput<K, V>, K, V> {
-    
-    public IntermediateMemoryToMemoryMerger(MergeManager<K, V> manager, 
-                                            int mergeFactor) {
-      super(manager, mergeFactor, exceptionReporter);
-      setName("InMemoryMerger - Thread to do in-memory merge of in-memory " +
-      		    "shuffled map-outputs");
-      setDaemon(true);
-    }
-
-    @Override
-    public void merge(List<MapOutput<K, V>> inputs) throws IOException {
-      if (inputs == null || inputs.size() == 0) {
-        return;
-      }
-
-      TaskAttemptID dummyMapId = inputs.get(0).getMapId(); 
-      List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>();
-      long mergeOutputSize = 
-        createInMemorySegments(inputs, inMemorySegments, 0);
-      int noInMemorySegments = inMemorySegments.size();
-      
-      MapOutput<K, V> mergedMapOutputs = 
-        unconditionalReserve(dummyMapId, mergeOutputSize, false);
-      
-      Writer<K, V> writer = 
-        new InMemoryWriter<K, V>(mergedMapOutputs.getArrayStream());
-      
-      LOG.info("Initiating Memory-to-Memory merge with " + noInMemorySegments +
-               " segments of total-size: " + mergeOutputSize);
-
-      RawKeyValueIterator rIter = 
-        Merger.merge(jobConf, rfs,
-                     (Class<K>)jobConf.getMapOutputKeyClass(),
-                     (Class<V>)jobConf.getMapOutputValueClass(),
-                     inMemorySegments, inMemorySegments.size(),
-                     new Path(reduceId.toString()),
-                     (RawComparator<K>)jobConf.getOutputKeyComparator(),
-                     reporter, null, null, null);
-      Merger.writeFile(rIter, writer, reporter, jobConf);
-      writer.close();
-
-      LOG.info(reduceId +  
-               " Memory-to-Memory merge of the " + noInMemorySegments +
-               " files in-memory complete.");
-
-      // Note the output of the merge
-      closeInMemoryMergedFile(mergedMapOutputs);
-    }
-  }
-  
-  private class InMemoryMerger extends MergeThread<MapOutput<K,V>, K,V> {
-    
-    public InMemoryMerger(MergeManager<K, V> manager) {
-      super(manager, Integer.MAX_VALUE, exceptionReporter);
-      setName
-      ("InMemoryMerger - Thread to merge in-memory shuffled map-outputs");
-      setDaemon(true);
-    }
-    
-    @Override
-    public void merge(List<MapOutput<K,V>> inputs) throws IOException {
-      if (inputs == null || inputs.size() == 0) {
-        return;
-      }
-      
-      //name this output file same as the name of the first file that is 
-      //there in the current list of inmem files (this is guaranteed to
-      //be absent on the disk currently. So we don't overwrite a prev. 
-      //created spill). Also we need to create the output file now since
-      //it is not guaranteed that this file will be present after merge
-      //is called (we delete empty files as soon as we see them
-      //in the merge method)
-
-      //figure out the mapId 
-      TaskAttemptID mapId = inputs.get(0).getMapId();
-      TaskID mapTaskId = mapId.getTaskID();
-
-      List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>();
-      long mergeOutputSize = 
-        createInMemorySegments(inputs, inMemorySegments,0);
-      int noInMemorySegments = inMemorySegments.size();
-
-      Path outputPath = 
-        mapOutputFile.getInputFileForWrite(mapTaskId,
-                                           mergeOutputSize).suffix(
-                                               Task.MERGED_OUTPUT_PREFIX);
-
-      Writer<K,V> writer = 
-        new Writer<K,V>(jobConf, rfs, outputPath,
-                        (Class<K>) jobConf.getMapOutputKeyClass(),
-                        (Class<V>) jobConf.getMapOutputValueClass(),
-                        codec, null);
-
-      RawKeyValueIterator rIter = null;
-      try {
-        LOG.info("Initiating in-memory merge with " + noInMemorySegments + 
-                 " segments...");
-        
-        rIter = Merger.merge(jobConf, rfs,
-                             (Class<K>)jobConf.getMapOutputKeyClass(),
-                             (Class<V>)jobConf.getMapOutputValueClass(),
-                             inMemorySegments, inMemorySegments.size(),
-                             new Path(reduceId.toString()),
-                             (RawComparator<K>)jobConf.getOutputKeyComparator(),
-                             reporter, spilledRecordsCounter, null, null);
-        
-        if (null == combinerClass) {
-          Merger.writeFile(rIter, writer, reporter, jobConf);
-        } else {
-          combineCollector.setWriter(writer);
-          combineAndSpill(rIter, reduceCombineInputCounter);
-        }
-        writer.close();
-
-        LOG.info(reduceId +  
-            " Merge of the " + noInMemorySegments +
-            " files in-memory complete." +
-            " Local file is " + outputPath + " of size " + 
-            localFS.getFileStatus(outputPath).getLen());
-      } catch (IOException e) { 
-        //make sure that we delete the ondisk file that we created 
-        //earlier when we invoked cloneFileAttributes
-        localFS.delete(outputPath, true);
-        throw e;
-      }
-
-      // Note the output of the merge
-      closeOnDiskFile(outputPath);
-    }
-
-  }
-  
-  private class OnDiskMerger extends MergeThread<Path,K,V> {
-    
-    public OnDiskMerger(MergeManager<K, V> manager) {
-      super(manager, Integer.MAX_VALUE, exceptionReporter);
-      setName("OnDiskMerger - Thread to merge on-disk map-outputs");
-      setDaemon(true);
-    }
-    
-    @Override
-    public void merge(List<Path> inputs) throws IOException {
-      // sanity check
-      if (inputs == null || inputs.isEmpty()) {
-        LOG.info("No ondisk files to merge...");
-        return;
-      }
-      
-      long approxOutputSize = 0;
-      int bytesPerSum = 
-        jobConf.getInt("io.bytes.per.checksum", 512);
-      
-      LOG.info("OnDiskMerger: We have  " + inputs.size() + 
-               " map outputs on disk. Triggering merge...");
-      
-      // 1. Prepare the list of files to be merged. 
-      for (Path file : inputs) {
-        approxOutputSize += localFS.getFileStatus(file).getLen();
-      }
-
-      // add the checksum length
-      approxOutputSize += 
-        ChecksumFileSystem.getChecksumLength(approxOutputSize, bytesPerSum);
-
-      // 2. Start the on-disk merge process
-      Path outputPath = 
-        localDirAllocator.getLocalPathForWrite(inputs.get(0).toString(), 
-            approxOutputSize, jobConf).suffix(Task.MERGED_OUTPUT_PREFIX);
-      Writer<K,V> writer = 
-        new Writer<K,V>(jobConf, rfs, outputPath, 
-                        (Class<K>) jobConf.getMapOutputKeyClass(), 
-                        (Class<V>) jobConf.getMapOutputValueClass(),
-                        codec, null);
-      RawKeyValueIterator iter  = null;
-      Path tmpDir = new Path(reduceId.toString());
-      try {
-        iter = Merger.merge(jobConf, rfs,
-                            (Class<K>) jobConf.getMapOutputKeyClass(),
-                            (Class<V>) jobConf.getMapOutputValueClass(),
-                            codec, inputs.toArray(new Path[inputs.size()]), 
-                            true, ioSortFactor, tmpDir, 
-                            (RawComparator<K>) jobConf.getOutputKeyComparator(), 
-                            reporter, spilledRecordsCounter, null, 
-                            mergedMapOutputsCounter, null);
-
-        Merger.writeFile(iter, writer, reporter, jobConf);
-        writer.close();
-      } catch (IOException e) {
-        localFS.delete(outputPath, true);
-        throw e;
-      }
-
-      closeOnDiskFile(outputPath);
-
-      LOG.info(reduceId +
-          " Finished merging " + inputs.size() + 
-          " map output files on disk of total-size " + 
-          approxOutputSize + "." + 
-          " Local output file is " + outputPath + " of size " +
-          localFS.getFileStatus(outputPath).getLen());
-    }
-  }
-  
-  private void combineAndSpill(
-      RawKeyValueIterator kvIter,
-      Counters.Counter inCounter) throws IOException {
-    JobConf job = jobConf;
-    Reducer combiner = ReflectionUtils.newInstance(combinerClass, job);
-    Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
-    Class<V> valClass = (Class<V>) job.getMapOutputValueClass();
-    RawComparator<K> comparator = 
-      (RawComparator<K>)job.getOutputKeyComparator();
-    try {
-      CombineValuesIterator values = new CombineValuesIterator(
-          kvIter, comparator, keyClass, valClass, job, Reporter.NULL,
-          inCounter);
-      while (values.more()) {
-        combiner.reduce(values.getKey(), values, combineCollector,
-                        Reporter.NULL);
-        values.nextKey();
-      }
-    } finally {
-      combiner.close();
-    }
-  }
-
-  private long createInMemorySegments(List<MapOutput<K,V>> inMemoryMapOutputs,
-                                      List<Segment<K, V>> inMemorySegments, 
-                                      long leaveBytes
-                                      ) throws IOException {
-    long totalSize = 0L;
-    // We could use fullSize could come from the RamManager, but files can be
-    // closed but not yet present in inMemoryMapOutputs
-    long fullSize = 0L;
-    for (MapOutput<K,V> mo : inMemoryMapOutputs) {
-      fullSize += mo.getMemory().length;
-    }
-    while(fullSize > leaveBytes) {
-      MapOutput<K,V> mo = inMemoryMapOutputs.remove(0);
-      byte[] data = mo.getMemory();
-      long size = data.length;
-      totalSize += size;
-      fullSize -= size;
-      Reader<K,V> reader = new InMemoryReader<K,V>(MergeManager.this, 
-                                                   mo.getMapId(),
-                                                   data, 0, (int)size);
-      inMemorySegments.add(new Segment<K,V>(reader, true, 
-                                            (mo.isPrimaryMapOutput() ? 
-                                            mergedMapOutputsCounter : null)));
-    }
-    return totalSize;
-  }
-
-  class RawKVIteratorReader extends IFile.Reader<K,V> {
-
-    private final RawKeyValueIterator kvIter;
-
-    public RawKVIteratorReader(RawKeyValueIterator kvIter, long size)
-        throws IOException {
-      super(null, null, size, null, spilledRecordsCounter);
-      this.kvIter = kvIter;
-    }
-    public boolean nextRawKey(DataInputBuffer key) throws IOException {
-      if (kvIter.next()) {
-        final DataInputBuffer kb = kvIter.getKey();
-        final int kp = kb.getPosition();
-        final int klen = kb.getLength() - kp;
-        key.reset(kb.getData(), kp, klen);
-        bytesRead += klen;
-        return true;
-      }
-      return false;
-    }
-    public void nextRawValue(DataInputBuffer value) throws IOException {
-      final DataInputBuffer vb = kvIter.getValue();
-      final int vp = vb.getPosition();
-      final int vlen = vb.getLength() - vp;
-      value.reset(vb.getData(), vp, vlen);
-      bytesRead += vlen;
-    }
-    public long getPosition() throws IOException {
-      return bytesRead;
-    }
-
-    public void close() throws IOException {
-      kvIter.close();
-    }
-  }
-
-  private RawKeyValueIterator finalMerge(JobConf job, FileSystem fs,
-                                       List<MapOutput<K,V>> inMemoryMapOutputs,
-                                       List<Path> onDiskMapOutputs
-                                       ) throws IOException {
-    LOG.info("finalMerge called with " + 
-             inMemoryMapOutputs.size() + " in-memory map-outputs and " + 
-             onDiskMapOutputs.size() + " on-disk map-outputs");
-    
-    final float maxRedPer =
-      job.getFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 0f);
-    if (maxRedPer > 1.0 || maxRedPer < 0.0) {
-      throw new IOException(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT +
-                            maxRedPer);
-    }
-    int maxInMemReduce = (int)Math.min(
-        Runtime.getRuntime().maxMemory() * maxRedPer, Integer.MAX_VALUE);
-    
-
-    // merge config params
-    Class<K> keyClass = (Class<K>)job.getMapOutputKeyClass();
-    Class<V> valueClass = (Class<V>)job.getMapOutputValueClass();
-    boolean keepInputs = job.getKeepFailedTaskFiles();
-    final Path tmpDir = new Path(reduceId.toString());
-    final RawComparator<K> comparator =
-      (RawComparator<K>)job.getOutputKeyComparator();
-
-    // segments required to vacate memory
-    List<Segment<K,V>> memDiskSegments = new ArrayList<Segment<K,V>>();
-    long inMemToDiskBytes = 0;
-    boolean mergePhaseFinished = false;
-    if (inMemoryMapOutputs.size() > 0) {
-      TaskID mapId = inMemoryMapOutputs.get(0).getMapId().getTaskID();
-      inMemToDiskBytes = createInMemorySegments(inMemoryMapOutputs, 
-                                                memDiskSegments,
-                                                maxInMemReduce);
-      final int numMemDiskSegments = memDiskSegments.size();
-      if (numMemDiskSegments > 0 &&
-            ioSortFactor > onDiskMapOutputs.size()) {
-        
-        // If we reach here, it implies that we have less than io.sort.factor
-        // disk segments and this will be incremented by 1 (result of the 
-        // memory segments merge). Since this total would still be 
-        // <= io.sort.factor, we will not do any more intermediate merges,
-        // the merge of all these disk segments would be directly fed to the
-        // reduce method
-        
-        mergePhaseFinished = true;
-        // must spill to disk, but can't retain in-mem for intermediate merge
-        final Path outputPath = 
-          mapOutputFile.getInputFileForWrite(mapId,
-                                             inMemToDiskBytes).suffix(
-                                                 Task.MERGED_OUTPUT_PREFIX);
-        final RawKeyValueIterator rIter = Merger.merge(job, fs,
-            keyClass, valueClass, memDiskSegments, numMemDiskSegments,
-            tmpDir, comparator, reporter, spilledRecordsCounter, null, 
-            mergePhase);
-        final Writer<K,V> writer = new Writer<K,V>(job, fs, outputPath,
-            keyClass, valueClass, codec, null);
-        try {
-          Merger.writeFile(rIter, writer, reporter, job);
-          // add to list of final disk outputs.
-          onDiskMapOutputs.add(outputPath);
-        } catch (IOException e) {
-          if (null != outputPath) {
-            try {
-              fs.delete(outputPath, true);
-            } catch (IOException ie) {
-              // NOTHING
-            }
-          }
-          throw e;
-        } finally {
-          if (null != writer) {
-            writer.close();
-          }
-        }
-        LOG.info("Merged " + numMemDiskSegments + " segments, " +
-                 inMemToDiskBytes + " bytes to disk to satisfy " +
-                 "reduce memory limit");
-        inMemToDiskBytes = 0;
-        memDiskSegments.clear();
-      } else if (inMemToDiskBytes != 0) {
-        LOG.info("Keeping " + numMemDiskSegments + " segments, " +
-                 inMemToDiskBytes + " bytes in memory for " +
-                 "intermediate, on-disk merge");
-      }
-    }
-
-    // segments on disk
-    List<Segment<K,V>> diskSegments = new ArrayList<Segment<K,V>>();
-    long onDiskBytes = inMemToDiskBytes;
-    Path[] onDisk = onDiskMapOutputs.toArray(new Path[onDiskMapOutputs.size()]);
-    for (Path file : onDisk) {
-      onDiskBytes += fs.getFileStatus(file).getLen();
-      LOG.debug("Disk file: " + file + " Length is " + 
-          fs.getFileStatus(file).getLen());
-      diskSegments.add(new Segment<K, V>(job, fs, file, codec, keepInputs,
-                                         (file.toString().endsWith(
-                                             Task.MERGED_OUTPUT_PREFIX) ?
-                                          null : mergedMapOutputsCounter)
-                                        ));
-    }
-    LOG.info("Merging " + onDisk.length + " files, " +
-             onDiskBytes + " bytes from disk");
-    Collections.sort(diskSegments, new Comparator<Segment<K,V>>() {
-      public int compare(Segment<K, V> o1, Segment<K, V> o2) {
-        if (o1.getLength() == o2.getLength()) {
-          return 0;
-        }
-        return o1.getLength() < o2.getLength() ? -1 : 1;
-      }
-    });
-
-    // build final list of segments from merged backed by disk + in-mem
-    List<Segment<K,V>> finalSegments = new ArrayList<Segment<K,V>>();
-    long inMemBytes = createInMemorySegments(inMemoryMapOutputs, 
-                                             finalSegments, 0);
-    LOG.info("Merging " + finalSegments.size() + " segments, " +
-             inMemBytes + " bytes from memory into reduce");
-    if (0 != onDiskBytes) {
-      final int numInMemSegments = memDiskSegments.size();
-      diskSegments.addAll(0, memDiskSegments);
-      memDiskSegments.clear();
-      // Pass mergePhase only if there is a going to be intermediate
-      // merges. See comment where mergePhaseFinished is being set
-      Progress thisPhase = (mergePhaseFinished) ? null : mergePhase; 
-      RawKeyValueIterator diskMerge = Merger.merge(
-          job, fs, keyClass, valueClass, diskSegments,
-          ioSortFactor, numInMemSegments, tmpDir, comparator,
-          reporter, false, spilledRecordsCounter, null, thisPhase);
-      diskSegments.clear();
-      if (0 == finalSegments.size()) {
-        return diskMerge;
-      }
-      finalSegments.add(new Segment<K,V>(
-            new RawKVIteratorReader(diskMerge, onDiskBytes), true));
-    }
-    return Merger.merge(job, fs, keyClass, valueClass,
-                 finalSegments, finalSegments.size(), tmpDir,
-                 comparator, reporter, spilledRecordsCounter, null,
-                 null);
-  
-  }
+  public RawKeyValueIterator close() throws Throwable;
 }

+ 797 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManagerImpl.java

@@ -0,0 +1,797 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapreduce.task.reduce;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.ChecksumFileSystem;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalDirAllocator;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.mapred.Counters;
+import org.apache.hadoop.mapred.IFile;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapOutputFile;
+import org.apache.hadoop.mapred.Merger;
+import org.apache.hadoop.mapred.RawKeyValueIterator;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.Task;
+import org.apache.hadoop.mapred.IFile.Reader;
+import org.apache.hadoop.mapred.IFile.Writer;
+import org.apache.hadoop.mapred.Merger.Segment;
+import org.apache.hadoop.mapred.Task.CombineOutputCollector;
+import org.apache.hadoop.mapred.Task.CombineValuesIterator;
+import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.TaskID;
+import org.apache.hadoop.mapreduce.task.reduce.MapOutput.MapOutputComparator;
+import org.apache.hadoop.util.Progress;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import com.google.common.annotations.VisibleForTesting;
+
+@SuppressWarnings(value={"unchecked"})
+@InterfaceAudience.LimitedPrivate({"MapReduce"})
+@InterfaceStability.Unstable
+public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
+  
+  private static final Log LOG = LogFactory.getLog(MergeManagerImpl.class);
+  
+  /* Maximum percentage of the in-memory limit that a single shuffle can 
+   * consume*/ 
+  private static final float DEFAULT_SHUFFLE_MEMORY_LIMIT_PERCENT
+    = 0.25f;
+
+  private final TaskAttemptID reduceId;
+  
+  private final JobConf jobConf;
+  private final FileSystem localFS;
+  private final FileSystem rfs;
+  private final LocalDirAllocator localDirAllocator;
+  
+  protected MapOutputFile mapOutputFile;
+  
+  Set<InMemoryMapOutput<K, V>> inMemoryMergedMapOutputs = 
+    new TreeSet<InMemoryMapOutput<K,V>>(new MapOutputComparator<K, V>());
+  private IntermediateMemoryToMemoryMerger memToMemMerger;
+
+  Set<InMemoryMapOutput<K, V>> inMemoryMapOutputs = 
+    new TreeSet<InMemoryMapOutput<K,V>>(new MapOutputComparator<K, V>());
+  private final MergeThread<InMemoryMapOutput<K,V>, K,V> inMemoryMerger;
+  
+  Set<Path> onDiskMapOutputs = new TreeSet<Path>();
+  private final OnDiskMerger onDiskMerger;
+  
+  private final long memoryLimit;
+  private long usedMemory;
+  private long commitMemory;
+  private final long maxSingleShuffleLimit;
+  
+  private final int memToMemMergeOutputsThreshold; 
+  private final long mergeThreshold;
+  
+  private final int ioSortFactor;
+
+  private final Reporter reporter;
+  private final ExceptionReporter exceptionReporter;
+  
+  /**
+   * Combiner class to run during in-memory merge, if defined.
+   */
+  private final Class<? extends Reducer> combinerClass;
+
+  /**
+   * Resettable collector used for combine.
+   */
+  private final CombineOutputCollector<K,V> combineCollector;
+
+  private final Counters.Counter spilledRecordsCounter;
+
+  private final Counters.Counter reduceCombineInputCounter;
+
+  private final Counters.Counter mergedMapOutputsCounter;
+  
+  private final CompressionCodec codec;
+  
+  private final Progress mergePhase;
+
+  public MergeManagerImpl(TaskAttemptID reduceId, JobConf jobConf, 
+                      FileSystem localFS,
+                      LocalDirAllocator localDirAllocator,  
+                      Reporter reporter,
+                      CompressionCodec codec,
+                      Class<? extends Reducer> combinerClass,
+                      CombineOutputCollector<K,V> combineCollector,
+                      Counters.Counter spilledRecordsCounter,
+                      Counters.Counter reduceCombineInputCounter,
+                      Counters.Counter mergedMapOutputsCounter,
+                      ExceptionReporter exceptionReporter,
+                      Progress mergePhase, MapOutputFile mapOutputFile) {
+    this.reduceId = reduceId;
+    this.jobConf = jobConf;
+    this.localDirAllocator = localDirAllocator;
+    this.exceptionReporter = exceptionReporter;
+    
+    this.reporter = reporter;
+    this.codec = codec;
+    this.combinerClass = combinerClass;
+    this.combineCollector = combineCollector;
+    this.reduceCombineInputCounter = reduceCombineInputCounter;
+    this.spilledRecordsCounter = spilledRecordsCounter;
+    this.mergedMapOutputsCounter = mergedMapOutputsCounter;
+    this.mapOutputFile = mapOutputFile;
+    this.mapOutputFile.setConf(jobConf);
+    
+    this.localFS = localFS;
+    this.rfs = ((LocalFileSystem)localFS).getRaw();
+    
+    final float maxInMemCopyUse =
+      jobConf.getFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.90f);
+    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
+      throw new IllegalArgumentException("Invalid value for " +
+          MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT + ": " +
+          maxInMemCopyUse);
+    }
+
+    // Allow unit tests to fix Runtime memory
+    this.memoryLimit = 
+      (long)(jobConf.getLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES,
+          Math.min(Runtime.getRuntime().maxMemory(), Integer.MAX_VALUE))
+        * maxInMemCopyUse);
+ 
+    this.ioSortFactor = jobConf.getInt(MRJobConfig.IO_SORT_FACTOR, 100);
+
+    final float singleShuffleMemoryLimitPercent =
+        jobConf.getFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT,
+            DEFAULT_SHUFFLE_MEMORY_LIMIT_PERCENT);
+    if (singleShuffleMemoryLimitPercent <= 0.0f
+        || singleShuffleMemoryLimitPercent > 1.0f) {
+      throw new IllegalArgumentException("Invalid value for "
+          + MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT + ": "
+          + singleShuffleMemoryLimitPercent);
+    }
+
+    usedMemory = 0L;
+    commitMemory = 0L;
+    this.maxSingleShuffleLimit = 
+      (long)(memoryLimit * singleShuffleMemoryLimitPercent);
+    this.memToMemMergeOutputsThreshold = 
+            jobConf.getInt(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, ioSortFactor);
+    this.mergeThreshold = (long)(this.memoryLimit * 
+                          jobConf.getFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 
+                                           0.90f));
+    LOG.info("MergerManager: memoryLimit=" + memoryLimit + ", " +
+             "maxSingleShuffleLimit=" + maxSingleShuffleLimit + ", " +
+             "mergeThreshold=" + mergeThreshold + ", " + 
+             "ioSortFactor=" + ioSortFactor + ", " +
+             "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold);
+
+    if (this.maxSingleShuffleLimit >= this.mergeThreshold) {
+      throw new RuntimeException("Invlaid configuration: "
+          + "maxSingleShuffleLimit should be less than mergeThreshold"
+          + "maxSingleShuffleLimit: " + this.maxSingleShuffleLimit
+          + "mergeThreshold: " + this.mergeThreshold);
+    }
+
+    boolean allowMemToMemMerge = 
+      jobConf.getBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, false);
+    if (allowMemToMemMerge) {
+      this.memToMemMerger = 
+        new IntermediateMemoryToMemoryMerger(this,
+                                             memToMemMergeOutputsThreshold);
+      this.memToMemMerger.start();
+    } else {
+      this.memToMemMerger = null;
+    }
+    
+    this.inMemoryMerger = createInMemoryMerger();
+    this.inMemoryMerger.start();
+    
+    this.onDiskMerger = new OnDiskMerger(this);
+    this.onDiskMerger.start();
+    
+    this.mergePhase = mergePhase;
+  }
+  
+  protected MergeThread<InMemoryMapOutput<K,V>, K,V> createInMemoryMerger() {
+    return new InMemoryMerger(this);
+  }
+
+  TaskAttemptID getReduceId() {
+    return reduceId;
+  }
+
+  @VisibleForTesting
+  ExceptionReporter getExceptionReporter() {
+    return exceptionReporter;
+  }
+
+  @Override
+  public void waitForResource() throws InterruptedException {
+    inMemoryMerger.waitForMerge();
+  }
+  
+  private boolean canShuffleToMemory(long requestedSize) {
+    return (requestedSize < maxSingleShuffleLimit); 
+  }
+  
+  @Override
+  public synchronized MapOutput<K,V> reserve(TaskAttemptID mapId, 
+                                             long requestedSize,
+                                             int fetcher
+                                             ) throws IOException {
+    if (!canShuffleToMemory(requestedSize)) {
+      LOG.info(mapId + ": Shuffling to disk since " + requestedSize + 
+               " is greater than maxSingleShuffleLimit (" + 
+               maxSingleShuffleLimit + ")");
+      return new OnDiskMapOutput<K,V>(mapId, reduceId, this, requestedSize,
+                                      jobConf, mapOutputFile, fetcher, true);
+    }
+    
+    // Stall shuffle if we are above the memory limit
+
+    // It is possible that all threads could just be stalling and not make
+    // progress at all. This could happen when:
+    //
+    // requested size is causing the used memory to go above limit &&
+    // requested size < singleShuffleLimit &&
+    // current used size < mergeThreshold (merge will not get triggered)
+    //
+    // To avoid this from happening, we allow exactly one thread to go past
+    // the memory limit. We check (usedMemory > memoryLimit) and not
+    // (usedMemory + requestedSize > memoryLimit). When this thread is done
+    // fetching, this will automatically trigger a merge thereby unlocking
+    // all the stalled threads
+    
+    if (usedMemory > memoryLimit) {
+      LOG.debug(mapId + ": Stalling shuffle since usedMemory (" + usedMemory
+          + ") is greater than memoryLimit (" + memoryLimit + ")." + 
+          " CommitMemory is (" + commitMemory + ")"); 
+      return null;
+    }
+    
+    // Allow the in-memory shuffle to progress
+    LOG.debug(mapId + ": Proceeding with shuffle since usedMemory ("
+        + usedMemory + ") is lesser than memoryLimit (" + memoryLimit + ")."
+        + "CommitMemory is (" + commitMemory + ")"); 
+    return unconditionalReserve(mapId, requestedSize, true);
+  }
+  
+  /**
+   * Unconditional Reserve is used by the Memory-to-Memory thread
+   * @return
+   */
+  private synchronized InMemoryMapOutput<K, V> unconditionalReserve(
+      TaskAttemptID mapId, long requestedSize, boolean primaryMapOutput) {
+    usedMemory += requestedSize;
+    return new InMemoryMapOutput<K,V>(jobConf, mapId, this, (int)requestedSize,
+                                      codec, primaryMapOutput);
+  }
+  
+  synchronized void unreserve(long size) {
+    usedMemory -= size;
+  }
+
+  public synchronized void closeInMemoryFile(InMemoryMapOutput<K,V> mapOutput) { 
+    inMemoryMapOutputs.add(mapOutput);
+    LOG.info("closeInMemoryFile -> map-output of size: " + mapOutput.getSize()
+        + ", inMemoryMapOutputs.size() -> " + inMemoryMapOutputs.size()
+        + ", commitMemory -> " + commitMemory + ", usedMemory ->" + usedMemory);
+
+    commitMemory+= mapOutput.getSize();
+
+    // Can hang if mergeThreshold is really low.
+    if (commitMemory >= mergeThreshold) {
+      LOG.info("Starting inMemoryMerger's merge since commitMemory=" +
+          commitMemory + " > mergeThreshold=" + mergeThreshold + 
+          ". Current usedMemory=" + usedMemory);
+      inMemoryMapOutputs.addAll(inMemoryMergedMapOutputs);
+      inMemoryMergedMapOutputs.clear();
+      inMemoryMerger.startMerge(inMemoryMapOutputs);
+      commitMemory = 0L;  // Reset commitMemory.
+    }
+    
+    if (memToMemMerger != null) {
+      if (inMemoryMapOutputs.size() >= memToMemMergeOutputsThreshold) { 
+        memToMemMerger.startMerge(inMemoryMapOutputs);
+      }
+    }
+  }
+  
+  
+  public synchronized void closeInMemoryMergedFile(InMemoryMapOutput<K,V> mapOutput) {
+    inMemoryMergedMapOutputs.add(mapOutput);
+    LOG.info("closeInMemoryMergedFile -> size: " + mapOutput.getSize() + 
+             ", inMemoryMergedMapOutputs.size() -> " + 
+             inMemoryMergedMapOutputs.size());
+  }
+  
+  public synchronized void closeOnDiskFile(Path file) {
+    onDiskMapOutputs.add(file);
+    
+    if (onDiskMapOutputs.size() >= (2 * ioSortFactor - 1)) {
+      onDiskMerger.startMerge(onDiskMapOutputs);
+    }
+  }
+  
+  @Override
+  public RawKeyValueIterator close() throws Throwable {
+    // Wait for on-going merges to complete
+    if (memToMemMerger != null) { 
+      memToMemMerger.close();
+    }
+    inMemoryMerger.close();
+    onDiskMerger.close();
+    
+    List<InMemoryMapOutput<K, V>> memory = 
+      new ArrayList<InMemoryMapOutput<K, V>>(inMemoryMergedMapOutputs);
+    memory.addAll(inMemoryMapOutputs);
+    List<Path> disk = new ArrayList<Path>(onDiskMapOutputs);
+    return finalMerge(jobConf, rfs, memory, disk);
+  }
+   
+  private class IntermediateMemoryToMemoryMerger 
+  extends MergeThread<InMemoryMapOutput<K, V>, K, V> {
+    
+    public IntermediateMemoryToMemoryMerger(MergeManagerImpl<K, V> manager, 
+                                            int mergeFactor) {
+      super(manager, mergeFactor, exceptionReporter);
+      setName("InMemoryMerger - Thread to do in-memory merge of in-memory " +
+      		    "shuffled map-outputs");
+      setDaemon(true);
+    }
+
+    @Override
+    public void merge(List<InMemoryMapOutput<K, V>> inputs) throws IOException {
+      if (inputs == null || inputs.size() == 0) {
+        return;
+      }
+
+      TaskAttemptID dummyMapId = inputs.get(0).getMapId(); 
+      List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>();
+      long mergeOutputSize = 
+        createInMemorySegments(inputs, inMemorySegments, 0);
+      int noInMemorySegments = inMemorySegments.size();
+      
+      InMemoryMapOutput<K, V> mergedMapOutputs = 
+        unconditionalReserve(dummyMapId, mergeOutputSize, false);
+      
+      Writer<K, V> writer = 
+        new InMemoryWriter<K, V>(mergedMapOutputs.getArrayStream());
+      
+      LOG.info("Initiating Memory-to-Memory merge with " + noInMemorySegments +
+               " segments of total-size: " + mergeOutputSize);
+
+      RawKeyValueIterator rIter = 
+        Merger.merge(jobConf, rfs,
+                     (Class<K>)jobConf.getMapOutputKeyClass(),
+                     (Class<V>)jobConf.getMapOutputValueClass(),
+                     inMemorySegments, inMemorySegments.size(),
+                     new Path(reduceId.toString()),
+                     (RawComparator<K>)jobConf.getOutputKeyComparator(),
+                     reporter, null, null, null);
+      Merger.writeFile(rIter, writer, reporter, jobConf);
+      writer.close();
+
+      LOG.info(reduceId +  
+               " Memory-to-Memory merge of the " + noInMemorySegments +
+               " files in-memory complete.");
+
+      // Note the output of the merge
+      closeInMemoryMergedFile(mergedMapOutputs);
+    }
+  }
+  
+  private class InMemoryMerger extends MergeThread<InMemoryMapOutput<K,V>, K,V> {
+    
+    public InMemoryMerger(MergeManagerImpl<K, V> manager) {
+      super(manager, Integer.MAX_VALUE, exceptionReporter);
+      setName
+      ("InMemoryMerger - Thread to merge in-memory shuffled map-outputs");
+      setDaemon(true);
+    }
+    
+    @Override
+    public void merge(List<InMemoryMapOutput<K,V>> inputs) throws IOException {
+      if (inputs == null || inputs.size() == 0) {
+        return;
+      }
+      
+      //name this output file same as the name of the first file that is 
+      //there in the current list of inmem files (this is guaranteed to
+      //be absent on the disk currently. So we don't overwrite a prev. 
+      //created spill). Also we need to create the output file now since
+      //it is not guaranteed that this file will be present after merge
+      //is called (we delete empty files as soon as we see them
+      //in the merge method)
+
+      //figure out the mapId 
+      TaskAttemptID mapId = inputs.get(0).getMapId();
+      TaskID mapTaskId = mapId.getTaskID();
+
+      List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>();
+      long mergeOutputSize = 
+        createInMemorySegments(inputs, inMemorySegments,0);
+      int noInMemorySegments = inMemorySegments.size();
+
+      Path outputPath = 
+        mapOutputFile.getInputFileForWrite(mapTaskId,
+                                           mergeOutputSize).suffix(
+                                               Task.MERGED_OUTPUT_PREFIX);
+
+      Writer<K,V> writer = 
+        new Writer<K,V>(jobConf, rfs, outputPath,
+                        (Class<K>) jobConf.getMapOutputKeyClass(),
+                        (Class<V>) jobConf.getMapOutputValueClass(),
+                        codec, null);
+
+      RawKeyValueIterator rIter = null;
+      try {
+        LOG.info("Initiating in-memory merge with " + noInMemorySegments + 
+                 " segments...");
+        
+        rIter = Merger.merge(jobConf, rfs,
+                             (Class<K>)jobConf.getMapOutputKeyClass(),
+                             (Class<V>)jobConf.getMapOutputValueClass(),
+                             inMemorySegments, inMemorySegments.size(),
+                             new Path(reduceId.toString()),
+                             (RawComparator<K>)jobConf.getOutputKeyComparator(),
+                             reporter, spilledRecordsCounter, null, null);
+        
+        if (null == combinerClass) {
+          Merger.writeFile(rIter, writer, reporter, jobConf);
+        } else {
+          combineCollector.setWriter(writer);
+          combineAndSpill(rIter, reduceCombineInputCounter);
+        }
+        writer.close();
+
+        LOG.info(reduceId +  
+            " Merge of the " + noInMemorySegments +
+            " files in-memory complete." +
+            " Local file is " + outputPath + " of size " + 
+            localFS.getFileStatus(outputPath).getLen());
+      } catch (IOException e) { 
+        //make sure that we delete the ondisk file that we created 
+        //earlier when we invoked cloneFileAttributes
+        localFS.delete(outputPath, true);
+        throw e;
+      }
+
+      // Note the output of the merge
+      closeOnDiskFile(outputPath);
+    }
+
+  }
+  
+  private class OnDiskMerger extends MergeThread<Path,K,V> {
+    
+    public OnDiskMerger(MergeManagerImpl<K, V> manager) {
+      super(manager, Integer.MAX_VALUE, exceptionReporter);
+      setName("OnDiskMerger - Thread to merge on-disk map-outputs");
+      setDaemon(true);
+    }
+    
+    @Override
+    public void merge(List<Path> inputs) throws IOException {
+      // sanity check
+      if (inputs == null || inputs.isEmpty()) {
+        LOG.info("No ondisk files to merge...");
+        return;
+      }
+      
+      long approxOutputSize = 0;
+      int bytesPerSum = 
+        jobConf.getInt("io.bytes.per.checksum", 512);
+      
+      LOG.info("OnDiskMerger: We have  " + inputs.size() + 
+               " map outputs on disk. Triggering merge...");
+      
+      // 1. Prepare the list of files to be merged. 
+      for (Path file : inputs) {
+        approxOutputSize += localFS.getFileStatus(file).getLen();
+      }
+
+      // add the checksum length
+      approxOutputSize += 
+        ChecksumFileSystem.getChecksumLength(approxOutputSize, bytesPerSum);
+
+      // 2. Start the on-disk merge process
+      Path outputPath = 
+        localDirAllocator.getLocalPathForWrite(inputs.get(0).toString(), 
+            approxOutputSize, jobConf).suffix(Task.MERGED_OUTPUT_PREFIX);
+      Writer<K,V> writer = 
+        new Writer<K,V>(jobConf, rfs, outputPath, 
+                        (Class<K>) jobConf.getMapOutputKeyClass(), 
+                        (Class<V>) jobConf.getMapOutputValueClass(),
+                        codec, null);
+      RawKeyValueIterator iter  = null;
+      Path tmpDir = new Path(reduceId.toString());
+      try {
+        iter = Merger.merge(jobConf, rfs,
+                            (Class<K>) jobConf.getMapOutputKeyClass(),
+                            (Class<V>) jobConf.getMapOutputValueClass(),
+                            codec, inputs.toArray(new Path[inputs.size()]), 
+                            true, ioSortFactor, tmpDir, 
+                            (RawComparator<K>) jobConf.getOutputKeyComparator(), 
+                            reporter, spilledRecordsCounter, null, 
+                            mergedMapOutputsCounter, null);
+
+        Merger.writeFile(iter, writer, reporter, jobConf);
+        writer.close();
+      } catch (IOException e) {
+        localFS.delete(outputPath, true);
+        throw e;
+      }
+
+      closeOnDiskFile(outputPath);
+
+      LOG.info(reduceId +
+          " Finished merging " + inputs.size() + 
+          " map output files on disk of total-size " + 
+          approxOutputSize + "." + 
+          " Local output file is " + outputPath + " of size " +
+          localFS.getFileStatus(outputPath).getLen());
+    }
+  }
+  
+  private void combineAndSpill(
+      RawKeyValueIterator kvIter,
+      Counters.Counter inCounter) throws IOException {
+    JobConf job = jobConf;
+    Reducer combiner = ReflectionUtils.newInstance(combinerClass, job);
+    Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
+    Class<V> valClass = (Class<V>) job.getMapOutputValueClass();
+    RawComparator<K> comparator = 
+      (RawComparator<K>)job.getOutputKeyComparator();
+    try {
+      CombineValuesIterator values = new CombineValuesIterator(
+          kvIter, comparator, keyClass, valClass, job, Reporter.NULL,
+          inCounter);
+      while (values.more()) {
+        combiner.reduce(values.getKey(), values, combineCollector,
+                        Reporter.NULL);
+        values.nextKey();
+      }
+    } finally {
+      combiner.close();
+    }
+  }
+
+  private long createInMemorySegments(List<InMemoryMapOutput<K,V>> inMemoryMapOutputs,
+                                      List<Segment<K, V>> inMemorySegments, 
+                                      long leaveBytes
+                                      ) throws IOException {
+    long totalSize = 0L;
+    // We could use fullSize could come from the RamManager, but files can be
+    // closed but not yet present in inMemoryMapOutputs
+    long fullSize = 0L;
+    for (InMemoryMapOutput<K,V> mo : inMemoryMapOutputs) {
+      fullSize += mo.getMemory().length;
+    }
+    while(fullSize > leaveBytes) {
+      InMemoryMapOutput<K,V> mo = inMemoryMapOutputs.remove(0);
+      byte[] data = mo.getMemory();
+      long size = data.length;
+      totalSize += size;
+      fullSize -= size;
+      Reader<K,V> reader = new InMemoryReader<K,V>(MergeManagerImpl.this, 
+                                                   mo.getMapId(),
+                                                   data, 0, (int)size);
+      inMemorySegments.add(new Segment<K,V>(reader, true, 
+                                            (mo.isPrimaryMapOutput() ? 
+                                            mergedMapOutputsCounter : null)));
+    }
+    return totalSize;
+  }
+
+  class RawKVIteratorReader extends IFile.Reader<K,V> {
+
+    private final RawKeyValueIterator kvIter;
+
+    public RawKVIteratorReader(RawKeyValueIterator kvIter, long size)
+        throws IOException {
+      super(null, null, size, null, spilledRecordsCounter);
+      this.kvIter = kvIter;
+    }
+    public boolean nextRawKey(DataInputBuffer key) throws IOException {
+      if (kvIter.next()) {
+        final DataInputBuffer kb = kvIter.getKey();
+        final int kp = kb.getPosition();
+        final int klen = kb.getLength() - kp;
+        key.reset(kb.getData(), kp, klen);
+        bytesRead += klen;
+        return true;
+      }
+      return false;
+    }
+    public void nextRawValue(DataInputBuffer value) throws IOException {
+      final DataInputBuffer vb = kvIter.getValue();
+      final int vp = vb.getPosition();
+      final int vlen = vb.getLength() - vp;
+      value.reset(vb.getData(), vp, vlen);
+      bytesRead += vlen;
+    }
+    public long getPosition() throws IOException {
+      return bytesRead;
+    }
+
+    public void close() throws IOException {
+      kvIter.close();
+    }
+  }
+
+  private RawKeyValueIterator finalMerge(JobConf job, FileSystem fs,
+                                       List<InMemoryMapOutput<K,V>> inMemoryMapOutputs,
+                                       List<Path> onDiskMapOutputs
+                                       ) throws IOException {
+    LOG.info("finalMerge called with " + 
+             inMemoryMapOutputs.size() + " in-memory map-outputs and " + 
+             onDiskMapOutputs.size() + " on-disk map-outputs");
+    
+    final float maxRedPer =
+      job.getFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 0f);
+    if (maxRedPer > 1.0 || maxRedPer < 0.0) {
+      throw new IOException(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT +
+                            maxRedPer);
+    }
+    int maxInMemReduce = (int)Math.min(
+        Runtime.getRuntime().maxMemory() * maxRedPer, Integer.MAX_VALUE);
+    
+
+    // merge config params
+    Class<K> keyClass = (Class<K>)job.getMapOutputKeyClass();
+    Class<V> valueClass = (Class<V>)job.getMapOutputValueClass();
+    boolean keepInputs = job.getKeepFailedTaskFiles();
+    final Path tmpDir = new Path(reduceId.toString());
+    final RawComparator<K> comparator =
+      (RawComparator<K>)job.getOutputKeyComparator();
+
+    // segments required to vacate memory
+    List<Segment<K,V>> memDiskSegments = new ArrayList<Segment<K,V>>();
+    long inMemToDiskBytes = 0;
+    boolean mergePhaseFinished = false;
+    if (inMemoryMapOutputs.size() > 0) {
+      TaskID mapId = inMemoryMapOutputs.get(0).getMapId().getTaskID();
+      inMemToDiskBytes = createInMemorySegments(inMemoryMapOutputs, 
+                                                memDiskSegments,
+                                                maxInMemReduce);
+      final int numMemDiskSegments = memDiskSegments.size();
+      if (numMemDiskSegments > 0 &&
+            ioSortFactor > onDiskMapOutputs.size()) {
+        
+        // If we reach here, it implies that we have less than io.sort.factor
+        // disk segments and this will be incremented by 1 (result of the 
+        // memory segments merge). Since this total would still be 
+        // <= io.sort.factor, we will not do any more intermediate merges,
+        // the merge of all these disk segments would be directly fed to the
+        // reduce method
+        
+        mergePhaseFinished = true;
+        // must spill to disk, but can't retain in-mem for intermediate merge
+        final Path outputPath = 
+          mapOutputFile.getInputFileForWrite(mapId,
+                                             inMemToDiskBytes).suffix(
+                                                 Task.MERGED_OUTPUT_PREFIX);
+        final RawKeyValueIterator rIter = Merger.merge(job, fs,
+            keyClass, valueClass, memDiskSegments, numMemDiskSegments,
+            tmpDir, comparator, reporter, spilledRecordsCounter, null, 
+            mergePhase);
+        final Writer<K,V> writer = new Writer<K,V>(job, fs, outputPath,
+            keyClass, valueClass, codec, null);
+        try {
+          Merger.writeFile(rIter, writer, reporter, job);
+          // add to list of final disk outputs.
+          onDiskMapOutputs.add(outputPath);
+        } catch (IOException e) {
+          if (null != outputPath) {
+            try {
+              fs.delete(outputPath, true);
+            } catch (IOException ie) {
+              // NOTHING
+            }
+          }
+          throw e;
+        } finally {
+          if (null != writer) {
+            writer.close();
+          }
+        }
+        LOG.info("Merged " + numMemDiskSegments + " segments, " +
+                 inMemToDiskBytes + " bytes to disk to satisfy " +
+                 "reduce memory limit");
+        inMemToDiskBytes = 0;
+        memDiskSegments.clear();
+      } else if (inMemToDiskBytes != 0) {
+        LOG.info("Keeping " + numMemDiskSegments + " segments, " +
+                 inMemToDiskBytes + " bytes in memory for " +
+                 "intermediate, on-disk merge");
+      }
+    }
+
+    // segments on disk
+    List<Segment<K,V>> diskSegments = new ArrayList<Segment<K,V>>();
+    long onDiskBytes = inMemToDiskBytes;
+    Path[] onDisk = onDiskMapOutputs.toArray(new Path[onDiskMapOutputs.size()]);
+    for (Path file : onDisk) {
+      onDiskBytes += fs.getFileStatus(file).getLen();
+      LOG.debug("Disk file: " + file + " Length is " + 
+          fs.getFileStatus(file).getLen());
+      diskSegments.add(new Segment<K, V>(job, fs, file, codec, keepInputs,
+                                         (file.toString().endsWith(
+                                             Task.MERGED_OUTPUT_PREFIX) ?
+                                          null : mergedMapOutputsCounter)
+                                        ));
+    }
+    LOG.info("Merging " + onDisk.length + " files, " +
+             onDiskBytes + " bytes from disk");
+    Collections.sort(diskSegments, new Comparator<Segment<K,V>>() {
+      public int compare(Segment<K, V> o1, Segment<K, V> o2) {
+        if (o1.getLength() == o2.getLength()) {
+          return 0;
+        }
+        return o1.getLength() < o2.getLength() ? -1 : 1;
+      }
+    });
+
+    // build final list of segments from merged backed by disk + in-mem
+    List<Segment<K,V>> finalSegments = new ArrayList<Segment<K,V>>();
+    long inMemBytes = createInMemorySegments(inMemoryMapOutputs, 
+                                             finalSegments, 0);
+    LOG.info("Merging " + finalSegments.size() + " segments, " +
+             inMemBytes + " bytes from memory into reduce");
+    if (0 != onDiskBytes) {
+      final int numInMemSegments = memDiskSegments.size();
+      diskSegments.addAll(0, memDiskSegments);
+      memDiskSegments.clear();
+      // Pass mergePhase only if there is a going to be intermediate
+      // merges. See comment where mergePhaseFinished is being set
+      Progress thisPhase = (mergePhaseFinished) ? null : mergePhase; 
+      RawKeyValueIterator diskMerge = Merger.merge(
+          job, fs, keyClass, valueClass, diskSegments,
+          ioSortFactor, numInMemSegments, tmpDir, comparator,
+          reporter, false, spilledRecordsCounter, null, thisPhase);
+      diskSegments.clear();
+      if (0 == finalSegments.size()) {
+        return diskMerge;
+      }
+      finalSegments.add(new Segment<K,V>(
+            new RawKVIteratorReader(diskMerge, onDiskBytes), true));
+    }
+    return Merger.merge(job, fs, keyClass, valueClass,
+                 finalSegments, finalSegments.size(), tmpDir,
+                 comparator, reporter, spilledRecordsCounter, null,
+                 null);
+  
+  }
+}

+ 2 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeThread.java

@@ -34,12 +34,12 @@ abstract class MergeThread<T,K,V> extends Thread {
 
   private AtomicInteger numPending = new AtomicInteger(0);
   private LinkedList<List<T>> pendingToBeMerged;
-  protected final MergeManager<K,V> manager;
+  protected final MergeManagerImpl<K,V> manager;
   private final ExceptionReporter reporter;
   private boolean closed = false;
   private final int mergeFactor;
   
-  public MergeThread(MergeManager<K,V> manager, int mergeFactor,
+  public MergeThread(MergeManagerImpl<K,V> manager, int mergeFactor,
                      ExceptionReporter reporter) {
     this.pendingToBeMerged = new LinkedList<List<T>>();
     this.manager = manager;

+ 131 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/OnDiskMapOutput.java

@@ -0,0 +1,131 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapreduce.task.reduce;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import org.apache.hadoop.io.IOUtils;
+
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.MapOutputFile;
+
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+class OnDiskMapOutput<K, V> extends MapOutput<K, V> {
+  private static final Log LOG = LogFactory.getLog(OnDiskMapOutput.class);
+  private final FileSystem localFS;
+  private final Path tmpOutputPath;
+  private final Path outputPath;
+  private final MergeManagerImpl<K, V> merger;
+  private final OutputStream disk; 
+
+  public OnDiskMapOutput(TaskAttemptID mapId, TaskAttemptID reduceId,
+                         MergeManagerImpl<K, V> merger, long size,
+                         JobConf conf,
+                         MapOutputFile mapOutputFile,
+                         int fetcher, boolean primaryMapOutput)
+    throws IOException {
+    super(mapId, size, primaryMapOutput);
+    this.merger = merger;
+    this.localFS = FileSystem.getLocal(conf);
+    outputPath =
+        mapOutputFile.getInputFileForWrite(mapId.getTaskID(),size);
+    tmpOutputPath = outputPath.suffix(String.valueOf(fetcher));
+    
+    disk = localFS.create(tmpOutputPath);
+
+  }
+
+  @Override
+  public void shuffle(MapHost host, InputStream input,
+                      long compressedLength, long decompressedLength,
+                      ShuffleClientMetrics metrics,
+                      Reporter reporter) throws IOException {
+    // Copy data to local-disk
+    long bytesLeft = compressedLength;
+    try {
+      final int BYTES_TO_READ = 64 * 1024;
+      byte[] buf = new byte[BYTES_TO_READ];
+      while (bytesLeft > 0) {
+        int n = input.read(buf, 0, (int) Math.min(bytesLeft, BYTES_TO_READ));
+        if (n < 0) {
+          throw new IOException("read past end of stream reading " + 
+                                getMapId());
+        }
+        disk.write(buf, 0, n);
+        bytesLeft -= n;
+        metrics.inputBytes(n);
+        reporter.progress();
+      }
+
+      LOG.info("Read " + (compressedLength - bytesLeft) + 
+               " bytes from map-output for " + getMapId());
+
+      disk.close();
+    } catch (IOException ioe) {
+      // Close the streams
+      IOUtils.cleanup(LOG, input, disk);
+
+      // Re-throw
+      throw ioe;
+    }
+
+    // Sanity check
+    if (bytesLeft != 0) {
+      throw new IOException("Incomplete map output received for " +
+                            getMapId() + " from " +
+                            host.getHostName() + " (" + 
+                            bytesLeft + " bytes missing of " + 
+                            compressedLength + ")");
+    }
+  }
+
+  @Override
+  public void commit() throws IOException {
+    localFS.rename(tmpOutputPath, outputPath);
+    merger.closeOnDiskFile(outputPath);
+  }
+  
+  @Override
+  public void abort() {
+    try {
+      localFS.delete(tmpOutputPath, false);
+    } catch (IOException ie) {
+      LOG.info("failure to clean up " + tmpOutputPath, ie);
+    }
+  }
+
+  @Override
+  public String getDescription() {
+    return "DISK";
+  }
+}

+ 15 - 18
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java

@@ -21,17 +21,10 @@ import java.io.IOException;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocalDirAllocator;
-import org.apache.hadoop.io.compress.CompressionCodec;
-import org.apache.hadoop.mapred.Counters;
 import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapOutputFile;
 import org.apache.hadoop.mapred.RawKeyValueIterator;
-import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapred.Task;
-import org.apache.hadoop.mapred.Task.CombineOutputCollector;
 import org.apache.hadoop.mapred.TaskStatus;
 import org.apache.hadoop.mapred.TaskUmbilicalProtocol;
 import org.apache.hadoop.mapred.ShuffleConsumerPlugin;
@@ -77,17 +70,21 @@ public class Shuffle<K, V> implements ShuffleConsumerPlugin<K, V>, ExceptionRepo
     this.taskStatus = context.getStatus();
     this.reduceTask = context.getReduceTask();
     
-    scheduler = 
-      new ShuffleScheduler<K,V>(jobConf, taskStatus, this, copyPhase, 
-                                context.getShuffledMapsCounter(), 
-                                context.getReduceShuffleBytes(), context.getFailedShuffleCounter());
-    merger = new MergeManager<K, V>(reduceId, jobConf, context.getLocalFS(),
-                                    context.getLocalDirAllocator(), reporter, context.getCodec(),
-                                    context.getCombinerClass(), context.getCombineCollector(), 
-                                    context.getSpilledRecordsCounter(), 
-                                    context.getReduceCombineInputCounter(), 
-                                    context.getMergedMapOutputsCounter(), 
-                                    this, context.getMergePhase(), context.getMapOutputFile());
+    scheduler = new ShuffleScheduler<K,V>(jobConf, taskStatus, this,
+        copyPhase, context.getShuffledMapsCounter(),
+        context.getReduceShuffleBytes(), context.getFailedShuffleCounter());
+    merger = createMergeManager(context);
+  }
+
+  protected MergeManager<K, V> createMergeManager(
+      ShuffleConsumerPlugin.Context context) {
+    return new MergeManagerImpl<K, V>(reduceId, jobConf, context.getLocalFS(),
+        context.getLocalDirAllocator(), reporter, context.getCodec(),
+        context.getCombinerClass(), context.getCombineCollector(), 
+        context.getSpilledRecordsCounter(),
+        context.getReduceCombineInputCounter(),
+        context.getMergedMapOutputsCounter(), this, context.getMergePhase(),
+        context.getMapOutputFile());
   }
 
   @Override

+ 11 - 0
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml

@@ -857,6 +857,17 @@
   </description>
 </property>
 
+<property>
+  <name>yarn.app.mapreduce.am.admin.user.env</name>
+  <value></value>
+  <description> Environment variables for the MR App Master 
+  processes for admin purposes. These values are set first and can be 
+  overridden by the user env (yarn.app.mapreduce.am.env) Example :
+  1) A=foo  This will set the env variable A to foo
+  2) B=$B:c This is inherit app master's B env variable.  
+  </description>
+</property>
+
 <property>
   <name>yarn.app.mapreduce.am.command-opts</name>
   <value>-Xmx1024m</value>

+ 5 - 6
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestFetcher.java

@@ -53,7 +53,7 @@ public class TestFetcher {
     private HttpURLConnection connection;
 
     public FakeFetcher(JobConf job, TaskAttemptID reduceId,
-        ShuffleScheduler<K,V> scheduler, MergeManager<K,V> merger, Reporter reporter,
+        ShuffleScheduler<K,V> scheduler, MergeManagerImpl<K,V> merger, Reporter reporter,
         ShuffleClientMetrics metrics, ExceptionReporter exceptionReporter,
         SecretKey jobTokenSecret, HttpURLConnection connection) {
       super(job, reduceId, scheduler, merger, reporter, metrics, exceptionReporter,
@@ -77,7 +77,7 @@ public class TestFetcher {
     JobConf job = new JobConf();
     TaskAttemptID id = TaskAttemptID.forName("attempt_0_1_r_1_1");
     ShuffleScheduler<Text, Text> ss = mock(ShuffleScheduler.class);
-    MergeManager<Text, Text> mm = mock(MergeManager.class);
+    MergeManagerImpl<Text, Text> mm = mock(MergeManagerImpl.class);
     Reporter r = mock(Reporter.class);
     ShuffleClientMetrics metrics = mock(ShuffleClientMetrics.class);
     ExceptionReporter except = mock(ExceptionReporter.class);
@@ -132,7 +132,7 @@ public class TestFetcher {
     JobConf job = new JobConf();
     TaskAttemptID id = TaskAttemptID.forName("attempt_0_1_r_1_1");
     ShuffleScheduler<Text, Text> ss = mock(ShuffleScheduler.class);
-    MergeManager<Text, Text> mm = mock(MergeManager.class);
+    MergeManagerImpl<Text, Text> mm = mock(MergeManagerImpl.class);
     Reporter r = mock(Reporter.class);
     ShuffleClientMetrics metrics = mock(ShuffleClientMetrics.class);
     ExceptionReporter except = mock(ExceptionReporter.class);
@@ -167,10 +167,9 @@ public class TestFetcher {
     header.write(new DataOutputStream(bout));
     ByteArrayInputStream in = new ByteArrayInputStream(bout.toByteArray());
     when(connection.getInputStream()).thenReturn(in);
-    //Defaults to WAIT, which is what we want to test
-    MapOutput<Text,Text> mapOut = new MapOutput<Text, Text>(map1ID);
+    //Defaults to null, which is what we want to test
     when(mm.reserve(any(TaskAttemptID.class), anyLong(), anyInt()))
-      .thenReturn(mapOut);
+      .thenReturn(null);
     
     underTest.copyFromHost(host);
     

+ 31 - 29
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestMergeManager.java

@@ -32,13 +32,13 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MapOutputFile;
 import org.apache.hadoop.mapreduce.MRJobConfig;
-import org.apache.hadoop.mapreduce.task.reduce.MapOutput.Type;
 import org.junit.Assert;
 import org.junit.Test;
 
 public class TestMergeManager {
 
   @Test(timeout=10000)
+  @SuppressWarnings("unchecked")
   public void testMemoryMerge() throws Exception {
     final int TOTAL_MEM_BYTES = 10000;
     final int OUTPUT_SIZE = 7950;
@@ -55,45 +55,47 @@ public class TestMergeManager {
 
     // reserve enough map output to cause a merge when it is committed
     MapOutput<Text, Text> out1 = mgr.reserve(null, OUTPUT_SIZE, 0);
-    Assert.assertEquals("Should be a memory merge",
-        Type.MEMORY, out1.getType());
-    fillOutput(out1);
+    Assert.assertTrue("Should be a memory merge",
+                      (out1 instanceof InMemoryMapOutput));
+    InMemoryMapOutput<Text, Text> mout1 = (InMemoryMapOutput<Text, Text>)out1;
+    fillOutput(mout1);
     MapOutput<Text, Text> out2 = mgr.reserve(null, OUTPUT_SIZE, 0);
-    Assert.assertEquals("Should be a memory merge",
-        Type.MEMORY, out2.getType());
-    fillOutput(out2);
+    Assert.assertTrue("Should be a memory merge",
+                      (out2 instanceof InMemoryMapOutput));
+    InMemoryMapOutput<Text, Text> mout2 = (InMemoryMapOutput<Text, Text>)out2;
+    fillOutput(mout2);
 
     // next reservation should be a WAIT
     MapOutput<Text, Text> out3 = mgr.reserve(null, OUTPUT_SIZE, 0);
-    Assert.assertEquals("Should be told to wait",
-        Type.WAIT, out3.getType());
+    Assert.assertEquals("Should be told to wait", null, out3);
 
     // trigger the first merge and wait for merge thread to start merging
     // and free enough output to reserve more
-    out1.commit();
-    out2.commit();
+    mout1.commit();
+    mout2.commit();
     mergeStart.await();
 
     Assert.assertEquals(1, mgr.getNumMerges());
 
     // reserve enough map output to cause another merge when committed
     out1 = mgr.reserve(null, OUTPUT_SIZE, 0);
-    Assert.assertEquals("Should be a memory merge",
-        Type.MEMORY, out1.getType());
-    fillOutput(out1);
+    Assert.assertTrue("Should be a memory merge",
+                       (out1 instanceof InMemoryMapOutput));
+    mout1 = (InMemoryMapOutput<Text, Text>)out1;
+    fillOutput(mout1);
     out2 = mgr.reserve(null, OUTPUT_SIZE, 0);
-    Assert.assertEquals("Should be a memory merge",
-        Type.MEMORY, out2.getType());
-    fillOutput(out2);
+    Assert.assertTrue("Should be a memory merge",
+                       (out2 instanceof InMemoryMapOutput));
+    mout2 = (InMemoryMapOutput<Text, Text>)out2;
+    fillOutput(mout2);
 
-    // next reservation should be a WAIT
+    // next reservation should be null
     out3 = mgr.reserve(null, OUTPUT_SIZE, 0);
-    Assert.assertEquals("Should be told to wait",
-        Type.WAIT, out3.getType());
+    Assert.assertEquals("Should be told to wait", null, out3);
 
     // commit output *before* merge thread completes
-    out1.commit();
-    out2.commit();
+    mout1.commit();
+    mout2.commit();
 
     // allow the first merge to complete
     mergeComplete.await();
@@ -110,7 +112,7 @@ public class TestMergeManager {
         0, reporter.getNumExceptions());
   }
 
-  private void fillOutput(MapOutput<Text, Text> output) throws IOException {
+  private void fillOutput(InMemoryMapOutput<Text, Text> output) throws IOException {
     BoundedByteArrayOutputStream stream = output.getArrayStream();
     int count = stream.getLimit();
     for (int i=0; i < count; ++i) {
@@ -118,7 +120,7 @@ public class TestMergeManager {
     }
   }
 
-  private static class StubbedMergeManager extends MergeManager<Text, Text> {
+  private static class StubbedMergeManager extends MergeManagerImpl<Text, Text> {
     private TestMergeThread mergeThread;
 
     public StubbedMergeManager(JobConf conf, ExceptionReporter reporter,
@@ -129,7 +131,7 @@ public class TestMergeManager {
     }
 
     @Override
-    protected MergeThread<MapOutput<Text, Text>, Text, Text> createInMemoryMerger() {
+    protected MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> createInMemoryMerger() {
       mergeThread = new TestMergeThread(this, getExceptionReporter());
       return mergeThread;
     }
@@ -140,12 +142,12 @@ public class TestMergeManager {
   }
 
   private static class TestMergeThread
-  extends MergeThread<MapOutput<Text,Text>, Text, Text> {
+  extends MergeThread<InMemoryMapOutput<Text,Text>, Text, Text> {
     private AtomicInteger numMerges;
     private CyclicBarrier mergeStart;
     private CyclicBarrier mergeComplete;
 
-    public TestMergeThread(MergeManager<Text, Text> mergeManager,
+    public TestMergeThread(MergeManagerImpl<Text, Text> mergeManager,
         ExceptionReporter reporter) {
       super(mergeManager, Integer.MAX_VALUE, reporter);
       numMerges = new AtomicInteger(0);
@@ -162,11 +164,11 @@ public class TestMergeManager {
     }
 
     @Override
-    public void merge(List<MapOutput<Text, Text>> inputs)
+    public void merge(List<InMemoryMapOutput<Text, Text>> inputs)
         throws IOException {
       synchronized (this) {
         numMerges.incrementAndGet();
-        for (MapOutput<Text, Text> input : inputs) {
+        for (InMemoryMapOutput<Text, Text> input : inputs) {
           manager.unreserve(input.getSize());
         }
       }

+ 4 - 3
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/CompletedJob.java

@@ -36,6 +36,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobACLsManager;
+import org.apache.hadoop.mapred.TaskCompletionEvent;
 import org.apache.hadoop.mapreduce.Counters;
 import org.apache.hadoop.mapreduce.JobACL;
 import org.apache.hadoop.mapreduce.TaskID;
@@ -183,13 +184,13 @@ public class CompletedJob implements org.apache.hadoop.mapreduce.v2.app.job.Job
   }
 
   @Override
-  public synchronized TaskAttemptCompletionEvent[] getMapAttemptCompletionEvents(
+  public synchronized TaskCompletionEvent[] getMapAttemptCompletionEvents(
       int startIndex, int maxEvents) {
     if (mapCompletionEvents == null) {
       constructTaskAttemptCompletionEvents();
     }
-    return getAttemptCompletionEvents(mapCompletionEvents,
-        startIndex, maxEvents);
+    return TypeConverter.fromYarn(getAttemptCompletionEvents(
+        mapCompletionEvents, startIndex, maxEvents));
   }
 
   private static TaskAttemptCompletionEvent[] getAttemptCompletionEvents(

+ 2 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/PartialJob.java

@@ -25,6 +25,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.TaskCompletionEvent;
 import org.apache.hadoop.mapreduce.Counters;
 import org.apache.hadoop.mapreduce.JobACL;
 import org.apache.hadoop.mapreduce.v2.api.records.AMInfo;
@@ -154,7 +155,7 @@ public class PartialJob implements org.apache.hadoop.mapreduce.v2.app.job.Job {
   }
 
   @Override
-  public TaskAttemptCompletionEvent[] getMapAttemptCompletionEvents(
+  public TaskCompletionEvent[] getMapAttemptCompletionEvents(
       int startIndex, int maxEvents) {
     return null;
   }

+ 2 - 1
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/MockHistoryJobs.java

@@ -23,6 +23,7 @@ import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.TaskCompletionEvent;
 import org.apache.hadoop.mapreduce.JobACL;
 import org.apache.hadoop.mapreduce.v2.api.records.AMInfo;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
@@ -143,7 +144,7 @@ public class MockHistoryJobs extends MockJobs {
     }
 
     @Override
-    public TaskAttemptCompletionEvent[] getMapAttemptCompletionEvents(
+    public TaskCompletionEvent[] getMapAttemptCompletionEvents(
         int startIndex, int maxEvents) {
       return job.getMapAttemptCompletionEvents(startIndex, maxEvents);
     }

+ 8 - 8
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java

@@ -33,6 +33,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.http.HttpConfig;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.JobID;
 import org.apache.hadoop.mapreduce.JobStatus;
 import org.apache.hadoop.mapreduce.MRJobConfig;
@@ -69,14 +70,16 @@ import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
+import org.apache.hadoop.yarn.api.records.ClientToken;
 import org.apache.hadoop.yarn.api.records.YarnApplicationState;
 import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.ipc.RPCUtil;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
-import org.apache.hadoop.yarn.security.ApplicationTokenIdentifier;
+import org.apache.hadoop.yarn.security.client.ClientTokenIdentifier;
 import org.apache.hadoop.yarn.util.BuilderUtils;
+import org.apache.hadoop.yarn.util.ProtoUtils;
 
 public class ClientServiceDelegate {
   private static final Log LOG = LogFactory.getLog(ClientServiceDelegate.class);
@@ -176,13 +179,10 @@ public class ClientServiceDelegate {
           serviceAddr = NetUtils.createSocketAddrForHost(
               application.getHost(), application.getRpcPort());
           if (UserGroupInformation.isSecurityEnabled()) {
-            String clientTokenEncoded = application.getClientToken();
-            Token<ApplicationTokenIdentifier> clientToken =
-              new Token<ApplicationTokenIdentifier>();
-            clientToken.decodeFromUrlString(clientTokenEncoded);
-            // RPC layer client expects ip:port as service for tokens
-            SecurityUtil.setTokenService(clientToken, serviceAddr);
-            newUgi.addToken(clientToken);
+            ClientToken clientToken = application.getClientToken();
+            Token<ClientTokenIdentifier> token =
+                ProtoUtils.convertFromProtoFormat(clientToken, serviceAddr);
+            newUgi.addToken(token);
           }
           LOG.debug("Connecting to " + serviceAddr);
           final InetSocketAddress finalServiceAddr = serviceAddr;

+ 3 - 4
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/NotRunningJob.java

@@ -62,8 +62,8 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskReport;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskState;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
 import org.apache.hadoop.yarn.api.records.YarnApplicationState;
 import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
@@ -86,10 +86,9 @@ public class NotRunningJob implements MRClientProtocol {
         .newRecordInstance(ApplicationAttemptId.class);
 
     // Setting AppState to NEW and finalStatus to UNDEFINED as they are never
-    // used
-    // for a non running job
+    // used for a non running job
     return BuilderUtils.newApplicationReport(unknownAppId, unknownAttemptId,
-        "N/A", "N/A", "N/A", "N/A", 0, "", YarnApplicationState.NEW, "N/A",
+        "N/A", "N/A", "N/A", "N/A", 0, null, YarnApplicationState.NEW, "N/A",
         "N/A", 0, 0, FinalApplicationStatus.UNDEFINED, null, "N/A");
   }
 

+ 39 - 8
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java

@@ -101,7 +101,7 @@ public class YARNRunner implements ClientProtocol {
   private Configuration conf;
   private final FileContext defaultFileContext;
   
-  /* usually is false unless the jobclient getdelegation token is 
+  /* usually is false unless the jobclient get delegation token is 
    *  called. This is a hack wherein we do return a token from RM 
    *  on getDelegationtoken but due to the restricted api on jobclient
    *  we just add a job history DT token when submitting a job.
@@ -165,12 +165,12 @@ public class YARNRunner implements ClientProtocol {
   @Override
   public TaskTrackerInfo[] getActiveTrackers() throws IOException,
       InterruptedException {
-	  return resMgrDelegate.getActiveTrackers();
+    return resMgrDelegate.getActiveTrackers();
   }
 
   @Override
   public JobStatus[] getAllJobs() throws IOException, InterruptedException {
-   return resMgrDelegate.getAllJobs();
+    return resMgrDelegate.getAllJobs();
   }
 
   @Override
@@ -394,14 +394,31 @@ public class YARNRunner implements ClientProtocol {
         MRJobConfig.MR_AM_LOG_LEVEL, MRJobConfig.DEFAULT_MR_AM_LOG_LEVEL);
     MRApps.addLog4jSystemProperties(logLevel, logSize, vargs);
 
+    // Check for Java Lib Path usage in MAP and REDUCE configs
+    warnForJavaLibPath(conf.get(MRJobConfig.MAP_JAVA_OPTS,""), "map", 
+        MRJobConfig.MAP_JAVA_OPTS, MRJobConfig.MAP_ENV);
+    warnForJavaLibPath(conf.get(MRJobConfig.MAPRED_MAP_ADMIN_JAVA_OPTS,""), "map", 
+        MRJobConfig.MAPRED_MAP_ADMIN_JAVA_OPTS, MRJobConfig.MAPRED_ADMIN_USER_ENV);
+    warnForJavaLibPath(conf.get(MRJobConfig.REDUCE_JAVA_OPTS,""), "reduce", 
+        MRJobConfig.REDUCE_JAVA_OPTS, MRJobConfig.REDUCE_ENV);
+    warnForJavaLibPath(conf.get(MRJobConfig.MAPRED_REDUCE_ADMIN_JAVA_OPTS,""), "reduce", 
+        MRJobConfig.MAPRED_REDUCE_ADMIN_JAVA_OPTS, MRJobConfig.MAPRED_ADMIN_USER_ENV);   
+
     // Add AM admin command opts before user command opts
     // so that it can be overridden by user
-    vargs.add(conf.get(MRJobConfig.MR_AM_ADMIN_COMMAND_OPTS,
-        MRJobConfig.DEFAULT_MR_AM_ADMIN_COMMAND_OPTS));
+    String mrAppMasterAdminOptions = conf.get(MRJobConfig.MR_AM_ADMIN_COMMAND_OPTS,
+        MRJobConfig.DEFAULT_MR_AM_ADMIN_COMMAND_OPTS);
+    warnForJavaLibPath(mrAppMasterAdminOptions, "app master", 
+        MRJobConfig.MR_AM_ADMIN_COMMAND_OPTS, MRJobConfig.MR_AM_ADMIN_USER_ENV);
+    vargs.add(mrAppMasterAdminOptions);
+    
+    // Add AM user command opts
+    String mrAppMasterUserOptions = conf.get(MRJobConfig.MR_AM_COMMAND_OPTS,
+        MRJobConfig.DEFAULT_MR_AM_COMMAND_OPTS);
+    warnForJavaLibPath(mrAppMasterUserOptions, "app master", 
+        MRJobConfig.MR_AM_COMMAND_OPTS, MRJobConfig.MR_AM_ENV);
+    vargs.add(mrAppMasterUserOptions);
     
-    vargs.add(conf.get(MRJobConfig.MR_AM_COMMAND_OPTS,
-        MRJobConfig.DEFAULT_MR_AM_COMMAND_OPTS));
-
     vargs.add(MRJobConfig.APPLICATION_MASTER_CLASS);
     vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR +
         Path.SEPARATOR + ApplicationConstants.STDOUT);
@@ -425,6 +442,9 @@ public class YARNRunner implements ClientProtocol {
     Map<String, String> environment = new HashMap<String, String>();
     MRApps.setClasspath(environment, conf);
     
+    // Setup the environment variables for Admin first
+    MRApps.setEnvFromInputString(environment, 
+        conf.get(MRJobConfig.MR_AM_ADMIN_USER_ENV));
     // Setup the environment variables (LD_LIBRARY_PATH, etc)
     MRApps.setEnvFromInputString(environment, 
         conf.get(MRJobConfig.MR_AM_ENV));
@@ -582,4 +602,15 @@ public class YARNRunner implements ClientProtocol {
       throws IOException {
     return clientCache.getClient(jobID).getLogFilePath(jobID, taskAttemptID);
   }
+
+  private static void warnForJavaLibPath(String opts, String component, 
+      String javaConf, String envConf) {
+    if (opts != null && opts.contains("-Djava.library.path")) {
+      LOG.warn("Usage of -Djava.library.path in " + javaConf + " can cause " +
+               "programs to no longer function if hadoop native libraries " +
+               "are used. These values should be set as part of the " +
+               "LD_LIBRARY_PATH in the " + component + " JVM env using " +
+               envConf + " config settings.");
+    }
+  }
 }

+ 17 - 5
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/NotificationTestCase.java

@@ -90,8 +90,8 @@ public abstract class NotificationTestCase extends HadoopTestCase {
   }
 
   public static class NotificationServlet extends HttpServlet {
-    public static int counter = 0;
-    public static int failureCounter = 0;
+    public static volatile int counter = 0;
+    public static volatile int failureCounter = 0;
     private static final long serialVersionUID = 1L;
     
     protected void doGet(HttpServletRequest req, HttpServletResponse res)
@@ -155,7 +155,11 @@ public abstract class NotificationTestCase extends HadoopTestCase {
 
     System.out.println(launchWordCount(this.createJobConf(),
                                        "a b c d e f g h", 1, 1));
-    Thread.sleep(2000);
+    boolean keepTrying = true;
+    for (int tries = 0; tries < 30 && keepTrying; tries++) {
+      Thread.sleep(50);
+      keepTrying = !(NotificationServlet.counter == 2);
+    }
     assertEquals(2, NotificationServlet.counter);
     assertEquals(0, NotificationServlet.failureCounter);
     
@@ -173,14 +177,22 @@ public abstract class NotificationTestCase extends HadoopTestCase {
     // run a job with KILLED status
     System.out.println(UtilsForTests.runJobKill(this.createJobConf(), inDir,
                                                 outDir).getID());
-    Thread.sleep(2000);
+    keepTrying = true;
+    for (int tries = 0; tries < 30 && keepTrying; tries++) {
+      Thread.sleep(50);
+      keepTrying = !(NotificationServlet.counter == 4);
+    }
     assertEquals(4, NotificationServlet.counter);
     assertEquals(0, NotificationServlet.failureCounter);
     
     // run a job with FAILED status
     System.out.println(UtilsForTests.runJobFail(this.createJobConf(), inDir,
                                                 outDir).getID());
-    Thread.sleep(2000);
+    keepTrying = true;
+    for (int tries = 0; tries < 30 && keepTrying; tries++) {
+      Thread.sleep(50);
+      keepTrying = !(NotificationServlet.counter == 6);
+    }
     assertEquals(6, NotificationServlet.counter);
     assertEquals(0, NotificationServlet.failureCounter);
   }

+ 61 - 2
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java

@@ -25,6 +25,7 @@ import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
+import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
@@ -83,6 +84,11 @@ import org.apache.hadoop.yarn.api.records.YarnApplicationState;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.log4j.Appender;
+import org.apache.log4j.Layout;
+import org.apache.log4j.Logger;
+import org.apache.log4j.SimpleLayout;
+import org.apache.log4j.WriterAppender;
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.invocation.InvocationOnMock;
@@ -112,6 +118,7 @@ public class TestYARNRunner extends TestCase {
   public void setUp() throws Exception {
     resourceMgrDelegate = mock(ResourceMgrDelegate.class);
     conf = new YarnConfiguration();
+    conf.set(YarnConfiguration.RM_PRINCIPAL, "mapred/host@REALM");
     clientCache = new ClientCache(conf, resourceMgrDelegate);
     clientCache = spy(clientCache);
     yarnRunner = new YARNRunner(conf, resourceMgrDelegate, clientCache);
@@ -188,7 +195,7 @@ public class TestYARNRunner extends TestCase {
 
   @Test
   public void testResourceMgrDelegate() throws Exception {
-    /* we not want a mock of resourcemgr deleagte */
+    /* we not want a mock of resource mgr delegate */
     final ClientRMProtocol clientRMProtocol = mock(ClientRMProtocol.class);
     ResourceMgrDelegate delegate = new ResourceMgrDelegate(conf) {
       @Override
@@ -255,6 +262,9 @@ public class TestYARNRunner extends TestCase {
   
   @Test
   public void testHistoryServerToken() throws Exception {
+    //Set the master principal in the config
+    conf.set(YarnConfiguration.RM_PRINCIPAL,"foo@LOCAL");
+
     final String masterPrincipal = Master.getMasterPrincipal(conf);
 
     final MRClientProtocol hsProxy = mock(MRClientProtocol.class);
@@ -264,7 +274,7 @@ public class TestYARNRunner extends TestCase {
             GetDelegationTokenRequest request =
                 (GetDelegationTokenRequest)invocation.getArguments()[0];
             // check that the renewer matches the cluster's RM principal
-            assertEquals(request.getRenewer(), masterPrincipal);
+            assertEquals(masterPrincipal, request.getRenewer() );
 
             DelegationToken token =
                 recordFactory.newRecordInstance(DelegationToken.class);
@@ -356,4 +366,53 @@ public class TestYARNRunner extends TestCase {
       assertTrue("AM admin command opts is after user command opts.", adminIndex < userIndex);
     }
   }
+  @Test
+  public void testWarnCommandOpts() throws Exception {
+    Logger logger = Logger.getLogger(YARNRunner.class);
+    
+    ByteArrayOutputStream bout = new ByteArrayOutputStream();
+    Layout layout = new SimpleLayout();
+    Appender appender = new WriterAppender(layout, bout);
+    logger.addAppender(appender);
+    
+    JobConf jobConf = new JobConf();
+    
+    jobConf.set(MRJobConfig.MR_AM_ADMIN_COMMAND_OPTS, "-Djava.net.preferIPv4Stack=true -Djava.library.path=foo");
+    jobConf.set(MRJobConfig.MR_AM_COMMAND_OPTS, "-Xmx1024m -Djava.library.path=bar");
+    
+    YARNRunner yarnRunner = new YARNRunner(jobConf);
+    
+    File jobxml = new File(testWorkDir, MRJobConfig.JOB_CONF_FILE);
+    OutputStream out = new FileOutputStream(jobxml);
+    conf.writeXml(out);
+    out.close();
+    
+    File jobsplit = new File(testWorkDir, MRJobConfig.JOB_SPLIT);
+    out = new FileOutputStream(jobsplit);
+    out.close();
+    
+    File jobsplitmetainfo = new File(testWorkDir, MRJobConfig.JOB_SPLIT_METAINFO);
+    out = new FileOutputStream(jobsplitmetainfo);
+    out.close();
+    
+    File appTokens = new File(testWorkDir, MRJobConfig.APPLICATION_TOKENS_FILE);
+    out = new FileOutputStream(appTokens);
+    out.close();
+    
+    @SuppressWarnings("unused")
+    ApplicationSubmissionContext submissionContext = 
+        yarnRunner.createApplicationSubmissionContext(jobConf, testWorkDir.toString(), new Credentials());
+   
+    String logMsg = bout.toString();
+    assertTrue(logMsg.contains("WARN - Usage of -Djava.library.path in " + 
+    		"yarn.app.mapreduce.am.admin-command-opts can cause programs to no " +
+        "longer function if hadoop native libraries are used. These values " + 
+    		"should be set as part of the LD_LIBRARY_PATH in the app master JVM " +
+        "env using yarn.app.mapreduce.am.admin.user.env config settings."));
+    assertTrue(logMsg.contains("WARN - Usage of -Djava.library.path in " + 
+        "yarn.app.mapreduce.am.command-opts can cause programs to no longer " +
+        "function if hadoop native libraries are used. These values should " +
+        "be set as part of the LD_LIBRARY_PATH in the app master JVM env " +
+        "using yarn.app.mapreduce.am.env config settings."));
+  }
 }

+ 14 - 11
hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/QuasiMonteCarlo.java

@@ -21,6 +21,7 @@ package org.apache.hadoop.examples;
 import java.io.IOException;
 import java.math.BigDecimal;
 import java.math.RoundingMode;
+import java.util.Random;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
@@ -77,8 +78,7 @@ public class QuasiMonteCarlo extends Configured implements Tool {
   static final String DESCRIPTION
       = "A map/reduce program that estimates Pi using a quasi-Monte Carlo method.";
   /** tmp directory for input/output */
-  static private final Path TMP_DIR = new Path(
-      QuasiMonteCarlo.class.getSimpleName() + "_TMP_3_141592654");
+  static private final String TMP_DIR_PREFIX = QuasiMonteCarlo.class.getSimpleName();
   
   /** 2-dimensional Halton sequence {H(i)},
    * where H(i) is a 2-dimensional point and i >= 1 is the index.
@@ -228,9 +228,9 @@ public class QuasiMonteCarlo extends Configured implements Tool {
     @Override
     public void cleanup(Context context) throws IOException {
       //write output to a file
-      Path outDir = new Path(TMP_DIR, "out");
-      Path outFile = new Path(outDir, "reduce-out");
       Configuration conf = context.getConfiguration();
+      Path outDir = new Path(conf.get(FileOutputFormat.OUTDIR));
+      Path outFile = new Path(outDir, "reduce-out");
       FileSystem fileSys = FileSystem.get(conf);
       SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
           outFile, LongWritable.class, LongWritable.class, 
@@ -246,7 +246,7 @@ public class QuasiMonteCarlo extends Configured implements Tool {
    * @return the estimated value of Pi
    */
   public static BigDecimal estimatePi(int numMaps, long numPoints,
-      Configuration conf
+      Path tmpDir, Configuration conf
       ) throws IOException, ClassNotFoundException, InterruptedException {
     Job job = new Job(conf);
     //setup job conf
@@ -269,14 +269,14 @@ public class QuasiMonteCarlo extends Configured implements Tool {
     job.setSpeculativeExecution(false);
 
     //setup input/output directories
-    final Path inDir = new Path(TMP_DIR, "in");
-    final Path outDir = new Path(TMP_DIR, "out");
+    final Path inDir = new Path(tmpDir, "in");
+    final Path outDir = new Path(tmpDir, "out");
     FileInputFormat.setInputPaths(job, inDir);
     FileOutputFormat.setOutputPath(job, outDir);
 
     final FileSystem fs = FileSystem.get(conf);
-    if (fs.exists(TMP_DIR)) {
-      throw new IOException("Tmp directory " + fs.makeQualified(TMP_DIR)
+    if (fs.exists(tmpDir)) {
+      throw new IOException("Tmp directory " + fs.makeQualified(tmpDir)
           + " already exists.  Please remove it first.");
     }
     if (!fs.mkdirs(inDir)) {
@@ -325,7 +325,7 @@ public class QuasiMonteCarlo extends Configured implements Tool {
           .multiply(BigDecimal.valueOf(numInside.get()))
           .divide(numTotal, RoundingMode.HALF_UP);
     } finally {
-      fs.delete(TMP_DIR, true);
+      fs.delete(tmpDir, true);
     }
   }
 
@@ -344,12 +344,15 @@ public class QuasiMonteCarlo extends Configured implements Tool {
     
     final int nMaps = Integer.parseInt(args[0]);
     final long nSamples = Long.parseLong(args[1]);
+    long now = System.currentTimeMillis();
+    int rand = new Random().nextInt(Integer.MAX_VALUE);
+    final Path tmpDir = new Path(TMP_DIR_PREFIX + "_" + now + "_" + rand);
         
     System.out.println("Number of Maps  = " + nMaps);
     System.out.println("Samples per Map = " + nSamples);
         
     System.out.println("Estimated value of Pi is "
-        + estimatePi(nMaps, nSamples, getConf()));
+        + estimatePi(nMaps, nSamples, tmpDir, getConf()));
     return 0;
   }
 

+ 6 - 6
hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/dancing/DistributedPentomino.java

@@ -174,16 +174,16 @@ public class DistributedPentomino extends Configured implements Tool {
       return 2;
     }
     // check for passed parameters, otherwise use defaults
-    int width = PENT_WIDTH;
-    int height = PENT_HEIGHT;
-    int depth = PENT_DEPTH;
+    int width = conf.getInt(Pentomino.WIDTH, PENT_WIDTH);
+    int height = conf.getInt(Pentomino.HEIGHT, PENT_HEIGHT);
+    int depth = conf.getInt(Pentomino.DEPTH, PENT_DEPTH);
     for (int i = 0; i < args.length; i++) {
       if (args[i].equalsIgnoreCase("-depth")) {
-          depth = Integer.parseInt(args[i++].trim());
+        depth = Integer.parseInt(args[++i].trim());
       } else if (args[i].equalsIgnoreCase("-height")) {
-	  height = Integer.parseInt(args[i++].trim());
+        height = Integer.parseInt(args[++i].trim());
       } else if (args[i].equalsIgnoreCase("-width") ) {
-	  width = Integer.parseInt(args[i++].trim()); 
+        width = Integer.parseInt(args[++i].trim());
       }
     }
     // now set the values within conf for M/R tasks to read, this

+ 76 - 0
hadoop-maven-plugins/pom.xml

@@ -0,0 +1,76 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.hadoop</groupId>
+    <artifactId>hadoop-project</artifactId>
+    <version>3.0.0-SNAPSHOT</version>
+    <relativePath>../hadoop-project</relativePath>
+  </parent>
+  <groupId>org.apache.hadoop</groupId>
+  <artifactId>hadoop-maven-plugins</artifactId>
+  <packaging>maven-plugin</packaging>
+  <name>Apache Hadoop Maven Plugins</name>
+  <properties>
+    <maven.dependency.version>3.0</maven.dependency.version>
+  </properties>
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.maven</groupId>
+      <artifactId>maven-plugin-api</artifactId>
+      <version>${maven.dependency.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.maven</groupId>
+      <artifactId>maven-core</artifactId>
+      <version>${maven.dependency.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.maven.plugin-tools</groupId>
+      <artifactId>maven-plugin-annotations</artifactId>
+      <version>${maven.dependency.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>3.8.1</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-plugin-plugin</artifactId>
+        <version>${maven.dependency.version}</version>
+        <configuration>
+          <skipErrorNoDescriptorsFound>true</skipErrorNoDescriptorsFound>
+        </configuration>
+        <executions>
+          <execution>
+            <id>mojo-descriptor</id>
+            <goals>
+              <goal>descriptor</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+</project>

+ 118 - 0
hadoop-maven-plugins/src/main/java/org/apache/hadoop/maven/plugin/util/Exec.java

@@ -0,0 +1,118 @@
+/*
+ * Copyright 2012 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.maven.plugin.util;
+
+import org.apache.maven.plugin.Mojo;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Exec is a helper class for executing an external process from a mojo.
+ */
+public class Exec {
+  private Mojo mojo;
+
+  /**
+   * Creates a new Exec instance for executing an external process from the given
+   * mojo.
+   * 
+   * @param mojo Mojo executing external process
+   */
+  public Exec(Mojo mojo) {
+    this.mojo = mojo;
+  }
+
+  /**
+   * Runs the specified command and saves each line of the command's output to
+   * the given list.
+   * 
+   * @param command List<String> containing command and all arguments
+   * @param output List<String> in/out parameter to receive command output
+   * @return int exit code of command
+   */
+  public int run(List<String> command, List<String> output) {
+    int retCode = 1;
+    ProcessBuilder pb = new ProcessBuilder(command);
+    try {
+      Process p = pb.start();
+      OutputBufferThread stdOut = new OutputBufferThread(p.getInputStream());
+      OutputBufferThread stdErr = new OutputBufferThread(p.getErrorStream());
+      stdOut.start();
+      stdErr.start();
+      retCode = p.waitFor();
+      if (retCode != 0) {
+        mojo.getLog().warn(command + " failed with error code " + retCode);
+        for (String s : stdErr.getOutput()) {
+          mojo.getLog().debug(s);
+        }
+      } else {
+        stdOut.join();
+        stdErr.join();
+        output.addAll(stdOut.getOutput());
+      }
+    } catch (Exception ex) {
+      mojo.getLog().warn(command + " failed: " + ex.toString());
+    }
+    return retCode;
+  }
+
+  /**
+   * OutputBufferThread is a background thread for consuming and storing output
+   * of the external process.
+   */
+  private static class OutputBufferThread extends Thread {
+    private List<String> output;
+    private BufferedReader reader;
+
+    /**
+     * Creates a new OutputBufferThread to consume the given InputStream.
+     * 
+     * @param is InputStream to consume
+     */
+    public OutputBufferThread(InputStream is) {
+      this.setDaemon(true);
+      output = new ArrayList<String>();
+      reader = new BufferedReader(new InputStreamReader(is));
+    }
+
+    @Override
+    public void run() {
+      try {
+        String line = reader.readLine();
+        while (line != null) {
+          output.add(line);
+          line = reader.readLine();
+        }
+      } catch (IOException ex) {
+        throw new RuntimeException("make failed with error code " + ex.toString());
+      }
+    }
+
+    /**
+     * Returns every line consumed from the input.
+     * 
+     * @return List<String> every line consumed from the input
+     */
+    public List<String> getOutput() {
+      return output;
+    }
+  }
+}

+ 61 - 0
hadoop-maven-plugins/src/main/java/org/apache/hadoop/maven/plugin/util/FileSetUtils.java

@@ -0,0 +1,61 @@
+/*
+ * Copyright 2012 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.maven.plugin.util;
+
+import org.apache.maven.model.FileSet;
+import org.codehaus.plexus.util.FileUtils;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * FileSetUtils contains helper methods for mojo implementations that need to
+ * work with a Maven FileSet.
+ */
+public class FileSetUtils {
+
+  /**
+   * Returns a string containing every element of the given list, with each
+   * element separated by a comma.
+   * 
+   * @param list List of all elements
+   * @return String containing every element, comma-separated
+   */
+  private static String getCommaSeparatedList(List list) {
+    StringBuilder buffer = new StringBuilder();
+    String separator = "";
+    for (Object e : list) {
+      buffer.append(separator).append(e);
+      separator = ",";
+    }
+    return buffer.toString();
+  }
+
+  /**
+   * Converts a Maven FileSet to a list of File objects.
+   * 
+   * @param source FileSet to convert
+   * @return List<File> containing every element of the FileSet as a File
+   * @throws IOException if an I/O error occurs while trying to find the files
+   */
+  @SuppressWarnings("unchecked")
+  public static List<File> convertFileSetToFiles(FileSet source) throws IOException {
+    String includes = getCommaSeparatedList(source.getIncludes());
+    String excludes = getCommaSeparatedList(source.getExcludes());
+    return FileUtils.getFiles(new File(source.getDirectory()), includes, excludes);
+  }
+}

+ 343 - 0
hadoop-maven-plugins/src/main/java/org/apache/hadoop/maven/plugin/versioninfo/VersionInfoMojo.java

@@ -0,0 +1,343 @@
+/*
+ * Copyright 2012 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.maven.plugin.versioninfo;
+
+import org.apache.hadoop.maven.plugin.util.Exec;
+import org.apache.hadoop.maven.plugin.util.FileSetUtils;
+import org.apache.maven.model.FileSet;
+import org.apache.maven.plugin.AbstractMojo;
+import org.apache.maven.plugin.MojoExecutionException;
+import org.apache.maven.plugins.annotations.LifecyclePhase;
+import org.apache.maven.plugins.annotations.Mojo;
+import org.apache.maven.plugins.annotations.Parameter;
+import org.apache.maven.project.MavenProject;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Date;
+import java.util.List;
+import java.util.TimeZone;
+
+/**
+ * VersionInfoMojo calculates information about the current version of the
+ * codebase and exports the information as properties for further use in a Maven
+ * build.  The version information includes build time, SCM URI, SCM branch, SCM
+ * commit, and an MD5 checksum of the contents of the files in the codebase.
+ */
+@Mojo(name="version-info", defaultPhase=LifecyclePhase.INITIALIZE)
+public class VersionInfoMojo extends AbstractMojo {
+
+  @Parameter(defaultValue="${project}")
+  private MavenProject project;
+
+  @Parameter(required=true)
+  private FileSet source;
+
+  @Parameter(defaultValue="version-info.build.time")
+  private String buildTimeProperty;
+
+  @Parameter(defaultValue="version-info.source.md5")
+  private String md5Property;
+
+  @Parameter(defaultValue="version-info.scm.uri")
+  private String scmUriProperty;
+
+  @Parameter(defaultValue="version-info.scm.branch")
+  private String scmBranchProperty;
+
+  @Parameter(defaultValue="version-info.scm.commit")
+  private String scmCommitProperty;
+
+  @Parameter(defaultValue="git")
+  private String gitCommand;
+
+  @Parameter(defaultValue="svn")
+  private String svnCommand;
+
+  private enum SCM {NONE, SVN, GIT}
+
+  @Override
+  public void execute() throws MojoExecutionException {
+    try {
+      SCM scm = determineSCM();
+      project.getProperties().setProperty(buildTimeProperty, getBuildTime());
+      project.getProperties().setProperty(scmUriProperty, getSCMUri(scm));
+      project.getProperties().setProperty(scmBranchProperty, getSCMBranch(scm));
+      project.getProperties().setProperty(scmCommitProperty, getSCMCommit(scm));
+      project.getProperties().setProperty(md5Property, computeMD5());
+    } catch (Throwable ex) {
+      throw new MojoExecutionException(ex.toString(), ex);
+    }
+  }
+
+  /**
+   * Returns a string representing current build time.
+   * 
+   * @return String representing current build time
+   */
+  private String getBuildTime() {
+    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm'Z'");
+    dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    return dateFormat.format(new Date());
+  }
+  private List<String> scmOut;
+
+  /**
+   * Determines which SCM is in use (Subversion, git, or none) and captures
+   * output of the SCM command for later parsing.
+   * 
+   * @return SCM in use for this build
+   * @throws Exception if any error occurs attempting to determine SCM
+   */
+  private SCM determineSCM() throws Exception {
+    Exec exec = new Exec(this);
+    SCM scm = SCM.NONE;
+    scmOut = new ArrayList<String>();
+    int ret = exec.run(Arrays.asList(svnCommand, "info"), scmOut);
+    if (ret == 0) {
+      scm = SCM.SVN;
+    } else {
+      ret = exec.run(Arrays.asList(gitCommand, "branch"), scmOut);
+      if (ret == 0) {
+        ret = exec.run(Arrays.asList(gitCommand, "remote", "-v"), scmOut);
+        if (ret != 0) {
+          scm = SCM.NONE;
+          scmOut = null;
+        } else {
+          ret = exec.run(Arrays.asList(gitCommand, "log", "-n", "1"), scmOut);
+          if (ret != 0) {
+            scm = SCM.NONE;
+            scmOut = null;
+          } else {
+            scm = SCM.GIT;
+          }
+        }
+      }
+    }
+    if (scmOut != null) {
+      getLog().debug(scmOut.toString());
+    }
+    getLog().info("SCM: " + scm);
+    return scm;
+  }
+
+  /**
+   * Return URI and branch of Subversion repository.
+   * 
+   * @param str String Subversion info output containing URI and branch
+   * @return String[] containing URI and branch
+   */
+  private String[] getSvnUriInfo(String str) {
+    String[] res = new String[]{"Unknown", "Unknown"};
+    try {
+      String path = str;
+      int index = path.indexOf("trunk");
+      if (index > -1) {
+        res[0] = path.substring(0, index - 1);
+        res[1] = "trunk";
+      } else {
+        index = path.indexOf("branches");
+        if (index > -1) {
+          res[0] = path.substring(0, index - 1);
+          int branchIndex = index + "branches".length() + 1;
+          index = path.indexOf("/", branchIndex);
+          if (index > -1) {
+            res[1] = path.substring(branchIndex, index);
+          } else {
+            res[1] = path.substring(branchIndex);
+          }
+        }
+      }
+    } catch (Exception ex) {
+      getLog().warn("Could not determine URI & branch from SVN URI: " + str);
+    }
+    return res;
+  }
+
+  /**
+   * Parses SCM output and returns URI of SCM.
+   * 
+   * @param scm SCM in use for this build
+   * @return String URI of SCM
+   */
+  private String getSCMUri(SCM scm) {
+    String uri = "Unknown";
+    switch (scm) {
+      case SVN:
+        for (String s : scmOut) {
+          if (s.startsWith("URL:")) {
+            uri = s.substring(4).trim();
+            uri = getSvnUriInfo(uri)[0];
+            break;
+          }
+        }
+        break;
+      case GIT:
+        for (String s : scmOut) {
+          if (s.startsWith("origin") && s.endsWith("(fetch)")) {
+            uri = s.substring("origin".length());
+            uri = uri.substring(0, uri.length() - "(fetch)".length());
+            break;
+          }
+        }
+        break;
+    }
+    return uri.trim();
+  }
+
+  /**
+   * Parses SCM output and returns commit of SCM.
+   * 
+   * @param scm SCM in use for this build
+   * @return String commit of SCM
+   */
+  private String getSCMCommit(SCM scm) {
+    String commit = "Unknown";
+    switch (scm) {
+      case SVN:
+        for (String s : scmOut) {
+          if (s.startsWith("Revision:")) {
+            commit = s.substring("Revision:".length());
+            break;
+          }
+        }
+        break;
+      case GIT:
+        for (String s : scmOut) {
+          if (s.startsWith("commit")) {
+            commit = s.substring("commit".length());
+            break;
+          }
+        }
+        break;
+    }
+    return commit.trim();
+  }
+
+  /**
+   * Parses SCM output and returns branch of SCM.
+   * 
+   * @param scm SCM in use for this build
+   * @return String branch of SCM
+   */
+  private String getSCMBranch(SCM scm) {
+    String branch = "Unknown";
+    switch (scm) {
+      case SVN:
+        for (String s : scmOut) {
+          if (s.startsWith("URL:")) {
+            branch = s.substring(4).trim();
+            branch = getSvnUriInfo(branch)[1];
+            break;
+          }
+        }
+        break;
+      case GIT:
+        for (String s : scmOut) {
+          if (s.startsWith("*")) {
+            branch = s.substring("*".length());
+            break;
+          }
+        }
+        break;
+    }
+    return branch.trim();
+  }
+
+  /**
+   * Reads and returns the full contents of the specified file.
+   * 
+   * @param file File to read
+   * @return byte[] containing full contents of file
+   * @throws IOException if there is an I/O error while reading the file
+   */
+  private byte[] readFile(File file) throws IOException {
+    RandomAccessFile raf = new RandomAccessFile(file, "r");
+    byte[] buffer = new byte[(int) raf.length()];
+    raf.readFully(buffer);
+    raf.close();
+    return buffer;
+  }
+
+  /**
+   * Given a list of files, computes and returns an MD5 checksum of the full
+   * contents of all files.
+   * 
+   * @param files List<File> containing every file to input into the MD5 checksum
+   * @return byte[] calculated MD5 checksum
+   * @throws IOException if there is an I/O error while reading a file
+   * @throws NoSuchAlgorithmException if the MD5 algorithm is not supported
+   */
+  private byte[] computeMD5(List<File> files) throws IOException, NoSuchAlgorithmException {
+    MessageDigest md5 = MessageDigest.getInstance("MD5");
+    for (File file : files) {
+      getLog().debug("Computing MD5 for: " + file);
+      md5.update(readFile(file));
+    }
+    return md5.digest();
+  }
+
+  /**
+   * Converts bytes to a hexadecimal string representation and returns it.
+   * 
+   * @param array byte[] to convert
+   * @return String containing hexadecimal representation of bytes
+   */
+  private String byteArrayToString(byte[] array) {
+    StringBuilder sb = new StringBuilder();
+    for (byte b : array) {
+      sb.append(Integer.toHexString(0xff & b));
+    }
+    return sb.toString();
+  }
+
+  /**
+   * Computes and returns an MD5 checksum of the contents of all files in the
+   * input Maven FileSet.
+   * 
+   * @return String containing hexadecimal representation of MD5 checksum
+   * @throws Exception if there is any error while computing the MD5 checksum
+   */
+  private String computeMD5() throws Exception {
+    List<File> files = FileSetUtils.convertFileSetToFiles(source);
+    // File order of MD5 calculation is significant.  Sorting is done on
+    // unix-format names, case-folded, in order to get a platform-independent
+    // sort and calculate the same MD5 on all platforms.
+    Collections.sort(files, new Comparator<File>() {
+      @Override
+      public int compare(File lhs, File rhs) {
+        return normalizePath(lhs).compareTo(normalizePath(rhs));
+      }
+
+      private String normalizePath(File file) {
+        return file.getPath().toUpperCase().replaceAll("\\\\", "/");
+      }
+    });
+    byte[] md5 = computeMD5(files);
+    String md5str = byteArrayToString(md5);
+    getLog().info("Computed MD5: " + md5str);
+    return md5str;
+  }
+}

+ 5 - 0
hadoop-project/pom.xml

@@ -769,6 +769,11 @@
           <artifactId>maven-pdf-plugin</artifactId>
           <version>1.1</version>
         </plugin>
+        <plugin>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-maven-plugins</artifactId>
+          <version>${project.version}</version>
+        </plugin>
       </plugins>
     </pluginManagement>
 

+ 3 - 0
hadoop-tools/hadoop-pipes/pom.xml

@@ -57,6 +57,9 @@
                     <exec executable="make" dir="${project.build.directory}/native" failonerror="true">
                       <arg line="VERBOSE=1"/>
                     </exec>
+                    <!-- The second make is a workaround for HADOOP-9215.  It can
+                         be removed when version 2.6 of cmake is no longer supported . -->
+                    <exec executable="make" dir="${project.build.directory}/native" failonerror="true"></exec>
                   </target>
                 </configuration>
               </execution>

+ 36 - 1
hadoop-yarn-project/CHANGES.txt

@@ -37,6 +37,9 @@ Release 2.0.3-alpha - Unreleased
 
     YARN-328. Use token request messages defined in hadoop common. (suresh)
 
+    YARN-231. RM Restart - Add FS-based persistent store implementation for
+    RMStateStore (Bikas Saha via hitesh)
+
   IMPROVEMENTS
 
     YARN-223. Update process tree instead of getting new process trees.
@@ -99,6 +102,11 @@ Release 2.0.3-alpha - Unreleased
 
     YARN-170. Change NodeManager stop to be reentrant. (Sandy Ryza via vinodkv)
 
+    YARN-331. Fill in missing fair scheduler documentation. (sandyr via tucu)
+
+    YARN-277. Use AMRMClient in DistributedShell to exemplify the approach.
+    (Bikas Saha via hitesh)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -189,6 +197,21 @@ Release 2.0.3-alpha - Unreleased
 
     YARN-330. Fix flakey test: TestNodeManagerShutdown#testKillContainersOnShutdown.
     (Sandy Ryza via hitesh)
+    
+    YARN-335. Fair scheduler doesn't check whether rack needs containers
+    before assigning to node. (Sandy Ryza via tomwhite)
+    
+    YARN-336. Fair scheduler FIFO scheduling within a queue only allows 1
+    app at a time. (Sandy Ryza via tomwhite)
+
+    YARN-135. Client tokens should be per app-attempt, and should be
+    unregistered on App-finish. (vinodkv via sseth)
+
+    YARN-302. Fair scheduler assignmultiple should default to false. (sandyr via tucu)
+    
+    YARN-319. Submitting a job to a fair scheduler queue for which the user
+    does not have permission causes the client to wait forever.
+    (shenhong via tomwhite)
 
 Release 2.0.2-alpha - 2012-09-07 
 
@@ -243,6 +266,18 @@ Release 2.0.2-alpha - 2012-09-07
     YARN-138. Ensure default values for minimum/maximum container sizes is
     sane. (harsh & sseth via acmurthy)
 
+Release 0.23.7 - UNRELEASED
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
 Release 0.23.6 - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -300,7 +335,7 @@ Release 0.23.6 - UNRELEASED
 
     YARN-334. Maven RAT plugin is not checking all source files (tgraves)
 
-Release 0.23.5 - UNRELEASED
+Release 0.23.5 - 2012-11-28
 
   INCOMPATIBLE CHANGES
 

+ 4 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationAttemptId.java

@@ -38,6 +38,8 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable;
 public abstract class ApplicationAttemptId implements
     Comparable<ApplicationAttemptId> {
   
+  public static final String appAttemptIdStrPrefix = "appattempt_";
+
   /**
    * Get the <code>ApplicationId</code> of the <code>ApplicationAttempId</code>. 
    * @return <code>ApplicationId</code> of the <code>ApplicationAttempId</code>
@@ -111,11 +113,11 @@ public abstract class ApplicationAttemptId implements
 
   @Override
   public String toString() {
-    StringBuilder sb = new StringBuilder("appattempt_");
+    StringBuilder sb = new StringBuilder(appAttemptIdStrPrefix);
     sb.append(this.getApplicationId().getClusterTimestamp()).append("_");
     sb.append(ApplicationId.appIdFormat.get().format(
         this.getApplicationId().getId()));
     sb.append("_").append(attemptIdFormat.get().format(getAttemptId()));
     return sb.toString();
   }
-}
+}

+ 4 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationId.java

@@ -38,6 +38,8 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable;
 @Stable
 public abstract class ApplicationId implements Comparable<ApplicationId> {
   
+  public static final String appIdStrPrefix = "application_";
+
   /**
    * Get the short integer identifier of the <code>ApplicationId</code>
    * which is unique for all applications started by a particular instance
@@ -88,7 +90,7 @@ public abstract class ApplicationId implements Comparable<ApplicationId> {
 
   @Override
   public String toString() {
-    return "application_" + this.getClusterTimestamp() + "_"
+    return appIdStrPrefix + this.getClusterTimestamp() + "_"
         + appIdFormat.get().format(getId());
   }
 
@@ -119,4 +121,4 @@ public abstract class ApplicationId implements Comparable<ApplicationId> {
       return false;
     return true;
   }
-}
+}

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationMaster.java

@@ -45,8 +45,8 @@ public interface ApplicationMaster {
   YarnApplicationState getState();
   void setState(YarnApplicationState state);
 
-  String getClientToken();
-  void setClientToken(String clientToken);
+  ClientToken getClientToken();
+  void setClientToken(ClientToken clientToken);
 
   int getAMFailCount();
   void setAMFailCount(int amFailCount);

+ 2 - 2
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationReport.java

@@ -144,11 +144,11 @@ public interface ApplicationReport {
    */
   @Public
   @Stable
-  String getClientToken();
+  ClientToken getClientToken();
 
   @Private
   @Unstable
-  void setClientToken(String clientToken);
+  void setClientToken(ClientToken clientToken);
 
   /**
    * Get the <code>YarnApplicationState</code> of the application.

+ 39 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ClientToken.java

@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.records;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Stable;
+
+/**
+ * <p>
+ * <code>ClientToken</code> is the security token used by the AMs to verify
+ * authenticity of any <code>client</code>.
+ * </p>
+ * 
+ * <p>
+ * The <code>ResourceManager</code>, provides a secure token (via
+ * {@link ApplicationReport#getClientToken()}) which is verified by the
+ * ApplicationMaster when the client directly talks to an AM.
+ * </p>
+ * 
+ */
+@Public
+@Stable
+public interface ClientToken extends Token {}

+ 1 - 53
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerToken.java

@@ -18,9 +18,6 @@
 
 package org.apache.hadoop.yarn.api.records;
 
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceAudience.Public;
 import org.apache.hadoop.classification.InterfaceStability.Stable;
 import org.apache.hadoop.yarn.api.AMRMProtocol;
@@ -43,53 +40,4 @@ import org.apache.hadoop.yarn.api.ContainerManager;
  */
 @Public
 @Stable
-public interface ContainerToken extends DelegationToken {
-  /**
-   * Get the token identifier.
-   * @return token identifier
-   */
-  @Public
-  @Stable
-  ByteBuffer getIdentifier();
-  
-  @Private
-  @Stable
-  void setIdentifier(ByteBuffer identifier);
-
-  /**
-   * Get the token password
-   * @return token password
-   */
-  @Public
-  @Stable
-  ByteBuffer getPassword();
-  
-  @Private
-  @Stable
-  void setPassword(ByteBuffer password);
-
-  /**
-   * Get the token kind.
-   * @return token kind
-   */
-  @Public
-  @Stable
-  String getKind();
-  
-  @Private
-  @Stable
-  void setKind(String kind);
-
-  /**
-   * Get the service to which the token is allocated.
-   * @return service to which the token is allocated
-   */
-  @Public
-  @Stable
-  String getService();
-
-  @Private
-  @Stable
-  void setService(String service);
-
-}
+public interface ContainerToken extends Token {}

+ 1 - 53
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/DelegationToken.java

@@ -18,12 +18,8 @@
 
 package org.apache.hadoop.yarn.api.records;
 
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceAudience.Public;
 import org.apache.hadoop.classification.InterfaceStability.Evolving;
-import org.apache.hadoop.classification.InterfaceStability.Stable;
 import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier;
 
 /**
@@ -33,52 +29,4 @@ import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdenti
  */
 @Public
 @Evolving
-public interface DelegationToken {
-  /**
-   * Get the token identifier.
-   * @return token identifier
-   */
-  @Public
-  @Stable
-  ByteBuffer getIdentifier();
-  
-  @Private
-  @Stable
-  void setIdentifier(ByteBuffer identifier);
-
-  /**
-   * Get the token password
-   * @return token password
-   */
-  @Public
-  @Stable
-  ByteBuffer getPassword();
-  
-  @Private
-  @Stable
-  void setPassword(ByteBuffer password);
-
-  /**
-   * Get the token kind.
-   * @return token kind
-   */
-  @Public
-  @Stable
-  String getKind();
-  
-  @Private
-  @Stable
-  void setKind(String kind);
-
-  /**
-   * Get the service to which the token is allocated.
-   * @return service to which the token is allocated
-   */
-  @Public
-  @Stable
-  String getService();
-
-  @Private
-  @Stable
-  void setService(String service);
-}
+public interface DelegationToken extends Token {}

+ 82 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Token.java

@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.records;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Stable;
+
+/**
+ * <p><code>Token</code> is the security entity used by the framework
+ * to verify authenticity of any resource.</p>
+ */
+@Public
+@Stable
+public interface Token {
+  /**
+   * Get the token identifier.
+   * @return token identifier
+   */
+  @Public
+  @Stable
+  ByteBuffer getIdentifier();
+  
+  @Private
+  @Stable
+  void setIdentifier(ByteBuffer identifier);
+
+  /**
+   * Get the token password
+   * @return token password
+   */
+  @Public
+  @Stable
+  ByteBuffer getPassword();
+  
+  @Private
+  @Stable
+  void setPassword(ByteBuffer password);
+
+  /**
+   * Get the token kind.
+   * @return token kind
+   */
+  @Public
+  @Stable
+  String getKind();
+  
+  @Private
+  @Stable
+  void setKind(String kind);
+
+  /**
+   * Get the service to which the token is allocated.
+   * @return service to which the token is allocated
+   */
+  @Public
+  @Stable
+  String getService();
+
+  @Private
+  @Stable
+  void setService(String service);
+
+}

+ 34 - 14
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationMasterPBImpl.java

@@ -18,10 +18,11 @@
 
 package org.apache.hadoop.yarn.api.records.impl.pb;
 
-
+import org.apache.hadoop.security.proto.SecurityProtos.TokenProto;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationMaster;
 import org.apache.hadoop.yarn.api.records.ApplicationStatus;
+import org.apache.hadoop.yarn.api.records.ClientToken;
 import org.apache.hadoop.yarn.api.records.ProtoBase;
 import org.apache.hadoop.yarn.api.records.YarnApplicationState;
 import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
@@ -31,15 +32,15 @@ import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationStatusProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.YarnApplicationStateProto;
 import org.apache.hadoop.yarn.util.ProtoUtils;
 
-
-public class ApplicationMasterPBImpl extends ProtoBase<ApplicationMasterProto> implements ApplicationMaster {
+public class ApplicationMasterPBImpl extends ProtoBase<ApplicationMasterProto>
+    implements ApplicationMaster {
   ApplicationMasterProto proto = ApplicationMasterProto.getDefaultInstance();
   ApplicationMasterProto.Builder builder = null;
   boolean viaProto = false;
 
   private ApplicationId applicationId = null;
   private ApplicationStatus applicationStatus = null;
-
+  private ClientToken clientToken = null;
 
   public ApplicationMasterPBImpl() {
     builder = ApplicationMasterProto.newBuilder();
@@ -59,13 +60,22 @@ public class ApplicationMasterPBImpl extends ProtoBase<ApplicationMasterProto> i
   }
 
   private void mergeLocalToBuilder() {
-    if (this.applicationId != null && !((ApplicationIdPBImpl)this.applicationId).getProto().equals(builder.getApplicationId())) {
+    if (this.applicationId != null
+        && !((ApplicationIdPBImpl) this.applicationId).getProto().equals(
+          builder.getApplicationId())) {
       builder.setApplicationId(convertToProtoFormat(this.applicationId));
     }
 
-    if (this.applicationStatus != null && !((ApplicationStatusPBImpl)this.applicationStatus).getProto().equals(builder.getStatus())) {
+    if (this.applicationStatus != null
+        && !((ApplicationStatusPBImpl) this.applicationStatus).getProto()
+          .equals(builder.getStatus())) {
       builder.setStatus(convertToProtoFormat(this.applicationStatus));
     }
+    if (this.clientToken != null
+        && !((ClientTokenPBImpl) this.clientToken).getProto().equals(
+          builder.getClientToken())) {
+      builder.setClientToken(convertToProtoFormat(this.clientToken));
+    }
   }
 
   private void mergeLocalToProto() {
@@ -188,23 +198,26 @@ public class ApplicationMasterPBImpl extends ProtoBase<ApplicationMasterProto> i
     this.applicationStatus = status;
 
   }
+
   @Override
-  public String getClientToken() {
+  public ClientToken getClientToken() {
     ApplicationMasterProtoOrBuilder p = viaProto ? proto : builder;
+    if (this.clientToken != null) {
+      return this.clientToken;
+    }
     if (!p.hasClientToken()) {
       return null;
     }
-    return (p.getClientToken());
+    this.clientToken = convertFromProtoFormat(p.getClientToken());
+    return this.clientToken;
   }
-
+  
   @Override
-  public void setClientToken(String clientToken) {
+  public void setClientToken(ClientToken clientToken) {
     maybeInitBuilder();
-    if (clientToken == null) {
+    if (clientToken == null) 
       builder.clearClientToken();
-      return;
-    }
-    builder.setClientToken((clientToken));
+    this.clientToken = clientToken;
   }
 
   @Override
@@ -271,4 +284,11 @@ public class ApplicationMasterPBImpl extends ProtoBase<ApplicationMasterProto> i
     return ((ApplicationStatusPBImpl)t).getProto();
   }
 
+  private ClientTokenPBImpl convertFromProtoFormat(TokenProto p) {
+    return new ClientTokenPBImpl(p);
+  }
+
+  private TokenProto convertToProtoFormat(ClientToken t) {
+    return ((ClientTokenPBImpl)t).getProto();
+  }
 }

+ 30 - 14
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationReportPBImpl.java

@@ -18,19 +18,21 @@
 
 package org.apache.hadoop.yarn.api.records.impl.pb;
 
+import org.apache.hadoop.security.proto.SecurityProtos.TokenProto;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
-import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
-import org.apache.hadoop.yarn.api.records.YarnApplicationState;
-import org.apache.hadoop.yarn.api.records.ProtoBase;
 import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport;
+import org.apache.hadoop.yarn.api.records.ClientToken;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.hadoop.yarn.api.records.ProtoBase;
+import org.apache.hadoop.yarn.api.records.YarnApplicationState;
 import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationAttemptIdProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProtoOrBuilder;
-import org.apache.hadoop.yarn.proto.YarnProtos.FinalApplicationStatusProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationResourceUsageReportProto;
+import org.apache.hadoop.yarn.proto.YarnProtos.FinalApplicationStatusProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.YarnApplicationStateProto;
 import org.apache.hadoop.yarn.util.ProtoUtils;
 
@@ -40,8 +42,9 @@ implements ApplicationReport {
   ApplicationReportProto.Builder builder = null;
   boolean viaProto = false;
 
-  ApplicationId applicationId;
-  ApplicationAttemptId currentApplicationAttemptId;
+  private ApplicationId applicationId;
+  private ApplicationAttemptId currentApplicationAttemptId;
+  private ClientToken clientToken = null;
 
   public ApplicationReportPBImpl() {
     builder = ApplicationReportProto.newBuilder();
@@ -159,12 +162,16 @@ implements ApplicationReport {
   }
 
   @Override
-  public String getClientToken() {
+  public ClientToken getClientToken() {
     ApplicationReportProtoOrBuilder p = viaProto ? proto : builder;
+    if (this.clientToken != null) {
+      return this.clientToken;
+    }
     if (!p.hasClientToken()) {
       return null;
     }
-    return (p.getClientToken());
+    this.clientToken = convertFromProtoFormat(p.getClientToken());
+    return this.clientToken;
   }
 
   @Override
@@ -176,7 +183,6 @@ implements ApplicationReport {
     return p.getUser();
   }
 
-
   @Override
   public String getDiagnostics() {
     ApplicationReportProtoOrBuilder p = viaProto ? proto : builder;
@@ -290,13 +296,11 @@ implements ApplicationReport {
   }
 
   @Override
-  public void setClientToken(String clientToken) {
+  public void setClientToken(ClientToken clientToken) {
     maybeInitBuilder();
-    if (clientToken == null) {
+    if (clientToken == null) 
       builder.clearClientToken();
-      return;
-    }
-    builder.setClientToken((clientToken));
+    this.clientToken = clientToken;
   }
 
   @Override
@@ -360,6 +364,11 @@ implements ApplicationReport {
             builder.getCurrentApplicationAttemptId())) {
       builder.setCurrentApplicationAttemptId(convertToProtoFormat(this.currentApplicationAttemptId));
     }
+    if (this.clientToken != null
+        && !((ClientTokenPBImpl) this.clientToken).getProto().equals(
+            builder.getClientToken())) {
+      builder.setClientToken(convertToProtoFormat(this.clientToken));
+    }
   }
 
   private void mergeLocalToProto() {
@@ -419,4 +428,11 @@ implements ApplicationReport {
     return ProtoUtils.convertToProtoFormat(s);
   }
 
+  private ClientTokenPBImpl convertFromProtoFormat(TokenProto p) {
+    return new ClientTokenPBImpl(p);
+  }
+
+  private TokenProto convertToProtoFormat(ClientToken t) {
+    return ((ClientTokenPBImpl)t).getProto();
+  }
 }

Some files were not shown because too many files changed in this diff