13 anos atrás · 359c746ca7
--- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml
+++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml
@@ -127,6 +127,17 @@
 
				         <unpack>false</unpack>
			
 
				       </binaries>
			
 
				     </moduleSet>
			
 
				+    <moduleSet>
			
 
				+      <includes>
			
 
				+        <include>org.apache.hadoop:hadoop-mapreduce-client-jobclient</include>
			
 
				+      </includes>
			
 
				+      <binaries>
			
 
				+        <attachmentClassifier>tests</attachmentClassifier>
			
 
				+        <outputDirectory>share/hadoop/${hadoop.component}</outputDirectory>
			
 
				+        <includeDependencies>false</includeDependencies>
			
 
				+        <unpack>false</unpack>
			
 
				+      </binaries>
			
 
				+    </moduleSet>
			
 
				   </moduleSets>
			
 
				   <dependencySets>
			
 
				     <dependencySet>
			
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -125,6 +125,21 @@ Release 0.23.1 - Unreleased
 
				 
			
 
				     HADOOP-7975. Add LZ4 as an entry in the default codec list, missed by HADOOP-7657 (harsh)
			
 
				 
			
 
				+    HADOOP-7987. Support setting the run-as user in unsecure mode. (jitendra)
			
 
				+
			
 
				+    HADOOP-4515. Configuration#getBoolean must not be case sensitive. (Sho Shimauchi via harsh)
			
 
				+
			
 
				+    HADOOP-6490. Use StringUtils over String#replace in Path#normalizePath.
			
 
				+    (Uma Maheswara Rao G via harsh)
			
 
				+
			
 
				+    HADOOP-7574. Improve FSShell -stat, add user/group elements.
			
 
				+    (XieXianshan via harsh)
			
 
				+
			
 
				+    HADOOP-7736. Remove duplicate Path#normalizePath call. (harsh)
			
 
				+
			
 
				+    HADOOP-7919. Remove the unused hadoop.logfile.* properties from the 
			
 
				+    core-default.xml file. (harsh)
			
 
				+
			
 
				   OPTIMIZATIONS
			
 
				 
			
 
				   BUG FIXES
			
@@ -207,6 +222,9 @@ Release 0.23.1 - Unreleased
 
				    HADOOP-7986. Adding config for MapReduce History Server protocol in
			
 
				    hadoop-policy.xml for service level authorization. (Mahadev Konar via vinodkv)
			
 
				 
			
 
				+   HADOOP-7981. Improve documentation for org.apache.hadoop.io.compress.
			
 
				+   Decompressor.getRemaining (Jonathan Eagles via mahadev)
			
 
				+
			
 
				 Release 0.23.0 - 2011-11-01 
			
 
				 
			
 
				   INCOMPATIBLE CHANGES
			
--- a/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/commands_manual.xml
+++ b/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/commands_manual.xml
@@ -753,11 +753,6 @@
 
				 			
			
 
				 			<section>
			
 
				 				<title> secondarynamenode </title>
			
 
				-				<note>
			
 
				-					The Secondary NameNode has been deprecated. Instead, consider using the
			
 
				-					<a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Checkpoint+Node">Checkpoint Node</a> or 
			
 
				-					<a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Backup+Node">Backup Node</a>. 
			
 
				-				</note>
			
 
				 				<p>	
			
 
				 					Runs the HDFS secondary 
			
 
				 					namenode. See <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Secondary+NameNode">Secondary NameNode</a> 
			
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
@@ -826,6 +826,12 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
 
				    */
			
 
				   public boolean getBoolean(String name, boolean defaultValue) {
			
 
				     String valueString = getTrimmed(name);
			
 
				+    if (null == valueString || "".equals(valueString)) {
			
 
				+      return defaultValue;
			
 
				+    }
			
 
				+
			
 
				+    valueString = valueString.toLowerCase();
			
 
				+
			
 
				     if ("true".equals(valueString))
			
 
				       return true;
			
 
				     else if ("false".equals(valueString))
			
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java
@@ -18,10 +18,12 @@
 
				 
			
 
				 package org.apache.hadoop.fs;
			
 
				 
			
 
				-import java.net.*;
			
 
				-import java.io.*;
			
 
				-import org.apache.avro.reflect.Stringable;
			
 
				+import java.io.IOException;
			
 
				+import java.net.URI;
			
 
				+import java.net.URISyntaxException;
			
 
				 
			
 
				+import org.apache.avro.reflect.Stringable;
			
 
				+import org.apache.commons.lang.StringUtils;
			
 
				 import org.apache.hadoop.classification.InterfaceAudience;
			
 
				 import org.apache.hadoop.classification.InterfaceStability;
			
 
				 import org.apache.hadoop.conf.Configuration;
			
@@ -76,7 +78,7 @@ public class Path implements Comparable {
 
				     }
			
 
				     URI resolved = parentUri.resolve(child.uri);
			
 
				     initialize(resolved.getScheme(), resolved.getAuthority(),
			
 
				-               normalizePath(resolved.getPath()), resolved.getFragment());
			
 
				+               resolved.getPath(), resolved.getFragment());
			
 
				   }
			
 
				 
			
 
				   private void checkPathArg( String path ) {
			
@@ -158,8 +160,8 @@ public class Path implements Comparable {
 
				 
			
 
				   private String normalizePath(String path) {
			
 
				     // remove double slashes & backslashes
			
 
				-    path = path.replace("//", "/");
			
 
				-    path = path.replace("\\", "/");
			
 
				+    path = StringUtils.replace(path, "//", "/");
			
 
				+    path = StringUtils.replace(path, "\\", "/");
			
 
				     
			
 
				     // trim trailing slash from non-root path (ignoring windows drive)
			
 
				     int minLength = hasWindowsDrive(path, true) ? 4 : 1;
			
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Stat.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Stat.java
@@ -32,9 +32,11 @@ import org.apache.hadoop.fs.FileStatus;
 
				  * Print statistics about path in specified format.
			
 
				  * Format sequences:
			
 
				  *   %b: Size of file in blocks
			
 
				+ *   %g: Group name of owner
			
 
				  *   %n: Filename
			
 
				  *   %o: Block size
			
 
				  *   %r: replication
			
 
				+ *   %u: User name of owner
			
 
				  *   %y: UTC date as &quot;yyyy-MM-dd HH:mm:ss&quot;
			
 
				  *   %Y: Milliseconds since January 1, 1970 UTC
			
 
				  */
			
@@ -50,8 +52,8 @@ class Stat extends FsCommand {
 
				   public static final String USAGE = "[format] <path> ...";
			
 
				   public static final String DESCRIPTION =
			
 
				     "Print statistics about the file/directory at <path>\n" +
			
 
				-    "in the specified format. Format accepts filesize in blocks (%b), filename (%n),\n" +
			
 
				-    "block size (%o), replication (%r), modification date (%y, %Y)\n";
			
 
				+    "in the specified format. Format accepts filesize in blocks (%b), group name of owner(%g),\n" +
			
 
				+    "filename (%n), block size (%o), replication (%r), user name of owner(%u), modification date (%y, %Y)\n";
			
 
				 
			
 
				   protected static final SimpleDateFormat timeFmt;
			
 
				   static {
			
@@ -92,6 +94,9 @@ class Stat extends FsCommand {
 
				                 ? "directory" 
			
 
				                 : (stat.isFile() ? "regular file" : "symlink"));
			
 
				             break;
			
 
				+          case 'g':
			
 
				+            buf.append(stat.getGroup());
			
 
				+            break;
			
 
				           case 'n':
			
 
				             buf.append(item.path.getName());
			
 
				             break;
			
@@ -101,6 +106,9 @@ class Stat extends FsCommand {
 
				           case 'r':
			
 
				             buf.append(stat.getReplication());
			
 
				             break;
			
 
				+          case 'u':
			
 
				+            buf.append(stat.getOwner());
			
 
				+            break;
			
 
				           case 'y':
			
 
				             buf.append(timeFmt.format(new Date(stat.getModificationTime())));
			
 
				             break;
			
@@ -118,4 +126,4 @@ class Stat extends FsCommand {
 
				     }
			
 
				     out.println(buf.toString());
			
 
				   }
			
 
				-}
			
 
				+}
			
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Decompressor.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Decompressor.java
@@ -49,7 +49,7 @@ public interface Decompressor {
 
				   public void setInput(byte[] b, int off, int len);
			
 
				   
			
 
				   /**
			
 
				-   * Returns true if the input data buffer is empty and 
			
 
				+   * Returns <code>true</code> if the input data buffer is empty and 
			
 
				    * {@link #setInput(byte[], int, int)} should be called to
			
 
				    * provide more input. 
			
 
				    * 
			
@@ -76,8 +76,11 @@ public interface Decompressor {
 
				   public boolean needsDictionary();
			
 
				 
			
 
				   /**
			
 
				-   * Returns true if the end of the decompressed 
			
 
				-   * data output stream has been reached.
			
 
				+   * Returns <code>true</code> if the end of the decompressed 
			
 
				+   * data output stream has been reached. Indicates a concatenated data stream
			
 
				+   * when finished() returns <code>true</code> and {@link #getRemaining()}
			
 
				+   * returns a positive value. finished() will be reset with the
			
 
				+   * {@link #reset()} method.
			
 
				    * @return <code>true</code> if the end of the decompressed
			
 
				    * data output stream has been reached.
			
 
				    */
			
@@ -98,15 +101,23 @@ public interface Decompressor {
 
				   public int decompress(byte[] b, int off, int len) throws IOException;
			
 
				 
			
 
				   /**
			
 
				-   * Returns the number of bytes remaining in the compressed-data buffer;
			
 
				-   * typically called after the decompressor has finished decompressing
			
 
				-   * the current gzip stream (a.k.a. "member").
			
 
				+   * Returns the number of bytes remaining in the compressed data buffer.
			
 
				+   * Indicates a concatenated data stream if {@link #finished()} returns
			
 
				+   * <code>true</code> and getRemaining() returns a positive value. If
			
 
				+   * {@link #finished()} returns <code>true</code> and getRemaining() returns
			
 
				+   * a zero value, indicates that the end of data stream has been reached and
			
 
				+   * is not a concatenated data stream. 
			
 
				+   * @return The number of bytes remaining in the compressed data buffer.
			
 
				    */
			
 
				   public int getRemaining();
			
 
				 
			
 
				   /**
			
 
				    * Resets decompressor and input and output buffers so that a new set of
			
 
				-   * input data can be processed.
			
 
				+   * input data can be processed. If {@link #finished()}} returns
			
 
				+   * <code>true</code> and {@link #getRemaining()} returns a positive value,
			
 
				+   * reset() is called before processing of the next data stream in the
			
 
				+   * concatenated data stream. {@link #finished()} will be reset and will
			
 
				+   * return <code>false</code> when reset() is called.
			
 
				    */
			
 
				   public void reset();
			
 
				 
			
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
@@ -80,6 +80,7 @@ public class UserGroupInformation {
 
				    * Percentage of the ticket window to use before we renew ticket.
			
 
				    */
			
 
				   private static final float TICKET_RENEW_WINDOW = 0.80f;
			
 
				+  static final String HADOOP_USER_NAME = "HADOOP_USER_NAME";
			
 
				   
			
 
				   /** 
			
 
				    * UgiMetrics maintains UGI activity statistics
			
@@ -137,7 +138,16 @@ public class UserGroupInformation {
 
				           LOG.debug("using kerberos user:"+user);
			
 
				         }
			
 
				       }
			
 
				-      // if we don't have a kerberos user, use the OS user
			
 
				+      //If we don't have a kerberos user and security is disabled, check
			
 
				+      //if user is specified in the environment or properties
			
 
				+      if (!isSecurityEnabled() && (user == null)) {
			
 
				+        String envUser = System.getenv(HADOOP_USER_NAME);
			
 
				+        if (envUser == null) {
			
 
				+          envUser = System.getProperty(HADOOP_USER_NAME);
			
 
				+        }
			
 
				+        user = envUser == null ? null : new User(envUser);
			
 
				+      }
			
 
				+      // use the OS user
			
 
				       if (user == null) {
			
 
				         user = getCanonicalUser(OS_PRINCIPAL_CLASS);
			
 
				         if (LOG.isDebugEnabled()) {
			
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@@ -134,20 +134,6 @@
 
				   </description>
			
 
				 </property>
			
 
				 
			
 
				-<!--- logging properties -->
			
 
				-
			
 
				-<property>
			
 
				-  <name>hadoop.logfile.size</name>
			
 
				-  <value>10000000</value>
			
 
				-  <description>The max size of each log file</description>
			
 
				-</property>
			
 
				-
			
 
				-<property>
			
 
				-  <name>hadoop.logfile.count</name>
			
 
				-  <value>10</value>
			
 
				-  <description>The max number of log files</description>
			
 
				-</property>
			
 
				-
			
 
				 <!-- i/o properties -->
			
 
				 <property>
			
 
				   <name>io.file.buffer.size</name>
			
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
@@ -451,6 +451,9 @@ public class TestConfiguration extends TestCase {
 
				     appendProperty("test.bool3", "  true ");
			
 
				     appendProperty("test.bool4", " false ");
			
 
				     appendProperty("test.bool5", "foo");
			
 
				+    appendProperty("test.bool6", "TRUE");
			
 
				+    appendProperty("test.bool7", "FALSE");
			
 
				+    appendProperty("test.bool8", "");
			
 
				     endConfig();
			
 
				     Path fileResource = new Path(CONFIG);
			
 
				     conf.addResource(fileResource);
			
@@ -459,6 +462,9 @@ public class TestConfiguration extends TestCase {
 
				     assertEquals(true, conf.getBoolean("test.bool3", false));
			
 
				     assertEquals(false, conf.getBoolean("test.bool4", true));
			
 
				     assertEquals(true, conf.getBoolean("test.bool5", true));
			
 
				+    assertEquals(true, conf.getBoolean("test.bool6", false));
			
 
				+    assertEquals(false, conf.getBoolean("test.bool7", true));
			
 
				+    assertEquals(false, conf.getBoolean("test.bool8", false));
			
 
				   }
			
 
				   
			
 
				   public void testFloatValues() throws IOException {
			
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserFromEnv.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserFromEnv.java
@@ -0,0 +1,32 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+ * contributor license agreements. See the NOTICE file distributed with this
			
 
				+ * work for additional information regarding copyright ownership. The ASF
			
 
				+ * licenses this file to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance with the License.
			
 
				+ * You may obtain a copy of the License at
			
 
				+ * 
			
 
				+ * http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ * 
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
			
 
				+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
			
 
				+ * License for the specific language governing permissions and limitations under
			
 
				+ * the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.security;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import org.junit.Assert;
			
 
				+import org.junit.Test;
			
 
				+
			
 
				+public class TestUserFromEnv {
			
 
				+
			
 
				+  @Test
			
 
				+  public void testUserFromEnvironment() throws IOException {
			
 
				+    System.setProperty(UserGroupInformation.HADOOP_USER_NAME, "randomUser");
			
 
				+    Assert.assertEquals("randomUser", UserGroupInformation.getLoginUser()
			
 
				+        .getUserName());
			
 
				+  }
			
 
				+}
			
--- a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml
+++ b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml
@@ -610,11 +610,11 @@
 
				         </comparator>
			
 
				         <comparator>
			
 
				           <type>RegexpComparator</type>
			
 
				-          <expected-output>^( |\t)*in the specified format. Format accepts filesize in blocks \(%b\), filename \(%n\),( )*</expected-output>
			
 
				+          <expected-output>^( |\t)*in the specified format. Format accepts filesize in blocks \(%b\), group name of owner\(%g\),( )*</expected-output>
			
 
				         </comparator>
			
 
				         <comparator>
			
 
				           <type>RegexpComparator</type>
			
 
				-          <expected-output>^( |\t)*block size \(%o\), replication \(%r\), modification date \(%y, %Y\)( )*</expected-output>
			
 
				+          <expected-output>^( |\t)*filename \(%n\), block size \(%o\), replication \(%r\), user name of owner\(%u\), modification date \(%y, %Y\)( )*</expected-output>
			
 
				         </comparator>
			
 
				       </comparators>
			
 
				     </test>
			
--- a/hadoop-hdfs-project/dev-support/test-patch.properties
+++ b/hadoop-hdfs-project/dev-support/test-patch.properties
@@ -18,4 +18,4 @@
 
				 
			
 
				 OK_RELEASEAUDIT_WARNINGS=0
			
 
				 OK_FINDBUGS_WARNINGS=0
			
 
				-OK_JAVADOC_WARNINGS=2
			
 
				+OK_JAVADOC_WARNINGS=0
			
--- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml
@@ -53,6 +53,11 @@
 
				       <artifactId>mockito-all</artifactId>
			
 
				       <scope>test</scope>
			
 
				     </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-annotations</artifactId>
			
 
				+      <scope>provided</scope>
			
 
				+    </dependency>
			
 
				     <dependency>
			
 
				       <groupId>com.sun.jersey</groupId>
			
 
				       <artifactId>jersey-server</artifactId>
			
--- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSServer.java
@@ -219,7 +219,7 @@ public class HttpFSServer {
 
				    * operation is @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.GetOpValues#LISTSTATUS}
			
 
				    * @param doAs user being impersonated, defualt value is none. It can be used
			
 
				    * only if the current user is a HttpFSServer proxyuser.
			
 
				-   * @param override, default is true. Used only for
			
 
				+   * @param override default is true. Used only for
			
 
				    * @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.PutOpValues#CREATE} operations.
			
 
				    * @param blockSize block size to set, used only by
			
 
				    * @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.PutOpValues#CREATE} operations.
			
@@ -419,7 +419,7 @@ public class HttpFSServer {
 
				    * @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.PutOpValues#SETOWNER} operations.
			
 
				    * @param group group to set, used only for
			
 
				    * @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.PutOpValues#SETOWNER} operations.
			
 
				-   * @param override, default is true. Used only for
			
 
				+   * @param override default is true. Used only for
			
 
				    * @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.PutOpValues#CREATE} operations.
			
 
				    * @param blockSize block size to set, used only by
			
 
				    * @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.PutOpValues#CREATE} operations.
			
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -201,6 +201,10 @@ Release 0.23.1 - UNRELEASED
 
				 
			
 
				     HDFS-2817. Combine the two TestSafeMode test suites. (todd)
			
 
				 
			
 
				+    HDFS-2818. Fix a missing space issue in HDFS webapps' title tags. (Devaraj K via harsh)
			
 
				+
			
 
				+    HDFS-2397. Undeprecate SecondaryNameNode (eli)
			
 
				+
			
 
				   OPTIMIZATIONS
			
 
				 
			
 
				     HDFS-2130. Switch default checksum to CRC32C. (todd)
			
@@ -215,6 +219,12 @@ Release 0.23.1 - UNRELEASED
 
				     for a client on the same node as the block file.  (Andrew Purtell,
			
 
				     Suresh Srinivas and Jitendra Nath Pandey via szetszwo)
			
 
				 
			
 
				+    HDFS-2825. Add test hook to turn off the writer preferring its local
			
 
				+    DN. (todd)
			
 
				+
			
 
				+    HDFS-2826. Add test case for HDFS-1476 (safemode can initialize
			
 
				+    replication queues before exiting) (todd)
			
 
				+
			
 
				   BUG FIXES
			
 
				 
			
 
				     HDFS-2541. For a sufficiently large value of blocks, the DN Scanner 
			
@@ -276,6 +286,15 @@ Release 0.23.1 - UNRELEASED
 
				     HDFS-2816. Fix missing license header in httpfs findbugsExcludeFile.xml.
			
 
				     (hitesh via tucu)
			
 
				 
			
 
				+    HDFS-2822. processMisReplicatedBlock incorrectly identifies
			
 
				+    under-construction blocks as under-replicated. (todd)
			
 
				+
			
 
				+    HDFS-442. dfsthroughput in test jar throws NPE (harsh)
			
 
				+
			
 
				+    HDFS-2836. HttpFSServer still has 2 javadoc warnings in trunk (revans2 via tucu)
			
 
				+
			
 
				+    HDFS-2837. mvn javadoc:javadoc not seeing LimitedPrivate class (revans2 via tucu)
			
 
				+
			
 
				 Release 0.23.0 - 2011-11-01 
			
 
				 
			
 
				   INCOMPATIBLE CHANGES
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/hdfs_user_guide.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/hdfs_user_guide.xml
@@ -112,17 +112,18 @@
 
				     		problems.
			
 
				     	</li>
			
 
				     	<li>
			
 
				-    		Secondary NameNode (deprecated): performs periodic checkpoints of the 
			
 
				+    		Secondary NameNode: performs periodic checkpoints of the 
			
 
				     		namespace and helps keep the size of file containing log of HDFS 
			
 
				     		modifications within certain limits at the NameNode.
			
 
				-    		Replaced by Checkpoint node.
			
 
				     	</li>
			
 
				+
			
 
				     	<li>
			
 
				     		Checkpoint node: performs periodic checkpoints of the namespace and
			
 
				     		helps minimize the size of the log stored at the NameNode 
			
 
				     		containing changes to the HDFS.
			
 
				-    		Replaces the role previously filled by the Secondary NameNode. 
			
 
				-    		NameNode allows multiple Checkpoint nodes simultaneously, 
			
 
				+    		Replaces the role previously filled by the Secondary NameNode,
			
 
				+                though is not yet battle hardened.
			
 
				+    		The NameNode allows multiple Checkpoint nodes simultaneously, 
			
 
				     		as long as there are no Backup nodes registered with the system.
			
 
				     	</li>
			
 
				     	<li>
			
@@ -132,6 +133,7 @@
 
				     		which is always in sync with the active NameNode namespace state.
			
 
				     		Only one Backup node may be registered with the NameNode at once.
			
 
				     	</li>
			
 
				+
			
 
				       </ul>
			
 
				     </li>
			
 
				     </ul>
			
@@ -234,12 +236,6 @@
 
				    
			
 
				    </section> 
			
 
				 	<section> <title>Secondary NameNode</title>
			
 
				-   <note>
			
 
				-   The Secondary NameNode has been deprecated. 
			
 
				-   Instead, consider using the 
			
 
				-   <a href="hdfs_user_guide.html#Checkpoint+node">Checkpoint Node</a> or 
			
 
				-   <a href="hdfs_user_guide.html#Backup+node">Backup Node</a>.
			
 
				-   </note>
			
 
				    <p>	
			
 
				      The NameNode stores modifications to the file system as a log
			
 
				      appended to a native file system file, <code>edits</code>. 
			
@@ -287,7 +283,9 @@
 
				      <a href="http://hadoop.apache.org/common/docs/current/commands_manual.html#secondarynamenode">secondarynamenode</a>.
			
 
				    </p>
			
 
				    
			
 
				-   </section><section> <title> Checkpoint Node </title>
			
 
				+   </section>
			
 
				+
			
 
				+   <section> <title> Checkpoint Node </title>
			
 
				    <p>NameNode persists its namespace using two files: <code>fsimage</code>,
			
 
				       which is the latest checkpoint of the namespace and <code>edits</code>,
			
 
				       a journal (log) of changes to the namespace since the checkpoint.
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -1793,7 +1793,8 @@ public class BlockManager {
 
				   public void processMisReplicatedBlocks() {
			
 
				     assert namesystem.hasWriteLock();
			
 
				 
			
 
				-    long nrInvalid = 0, nrOverReplicated = 0, nrUnderReplicated = 0;
			
 
				+    long nrInvalid = 0, nrOverReplicated = 0, nrUnderReplicated = 0,
			
 
				+         nrUnderConstruction = 0;
			
 
				     neededReplications.clear();
			
 
				     for (BlockInfo block : blocksMap.getBlocks()) {
			
 
				       INodeFile fileINode = block.getINode();
			
@@ -1803,6 +1804,12 @@ public class BlockManager {
 
				         addToInvalidates(block);
			
 
				         continue;
			
 
				       }
			
 
				+      if (!block.isComplete()) {
			
 
				+        // Incomplete blocks are never considered mis-replicated --
			
 
				+        // they'll be reached when they are completed or recovered.
			
 
				+        nrUnderConstruction++;
			
 
				+        continue;
			
 
				+      }
			
 
				       // calculate current replication
			
 
				       short expectedReplication = fileINode.getReplication();
			
 
				       NumberReplicas num = countNodes(block);
			
@@ -1826,6 +1833,7 @@ public class BlockManager {
 
				     LOG.info("Number of invalid blocks          = " + nrInvalid);
			
 
				     LOG.info("Number of under-replicated blocks = " + nrUnderReplicated);
			
 
				     LOG.info("Number of  over-replicated blocks = " + nrOverReplicated);
			
 
				+    LOG.info("Number of blocks being written    = " + nrUnderConstruction);
			
 
				   }
			
 
				 
			
 
				   /** Set replication for the blocks. */
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
@@ -38,6 +38,8 @@ import org.apache.hadoop.net.NetworkTopology;
 
				 import org.apache.hadoop.net.Node;
			
 
				 import org.apache.hadoop.net.NodeBase;
			
 
				 
			
 
				+import com.google.common.annotations.VisibleForTesting;
			
 
				+
			
 
				 /** The class is responsible for choosing the desired number of targets
			
 
				  * for placing block replicas.
			
 
				  * The replica placement strategy is that if the writer is on a datanode,
			
@@ -49,6 +51,7 @@ import org.apache.hadoop.net.NodeBase;
 
				 @InterfaceAudience.Private
			
 
				 public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
			
 
				   private boolean considerLoad; 
			
 
				+  private boolean preferLocalNode = true;
			
 
				   private NetworkTopology clusterMap;
			
 
				   private FSClusterStats stats;
			
 
				   static final String enableDebugLogging = "For more information, please enable"
			
@@ -223,17 +226,17 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
 
				     if (localMachine == null)
			
 
				       return chooseRandom(NodeBase.ROOT, excludedNodes, 
			
 
				                           blocksize, maxNodesPerRack, results);
			
 
				-      
			
 
				-    // otherwise try local machine first
			
 
				-    Node oldNode = excludedNodes.put(localMachine, localMachine);
			
 
				-    if (oldNode == null) { // was not in the excluded list
			
 
				-      if (isGoodTarget(localMachine, blocksize,
			
 
				-                       maxNodesPerRack, false, results)) {
			
 
				-        results.add(localMachine);
			
 
				-        return localMachine;
			
 
				-      }
			
 
				-    } 
			
 
				-      
			
 
				+    if (preferLocalNode) {
			
 
				+      // otherwise try local machine first
			
 
				+      Node oldNode = excludedNodes.put(localMachine, localMachine);
			
 
				+      if (oldNode == null) { // was not in the excluded list
			
 
				+        if (isGoodTarget(localMachine, blocksize,
			
 
				+                         maxNodesPerRack, false, results)) {
			
 
				+          results.add(localMachine);
			
 
				+          return localMachine;
			
 
				+        }
			
 
				+      } 
			
 
				+    }      
			
 
				     // try a node on local rack
			
 
				     return chooseLocalRack(localMachine, excludedNodes, 
			
 
				                            blocksize, maxNodesPerRack, results);
			
@@ -568,5 +571,10 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
 
				     }
			
 
				     return cur;
			
 
				   }
			
 
				+  
			
 
				+  @VisibleForTesting
			
 
				+  void setPreferLocalNode(boolean prefer) {
			
 
				+    this.preferLocalNode = prefer;
			
 
				+  }
			
 
				 }
			
 
				 
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -172,6 +172,7 @@ import org.apache.hadoop.util.VersionInfo;
 
				 import org.mortbay.util.ajax.JSON;
			
 
				 
			
 
				 import com.google.common.base.Preconditions;
			
 
				+import com.google.common.annotations.VisibleForTesting;
			
 
				 
			
 
				 /***************************************************
			
 
				  * FSNamesystem does the actual bookkeeping work for the
			
@@ -2842,7 +2843,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
				     /** Total number of blocks. */
			
 
				     int blockTotal; 
			
 
				     /** Number of safe blocks. */
			
 
				-    private int blockSafe;
			
 
				+    int blockSafe;
			
 
				     /** Number of blocks needed to satisfy safe mode threshold condition */
			
 
				     private int blockThreshold;
			
 
				     /** Number of blocks needed before populating replication queues */
			
@@ -2850,7 +2851,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
				     /** time of the last status printout */
			
 
				     private long lastStatusReport = 0;
			
 
				     /** flag indicating whether replication queues have been initialized */
			
 
				-    private boolean initializedReplQueues = false;
			
 
				+    boolean initializedReplQueues = false;
			
 
				     /** Was safemode entered automatically because available resources were low. */
			
 
				     private boolean resourcesLow = false;
			
 
				     
			
@@ -2980,9 +2981,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
				      */
			
 
				     private synchronized void initializeReplQueues() {
			
 
				       LOG.info("initializing replication queues");
			
 
				-      if (isPopulatingReplQueues()) {
			
 
				-        LOG.warn("Replication queues already initialized.");
			
 
				-      }
			
 
				+      assert !isPopulatingReplQueues() : "Already initialized repl queues";
			
 
				       long startTimeMisReplicatedScan = now();
			
 
				       blockManager.processMisReplicatedBlocks();
			
 
				       initializedReplQueues = true;
			
@@ -4412,4 +4411,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
				       byte[] password) throws InvalidToken {
			
 
				     getDelegationTokenSecretManager().verifyToken(identifier, password);
			
 
				   }
			
 
				+
			
 
				+  @VisibleForTesting
			
 
				+  public SafeModeInfo getSafeModeInfoForTests() {
			
 
				+    return safeMode;
			
 
				+  }
			
 
				 }
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
@@ -87,7 +87,6 @@ import com.google.common.collect.ImmutableList;
 
				  * primary NameNode.
			
 
				  *
			
 
				  **********************************************************/
			
 
				-@Deprecated // use BackupNode with -checkpoint argument instead.
			
 
				 @InterfaceAudience.Private
			
 
				 public class SecondaryNameNode implements Runnable {
			
 
				     
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/corrupt_files.jsp
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/corrupt_files.jsp
@@ -41,7 +41,7 @@
 
				 <!DOCTYPE html>
			
 
				 <html>
			
 
				 <link rel="stylesheet" type="text/css" href="/static/hadoop.css">
			
 
				-<title>Hadoop <%=namenodeRole%> <%=namenodeLabel%></title>
			
 
				+<title>Hadoop <%=namenodeRole%>&nbsp;<%=namenodeLabel%></title>
			
 
				 <body>
			
 
				 <h1><%=namenodeRole%> '<%=namenodeLabel%>'</h1>
			
 
				 <%=NamenodeJspHelper.getVersionTable(fsn)%>
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp
@@ -37,7 +37,7 @@
 
				 <html>
			
 
				 
			
 
				 <link rel="stylesheet" type="text/css" href="/static/hadoop.css">
			
 
				-<title>Hadoop <%=namenodeRole%> <%=namenodeLabel%></title>
			
 
				+<title>Hadoop <%=namenodeRole%>&nbsp;<%=namenodeLabel%></title>
			
 
				     
			
 
				 <body>
			
 
				 <h1><%=namenodeRole%> '<%=namenodeLabel%>'</h1>
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfsnodelist.jsp
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfsnodelist.jsp
@@ -37,7 +37,7 @@ String namenodeLabel = nn.getNameNodeAddress().getHostName() + ":" + nn.getNameN
 
				 <html>
			
 
				 
			
 
				 <link rel="stylesheet" type="text/css" href="/static/hadoop.css">
			
 
				-<title>Hadoop <%=namenodeRole%> <%=namenodeLabel%></title>
			
 
				+<title>Hadoop <%=namenodeRole%>&nbsp;<%=namenodeLabel%></title>
			
 
				   
			
 
				 <body>
			
 
				 <h1><%=namenodeRole%> '<%=namenodeLabel%>'</h1>
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BenchmarkThroughput.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BenchmarkThroughput.java
@@ -193,6 +193,10 @@ public class BenchmarkThroughput extends Configured implements Tool {
 
				     BUFFER_SIZE = conf.getInt("dfsthroughput.buffer.size", 4 * 1024);
			
 
				 
			
 
				     String localDir = conf.get("mapred.temp.dir");
			
 
				+    if (localDir == null) {
			
 
				+      localDir = conf.get("hadoop.tmp.dir");
			
 
				+      conf.set("mapred.temp.dir", localDir);
			
 
				+    }
			
 
				     dir = new LocalDirAllocator("mapred.temp.dir");
			
 
				 
			
 
				     System.setProperty("test.build.data", localDir);
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java
@@ -19,22 +19,37 @@
 
				 package org.apache.hadoop.hdfs;
			
 
				 
			
 
				 import java.io.IOException;
			
 
				+import java.util.List;
			
 
				 
			
 
				 import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FSDataOutputStream;
			
 
				 import org.apache.hadoop.fs.Path;
			
 
				 import org.apache.hadoop.fs.FileSystem;
			
 
				 import org.apache.hadoop.fs.permission.FsPermission;
			
 
				+import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
			
 
				 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
			
 
				+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
			
 
				+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
			
 
				+import org.apache.hadoop.hdfs.server.namenode.NameNode;
			
 
				+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
			
 
				+import org.apache.hadoop.test.GenericTestUtils;
			
 
				 
			
 
				 import static org.junit.Assert.*;
			
 
				 import org.junit.Before;
			
 
				 import org.junit.After;
			
 
				 import org.junit.Test;
			
 
				 
			
 
				+import com.google.common.base.Supplier;
			
 
				+import com.google.common.collect.Lists;
			
 
				+
			
 
				 /**
			
 
				  * Tests to verify safe mode correctness.
			
 
				  */
			
 
				 public class TestSafeMode {
			
 
				+  private static final Path TEST_PATH = new Path("/test");
			
 
				+  private static final int BLOCK_SIZE = 1024;
			
 
				   Configuration conf; 
			
 
				   MiniDFSCluster cluster;
			
 
				   FileSystem fs;
			
@@ -43,6 +58,7 @@ public class TestSafeMode {
 
				   @Before
			
 
				   public void startUp() throws IOException {
			
 
				     conf = new HdfsConfiguration();
			
 
				+    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
			
 
				     cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
			
 
				     cluster.waitActive();      
			
 
				     fs = cluster.getFileSystem();
			
@@ -83,7 +99,7 @@ public class TestSafeMode {
 
				     
			
 
				     // create two files with one block each.
			
 
				     DFSTestUtil.createFile(fs, file1, 1000, (short)1, 0);
			
 
				-    DFSTestUtil.createFile(fs, file2, 2000, (short)1, 0);
			
 
				+    DFSTestUtil.createFile(fs, file2, 1000, (short)1, 0);
			
 
				     fs.close();
			
 
				     cluster.shutdown();
			
 
				     
			
@@ -127,6 +143,106 @@ public class TestSafeMode {
 
				     String status = cluster.getNameNode().getNamesystem().getSafemode();
			
 
				     assertEquals("", status);
			
 
				   }
			
 
				+  
			
 
				+  /**
			
 
				+   * Test that the NN initializes its under-replicated blocks queue
			
 
				+   * before it is ready to exit safemode (HDFS-1476)
			
 
				+   */
			
 
				+  @Test(timeout=45000)
			
 
				+  public void testInitializeReplQueuesEarly() throws Exception {
			
 
				+    // Spray the blocks around the cluster when we add DNs instead of
			
 
				+    // concentrating all blocks on the first node.
			
 
				+    BlockManagerTestUtil.setWritingPrefersLocalNode(
			
 
				+        cluster.getNamesystem().getBlockManager(), false);
			
 
				+    
			
 
				+    cluster.startDataNodes(conf, 2, true, StartupOption.REGULAR, null);
			
 
				+    cluster.waitActive();
			
 
				+    DFSTestUtil.createFile(fs, TEST_PATH, 15*BLOCK_SIZE, (short)1, 1L);
			
 
				+    
			
 
				+    
			
 
				+    List<DataNodeProperties> dnprops = Lists.newLinkedList();
			
 
				+    dnprops.add(cluster.stopDataNode(0));
			
 
				+    dnprops.add(cluster.stopDataNode(0));
			
 
				+    dnprops.add(cluster.stopDataNode(0));
			
 
				+    
			
 
				+    cluster.getConfiguration(0).setFloat(
			
 
				+        DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY, 1f/15f);
			
 
				+    
			
 
				+    cluster.restartNameNode();
			
 
				+    final NameNode nn = cluster.getNameNode();
			
 
				+    
			
 
				+    String status = nn.getNamesystem().getSafemode();
			
 
				+    assertEquals("Safe mode is ON.The reported blocks 0 needs additional " +
			
 
				+        "15 blocks to reach the threshold 0.9990 of total blocks 15. " +
			
 
				+        "Safe mode will be turned off automatically.", status);
			
 
				+    assertFalse("Mis-replicated block queues should not be initialized " +
			
 
				+        "until threshold is crossed",
			
 
				+        NameNodeAdapter.safeModeInitializedReplQueues(nn));
			
 
				+    
			
 
				+    cluster.restartDataNode(dnprops.remove(0));
			
 
				+
			
 
				+    // Wait for the block report from the restarted DN to come in.
			
 
				+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
			
 
				+      @Override
			
 
				+      public Boolean get() {
			
 
				+        return NameNodeAdapter.getSafeModeSafeBlocks(nn) > 0;
			
 
				+      }
			
 
				+    }, 10, 10000);
			
 
				+    // SafeMode is fine-grain synchronized, so the processMisReplicatedBlocks
			
 
				+    // call is still going on at this point - wait until it's done by grabbing
			
 
				+    // the lock.
			
 
				+    nn.getNamesystem().writeLock();
			
 
				+    nn.getNamesystem().writeUnlock();
			
 
				+    int safe = NameNodeAdapter.getSafeModeSafeBlocks(nn);
			
 
				+    assertTrue("Expected first block report to make some but not all blocks " +
			
 
				+        "safe. Got: " + safe, safe >= 1 && safe < 15);
			
 
				+    BlockManagerTestUtil.updateState(nn.getNamesystem().getBlockManager());
			
 
				+    
			
 
				+    assertTrue(NameNodeAdapter.safeModeInitializedReplQueues(nn));
			
 
				+    assertEquals(15 - safe, nn.getNamesystem().getUnderReplicatedBlocks());
			
 
				+    
			
 
				+    cluster.restartDataNodes();
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Test that, when under-replicated blocks are processed at the end of
			
 
				+   * safe-mode, blocks currently under construction are not considered
			
 
				+   * under-construction or missing. Regression test for HDFS-2822.
			
 
				+   */
			
 
				+  @Test
			
 
				+  public void testRbwBlocksNotConsideredUnderReplicated() throws IOException {
			
 
				+    List<FSDataOutputStream> stms = Lists.newArrayList();
			
 
				+    try {
			
 
				+      // Create some junk blocks so that the NN doesn't just immediately
			
 
				+      // exit safemode on restart.
			
 
				+      DFSTestUtil.createFile(fs, new Path("/junk-blocks"),
			
 
				+          BLOCK_SIZE*4, (short)1, 1L);
			
 
				+      // Create several files which are left open. It's important to
			
 
				+      // create several here, because otherwise the first iteration of the
			
 
				+      // replication monitor will pull them off the replication queue and
			
 
				+      // hide this bug from the test!
			
 
				+      for (int i = 0; i < 10; i++) {
			
 
				+        FSDataOutputStream stm = fs.create(
			
 
				+            new Path("/append-" + i), true, BLOCK_SIZE, (short) 1, BLOCK_SIZE);
			
 
				+        stms.add(stm);
			
 
				+        stm.write(1);
			
 
				+        stm.hflush();
			
 
				+      }
			
 
				+
			
 
				+      cluster.restartNameNode();
			
 
				+      FSNamesystem ns = cluster.getNameNode(0).getNamesystem();
			
 
				+      BlockManagerTestUtil.updateState(ns.getBlockManager());
			
 
				+      assertEquals(0, ns.getPendingReplicationBlocks());
			
 
				+      assertEquals(0, ns.getCorruptReplicaBlocks());
			
 
				+      assertEquals(0, ns.getMissingBlocksCount());
			
 
				+
			
 
				+    } finally {
			
 
				+      for (FSDataOutputStream stm : stms) {
			
 
				+        IOUtils.closeStream(stm);
			
 
				+      }
			
 
				+      cluster.shutdown();
			
 
				+    }
			
 
				+  }
			
 
				 
			
 
				   public interface FSRun {
			
 
				     public abstract void run(FileSystem fs) throws IOException;
			
@@ -241,4 +357,4 @@ public class TestSafeMode {
 
				     assertEquals("", cluster.getNamesystem().getSafemode());
			
 
				   }
			
 
				 
			
 
				-}
			
 
				+}
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
@@ -27,6 +27,8 @@ import org.apache.hadoop.hdfs.protocol.Block;
 
				 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
			
 
				 import org.apache.hadoop.util.Daemon;
			
 
				 
			
 
				+import com.google.common.base.Preconditions;
			
 
				+
			
 
				 public class BlockManagerTestUtil {
			
 
				   public static void setNodeReplicationLimit(final BlockManager blockManager,
			
 
				       final int limit) {
			
@@ -122,4 +124,17 @@ public class BlockManagerTestUtil {
 
				     return blockManager.computeDatanodeWork();
			
 
				   }
			
 
				   
			
 
				+  
			
 
				+  /**
			
 
				+   * Change whether the block placement policy will prefer the writer's
			
 
				+   * local Datanode or not.
			
 
				+   * @param prefer
			
 
				+   */
			
 
				+  public static void setWritingPrefersLocalNode(
			
 
				+      BlockManager bm, boolean prefer) {
			
 
				+    BlockPlacementPolicy bpp = bm.getBlockPlacementPolicy();
			
 
				+    Preconditions.checkState(bpp instanceof BlockPlacementPolicyDefault,
			
 
				+        "Must use default policy, got %s", bpp.getClass());
			
 
				+    ((BlockPlacementPolicyDefault)bpp).setPreferLocalNode(prefer);
			
 
				+  }
			
 
				 }
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
@@ -24,6 +24,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 
				 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
			
 
				 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
			
 
				 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
			
 
				+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem.SafeModeInfo;
			
 
				 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
			
 
				 import org.apache.hadoop.ipc.Server;
			
 
				 
			
@@ -97,4 +98,28 @@ public class NameNodeAdapter {
 
				       ns.readUnlock();
			
 
				     }
			
 
				   }
			
 
				+  
			
 
				+  /**
			
 
				+   * @return the number of blocks marked safe by safemode, or -1
			
 
				+   * if safemode is not running.
			
 
				+   */
			
 
				+  public static int getSafeModeSafeBlocks(NameNode nn) {
			
 
				+    SafeModeInfo smi = nn.getNamesystem().getSafeModeInfoForTests();
			
 
				+    if (smi == null) {
			
 
				+      return -1;
			
 
				+    }
			
 
				+    return smi.blockSafe;
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * @return true if safemode is not running, or if safemode has already
			
 
				+   * initialized the replication queues
			
 
				+   */
			
 
				+  public static boolean safeModeInitializedReplQueues(NameNode nn) {
			
 
				+    SafeModeInfo smi = nn.getNamesystem().getSafeModeInfoForTests();
			
 
				+    if (smi == null) {
			
 
				+      return true;
			
 
				+    }
			
 
				+    return smi.initializedReplQueues;
			
 
				+  }
			
 
				 }
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
@@ -203,7 +203,6 @@ public class TestCheckpoint extends TestCase {
 
				   /*
			
 
				    * Simulate namenode crashing after rolling edit log.
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   public void testSecondaryNamenodeError1()
			
 
				     throws IOException {
			
 
				     LOG.info("Starting testSecondaryNamenodeError1");
			
@@ -265,7 +264,6 @@ public class TestCheckpoint extends TestCase {
 
				   /*
			
 
				    * Simulate a namenode crash after uploading new image
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   public void testSecondaryNamenodeError2() throws IOException {
			
 
				     LOG.info("Starting testSecondaryNamenodeError2");
			
 
				     Configuration conf = new HdfsConfiguration();
			
@@ -324,7 +322,6 @@ public class TestCheckpoint extends TestCase {
 
				   /*
			
 
				    * Simulate a secondary namenode crash after rolling the edit log.
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   public void testSecondaryNamenodeError3() throws IOException {
			
 
				     LOG.info("Starting testSecondaryNamenodeError3");
			
 
				     Configuration conf = new HdfsConfiguration();
			
@@ -394,7 +391,6 @@ public class TestCheckpoint extends TestCase {
 
				    * back to the name-node.
			
 
				    * Used to truncate primary fsimage file.
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   public void testSecondaryFailsToReturnImage() throws IOException {
			
 
				     LOG.info("Starting testSecondaryFailsToReturnImage");
			
 
				     Configuration conf = new HdfsConfiguration();
			
@@ -471,7 +467,6 @@ public class TestCheckpoint extends TestCase {
 
				    * @param errorType the ErrorSimulator type to trigger
			
 
				    * @param exceptionSubstring an expected substring of the triggered exception
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   private void doSendFailTest(int errorType, String exceptionSubstring)
			
 
				       throws IOException {
			
 
				     Configuration conf = new HdfsConfiguration();
			
@@ -586,7 +581,6 @@ public class TestCheckpoint extends TestCase {
 
				   /**
			
 
				    * Test that the SecondaryNameNode properly locks its storage directories.
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   public void testSecondaryNameNodeLocking() throws Exception {
			
 
				     // Start a primary NN so that the secondary will start successfully
			
 
				     Configuration conf = new HdfsConfiguration();
			
@@ -679,7 +673,6 @@ public class TestCheckpoint extends TestCase {
 
				    * 2. if the NN does not contain an image, importing a checkpoint
			
 
				    *    succeeds and re-saves the image
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   public void testImportCheckpoint() throws Exception {
			
 
				     Configuration conf = new HdfsConfiguration();
			
 
				     Path testPath = new Path("/testfile");
			
@@ -760,16 +753,12 @@ public class TestCheckpoint extends TestCase {
 
				       throw new IOException("Cannot create directory " + dir);
			
 
				   }
			
 
				   
			
 
				-  // This deprecation suppress warning does not work due to known Java bug:
			
 
				-  // http://bugs.sun.com/view_bug.do?bug_id=6460147
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   SecondaryNameNode startSecondaryNameNode(Configuration conf
			
 
				                                           ) throws IOException {
			
 
				     conf.set(DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, "0.0.0.0:0");
			
 
				     return new SecondaryNameNode(conf);
			
 
				   }
			
 
				   
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   SecondaryNameNode startSecondaryNameNode(Configuration conf, int index)
			
 
				       throws IOException {
			
 
				     Configuration snnConf = new Configuration(conf);
			
@@ -782,7 +771,6 @@ public class TestCheckpoint extends TestCase {
 
				   /**
			
 
				    * Tests checkpoint in HDFS.
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   public void testCheckpoint() throws IOException {
			
 
				     Path file1 = new Path("checkpoint.dat");
			
 
				     Path file2 = new Path("checkpoint2.dat");
			
@@ -1009,7 +997,6 @@ public class TestCheckpoint extends TestCase {
 
				    * - it then fails again for the same reason
			
 
				    * - it then tries to checkpoint a third time
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   public void testCheckpointAfterTwoFailedUploads() throws IOException {
			
 
				     MiniDFSCluster cluster = null;
			
 
				     SecondaryNameNode secondary = null;
			
@@ -1064,7 +1051,6 @@ public class TestCheckpoint extends TestCase {
 
				    * 
			
 
				    * @throws IOException
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   public void testMultipleSecondaryNamenodes() throws IOException {
			
 
				     Configuration conf = new HdfsConfiguration();
			
 
				     String nameserviceId1 = "ns1";
			
@@ -1114,7 +1100,6 @@ public class TestCheckpoint extends TestCase {
 
				    * Test that the secondary doesn't have to re-download image
			
 
				    * if it hasn't changed.
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   public void testSecondaryImageDownload() throws IOException {
			
 
				     LOG.info("Starting testSecondaryImageDownload");
			
 
				     Configuration conf = new HdfsConfiguration();
			
@@ -1197,7 +1182,6 @@ public class TestCheckpoint extends TestCase {
 
				    * It verifies that this works even though the earlier-txid checkpoint gets
			
 
				    * uploaded after the later-txid checkpoint.
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   public void testMultipleSecondaryNNsAgainstSameNN() throws Exception {
			
 
				     Configuration conf = new HdfsConfiguration();
			
 
				 
			
@@ -1283,7 +1267,6 @@ public class TestCheckpoint extends TestCase {
 
				    * It verifies that one of the two gets an error that it's uploading a
			
 
				    * duplicate checkpoint, and the other one succeeds.
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   public void testMultipleSecondaryNNsAgainstSameNN2() throws Exception {
			
 
				     Configuration conf = new HdfsConfiguration();
			
 
				 
			
@@ -1382,7 +1365,6 @@ public class TestCheckpoint extends TestCase {
 
				    * is running. The secondary should shut itself down if if talks to a NN
			
 
				    * with the wrong namespace.
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   public void testReformatNNBetweenCheckpoints() throws IOException {
			
 
				     MiniDFSCluster cluster = null;
			
 
				     SecondaryNameNode secondary = null;
			
@@ -1637,7 +1619,6 @@ public class TestCheckpoint extends TestCase {
 
				   /**
			
 
				    * Test that the 2NN triggers a checkpoint after the configurable interval
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   public void testCheckpointTriggerOnTxnCount() throws Exception {
			
 
				     MiniDFSCluster cluster = null;
			
 
				     SecondaryNameNode secondary = null;
			
@@ -1691,7 +1672,6 @@ public class TestCheckpoint extends TestCase {
 
				    * logs that connect the 2NN's old checkpoint to the current txid
			
 
				    * get archived. Then, the 2NN tries to checkpoint again.
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   public void testSecondaryHasVeryOutOfDateImage() throws IOException {
			
 
				     MiniDFSCluster cluster = null;
			
 
				     SecondaryNameNode secondary = null;
			
@@ -1729,7 +1709,6 @@ public class TestCheckpoint extends TestCase {
 
				     }
			
 
				   }
			
 
				   
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   public void testCommandLineParsing() throws ParseException {
			
 
				     SecondaryNameNode.CommandLineOpts opts =
			
 
				       new SecondaryNameNode.CommandLineOpts();
			
@@ -1764,7 +1743,6 @@ public class TestCheckpoint extends TestCase {
 
				     } catch (ParseException e) {}
			
 
				   }
			
 
				 
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   private void cleanup(SecondaryNameNode snn) {
			
 
				     if (snn != null) {
			
 
				       try {
			
@@ -1780,7 +1758,6 @@ public class TestCheckpoint extends TestCase {
 
				    * Assert that if any two files have the same name across the 2NNs
			
 
				    * and NN, they should have the same content too.
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   private void assertParallelFilesInvariant(MiniDFSCluster cluster,
			
 
				       ImmutableList<SecondaryNameNode> secondaries) throws Exception {
			
 
				     List<File> allCurrentDirs = Lists.newArrayList();
			
@@ -1792,7 +1769,6 @@ public class TestCheckpoint extends TestCase {
 
				         ImmutableSet.of("VERSION"));    
			
 
				   }
			
 
				   
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   private List<File> getCheckpointCurrentDirs(SecondaryNameNode secondary) {
			
 
				     List<File> ret = Lists.newArrayList();
			
 
				     for (URI u : secondary.getCheckpointDirs()) {
			
@@ -1802,7 +1778,6 @@ public class TestCheckpoint extends TestCase {
 
				     return ret;
			
 
				   }
			
 
				 
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   private CheckpointStorage spyOnSecondaryImage(SecondaryNameNode secondary1) {
			
 
				     CheckpointStorage spy = Mockito.spy((CheckpointStorage)secondary1.getFSImage());;
			
 
				     secondary1.setFSImage(spy);
			
@@ -1812,7 +1787,6 @@ public class TestCheckpoint extends TestCase {
 
				   /**
			
 
				    * A utility class to perform a checkpoint in a different thread.
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   private static class DoCheckpointThread extends Thread {
			
 
				     private final SecondaryNameNode snn;
			
 
				     private volatile Throwable thrown = null;
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameEditsConfigs.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameEditsConfigs.java
@@ -106,9 +106,6 @@ public class TestNameEditsConfigs extends TestCase {
 
				     assertTrue(!fileSys.exists(name));
			
 
				   }
			
 
				 
			
 
				-  // This deprecation suppress warning does not work due to known Java bug:
			
 
				-  // http://bugs.sun.com/view_bug.do?bug_id=6460147
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   SecondaryNameNode startSecondaryNameNode(Configuration conf
			
 
				                                           ) throws IOException {
			
 
				     conf.set(DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, "0.0.0.0:0");
			
@@ -128,7 +125,6 @@ public class TestNameEditsConfigs extends TestCase {
 
				    * sure we are reading proper edits and image.
			
 
				    * @throws Exception 
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   public void testNameEditsConfigs() throws Exception {
			
 
				     Path file1 = new Path("TestNameEditsConfigs1");
			
 
				     Path file2 = new Path("TestNameEditsConfigs2");
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecondaryWebUi.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecondaryWebUi.java
@@ -30,7 +30,6 @@ import org.junit.Test;
 
				 
			
 
				 public class TestSecondaryWebUi {
			
 
				 
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   @Test
			
 
				   public void testSecondaryWebUi() throws IOException {
			
 
				     Configuration conf = new Configuration();
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java
@@ -120,7 +120,6 @@ public class TestStartup extends TestCase {
 
				    * start MiniDFScluster, create a file (to create edits) and do a checkpoint  
			
 
				    * @throws IOException
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   public void createCheckPoint() throws IOException {
			
 
				     LOG.info("--starting mini cluster");
			
 
				     // manage dirs parameter set to false 
			
@@ -300,7 +299,6 @@ public class TestStartup extends TestCase {
 
				    * secondary node copies fsimage and edits into correct separate directories.
			
 
				    * @throws IOException
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   public void testSNNStartup() throws IOException{
			
 
				     //setUpConfig();
			
 
				     LOG.info("--starting SecondNN startup test");
			
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java
@@ -153,7 +153,6 @@ public class TestStorageRestore {
 
				    * 7. run doCheckpoint
			
 
				    * 8. verify that all the image and edits files are the same.
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   @Test
			
 
				   public void testStorageRestore() throws Exception {
			
 
				     int numDatanodes = 0;
			
@@ -310,7 +309,6 @@ public class TestStorageRestore {
 
				    * then try to perform a checkpoint. The NN should not serve up the image or
			
 
				    * edits from the restored (empty) dir.
			
 
				    */
			
 
				-  @SuppressWarnings("deprecation")
			
 
				   @Test
			
 
				   public void testMultipleSecondaryCheckpoint() throws IOException {
			
 
				     
			
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -142,6 +142,14 @@ Release 0.23.1 - Unreleased
 
				 
			
 
				     MAPREDUCE-3692. yarn-resourcemanager out and log files can get big. (eli)
			
 
				 
			
 
				+    MAPREDUCE-3710. Improved FileInputFormat to return better locality for the
			
 
				+    last split. (Siddarth Seth via vinodkv)
			
 
				+
			
 
				+    MAPREDUCE-2765. DistCp Rewrite. (Mithun Radhakrishnan via mahadev)
			
 
				+
			
 
				+    MAPREDUCE-3737. The Web Application Proxy's is not documented very well.
			
 
				+    (Robert Evans via mahadev)
			
 
				+
			
 
				   OPTIMIZATIONS
			
 
				 
			
 
				     MAPREDUCE-3567. Extraneous JobConf objects in AM heap. (Vinod Kumar
			
@@ -165,7 +173,13 @@ Release 0.23.1 - Unreleased
 
				     MAPREDUCE-3512. Batching JobHistory flushing to DFS so that we don't flush
			
 
				     for every event slowing down AM. (Siddarth Seth via vinodkv)
			
 
				 
			
 
				+    MAPREDUCE-3718. Change default AM heartbeat interval to 1 second. (Hitesh
			
 
				+    Shah via sseth)
			
 
				+
			
 
				   BUG FIXES
			
 
				+    MAPREDUCE-3194. "mapred mradmin" command is broken in mrv2
			
 
				+                     (Jason Lowe via bobby)
			
 
				+
			
 
				     MAPREDUCE-3462. Fix Gridmix JUnit testcase failures. 
			
 
				                     (Ravi Prakash and Ravi Gummadi via amarrk)
			
 
				 
			
@@ -498,6 +512,48 @@ Release 0.23.1 - Unreleased
 
				 
			
 
				     MAPREDUCE-3705. ant build fails on 0.23 branch. (Thomas Graves via
			
 
				     mahadev)
			
 
				+ 
			
 
				+    MAPREDUCE-3691. webservices add support to compress response.
			
 
				+    (Thomas Graves via mahadev)
			
 
				+
			
 
				+    MAPREDUCE-3702. internal server error trying access application master 
			
 
				+    via proxy with filter enabled (Thomas Graves via mahadev)
			
 
				+
			
 
				+    MAPREDUCE-3646. Remove redundant URL info from "mapred job" output.
			
 
				+    (Jonathan Eagles via mahadev)
			
 
				+
			
 
				+    MAPREDUCE-3681. Fixed computation of queue's usedCapacity. (acmurthy) 
			
 
				+
			
 
				+    MAPREDUCE-3505. yarn APPLICATION_CLASSPATH needs to be overridable. 
			
 
				+    (ahmed via tucu)
			
 
				+
			
 
				+    MAPREDUCE-3714. Fixed EventFetcher and Fetcher threads to shut-down properly
			
 
				+    so that reducers don't hang in corner cases. (vinodkv)
			
 
				+
			
 
				+    MAPREDUCE-3712. The mapreduce tar does not contain the hadoop-mapreduce-client-
			
 
				+    jobclient-tests.jar. (mahadev)
			
 
				+
			
 
				+    MAPREDUCE-3717. JobClient test jar has missing files to run all the test programs.
			
 
				+    (mahadev)
			
 
				+
			
 
				+    MAPREDUCE-3630. Fixes a NullPointer exception while running TeraGen - if a
			
 
				+    map is asked to generate 0 records. (Mahadev Konar via sseth)
			
 
				+
			
 
				+    MAPREDUCE-3683. Fixed maxCapacity of queues to be product of parent
			
 
				+    maxCapacities. (acmurthy)
			
 
				+
			
 
				+    MAPREDUCE-3713. Fixed the way head-room is allocated to applications by
			
 
				+    CapacityScheduler so that it deducts current-usage per user and not
			
 
				+    per-application. (Arun C Murthy via vinodkv)
			
 
				+
			
 
				+    MAPREDUCE-3721. Fixed a race in shuffle which caused reduces to hang.
			
 
				+    (sseth via acmurthy) 
			
 
				+
			
 
				+    MAPREDUCE-3733. Add Apache License Header to hadoop-distcp/pom.xml.
			
 
				+    (mahadev)
			
 
				+
			
 
				+    MAPREDUCE-3735. Add distcp jar to the distribution (tar).
			
 
				+    (mahadev)
			
 
				 
			
 
				 Release 0.23.0 - 2011-11-01 
			
 
				 
			
--- a/hadoop-mapreduce-project/bin/mapred
+++ b/hadoop-mapreduce-project/bin/mapred
@@ -30,9 +30,6 @@ fi
 
				 function print_usage(){
			
 
				   echo "Usage: mapred [--config confdir] COMMAND"
			
 
				   echo "       where COMMAND is one of:"
			
 
				-  echo "  mradmin              run a Map-Reduce admin client"
			
 
				-  echo "  jobtracker           run the MapReduce job Tracker node" 
			
 
				-  echo "  tasktracker          run a MapReduce task Tracker node" 
			
 
				   echo "  pipes                run a Pipes job"
			
 
				   echo "  job                  manipulate MapReduce jobs"
			
 
				   echo "  queue                get information regarding JobQueues"
			
@@ -51,16 +48,7 @@ fi
 
				 COMMAND=$1
			
 
				 shift
			
 
				 
			
 
				-if [ "$COMMAND" = "mradmin" ] ; then
			
 
				-  CLASS=org.apache.hadoop.mapred.tools.MRAdmin
			
 
				-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
			
 
				-elif [ "$COMMAND" = "jobtracker" ] ; then
			
 
				-  CLASS=org.apache.hadoop.mapred.JobTracker
			
 
				-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_JOBTRACKER_OPTS"
			
 
				-elif [ "$COMMAND" = "tasktracker" ] ; then
			
 
				-  CLASS=org.apache.hadoop.mapred.TaskTracker
			
 
				-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_TASKTRACKER_OPTS"
			
 
				-elif [ "$COMMAND" = "job" ] ; then
			
 
				+if [ "$COMMAND" = "job" ] ; then
			
 
				   CLASS=org.apache.hadoop.mapred.JobClient
			
 
				 elif [ "$COMMAND" = "queue" ] ; then
			
 
				   CLASS=org.apache.hadoop.mapred.JobQueueClient
			
@@ -75,6 +63,13 @@ elif [ "$COMMAND" = "classpath" ] ; then
 
				 elif [ "$COMMAND" = "groups" ] ; then
			
 
				   CLASS=org.apache.hadoop.mapred.tools.GetGroups
			
 
				   HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
			
 
				+elif [ "$COMMAND" = "mradmin" ] \
			
 
				+    || [ "$COMMAND" = "jobtracker" ] \
			
 
				+    || [ "$COMMAND" = "tasktracker" ] ; then
			
 
				+  echo "Sorry, the $COMMAND command is no longer supported."
			
 
				+  echo "You may find similar functionality with the \"yarn\" shell command."
			
 
				+  print_usage
			
 
				+  exit
			
 
				 else
			
 
				   echo $COMMAND - invalid command
			
 
				   print_usage
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
@@ -522,13 +522,13 @@ public abstract class TaskAttemptImpl implements
 
				    * a parent CLC and use it for all the containers, so this should go away
			
 
				    * once the mr-generated-classpath stuff is gone.
			
 
				    */
			
 
				-  private static String getInitialClasspath() throws IOException {
			
 
				+  private static String getInitialClasspath(Configuration conf) throws IOException {
			
 
				     synchronized (classpathLock) {
			
 
				       if (initialClasspathFlag.get()) {
			
 
				         return initialClasspath;
			
 
				       }
			
 
				       Map<String, String> env = new HashMap<String, String>();
			
 
				-      MRApps.setClasspath(env);
			
 
				+      MRApps.setClasspath(env, conf);
			
 
				       initialClasspath = env.get(Environment.CLASSPATH.name());
			
 
				       initialClasspathFlag.set(true);
			
 
				       return initialClasspath;
			
@@ -631,7 +631,7 @@ public abstract class TaskAttemptImpl implements
 
				       Apps.addToEnvironment(
			
 
				           environment,  
			
 
				           Environment.CLASSPATH.name(), 
			
 
				-          getInitialClasspath());
			
 
				+          getInitialClasspath(conf));
			
 
				     } catch (IOException e) {
			
 
				       throw new YarnException(e);
			
 
				     }
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml
@@ -38,6 +38,10 @@
 
				       <groupId>org.apache.hadoop</groupId>
			
 
				       <artifactId>hadoop-mapreduce-client-core</artifactId>
			
 
				     </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-yarn-server-common</artifactId>
			
 
				+    </dependency>
			
 
				   </dependencies>
			
 
				 
			
 
				   <build>
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java
@@ -54,6 +54,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
 
				 import org.apache.hadoop.yarn.api.records.LocalResource;
			
 
				 import org.apache.hadoop.yarn.api.records.LocalResourceType;
			
 
				 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
			
 
				+import org.apache.hadoop.yarn.conf.YarnConfiguration;
			
 
				 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
			
 
				 import org.apache.hadoop.yarn.util.Apps;
			
 
				 import org.apache.hadoop.yarn.util.BuilderUtils;
			
@@ -171,7 +172,7 @@ public class MRApps extends Apps {
 
				   }
			
 
				 
			
 
				   private static void setMRFrameworkClasspath(
			
 
				-      Map<String, String> environment) throws IOException {
			
 
				+      Map<String, String> environment, Configuration conf) throws IOException {
			
 
				     InputStream classpathFileStream = null;
			
 
				     BufferedReader reader = null;
			
 
				     try {
			
@@ -208,8 +209,10 @@ public class MRApps extends Apps {
 
				       }
			
 
				 
			
 
				       // Add standard Hadoop classes
			
 
				-      for (String c : ApplicationConstants.APPLICATION_CLASSPATH) {
			
 
				-        Apps.addToEnvironment(environment, Environment.CLASSPATH.name(), c);
			
 
				+      for (String c : conf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH)
			
 
				+          .split(",")) {
			
 
				+        Apps.addToEnvironment(environment, Environment.CLASSPATH.name(), c
			
 
				+            .trim());
			
 
				       }
			
 
				     } finally {
			
 
				       if (classpathFileStream != null) {
			
@@ -222,8 +225,8 @@ public class MRApps extends Apps {
 
				     // TODO: Remove duplicates.
			
 
				   }
			
 
				   
			
 
				-  public static void setClasspath(Map<String, String> environment) 
			
 
				-      throws IOException {
			
 
				+  public static void setClasspath(Map<String, String> environment,
			
 
				+      Configuration conf) throws IOException {
			
 
				     Apps.addToEnvironment(
			
 
				         environment, 
			
 
				         Environment.CLASSPATH.name(), 
			
@@ -232,7 +235,7 @@ public class MRApps extends Apps {
 
				         environment, 
			
 
				         Environment.CLASSPATH.name(),
			
 
				         Environment.PWD.$() + Path.SEPARATOR + "*");
			
 
				-    MRApps.setMRFrameworkClasspath(environment);
			
 
				+    MRApps.setMRFrameworkClasspath(environment, conf);
			
 
				   }
			
 
				   
			
 
				   private static final String STAGING_CONSTANT = ".staging";
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java
@@ -18,7 +18,12 @@
 
				 
			
 
				 package org.apache.hadoop.mapreduce.v2.util;
			
 
				 
			
 
				+import java.io.IOException;
			
 
				+import java.util.HashMap;
			
 
				+import java.util.Map;
			
 
				+
			
 
				 import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.mapreduce.Job;
			
 
				 import org.apache.hadoop.mapreduce.JobID;
			
 
				 import org.apache.hadoop.mapreduce.MRJobConfig;
			
 
				 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
			
@@ -121,4 +126,17 @@ public class TestMRApps {
 
				         "/my/path/to/staging/dummy-user/.staging/job_dummy-job_12345/job.xml", jobFile);
			
 
				   }
			
 
				 
			
 
				+  @Test public void testSetClasspath() throws IOException {
			
 
				+    Job job = Job.getInstance();
			
 
				+    Map<String, String> environment = new HashMap<String, String>();
			
 
				+    MRApps.setClasspath(environment, job.getConfiguration());
			
 
				+    assertEquals("job.jar:$PWD/*:$HADOOP_CONF_DIR:" +
			
 
				+        "$HADOOP_COMMON_HOME/share/hadoop/common/*:" +
			
 
				+        "$HADOOP_COMMON_HOME/share/hadoop/common/lib/*:" +
			
 
				+        "$HADOOP_HDFS_HOME/share/hadoop/hdfs/*:" +
			
 
				+        "$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*:" +
			
 
				+        "$YARN_HOME/share/hadoop/mapreduce/*:" +
			
 
				+        "$YARN_HOME/share/hadoop/mapreduce/lib/*",
			
 
				+        environment.get("CLASSPATH"));
			
 
				+  }
			
 
				 }
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java
@@ -289,8 +289,10 @@ public abstract class FileInputFormat<K, V> implements InputFormat<K, V> {
 
				         }
			
 
				         
			
 
				         if (bytesRemaining != 0) {
			
 
				-          splits.add(makeSplit(path, length-bytesRemaining, bytesRemaining, 
			
 
				-                     blkLocations[blkLocations.length-1].getHosts()));
			
 
				+          String[] splitHosts = getSplitHosts(blkLocations, length
			
 
				+              - bytesRemaining, bytesRemaining, clusterMap);
			
 
				+          splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining,
			
 
				+              splitHosts));
			
 
				         }
			
 
				       } else if (length != 0) {
			
 
				         String[] splitHosts = getSplitHosts(blkLocations,0,length,clusterMap);
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java
@@ -1216,6 +1216,7 @@ public class Job extends JobContextImpl implements JobContext {
 
				       }
			
 
				     });
			
 
				     state = JobState.RUNNING;
			
 
				+    LOG.info("The url to track the job: " + getTrackingURL());
			
 
				    }
			
 
				   
			
 
				   /**
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
@@ -417,7 +417,7 @@ public interface MRJobConfig {
 
				   /** How often the AM should send heartbeats to the RM.*/
			
 
				   public static final String MR_AM_TO_RM_HEARTBEAT_INTERVAL_MS =
			
 
				     MR_AM_PREFIX + "scheduler.heartbeat.interval-ms";
			
 
				-  public static final int DEFAULT_MR_AM_TO_RM_HEARTBEAT_INTERVAL_MS = 2000;
			
 
				+  public static final int DEFAULT_MR_AM_TO_RM_HEARTBEAT_INTERVAL_MS = 1000;
			
 
				 
			
 
				   /**
			
 
				    * If contact with RM is lost, the AM will wait MR_AM_TO_RM_WAIT_INTERVAL_MS
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java
@@ -286,8 +286,9 @@ public abstract class FileInputFormat<K, V> extends InputFormat<K, V> {
 
				           }
			
 
				 
			
 
				           if (bytesRemaining != 0) {
			
 
				+            int blkIndex = getBlockIndex(blkLocations, length-bytesRemaining);
			
 
				             splits.add(makeSplit(path, length-bytesRemaining, bytesRemaining,
			
 
				-                       blkLocations[blkLocations.length-1].getHosts()));
			
 
				+                       blkLocations[blkIndex].getHosts()));
			
 
				           }
			
 
				         } else { // not splitable
			
 
				           splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts()));
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/EventFetcher.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/EventFetcher.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.mapred.TaskCompletionEvent;
 
				 import org.apache.hadoop.mapred.TaskUmbilicalProtocol;
			
 
				 import org.apache.hadoop.mapreduce.TaskAttemptID;
			
 
				 
			
 
				+@SuppressWarnings("deprecation")
			
 
				 class EventFetcher<K,V> extends Thread {
			
 
				   private static final long SLEEP_TIME = 1000;
			
 
				   private static final int MAX_EVENTS_TO_FETCH = 10000;
			
@@ -41,6 +42,8 @@ class EventFetcher<K,V> extends Thread {
 
				   private ExceptionReporter exceptionReporter = null;
			
 
				   
			
 
				   private int maxMapRuntime = 0;
			
 
				+
			
 
				+  private volatile boolean stopped = false;
			
 
				   
			
 
				   public EventFetcher(TaskAttemptID reduce,
			
 
				                       TaskUmbilicalProtocol umbilical,
			
@@ -60,7 +63,7 @@ class EventFetcher<K,V> extends Thread {
 
				     LOG.info(reduce + " Thread started: " + getName());
			
 
				     
			
 
				     try {
			
 
				-      while (true && !Thread.currentThread().isInterrupted()) {
			
 
				+      while (!stopped && !Thread.currentThread().isInterrupted()) {
			
 
				         try {
			
 
				           int numNewMaps = getMapCompletionEvents();
			
 
				           failures = 0;
			
@@ -71,6 +74,9 @@ class EventFetcher<K,V> extends Thread {
 
				           if (!Thread.currentThread().isInterrupted()) {
			
 
				             Thread.sleep(SLEEP_TIME);
			
 
				           }
			
 
				+        } catch (InterruptedException e) {
			
 
				+          LOG.info("EventFetcher is interrupted.. Returning");
			
 
				+          return;
			
 
				         } catch (IOException ie) {
			
 
				           LOG.info("Exception in getting events", ie);
			
 
				           // check to see whether to abort
			
@@ -90,6 +96,16 @@ class EventFetcher<K,V> extends Thread {
 
				       return;
			
 
				     }
			
 
				   }
			
 
				+
			
 
				+  public void shutDown() {
			
 
				+    this.stopped = true;
			
 
				+    interrupt();
			
 
				+    try {
			
 
				+      join(5000);
			
 
				+    } catch(InterruptedException ie) {
			
 
				+      LOG.warn("Got interrupted while joining " + getName(), ie);
			
 
				+    }
			
 
				+  }
			
 
				   
			
 
				   /** 
			
 
				    * Queries the {@link TaskTracker} for a set of map-completion events 
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java
@@ -48,6 +48,7 @@ import org.apache.hadoop.mapreduce.task.reduce.MapOutput.Type;
 
				 import org.apache.hadoop.util.Progressable;
			
 
				 import org.apache.hadoop.util.ReflectionUtils;
			
 
				 
			
 
				+@SuppressWarnings({"deprecation"})
			
 
				 class Fetcher<K,V> extends Thread {
			
 
				   
			
 
				   private static final Log LOG = LogFactory.getLog(Fetcher.class);
			
@@ -88,6 +89,8 @@ class Fetcher<K,V> extends Thread {
 
				   private final Decompressor decompressor;
			
 
				   private final SecretKey jobTokenSecret;
			
 
				 
			
 
				+  private volatile boolean stopped = false;
			
 
				+
			
 
				   public Fetcher(JobConf job, TaskAttemptID reduceId, 
			
 
				                  ShuffleScheduler<K,V> scheduler, MergeManager<K,V> merger,
			
 
				                  Reporter reporter, ShuffleClientMetrics metrics,
			
@@ -135,7 +138,7 @@ class Fetcher<K,V> extends Thread {
 
				   
			
 
				   public void run() {
			
 
				     try {
			
 
				-      while (true && !Thread.currentThread().isInterrupted()) {
			
 
				+      while (!stopped && !Thread.currentThread().isInterrupted()) {
			
 
				         MapHost host = null;
			
 
				         try {
			
 
				           // If merge is on, block
			
@@ -160,7 +163,17 @@ class Fetcher<K,V> extends Thread {
 
				       exceptionReporter.reportException(t);
			
 
				     }
			
 
				   }
			
 
				-  
			
 
				+
			
 
				+  public void shutDown() throws InterruptedException {
			
 
				+    this.stopped = true;
			
 
				+    interrupt();
			
 
				+    try {
			
 
				+      join(5000);
			
 
				+    } catch (InterruptedException ie) {
			
 
				+      LOG.warn("Got interrupt while joining " + getName(), ie);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				   /**
			
 
				    * The crux of the matter...
			
 
				    * 
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManager.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManager.java
@@ -92,6 +92,7 @@ public class MergeManager<K, V> {
 
				   
			
 
				   private final long memoryLimit;
			
 
				   private long usedMemory;
			
 
				+  private long commitMemory;
			
 
				   private final long maxSingleShuffleLimit;
			
 
				   
			
 
				   private final int memToMemMergeOutputsThreshold; 
			
@@ -181,6 +182,13 @@ public class MergeManager<K, V> {
 
				              "ioSortFactor=" + ioSortFactor + ", " +
			
 
				              "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold);
			
 
				 
			
 
				+    if (this.maxSingleShuffleLimit >= this.mergeThreshold) {
			
 
				+      throw new RuntimeException("Invlaid configuration: "
			
 
				+          + "maxSingleShuffleLimit should be less than mergeThreshold"
			
 
				+          + "maxSingleShuffleLimit: " + this.maxSingleShuffleLimit
			
 
				+          + "mergeThreshold: " + this.mergeThreshold);
			
 
				+    }
			
 
				+
			
 
				     boolean allowMemToMemMerge = 
			
 
				       jobConf.getBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, false);
			
 
				     if (allowMemToMemMerge) {
			
@@ -245,16 +253,16 @@ public class MergeManager<K, V> {
 
				     // all the stalled threads
			
 
				     
			
 
				     if (usedMemory > memoryLimit) {
			
 
				-      LOG.debug(mapId + ": Stalling shuffle since usedMemory (" + usedMemory + 
			
 
				-               ") is greater than memoryLimit (" + memoryLimit + ")"); 
			
 
				-      
			
 
				+      LOG.debug(mapId + ": Stalling shuffle since usedMemory (" + usedMemory
			
 
				+          + ") is greater than memoryLimit (" + memoryLimit + ")." + 
			
 
				+          " CommitMemory is (" + commitMemory + ")"); 
			
 
				       return stallShuffle;
			
 
				     }
			
 
				     
			
 
				     // Allow the in-memory shuffle to progress
			
 
				-    LOG.debug(mapId + ": Proceeding with shuffle since usedMemory (" +
			
 
				-        usedMemory + 
			
 
				-        ") is lesser than memoryLimit (" + memoryLimit + ")"); 
			
 
				+    LOG.debug(mapId + ": Proceeding with shuffle since usedMemory ("
			
 
				+        + usedMemory + ") is lesser than memoryLimit (" + memoryLimit + ")."
			
 
				+        + "CommitMemory is (" + commitMemory + ")"); 
			
 
				     return unconditionalReserve(mapId, requestedSize, true);
			
 
				   }
			
 
				   
			
@@ -270,18 +278,24 @@ public class MergeManager<K, V> {
 
				   }
			
 
				   
			
 
				   synchronized void unreserve(long size) {
			
 
				+    commitMemory -= size;
			
 
				     usedMemory -= size;
			
 
				   }
			
 
				-  
			
 
				+
			
 
				   public synchronized void closeInMemoryFile(MapOutput<K,V> mapOutput) { 
			
 
				     inMemoryMapOutputs.add(mapOutput);
			
 
				     LOG.info("closeInMemoryFile -> map-output of size: " + mapOutput.getSize()
			
 
				-        + ", inMemoryMapOutputs.size() -> " + inMemoryMapOutputs.size());
			
 
				-    
			
 
				+        + ", inMemoryMapOutputs.size() -> " + inMemoryMapOutputs.size()
			
 
				+        + ", commitMemory -> " + commitMemory + ", usedMemory ->" + usedMemory);
			
 
				+
			
 
				+    commitMemory+= mapOutput.getSize();
			
 
				+
			
 
				     synchronized (inMemoryMerger) {
			
 
				-      if (!inMemoryMerger.isInProgress() && usedMemory >= mergeThreshold) {
			
 
				-        LOG.info("Starting inMemoryMerger's merge since usedMemory=" +
			
 
				-            usedMemory + " > mergeThreshold=" + mergeThreshold);
			
 
				+      // Can hang if mergeThreshold is really low.
			
 
				+      if (!inMemoryMerger.isInProgress() && commitMemory >= mergeThreshold) {
			
 
				+        LOG.info("Starting inMemoryMerger's merge since commitMemory=" +
			
 
				+            commitMemory + " > mergeThreshold=" + mergeThreshold + 
			
 
				+            ". Current usedMemory=" + usedMemory);
			
 
				         inMemoryMapOutputs.addAll(inMemoryMergedMapOutputs);
			
 
				         inMemoryMergedMapOutputs.clear();
			
 
				         inMemoryMerger.startMerge(inMemoryMapOutputs);
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java
@@ -19,8 +19,6 @@ package org.apache.hadoop.mapreduce.task.reduce;
 
				 
			
 
				 import java.io.IOException;
			
 
				 
			
 
				-import org.apache.commons.logging.Log;
			
 
				-import org.apache.commons.logging.LogFactory;
			
 
				 import org.apache.hadoop.classification.InterfaceAudience;
			
 
				 import org.apache.hadoop.classification.InterfaceStability;
			
 
				 import org.apache.hadoop.fs.FileSystem;
			
@@ -33,17 +31,17 @@ import org.apache.hadoop.mapred.RawKeyValueIterator;
 
				 import org.apache.hadoop.mapred.Reducer;
			
 
				 import org.apache.hadoop.mapred.Reporter;
			
 
				 import org.apache.hadoop.mapred.Task;
			
 
				+import org.apache.hadoop.mapred.Task.CombineOutputCollector;
			
 
				 import org.apache.hadoop.mapred.TaskStatus;
			
 
				 import org.apache.hadoop.mapred.TaskUmbilicalProtocol;
			
 
				-import org.apache.hadoop.mapred.Task.CombineOutputCollector;
			
 
				 import org.apache.hadoop.mapreduce.MRJobConfig;
			
 
				 import org.apache.hadoop.mapreduce.TaskAttemptID;
			
 
				 import org.apache.hadoop.util.Progress;
			
 
				 
			
 
				 @InterfaceAudience.Private
			
 
				 @InterfaceStability.Unstable
			
 
				+@SuppressWarnings({"deprecation", "unchecked", "rawtypes"})
			
 
				 public class Shuffle<K, V> implements ExceptionReporter {
			
 
				-  private static final Log LOG = LogFactory.getLog(Shuffle.class);
			
 
				   private static final int PROGRESS_FREQUENCY = 2000;
			
 
				   
			
 
				   private final TaskAttemptID reduceId;
			
@@ -100,7 +98,6 @@ public class Shuffle<K, V> implements ExceptionReporter {
 
				                                     this, mergePhase, mapOutputFile);
			
 
				   }
			
 
				 
			
 
				-  @SuppressWarnings("unchecked")
			
 
				   public RawKeyValueIterator run() throws IOException, InterruptedException {
			
 
				     // Start the map-completion events fetcher thread
			
 
				     final EventFetcher<K,V> eventFetcher = 
			
@@ -130,19 +127,11 @@ public class Shuffle<K, V> implements ExceptionReporter {
 
				     }
			
 
				 
			
 
				     // Stop the event-fetcher thread
			
 
				-    eventFetcher.interrupt();
			
 
				-    try {
			
 
				-      eventFetcher.join();
			
 
				-    } catch(Throwable t) {
			
 
				-      LOG.info("Failed to stop " + eventFetcher.getName(), t);
			
 
				-    }
			
 
				+    eventFetcher.shutDown();
			
 
				     
			
 
				     // Stop the map-output fetcher threads
			
 
				     for (Fetcher<K,V> fetcher : fetchers) {
			
 
				-      fetcher.interrupt();
			
 
				-    }
			
 
				-    for (Fetcher<K,V> fetcher : fetchers) {
			
 
				-      fetcher.join();
			
 
				+      fetcher.shutDown();
			
 
				     }
			
 
				     fetchers = null;
			
 
				     
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml
@@ -102,6 +102,13 @@
 
				             <phase>test-compile</phase>
			
 
				           </execution>
			
 
				         </executions>
			
 
				+        <configuration>       
			
 
				+         <archive>
			
 
				+          <manifest>
			
 
				+           <mainClass>org.apache.hadoop.test.MapredTestDriver</mainClass>
			
 
				+         </manifest>
			
 
				+         </archive>
			
 
				+        </configuration>
			
 
				       </plugin>
			
 
				       <plugin>
			
 
				         <groupId>org.apache.maven.plugins</groupId>
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java
@@ -175,7 +175,6 @@ public class ClientServiceDelegate {
 
				                 + ":" + addr.getPort()));
			
 
				             newUgi.addToken(clientToken);
			
 
				           }
			
 
				-          LOG.info("The url to track the job: " + application.getTrackingUrl());
			
 
				           LOG.debug("Connecting to " + serviceAddr);
			
 
				           final String tempStr = serviceAddr;
			
 
				           realProxy = newUgi.doAs(new PrivilegedExceptionAction<MRClientProtocol>() {
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java
@@ -406,7 +406,7 @@ public class YARNRunner implements ClientProtocol {
 
				     // Setup the CLASSPATH in environment
			
 
				     // i.e. add { job jar, CWD, Hadoop jars} to classpath.
			
 
				     Map<String, String> environment = new HashMap<String, String>();
			
 
				-    MRApps.setClasspath(environment);
			
 
				+    MRApps.setClasspath(environment, conf);
			
 
				 
			
 
				     // Parse distributed cache
			
 
				     MRApps.setupDistributedCache(jobConf, localResources);
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/GenericMRLoadGenerator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/GenericMRLoadGenerator.java
@@ -29,7 +29,6 @@ import java.util.Stack;
 
				 
			
 
				 import org.apache.hadoop.conf.Configuration;
			
 
				 import org.apache.hadoop.conf.Configured;
			
 
				-import org.apache.hadoop.examples.RandomTextWriter;
			
 
				 import org.apache.hadoop.fs.FileStatus;
			
 
				 import org.apache.hadoop.fs.FileSystem;
			
 
				 import org.apache.hadoop.fs.Path;
			
@@ -40,6 +39,7 @@ import org.apache.hadoop.io.Writable;
 
				 import org.apache.hadoop.io.WritableComparable;
			
 
				 import org.apache.hadoop.io.WritableUtils;
			
 
				 import org.apache.hadoop.mapred.lib.NullOutputFormat;
			
 
				+import org.apache.hadoop.mapreduce.RandomTextWriter;
			
 
				 import org.apache.hadoop.util.GenericOptionsParser;
			
 
				 import org.apache.hadoop.util.ReflectionUtils;
			
 
				 import org.apache.hadoop.util.Tool;
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestFileInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestFileInputFormat.java
@@ -17,6 +17,10 @@
 
				  */
			
 
				 package org.apache.hadoop.mapred;
			
 
				 
			
 
				+import static org.mockito.Matchers.any;
			
 
				+import static org.mockito.Mockito.mock;
			
 
				+import static org.mockito.Mockito.when;
			
 
				+
			
 
				 import java.io.DataOutputStream;
			
 
				 import java.io.IOException;
			
 
				 
			
@@ -32,6 +36,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
 
				 import org.apache.hadoop.hdfs.MiniDFSCluster;
			
 
				 import org.apache.hadoop.io.Text;
			
 
				 
			
 
				+@SuppressWarnings("deprecation")
			
 
				 public class TestFileInputFormat extends TestCase {
			
 
				 
			
 
				   Configuration conf = new Configuration();
			
@@ -186,6 +191,102 @@ public class TestFileInputFormat extends TestCase {
 
				     assertEquals(splits.length, 2);
			
 
				   }
			
 
				 
			
 
				+  @SuppressWarnings("rawtypes")
			
 
				+  public void testLastInputSplitAtSplitBoundary() throws Exception {
			
 
				+    FileInputFormat fif = new FileInputFormatForTest(1024l * 1024 * 1024,
			
 
				+        128l * 1024 * 1024);
			
 
				+    JobConf job = new JobConf();
			
 
				+    InputSplit[] splits = fif.getSplits(job, 8);
			
 
				+    assertEquals(8, splits.length);
			
 
				+    for (int i = 0; i < splits.length; i++) {
			
 
				+      InputSplit split = splits[i];
			
 
				+      assertEquals(("host" + i), split.getLocations()[0]);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @SuppressWarnings("rawtypes")
			
 
				+  public void testLastInputSplitExceedingSplitBoundary() throws Exception {
			
 
				+    FileInputFormat fif = new FileInputFormatForTest(1027l * 1024 * 1024,
			
 
				+        128l * 1024 * 1024);
			
 
				+    JobConf job = new JobConf();
			
 
				+    InputSplit[] splits = fif.getSplits(job, 8);
			
 
				+    assertEquals(8, splits.length);
			
 
				+    for (int i = 0; i < splits.length; i++) {
			
 
				+      InputSplit split = splits[i];
			
 
				+      assertEquals(("host" + i), split.getLocations()[0]);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @SuppressWarnings("rawtypes")
			
 
				+  public void testLastInputSplitSingleSplit() throws Exception {
			
 
				+    FileInputFormat fif = new FileInputFormatForTest(100l * 1024 * 1024,
			
 
				+        128l * 1024 * 1024);
			
 
				+    JobConf job = new JobConf();
			
 
				+    InputSplit[] splits = fif.getSplits(job, 1);
			
 
				+    assertEquals(1, splits.length);
			
 
				+    for (int i = 0; i < splits.length; i++) {
			
 
				+      InputSplit split = splits[i];
			
 
				+      assertEquals(("host" + i), split.getLocations()[0]);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private class FileInputFormatForTest<K, V> extends FileInputFormat<K, V> {
			
 
				+
			
 
				+    long splitSize;
			
 
				+    long length;
			
 
				+
			
 
				+    FileInputFormatForTest(long length, long splitSize) {
			
 
				+      this.length = length;
			
 
				+      this.splitSize = splitSize;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public RecordReader<K, V> getRecordReader(InputSplit split, JobConf job,
			
 
				+        Reporter reporter) throws IOException {
			
 
				+      return null;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    protected FileStatus[] listStatus(JobConf job) throws IOException {
			
 
				+      FileStatus mockFileStatus = mock(FileStatus.class);
			
 
				+      when(mockFileStatus.getBlockSize()).thenReturn(splitSize);
			
 
				+      when(mockFileStatus.isDirectory()).thenReturn(false);
			
 
				+      Path mockPath = mock(Path.class);
			
 
				+      FileSystem mockFs = mock(FileSystem.class);
			
 
				+
			
 
				+      BlockLocation[] blockLocations = mockBlockLocations(length, splitSize);
			
 
				+      when(mockFs.getFileBlockLocations(mockFileStatus, 0, length)).thenReturn(
			
 
				+          blockLocations);
			
 
				+      when(mockPath.getFileSystem(any(Configuration.class))).thenReturn(mockFs);
			
 
				+
			
 
				+      when(mockFileStatus.getPath()).thenReturn(mockPath);
			
 
				+      when(mockFileStatus.getLen()).thenReturn(length);
			
 
				+
			
 
				+      FileStatus[] fs = new FileStatus[1];
			
 
				+      fs[0] = mockFileStatus;
			
 
				+      return fs;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    protected long computeSplitSize(long blockSize, long minSize, long maxSize) {
			
 
				+      return splitSize;
			
 
				+    }
			
 
				+
			
 
				+    private BlockLocation[] mockBlockLocations(long size, long splitSize) {
			
 
				+      int numLocations = (int) (size / splitSize);
			
 
				+      if (size % splitSize != 0)
			
 
				+        numLocations++;
			
 
				+      BlockLocation[] blockLocations = new BlockLocation[numLocations];
			
 
				+      for (int i = 0; i < numLocations; i++) {
			
 
				+        String[] names = new String[] { "b" + i };
			
 
				+        String[] hosts = new String[] { "host" + i };
			
 
				+        blockLocations[i] = new BlockLocation(names, hosts, i * splitSize,
			
 
				+            Math.min(splitSize, size - (splitSize * i)));
			
 
				+      }
			
 
				+      return blockLocations;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				   static void writeFile(Configuration conf, Path name,
			
 
				       short replication, int numBlocks) throws IOException {
			
 
				     FileSystem fileSys = FileSystem.get(conf);
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/ThreadedMapBenchmark.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/ThreadedMapBenchmark.java
@@ -25,7 +25,6 @@ import java.util.Random;
 
				 import org.apache.commons.logging.Log;
			
 
				 import org.apache.commons.logging.LogFactory;
			
 
				 import org.apache.hadoop.conf.Configured;
			
 
				-import org.apache.hadoop.examples.RandomWriter;
			
 
				 import org.apache.hadoop.fs.FileSystem;
			
 
				 import org.apache.hadoop.fs.Path;
			
 
				 import org.apache.hadoop.io.BytesWritable;
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/GenericMRLoadGenerator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/GenericMRLoadGenerator.java
@@ -29,7 +29,6 @@ import java.util.Stack;
 
				 
			
 
				 import org.apache.hadoop.conf.Configuration;
			
 
				 import org.apache.hadoop.conf.Configured;
			
 
				-import org.apache.hadoop.examples.RandomTextWriter;
			
 
				 import org.apache.hadoop.fs.FileStatus;
			
 
				 import org.apache.hadoop.fs.FileSystem;
			
 
				 import org.apache.hadoop.fs.Path;
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/RandomTextWriter.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/RandomTextWriter.java
@@ -0,0 +1,757 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Date;
			
 
				+import java.util.List;
			
 
				+import java.util.Random;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.conf.Configured;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.mapred.ClusterStatus;
			
 
				+import org.apache.hadoop.mapred.JobClient;
			
 
				+import org.apache.hadoop.mapreduce.*;
			
 
				+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
			
 
				+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
			
 
				+import org.apache.hadoop.util.Tool;
			
 
				+import org.apache.hadoop.util.ToolRunner;
			
 
				+
			
 
				+/**
			
 
				+ * This program uses map/reduce to just run a distributed job where there is
			
 
				+ * no interaction between the tasks and each task writes a large unsorted
			
 
				+ * random sequence of words.
			
 
				+ * In order for this program to generate data for terasort with a 5-10 words
			
 
				+ * per key and 20-100 words per value, have the following config:
			
 
				+ * <xmp>
			
 
				+ * <?xml version="1.0"?>
			
 
				+ * <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
			
 
				+ * <configuration>
			
 
				+ *   <property>
			
 
				+ *     <name>mapreduce.randomtextwriter.minwordskey</name>
			
 
				+ *     <value>5</value>
			
 
				+ *   </property>
			
 
				+ *   <property>
			
 
				+ *     <name>mapreduce.randomtextwriter.maxwordskey</name>
			
 
				+ *     <value>10</value>
			
 
				+ *   </property>
			
 
				+ *   <property>
			
 
				+ *     <name>mapreduce.randomtextwriter.minwordsvalue</name>
			
 
				+ *     <value>20</value>
			
 
				+ *   </property>
			
 
				+ *   <property>
			
 
				+ *     <name>mapreduce.randomtextwriter.maxwordsvalue</name>
			
 
				+ *     <value>100</value>
			
 
				+ *   </property>
			
 
				+ *   <property>
			
 
				+ *     <name>mapreduce.randomtextwriter.totalbytes</name>
			
 
				+ *     <value>1099511627776</value>
			
 
				+ *   </property>
			
 
				+ * </configuration></xmp>
			
 
				+ * 
			
 
				+ * Equivalently, {@link RandomTextWriter} also supports all the above options
			
 
				+ * and ones supported by {@link Tool} via the command-line.
			
 
				+ * 
			
 
				+ * To run: bin/hadoop jar hadoop-${version}-examples.jar randomtextwriter
			
 
				+ *            [-outFormat <i>output format class</i>] <i>output</i> 
			
 
				+ */
			
 
				+public class RandomTextWriter extends Configured implements Tool {
			
 
				+  public static final String TOTAL_BYTES = 
			
 
				+    "mapreduce.randomtextwriter.totalbytes";
			
 
				+  public static final String BYTES_PER_MAP = 
			
 
				+    "mapreduce.randomtextwriter.bytespermap";
			
 
				+  public static final String MAPS_PER_HOST = 
			
 
				+    "mapreduce.randomtextwriter.mapsperhost";
			
 
				+  public static final String MAX_VALUE = "mapreduce.randomtextwriter.maxwordsvalue";
			
 
				+  public static final String MIN_VALUE = "mapreduce.randomtextwriter.minwordsvalue";
			
 
				+  public static final String MIN_KEY = "mapreduce.randomtextwriter.minwordskey";
			
 
				+  public static final String MAX_KEY = "mapreduce.randomtextwriter.maxwordskey";
			
 
				+  
			
 
				+  static int printUsage() {
			
 
				+    System.out.println("randomtextwriter " +
			
 
				+                       "[-outFormat <output format class>] " + 
			
 
				+                       "<output>");
			
 
				+    ToolRunner.printGenericCommandUsage(System.out);
			
 
				+    return 2;
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * User counters
			
 
				+   */
			
 
				+  static enum Counters { RECORDS_WRITTEN, BYTES_WRITTEN }
			
 
				+
			
 
				+  static class RandomTextMapper extends Mapper<Text, Text, Text, Text> {
			
 
				+    
			
 
				+    private long numBytesToWrite;
			
 
				+    private int minWordsInKey;
			
 
				+    private int wordsInKeyRange;
			
 
				+    private int minWordsInValue;
			
 
				+    private int wordsInValueRange;
			
 
				+    private Random random = new Random();
			
 
				+    
			
 
				+    /**
			
 
				+     * Save the configuration value that we need to write the data.
			
 
				+     */
			
 
				+    public void setup(Context context) {
			
 
				+      Configuration conf = context.getConfiguration();
			
 
				+      numBytesToWrite = conf.getLong(BYTES_PER_MAP,
			
 
				+                                    1*1024*1024*1024);
			
 
				+      minWordsInKey = conf.getInt(MIN_KEY, 5);
			
 
				+      wordsInKeyRange = (conf.getInt(MAX_KEY, 10) - minWordsInKey);
			
 
				+      minWordsInValue = conf.getInt(MIN_VALUE, 10);
			
 
				+      wordsInValueRange = (conf.getInt(MAX_VALUE, 100) - minWordsInValue);
			
 
				+    }
			
 
				+    
			
 
				+    /**
			
 
				+     * Given an output filename, write a bunch of random records to it.
			
 
				+     */
			
 
				+    public void map(Text key, Text value,
			
 
				+                    Context context) throws IOException,InterruptedException {
			
 
				+      int itemCount = 0;
			
 
				+      while (numBytesToWrite > 0) {
			
 
				+        // Generate the key/value 
			
 
				+        int noWordsKey = minWordsInKey + 
			
 
				+          (wordsInKeyRange != 0 ? random.nextInt(wordsInKeyRange) : 0);
			
 
				+        int noWordsValue = minWordsInValue + 
			
 
				+          (wordsInValueRange != 0 ? random.nextInt(wordsInValueRange) : 0);
			
 
				+        Text keyWords = generateSentence(noWordsKey);
			
 
				+        Text valueWords = generateSentence(noWordsValue);
			
 
				+        
			
 
				+        // Write the sentence 
			
 
				+        context.write(keyWords, valueWords);
			
 
				+        
			
 
				+        numBytesToWrite -= (keyWords.getLength() + valueWords.getLength());
			
 
				+        
			
 
				+        // Update counters, progress etc.
			
 
				+        context.getCounter(Counters.BYTES_WRITTEN).increment(
			
 
				+                  keyWords.getLength() + valueWords.getLength());
			
 
				+        context.getCounter(Counters.RECORDS_WRITTEN).increment(1);
			
 
				+        if (++itemCount % 200 == 0) {
			
 
				+          context.setStatus("wrote record " + itemCount + ". " + 
			
 
				+                             numBytesToWrite + " bytes left.");
			
 
				+        }
			
 
				+      }
			
 
				+      context.setStatus("done with " + itemCount + " records.");
			
 
				+    }
			
 
				+    
			
 
				+    private Text generateSentence(int noWords) {
			
 
				+      StringBuffer sentence = new StringBuffer();
			
 
				+      String space = " ";
			
 
				+      for (int i=0; i < noWords; ++i) {
			
 
				+        sentence.append(words[random.nextInt(words.length)]);
			
 
				+        sentence.append(space);
			
 
				+      }
			
 
				+      return new Text(sentence.toString());
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * This is the main routine for launching a distributed random write job.
			
 
				+   * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
			
 
				+   * The reduce doesn't do anything.
			
 
				+   * 
			
 
				+   * @throws IOException 
			
 
				+   */
			
 
				+  public int run(String[] args) throws Exception {    
			
 
				+    if (args.length == 0) {
			
 
				+      return printUsage();    
			
 
				+    }
			
 
				+    
			
 
				+    Configuration conf = getConf();
			
 
				+    JobClient client = new JobClient(conf);
			
 
				+    ClusterStatus cluster = client.getClusterStatus();
			
 
				+    int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
			
 
				+    long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
			
 
				+                                             1*1024*1024*1024);
			
 
				+    if (numBytesToWritePerMap == 0) {
			
 
				+      System.err.println("Cannot have " + BYTES_PER_MAP +" set to 0");
			
 
				+      return -2;
			
 
				+    }
			
 
				+    long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
			
 
				+         numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
			
 
				+    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
			
 
				+    if (numMaps == 0 && totalBytesToWrite > 0) {
			
 
				+      numMaps = 1;
			
 
				+      conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
			
 
				+    }
			
 
				+    conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
			
 
				+    
			
 
				+    Job job = new Job(conf);
			
 
				+    
			
 
				+    job.setJarByClass(RandomTextWriter.class);
			
 
				+    job.setJobName("random-text-writer");
			
 
				+    
			
 
				+    job.setOutputKeyClass(Text.class);
			
 
				+    job.setOutputValueClass(Text.class);
			
 
				+    
			
 
				+    job.setInputFormatClass(RandomWriter.RandomInputFormat.class);
			
 
				+    job.setMapperClass(RandomTextMapper.class);        
			
 
				+    
			
 
				+    Class<? extends OutputFormat> outputFormatClass = 
			
 
				+      SequenceFileOutputFormat.class;
			
 
				+    List<String> otherArgs = new ArrayList<String>();
			
 
				+    for(int i=0; i < args.length; ++i) {
			
 
				+      try {
			
 
				+        if ("-outFormat".equals(args[i])) {
			
 
				+          outputFormatClass = 
			
 
				+            Class.forName(args[++i]).asSubclass(OutputFormat.class);
			
 
				+        } else {
			
 
				+          otherArgs.add(args[i]);
			
 
				+        }
			
 
				+      } catch (ArrayIndexOutOfBoundsException except) {
			
 
				+        System.out.println("ERROR: Required parameter missing from " +
			
 
				+            args[i-1]);
			
 
				+        return printUsage(); // exits
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    job.setOutputFormatClass(outputFormatClass);
			
 
				+    FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));
			
 
				+    
			
 
				+    System.out.println("Running " + numMaps + " maps.");
			
 
				+    
			
 
				+    // reducer NONE
			
 
				+    job.setNumReduceTasks(0);
			
 
				+    
			
 
				+    Date startTime = new Date();
			
 
				+    System.out.println("Job started: " + startTime);
			
 
				+    int ret = job.waitForCompletion(true) ? 0 : 1;
			
 
				+    Date endTime = new Date();
			
 
				+    System.out.println("Job ended: " + endTime);
			
 
				+    System.out.println("The job took " + 
			
 
				+                       (endTime.getTime() - startTime.getTime()) /1000 + 
			
 
				+                       " seconds.");
			
 
				+    
			
 
				+    return ret;
			
 
				+  }
			
 
				+  
			
 
				+  public static void main(String[] args) throws Exception {
			
 
				+    int res = ToolRunner.run(new Configuration(), new RandomTextWriter(), args);
			
 
				+    System.exit(res);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * A random list of 100 words from /usr/share/dict/words
			
 
				+   */
			
 
				+  private static String[] words = {
			
 
				+                                   "diurnalness", "Homoiousian",
			
 
				+                                   "spiranthic", "tetragynian",
			
 
				+                                   "silverhead", "ungreat",
			
 
				+                                   "lithograph", "exploiter",
			
 
				+                                   "physiologian", "by",
			
 
				+                                   "hellbender", "Filipendula",
			
 
				+                                   "undeterring", "antiscolic",
			
 
				+                                   "pentagamist", "hypoid",
			
 
				+                                   "cacuminal", "sertularian",
			
 
				+                                   "schoolmasterism", "nonuple",
			
 
				+                                   "gallybeggar", "phytonic",
			
 
				+                                   "swearingly", "nebular",
			
 
				+                                   "Confervales", "thermochemically",
			
 
				+                                   "characinoid", "cocksuredom",
			
 
				+                                   "fallacious", "feasibleness",
			
 
				+                                   "debromination", "playfellowship",
			
 
				+                                   "tramplike", "testa",
			
 
				+                                   "participatingly", "unaccessible",
			
 
				+                                   "bromate", "experientialist",
			
 
				+                                   "roughcast", "docimastical",
			
 
				+                                   "choralcelo", "blightbird",
			
 
				+                                   "peptonate", "sombreroed",
			
 
				+                                   "unschematized", "antiabolitionist",
			
 
				+                                   "besagne", "mastication",
			
 
				+                                   "bromic", "sviatonosite",
			
 
				+                                   "cattimandoo", "metaphrastical",
			
 
				+                                   "endotheliomyoma", "hysterolysis",
			
 
				+                                   "unfulminated", "Hester",
			
 
				+                                   "oblongly", "blurredness",
			
 
				+                                   "authorling", "chasmy",
			
 
				+                                   "Scorpaenidae", "toxihaemia",
			
 
				+                                   "Dictograph", "Quakerishly",
			
 
				+                                   "deaf", "timbermonger",
			
 
				+                                   "strammel", "Thraupidae",
			
 
				+                                   "seditious", "plerome",
			
 
				+                                   "Arneb", "eristically",
			
 
				+                                   "serpentinic", "glaumrie",
			
 
				+                                   "socioromantic", "apocalypst",
			
 
				+                                   "tartrous", "Bassaris",
			
 
				+                                   "angiolymphoma", "horsefly",
			
 
				+                                   "kenno", "astronomize",
			
 
				+                                   "euphemious", "arsenide",
			
 
				+                                   "untongued", "parabolicness",
			
 
				+                                   "uvanite", "helpless",
			
 
				+                                   "gemmeous", "stormy",
			
 
				+                                   "templar", "erythrodextrin",
			
 
				+                                   "comism", "interfraternal",
			
 
				+                                   "preparative", "parastas",
			
 
				+                                   "frontoorbital", "Ophiosaurus",
			
 
				+                                   "diopside", "serosanguineous",
			
 
				+                                   "ununiformly", "karyological",
			
 
				+                                   "collegian", "allotropic",
			
 
				+                                   "depravity", "amylogenesis",
			
 
				+                                   "reformatory", "epidymides",
			
 
				+                                   "pleurotropous", "trillium",
			
 
				+                                   "dastardliness", "coadvice",
			
 
				+                                   "embryotic", "benthonic",
			
 
				+                                   "pomiferous", "figureheadship",
			
 
				+                                   "Megaluridae", "Harpa",
			
 
				+                                   "frenal", "commotion",
			
 
				+                                   "abthainry", "cobeliever",
			
 
				+                                   "manilla", "spiciferous",
			
 
				+                                   "nativeness", "obispo",
			
 
				+                                   "monilioid", "biopsic",
			
 
				+                                   "valvula", "enterostomy",
			
 
				+                                   "planosubulate", "pterostigma",
			
 
				+                                   "lifter", "triradiated",
			
 
				+                                   "venialness", "tum",
			
 
				+                                   "archistome", "tautness",
			
 
				+                                   "unswanlike", "antivenin",
			
 
				+                                   "Lentibulariaceae", "Triphora",
			
 
				+                                   "angiopathy", "anta",
			
 
				+                                   "Dawsonia", "becomma",
			
 
				+                                   "Yannigan", "winterproof",
			
 
				+                                   "antalgol", "harr",
			
 
				+                                   "underogating", "ineunt",
			
 
				+                                   "cornberry", "flippantness",
			
 
				+                                   "scyphostoma", "approbation",
			
 
				+                                   "Ghent", "Macraucheniidae",
			
 
				+                                   "scabbiness", "unanatomized",
			
 
				+                                   "photoelasticity", "eurythermal",
			
 
				+                                   "enation", "prepavement",
			
 
				+                                   "flushgate", "subsequentially",
			
 
				+                                   "Edo", "antihero",
			
 
				+                                   "Isokontae", "unforkedness",
			
 
				+                                   "porriginous", "daytime",
			
 
				+                                   "nonexecutive", "trisilicic",
			
 
				+                                   "morphiomania", "paranephros",
			
 
				+                                   "botchedly", "impugnation",
			
 
				+                                   "Dodecatheon", "obolus",
			
 
				+                                   "unburnt", "provedore",
			
 
				+                                   "Aktistetae", "superindifference",
			
 
				+                                   "Alethea", "Joachimite",
			
 
				+                                   "cyanophilous", "chorograph",
			
 
				+                                   "brooky", "figured",
			
 
				+                                   "periclitation", "quintette",
			
 
				+                                   "hondo", "ornithodelphous",
			
 
				+                                   "unefficient", "pondside",
			
 
				+                                   "bogydom", "laurinoxylon",
			
 
				+                                   "Shiah", "unharmed",
			
 
				+                                   "cartful", "noncrystallized",
			
 
				+                                   "abusiveness", "cromlech",
			
 
				+                                   "japanned", "rizzomed",
			
 
				+                                   "underskin", "adscendent",
			
 
				+                                   "allectory", "gelatinousness",
			
 
				+                                   "volcano", "uncompromisingly",
			
 
				+                                   "cubit", "idiotize",
			
 
				+                                   "unfurbelowed", "undinted",
			
 
				+                                   "magnetooptics", "Savitar",
			
 
				+                                   "diwata", "ramosopalmate",
			
 
				+                                   "Pishquow", "tomorn",
			
 
				+                                   "apopenptic", "Haversian",
			
 
				+                                   "Hysterocarpus", "ten",
			
 
				+                                   "outhue", "Bertat",
			
 
				+                                   "mechanist", "asparaginic",
			
 
				+                                   "velaric", "tonsure",
			
 
				+                                   "bubble", "Pyrales",
			
 
				+                                   "regardful", "glyphography",
			
 
				+                                   "calabazilla", "shellworker",
			
 
				+                                   "stradametrical", "havoc",
			
 
				+                                   "theologicopolitical", "sawdust",
			
 
				+                                   "diatomaceous", "jajman",
			
 
				+                                   "temporomastoid", "Serrifera",
			
 
				+                                   "Ochnaceae", "aspersor",
			
 
				+                                   "trailmaking", "Bishareen",
			
 
				+                                   "digitule", "octogynous",
			
 
				+                                   "epididymitis", "smokefarthings",
			
 
				+                                   "bacillite", "overcrown",
			
 
				+                                   "mangonism", "sirrah",
			
 
				+                                   "undecorated", "psychofugal",
			
 
				+                                   "bismuthiferous", "rechar",
			
 
				+                                   "Lemuridae", "frameable",
			
 
				+                                   "thiodiazole", "Scanic",
			
 
				+                                   "sportswomanship", "interruptedness",
			
 
				+                                   "admissory", "osteopaedion",
			
 
				+                                   "tingly", "tomorrowness",
			
 
				+                                   "ethnocracy", "trabecular",
			
 
				+                                   "vitally", "fossilism",
			
 
				+                                   "adz", "metopon",
			
 
				+                                   "prefatorial", "expiscate",
			
 
				+                                   "diathermacy", "chronist",
			
 
				+                                   "nigh", "generalizable",
			
 
				+                                   "hysterogen", "aurothiosulphuric",
			
 
				+                                   "whitlowwort", "downthrust",
			
 
				+                                   "Protestantize", "monander",
			
 
				+                                   "Itea", "chronographic",
			
 
				+                                   "silicize", "Dunlop",
			
 
				+                                   "eer", "componental",
			
 
				+                                   "spot", "pamphlet",
			
 
				+                                   "antineuritic", "paradisean",
			
 
				+                                   "interruptor", "debellator",
			
 
				+                                   "overcultured", "Florissant",
			
 
				+                                   "hyocholic", "pneumatotherapy",
			
 
				+                                   "tailoress", "rave",
			
 
				+                                   "unpeople", "Sebastian",
			
 
				+                                   "thermanesthesia", "Coniferae",
			
 
				+                                   "swacking", "posterishness",
			
 
				+                                   "ethmopalatal", "whittle",
			
 
				+                                   "analgize", "scabbardless",
			
 
				+                                   "naught", "symbiogenetically",
			
 
				+                                   "trip", "parodist",
			
 
				+                                   "columniform", "trunnel",
			
 
				+                                   "yawler", "goodwill",
			
 
				+                                   "pseudohalogen", "swangy",
			
 
				+                                   "cervisial", "mediateness",
			
 
				+                                   "genii", "imprescribable",
			
 
				+                                   "pony", "consumptional",
			
 
				+                                   "carposporangial", "poleax",
			
 
				+                                   "bestill", "subfebrile",
			
 
				+                                   "sapphiric", "arrowworm",
			
 
				+                                   "qualminess", "ultraobscure",
			
 
				+                                   "thorite", "Fouquieria",
			
 
				+                                   "Bermudian", "prescriber",
			
 
				+                                   "elemicin", "warlike",
			
 
				+                                   "semiangle", "rotular",
			
 
				+                                   "misthread", "returnability",
			
 
				+                                   "seraphism", "precostal",
			
 
				+                                   "quarried", "Babylonism",
			
 
				+                                   "sangaree", "seelful",
			
 
				+                                   "placatory", "pachydermous",
			
 
				+                                   "bozal", "galbulus",
			
 
				+                                   "spermaphyte", "cumbrousness",
			
 
				+                                   "pope", "signifier",
			
 
				+                                   "Endomycetaceae", "shallowish",
			
 
				+                                   "sequacity", "periarthritis",
			
 
				+                                   "bathysphere", "pentosuria",
			
 
				+                                   "Dadaism", "spookdom",
			
 
				+                                   "Consolamentum", "afterpressure",
			
 
				+                                   "mutter", "louse",
			
 
				+                                   "ovoviviparous", "corbel",
			
 
				+                                   "metastoma", "biventer",
			
 
				+                                   "Hydrangea", "hogmace",
			
 
				+                                   "seizing", "nonsuppressed",
			
 
				+                                   "oratorize", "uncarefully",
			
 
				+                                   "benzothiofuran", "penult",
			
 
				+                                   "balanocele", "macropterous",
			
 
				+                                   "dishpan", "marten",
			
 
				+                                   "absvolt", "jirble",
			
 
				+                                   "parmelioid", "airfreighter",
			
 
				+                                   "acocotl", "archesporial",
			
 
				+                                   "hypoplastral", "preoral",
			
 
				+                                   "quailberry", "cinque",
			
 
				+                                   "terrestrially", "stroking",
			
 
				+                                   "limpet", "moodishness",
			
 
				+                                   "canicule", "archididascalian",
			
 
				+                                   "pompiloid", "overstaid",
			
 
				+                                   "introducer", "Italical",
			
 
				+                                   "Christianopaganism", "prescriptible",
			
 
				+                                   "subofficer", "danseuse",
			
 
				+                                   "cloy", "saguran",
			
 
				+                                   "frictionlessly", "deindividualization",
			
 
				+                                   "Bulanda", "ventricous",
			
 
				+                                   "subfoliar", "basto",
			
 
				+                                   "scapuloradial", "suspend",
			
 
				+                                   "stiffish", "Sphenodontidae",
			
 
				+                                   "eternal", "verbid",
			
 
				+                                   "mammonish", "upcushion",
			
 
				+                                   "barkometer", "concretion",
			
 
				+                                   "preagitate", "incomprehensible",
			
 
				+                                   "tristich", "visceral",
			
 
				+                                   "hemimelus", "patroller",
			
 
				+                                   "stentorophonic", "pinulus",
			
 
				+                                   "kerykeion", "brutism",
			
 
				+                                   "monstership", "merciful",
			
 
				+                                   "overinstruct", "defensibly",
			
 
				+                                   "bettermost", "splenauxe",
			
 
				+                                   "Mormyrus", "unreprimanded",
			
 
				+                                   "taver", "ell",
			
 
				+                                   "proacquittal", "infestation",
			
 
				+                                   "overwoven", "Lincolnlike",
			
 
				+                                   "chacona", "Tamil",
			
 
				+                                   "classificational", "lebensraum",
			
 
				+                                   "reeveland", "intuition",
			
 
				+                                   "Whilkut", "focaloid",
			
 
				+                                   "Eleusinian", "micromembrane",
			
 
				+                                   "byroad", "nonrepetition",
			
 
				+                                   "bacterioblast", "brag",
			
 
				+                                   "ribaldrous", "phytoma",
			
 
				+                                   "counteralliance", "pelvimetry",
			
 
				+                                   "pelf", "relaster",
			
 
				+                                   "thermoresistant", "aneurism",
			
 
				+                                   "molossic", "euphonym",
			
 
				+                                   "upswell", "ladhood",
			
 
				+                                   "phallaceous", "inertly",
			
 
				+                                   "gunshop", "stereotypography",
			
 
				+                                   "laryngic", "refasten",
			
 
				+                                   "twinling", "oflete",
			
 
				+                                   "hepatorrhaphy", "electrotechnics",
			
 
				+                                   "cockal", "guitarist",
			
 
				+                                   "topsail", "Cimmerianism",
			
 
				+                                   "larklike", "Llandovery",
			
 
				+                                   "pyrocatechol", "immatchable",
			
 
				+                                   "chooser", "metrocratic",
			
 
				+                                   "craglike", "quadrennial",
			
 
				+                                   "nonpoisonous", "undercolored",
			
 
				+                                   "knob", "ultratense",
			
 
				+                                   "balladmonger", "slait",
			
 
				+                                   "sialadenitis", "bucketer",
			
 
				+                                   "magnificently", "unstipulated",
			
 
				+                                   "unscourged", "unsupercilious",
			
 
				+                                   "packsack", "pansophism",
			
 
				+                                   "soorkee", "percent",
			
 
				+                                   "subirrigate", "champer",
			
 
				+                                   "metapolitics", "spherulitic",
			
 
				+                                   "involatile", "metaphonical",
			
 
				+                                   "stachyuraceous", "speckedness",
			
 
				+                                   "bespin", "proboscidiform",
			
 
				+                                   "gul", "squit",
			
 
				+                                   "yeelaman", "peristeropode",
			
 
				+                                   "opacousness", "shibuichi",
			
 
				+                                   "retinize", "yote",
			
 
				+                                   "misexposition", "devilwise",
			
 
				+                                   "pumpkinification", "vinny",
			
 
				+                                   "bonze", "glossing",
			
 
				+                                   "decardinalize", "transcortical",
			
 
				+                                   "serphoid", "deepmost",
			
 
				+                                   "guanajuatite", "wemless",
			
 
				+                                   "arval", "lammy",
			
 
				+                                   "Effie", "Saponaria",
			
 
				+                                   "tetrahedral", "prolificy",
			
 
				+                                   "excerpt", "dunkadoo",
			
 
				+                                   "Spencerism", "insatiately",
			
 
				+                                   "Gilaki", "oratorship",
			
 
				+                                   "arduousness", "unbashfulness",
			
 
				+                                   "Pithecolobium", "unisexuality",
			
 
				+                                   "veterinarian", "detractive",
			
 
				+                                   "liquidity", "acidophile",
			
 
				+                                   "proauction", "sural",
			
 
				+                                   "totaquina", "Vichyite",
			
 
				+                                   "uninhabitedness", "allegedly",
			
 
				+                                   "Gothish", "manny",
			
 
				+                                   "Inger", "flutist",
			
 
				+                                   "ticktick", "Ludgatian",
			
 
				+                                   "homotransplant", "orthopedical",
			
 
				+                                   "diminutively", "monogoneutic",
			
 
				+                                   "Kenipsim", "sarcologist",
			
 
				+                                   "drome", "stronghearted",
			
 
				+                                   "Fameuse", "Swaziland",
			
 
				+                                   "alen", "chilblain",
			
 
				+                                   "beatable", "agglomeratic",
			
 
				+                                   "constitutor", "tendomucoid",
			
 
				+                                   "porencephalous", "arteriasis",
			
 
				+                                   "boser", "tantivy",
			
 
				+                                   "rede", "lineamental",
			
 
				+                                   "uncontradictableness", "homeotypical",
			
 
				+                                   "masa", "folious",
			
 
				+                                   "dosseret", "neurodegenerative",
			
 
				+                                   "subtransverse", "Chiasmodontidae",
			
 
				+                                   "palaeotheriodont", "unstressedly",
			
 
				+                                   "chalcites", "piquantness",
			
 
				+                                   "lampyrine", "Aplacentalia",
			
 
				+                                   "projecting", "elastivity",
			
 
				+                                   "isopelletierin", "bladderwort",
			
 
				+                                   "strander", "almud",
			
 
				+                                   "iniquitously", "theologal",
			
 
				+                                   "bugre", "chargeably",
			
 
				+                                   "imperceptivity", "meriquinoidal",
			
 
				+                                   "mesophyte", "divinator",
			
 
				+                                   "perfunctory", "counterappellant",
			
 
				+                                   "synovial", "charioteer",
			
 
				+                                   "crystallographical", "comprovincial",
			
 
				+                                   "infrastapedial", "pleasurehood",
			
 
				+                                   "inventurous", "ultrasystematic",
			
 
				+                                   "subangulated", "supraoesophageal",
			
 
				+                                   "Vaishnavism", "transude",
			
 
				+                                   "chrysochrous", "ungrave",
			
 
				+                                   "reconciliable", "uninterpleaded",
			
 
				+                                   "erlking", "wherefrom",
			
 
				+                                   "aprosopia", "antiadiaphorist",
			
 
				+                                   "metoxazine", "incalculable",
			
 
				+                                   "umbellic", "predebit",
			
 
				+                                   "foursquare", "unimmortal",
			
 
				+                                   "nonmanufacture", "slangy",
			
 
				+                                   "predisputant", "familist",
			
 
				+                                   "preaffiliate", "friarhood",
			
 
				+                                   "corelysis", "zoonitic",
			
 
				+                                   "halloo", "paunchy",
			
 
				+                                   "neuromimesis", "aconitine",
			
 
				+                                   "hackneyed", "unfeeble",
			
 
				+                                   "cubby", "autoschediastical",
			
 
				+                                   "naprapath", "lyrebird",
			
 
				+                                   "inexistency", "leucophoenicite",
			
 
				+                                   "ferrogoslarite", "reperuse",
			
 
				+                                   "uncombable", "tambo",
			
 
				+                                   "propodiale", "diplomatize",
			
 
				+                                   "Russifier", "clanned",
			
 
				+                                   "corona", "michigan",
			
 
				+                                   "nonutilitarian", "transcorporeal",
			
 
				+                                   "bought", "Cercosporella",
			
 
				+                                   "stapedius", "glandularly",
			
 
				+                                   "pictorially", "weism",
			
 
				+                                   "disilane", "rainproof",
			
 
				+                                   "Caphtor", "scrubbed",
			
 
				+                                   "oinomancy", "pseudoxanthine",
			
 
				+                                   "nonlustrous", "redesertion",
			
 
				+                                   "Oryzorictinae", "gala",
			
 
				+                                   "Mycogone", "reappreciate",
			
 
				+                                   "cyanoguanidine", "seeingness",
			
 
				+                                   "breadwinner", "noreast",
			
 
				+                                   "furacious", "epauliere",
			
 
				+                                   "omniscribent", "Passiflorales",
			
 
				+                                   "uninductive", "inductivity",
			
 
				+                                   "Orbitolina", "Semecarpus",
			
 
				+                                   "migrainoid", "steprelationship",
			
 
				+                                   "phlogisticate", "mesymnion",
			
 
				+                                   "sloped", "edificator",
			
 
				+                                   "beneficent", "culm",
			
 
				+                                   "paleornithology", "unurban",
			
 
				+                                   "throbless", "amplexifoliate",
			
 
				+                                   "sesquiquintile", "sapience",
			
 
				+                                   "astucious", "dithery",
			
 
				+                                   "boor", "ambitus",
			
 
				+                                   "scotching", "uloid",
			
 
				+                                   "uncompromisingness", "hoove",
			
 
				+                                   "waird", "marshiness",
			
 
				+                                   "Jerusalem", "mericarp",
			
 
				+                                   "unevoked", "benzoperoxide",
			
 
				+                                   "outguess", "pyxie",
			
 
				+                                   "hymnic", "euphemize",
			
 
				+                                   "mendacity", "erythremia",
			
 
				+                                   "rosaniline", "unchatteled",
			
 
				+                                   "lienteria", "Bushongo",
			
 
				+                                   "dialoguer", "unrepealably",
			
 
				+                                   "rivethead", "antideflation",
			
 
				+                                   "vinegarish", "manganosiderite",
			
 
				+                                   "doubtingness", "ovopyriform",
			
 
				+                                   "Cephalodiscus", "Muscicapa",
			
 
				+                                   "Animalivora", "angina",
			
 
				+                                   "planispheric", "ipomoein",
			
 
				+                                   "cuproiodargyrite", "sandbox",
			
 
				+                                   "scrat", "Munnopsidae",
			
 
				+                                   "shola", "pentafid",
			
 
				+                                   "overstudiousness", "times",
			
 
				+                                   "nonprofession", "appetible",
			
 
				+                                   "valvulotomy", "goladar",
			
 
				+                                   "uniarticular", "oxyterpene",
			
 
				+                                   "unlapsing", "omega",
			
 
				+                                   "trophonema", "seminonflammable",
			
 
				+                                   "circumzenithal", "starer",
			
 
				+                                   "depthwise", "liberatress",
			
 
				+                                   "unleavened", "unrevolting",
			
 
				+                                   "groundneedle", "topline",
			
 
				+                                   "wandoo", "umangite",
			
 
				+                                   "ordinant", "unachievable",
			
 
				+                                   "oversand", "snare",
			
 
				+                                   "avengeful", "unexplicit",
			
 
				+                                   "mustafina", "sonable",
			
 
				+                                   "rehabilitative", "eulogization",
			
 
				+                                   "papery", "technopsychology",
			
 
				+                                   "impressor", "cresylite",
			
 
				+                                   "entame", "transudatory",
			
 
				+                                   "scotale", "pachydermatoid",
			
 
				+                                   "imaginary", "yeat",
			
 
				+                                   "slipped", "stewardship",
			
 
				+                                   "adatom", "cockstone",
			
 
				+                                   "skyshine", "heavenful",
			
 
				+                                   "comparability", "exprobratory",
			
 
				+                                   "dermorhynchous", "parquet",
			
 
				+                                   "cretaceous", "vesperal",
			
 
				+                                   "raphis", "undangered",
			
 
				+                                   "Glecoma", "engrain",
			
 
				+                                   "counteractively", "Zuludom",
			
 
				+                                   "orchiocatabasis", "Auriculariales",
			
 
				+                                   "warriorwise", "extraorganismal",
			
 
				+                                   "overbuilt", "alveolite",
			
 
				+                                   "tetchy", "terrificness",
			
 
				+                                   "widdle", "unpremonished",
			
 
				+                                   "rebilling", "sequestrum",
			
 
				+                                   "equiconvex", "heliocentricism",
			
 
				+                                   "catabaptist", "okonite",
			
 
				+                                   "propheticism", "helminthagogic",
			
 
				+                                   "calycular", "giantly",
			
 
				+                                   "wingable", "golem",
			
 
				+                                   "unprovided", "commandingness",
			
 
				+                                   "greave", "haply",
			
 
				+                                   "doina", "depressingly",
			
 
				+                                   "subdentate", "impairment",
			
 
				+                                   "decidable", "neurotrophic",
			
 
				+                                   "unpredict", "bicorporeal",
			
 
				+                                   "pendulant", "flatman",
			
 
				+                                   "intrabred", "toplike",
			
 
				+                                   "Prosobranchiata", "farrantly",
			
 
				+                                   "toxoplasmosis", "gorilloid",
			
 
				+                                   "dipsomaniacal", "aquiline",
			
 
				+                                   "atlantite", "ascitic",
			
 
				+                                   "perculsive", "prospectiveness",
			
 
				+                                   "saponaceous", "centrifugalization",
			
 
				+                                   "dinical", "infravaginal",
			
 
				+                                   "beadroll", "affaite",
			
 
				+                                   "Helvidian", "tickleproof",
			
 
				+                                   "abstractionism", "enhedge",
			
 
				+                                   "outwealth", "overcontribute",
			
 
				+                                   "coldfinch", "gymnastic",
			
 
				+                                   "Pincian", "Munychian",
			
 
				+                                   "codisjunct", "quad",
			
 
				+                                   "coracomandibular", "phoenicochroite",
			
 
				+                                   "amender", "selectivity",
			
 
				+                                   "putative", "semantician",
			
 
				+                                   "lophotrichic", "Spatangoidea",
			
 
				+                                   "saccharogenic", "inferent",
			
 
				+                                   "Triconodonta", "arrendation",
			
 
				+                                   "sheepskin", "taurocolla",
			
 
				+                                   "bunghole", "Machiavel",
			
 
				+                                   "triakistetrahedral", "dehairer",
			
 
				+                                   "prezygapophysial", "cylindric",
			
 
				+                                   "pneumonalgia", "sleigher",
			
 
				+                                   "emir", "Socraticism",
			
 
				+                                   "licitness", "massedly",
			
 
				+                                   "instructiveness", "sturdied",
			
 
				+                                   "redecrease", "starosta",
			
 
				+                                   "evictor", "orgiastic",
			
 
				+                                   "squdge", "meloplasty",
			
 
				+                                   "Tsonecan", "repealableness",
			
 
				+                                   "swoony", "myesthesia",
			
 
				+                                   "molecule", "autobiographist",
			
 
				+                                   "reciprocation", "refective",
			
 
				+                                   "unobservantness", "tricae",
			
 
				+                                   "ungouged", "floatability",
			
 
				+                                   "Mesua", "fetlocked",
			
 
				+                                   "chordacentrum", "sedentariness",
			
 
				+                                   "various", "laubanite",
			
 
				+                                   "nectopod", "zenick",
			
 
				+                                   "sequentially", "analgic",
			
 
				+                                   "biodynamics", "posttraumatic",
			
 
				+                                   "nummi", "pyroacetic",
			
 
				+                                   "bot", "redescend",
			
 
				+                                   "dispermy", "undiffusive",
			
 
				+                                   "circular", "trillion",
			
 
				+                                   "Uraniidae", "ploration",
			
 
				+                                   "discipular", "potentness",
			
 
				+                                   "sud", "Hu",
			
 
				+                                   "Eryon", "plugger",
			
 
				+                                   "subdrainage", "jharal",
			
 
				+                                   "abscission", "supermarket",
			
 
				+                                   "countergabion", "glacierist",
			
 
				+                                   "lithotresis", "minniebush",
			
 
				+                                   "zanyism", "eucalypteol",
			
 
				+                                   "sterilely", "unrealize",
			
 
				+                                   "unpatched", "hypochondriacism",
			
 
				+                                   "critically", "cheesecutter",
			
 
				+                                  };
			
 
				+}
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/RandomWriter.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/RandomWriter.java
@@ -0,0 +1,298 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.mapreduce;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.Date;
			
 
				+import java.util.List;
			
 
				+import java.util.Random;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.conf.Configured;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.BytesWritable;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.io.Writable;
			
 
				+import org.apache.hadoop.io.WritableComparable;
			
 
				+import org.apache.hadoop.mapred.ClusterStatus;
			
 
				+import org.apache.hadoop.mapred.JobClient;
			
 
				+import org.apache.hadoop.mapreduce.*;
			
 
				+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
			
 
				+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
			
 
				+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
			
 
				+import org.apache.hadoop.util.GenericOptionsParser;
			
 
				+import org.apache.hadoop.util.Tool;
			
 
				+import org.apache.hadoop.util.ToolRunner;
			
 
				+
			
 
				+/**
			
 
				+ * This program uses map/reduce to just run a distributed job where there is
			
 
				+ * no interaction between the tasks and each task write a large unsorted
			
 
				+ * random binary sequence file of BytesWritable.
			
 
				+ * In order for this program to generate data for terasort with 10-byte keys
			
 
				+ * and 90-byte values, have the following config:
			
 
				+ * <xmp>
			
 
				+ * <?xml version="1.0"?>
			
 
				+ * <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
			
 
				+ * <configuration>
			
 
				+ *   <property>
			
 
				+ *     <name>mapreduce.randomwriter.minkey</name>
			
 
				+ *     <value>10</value>
			
 
				+ *   </property>
			
 
				+ *   <property>
			
 
				+ *     <name>mapreduce.randomwriter.maxkey</name>
			
 
				+ *     <value>10</value>
			
 
				+ *   </property>
			
 
				+ *   <property>
			
 
				+ *     <name>mapreduce.randomwriter.minvalue</name>
			
 
				+ *     <value>90</value>
			
 
				+ *   </property>
			
 
				+ *   <property>
			
 
				+ *     <name>mapreduce.randomwriter.maxvalue</name>
			
 
				+ *     <value>90</value>
			
 
				+ *   </property>
			
 
				+ *   <property>
			
 
				+ *     <name>mapreduce.randomwriter.totalbytes</name>
			
 
				+ *     <value>1099511627776</value>
			
 
				+ *   </property>
			
 
				+ * </configuration></xmp>
			
 
				+ * 
			
 
				+ * Equivalently, {@link RandomWriter} also supports all the above options
			
 
				+ * and ones supported by {@link GenericOptionsParser} via the command-line.
			
 
				+ */
			
 
				+public class RandomWriter extends Configured implements Tool {
			
 
				+  public static final String TOTAL_BYTES = "mapreduce.randomwriter.totalbytes";
			
 
				+  public static final String BYTES_PER_MAP = 
			
 
				+    "mapreduce.randomwriter.bytespermap";
			
 
				+  public static final String MAPS_PER_HOST = 
			
 
				+    "mapreduce.randomwriter.mapsperhost";
			
 
				+  public static final String MAX_VALUE = "mapreduce.randomwriter.maxvalue";
			
 
				+  public static final String MIN_VALUE = "mapreduce.randomwriter.minvalue";
			
 
				+  public static final String MIN_KEY = "mapreduce.randomwriter.minkey";
			
 
				+  public static final String MAX_KEY = "mapreduce.randomwriter.maxkey";
			
 
				+  
			
 
				+  /**
			
 
				+   * User counters
			
 
				+   */
			
 
				+  static enum Counters { RECORDS_WRITTEN, BYTES_WRITTEN }
			
 
				+  
			
 
				+  /**
			
 
				+   * A custom input format that creates virtual inputs of a single string
			
 
				+   * for each map.
			
 
				+   */
			
 
				+  static class RandomInputFormat extends InputFormat<Text, Text> {
			
 
				+
			
 
				+    /** 
			
 
				+     * Generate the requested number of file splits, with the filename
			
 
				+     * set to the filename of the output file.
			
 
				+     */
			
 
				+    public List<InputSplit> getSplits(JobContext job) throws IOException {
			
 
				+      List<InputSplit> result = new ArrayList<InputSplit>();
			
 
				+      Path outDir = FileOutputFormat.getOutputPath(job);
			
 
				+      int numSplits = 
			
 
				+            job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1);
			
 
				+      for(int i=0; i < numSplits; ++i) {
			
 
				+        result.add(new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, 
			
 
				+                                  (String[])null));
			
 
				+      }
			
 
				+      return result;
			
 
				+    }
			
 
				+
			
 
				+    /**
			
 
				+     * Return a single record (filename, "") where the filename is taken from
			
 
				+     * the file split.
			
 
				+     */
			
 
				+    static class RandomRecordReader extends RecordReader<Text, Text> {
			
 
				+      Path name;
			
 
				+      Text key = null;
			
 
				+      Text value = new Text();
			
 
				+      public RandomRecordReader(Path p) {
			
 
				+        name = p;
			
 
				+      }
			
 
				+      
			
 
				+      public void initialize(InputSplit split,
			
 
				+                             TaskAttemptContext context)
			
 
				+      throws IOException, InterruptedException {
			
 
				+    	  
			
 
				+      }
			
 
				+      
			
 
				+      public boolean nextKeyValue() {
			
 
				+        if (name != null) {
			
 
				+          key = new Text();
			
 
				+          key.set(name.getName());
			
 
				+          name = null;
			
 
				+          return true;
			
 
				+        }
			
 
				+        return false;
			
 
				+      }
			
 
				+      
			
 
				+      public Text getCurrentKey() {
			
 
				+        return key;
			
 
				+      }
			
 
				+      
			
 
				+      public Text getCurrentValue() {
			
 
				+        return value;
			
 
				+      }
			
 
				+      
			
 
				+      public void close() {}
			
 
				+
			
 
				+      public float getProgress() {
			
 
				+        return 0.0f;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    public RecordReader<Text, Text> createRecordReader(InputSplit split,
			
 
				+        TaskAttemptContext context) throws IOException, InterruptedException {
			
 
				+      return new RandomRecordReader(((FileSplit) split).getPath());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  static class RandomMapper extends Mapper<WritableComparable, Writable,
			
 
				+                      BytesWritable, BytesWritable> {
			
 
				+    
			
 
				+    private long numBytesToWrite;
			
 
				+    private int minKeySize;
			
 
				+    private int keySizeRange;
			
 
				+    private int minValueSize;
			
 
				+    private int valueSizeRange;
			
 
				+    private Random random = new Random();
			
 
				+    private BytesWritable randomKey = new BytesWritable();
			
 
				+    private BytesWritable randomValue = new BytesWritable();
			
 
				+    
			
 
				+    private void randomizeBytes(byte[] data, int offset, int length) {
			
 
				+      for(int i=offset + length - 1; i >= offset; --i) {
			
 
				+        data[i] = (byte) random.nextInt(256);
			
 
				+      }
			
 
				+    }
			
 
				+    
			
 
				+    /**
			
 
				+     * Given an output filename, write a bunch of random records to it.
			
 
				+     */
			
 
				+    public void map(WritableComparable key, 
			
 
				+                    Writable value,
			
 
				+                    Context context) throws IOException,InterruptedException {
			
 
				+      int itemCount = 0;
			
 
				+      while (numBytesToWrite > 0) {
			
 
				+        int keyLength = minKeySize + 
			
 
				+          (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0);
			
 
				+        randomKey.setSize(keyLength);
			
 
				+        randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength());
			
 
				+        int valueLength = minValueSize +
			
 
				+          (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0);
			
 
				+        randomValue.setSize(valueLength);
			
 
				+        randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength());
			
 
				+        context.write(randomKey, randomValue);
			
 
				+        numBytesToWrite -= keyLength + valueLength;
			
 
				+        context.getCounter(Counters.BYTES_WRITTEN).increment(keyLength + valueLength);
			
 
				+        context.getCounter(Counters.RECORDS_WRITTEN).increment(1);
			
 
				+        if (++itemCount % 200 == 0) {
			
 
				+          context.setStatus("wrote record " + itemCount + ". " + 
			
 
				+                             numBytesToWrite + " bytes left.");
			
 
				+        }
			
 
				+      }
			
 
				+      context.setStatus("done with " + itemCount + " records.");
			
 
				+    }
			
 
				+    
			
 
				+    /**
			
 
				+     * Save the values out of the configuaration that we need to write
			
 
				+     * the data.
			
 
				+     */
			
 
				+    @Override
			
 
				+    public void setup(Context context) {
			
 
				+      Configuration conf = context.getConfiguration();
			
 
				+      numBytesToWrite = conf.getLong(BYTES_PER_MAP,
			
 
				+                                    1*1024*1024*1024);
			
 
				+      minKeySize = conf.getInt(MIN_KEY, 10);
			
 
				+      keySizeRange = 
			
 
				+        conf.getInt(MAX_KEY, 1000) - minKeySize;
			
 
				+      minValueSize = conf.getInt(MIN_VALUE, 0);
			
 
				+      valueSizeRange = 
			
 
				+        conf.getInt(MAX_VALUE, 20000) - minValueSize;
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  /**
			
 
				+   * This is the main routine for launching a distributed random write job.
			
 
				+   * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
			
 
				+   * The reduce doesn't do anything.
			
 
				+   * 
			
 
				+   * @throws IOException 
			
 
				+   */
			
 
				+  public int run(String[] args) throws Exception {    
			
 
				+    if (args.length == 0) {
			
 
				+      System.out.println("Usage: writer <out-dir>");
			
 
				+      ToolRunner.printGenericCommandUsage(System.out);
			
 
				+      return 2;
			
 
				+    }
			
 
				+    
			
 
				+    Path outDir = new Path(args[0]);
			
 
				+    Configuration conf = getConf();
			
 
				+    JobClient client = new JobClient(conf);
			
 
				+    ClusterStatus cluster = client.getClusterStatus();
			
 
				+    int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
			
 
				+    long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
			
 
				+                                             1*1024*1024*1024);
			
 
				+    if (numBytesToWritePerMap == 0) {
			
 
				+      System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0");
			
 
				+      return -2;
			
 
				+    }
			
 
				+    long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
			
 
				+         numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
			
 
				+    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
			
 
				+    if (numMaps == 0 && totalBytesToWrite > 0) {
			
 
				+      numMaps = 1;
			
 
				+      conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
			
 
				+    }
			
 
				+    conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
			
 
				+
			
 
				+    Job job = new Job(conf);
			
 
				+    
			
 
				+    job.setJarByClass(RandomWriter.class);
			
 
				+    job.setJobName("random-writer");
			
 
				+    FileOutputFormat.setOutputPath(job, outDir);
			
 
				+    job.setOutputKeyClass(BytesWritable.class);
			
 
				+    job.setOutputValueClass(BytesWritable.class);
			
 
				+    job.setInputFormatClass(RandomInputFormat.class);
			
 
				+    job.setMapperClass(RandomMapper.class);        
			
 
				+    job.setReducerClass(Reducer.class);
			
 
				+    job.setOutputFormatClass(SequenceFileOutputFormat.class);
			
 
				+    
			
 
				+    System.out.println("Running " + numMaps + " maps.");
			
 
				+    
			
 
				+    // reducer NONE
			
 
				+    job.setNumReduceTasks(0);
			
 
				+    
			
 
				+    Date startTime = new Date();
			
 
				+    System.out.println("Job started: " + startTime);
			
 
				+    int ret = job.waitForCompletion(true) ? 0 : 1;
			
 
				+    Date endTime = new Date();
			
 
				+    System.out.println("Job ended: " + endTime);
			
 
				+    System.out.println("The job took " + 
			
 
				+                       (endTime.getTime() - startTime.getTime()) /1000 + 
			
 
				+                       " seconds.");
			
 
				+    
			
 
				+    return ret;
			
 
				+  }
			
 
				+  
			
 
				+  public static void main(String[] args) throws Exception {
			
 
				+    int res = ToolRunner.run(new Configuration(), new RandomWriter(), args);
			
 
				+    System.exit(res);
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestFileInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestFileInputFormat.java
@@ -19,7 +19,9 @@
 
				 package org.apache.hadoop.mapreduce.lib.input;
			
 
				 
			
 
				 import java.io.IOException;
			
 
				+import java.util.ArrayList;
			
 
				 import java.util.Arrays;
			
 
				+import java.util.List;
			
 
				 
			
 
				 import org.junit.Test;
			
 
				 import static org.junit.Assert.*;
			
@@ -28,10 +30,15 @@ import static org.mockito.Mockito.*;
 
				 import static org.apache.hadoop.test.MockitoMaker.*;
			
 
				 
			
 
				 import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.BlockLocation;
			
 
				 import org.apache.hadoop.fs.FileStatus;
			
 
				 import org.apache.hadoop.fs.FileSystem;
			
 
				 import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.mapreduce.InputSplit;
			
 
				 import org.apache.hadoop.mapreduce.Job;
			
 
				+import org.apache.hadoop.mapreduce.JobContext;
			
 
				+import org.apache.hadoop.mapreduce.RecordReader;
			
 
				+import org.apache.hadoop.mapreduce.TaskAttemptContext;
			
 
				 
			
 
				 public class TestFileInputFormat {
			
 
				 
			
@@ -80,4 +87,108 @@ public class TestFileInputFormat {
 
				     ispy.getSplits(job);
			
 
				     verify(conf).setLong(FileInputFormat.NUM_INPUT_FILES, 1);
			
 
				   }
			
 
				+  
			
 
				+  @Test
			
 
				+  @SuppressWarnings({"rawtypes", "unchecked"})
			
 
				+  public void testLastInputSplitAtSplitBoundary() throws Exception {
			
 
				+    FileInputFormat fif = new FileInputFormatForTest(1024l * 1024 * 1024,
			
 
				+        128l * 1024 * 1024);
			
 
				+    Configuration conf = new Configuration();
			
 
				+    JobContext jobContext = mock(JobContext.class);
			
 
				+    when(jobContext.getConfiguration()).thenReturn(conf);
			
 
				+    List<InputSplit> splits = fif.getSplits(jobContext);
			
 
				+    assertEquals(8, splits.size());
			
 
				+    for (int i = 0 ; i < splits.size() ; i++) {
			
 
				+      InputSplit split = splits.get(i);
			
 
				+      assertEquals(("host" + i), split.getLocations()[0]);
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  @Test
			
 
				+  @SuppressWarnings({ "rawtypes", "unchecked" })
			
 
				+  public void testLastInputSplitExceedingSplitBoundary() throws Exception {
			
 
				+    FileInputFormat fif = new FileInputFormatForTest(1027l * 1024 * 1024,
			
 
				+        128l * 1024 * 1024);
			
 
				+    Configuration conf = new Configuration();
			
 
				+    JobContext jobContext = mock(JobContext.class);
			
 
				+    when(jobContext.getConfiguration()).thenReturn(conf);
			
 
				+    List<InputSplit> splits = fif.getSplits(jobContext);
			
 
				+    assertEquals(8, splits.size());
			
 
				+    for (int i = 0; i < splits.size(); i++) {
			
 
				+      InputSplit split = splits.get(i);
			
 
				+      assertEquals(("host" + i), split.getLocations()[0]);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  @SuppressWarnings({ "rawtypes", "unchecked" })
			
 
				+  public void testLastInputSplitSingleSplit() throws Exception {
			
 
				+    FileInputFormat fif = new FileInputFormatForTest(100l * 1024 * 1024,
			
 
				+        128l * 1024 * 1024);
			
 
				+    Configuration conf = new Configuration();
			
 
				+    JobContext jobContext = mock(JobContext.class);
			
 
				+    when(jobContext.getConfiguration()).thenReturn(conf);
			
 
				+    List<InputSplit> splits = fif.getSplits(jobContext);
			
 
				+    assertEquals(1, splits.size());
			
 
				+    for (int i = 0; i < splits.size(); i++) {
			
 
				+      InputSplit split = splits.get(i);
			
 
				+      assertEquals(("host" + i), split.getLocations()[0]);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private class FileInputFormatForTest<K, V> extends FileInputFormat<K, V> {
			
 
				+
			
 
				+    long splitSize;
			
 
				+    long length;
			
 
				+
			
 
				+    FileInputFormatForTest(long length, long splitSize) {
			
 
				+      this.length = length;
			
 
				+      this.splitSize = splitSize;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public RecordReader<K, V> createRecordReader(InputSplit split,
			
 
				+        TaskAttemptContext context) throws IOException, InterruptedException {
			
 
				+      return null;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    protected List<FileStatus> listStatus(JobContext job) throws IOException {
			
 
				+      FileStatus mockFileStatus = mock(FileStatus.class);
			
 
				+      when(mockFileStatus.getBlockSize()).thenReturn(splitSize);
			
 
				+      Path mockPath = mock(Path.class);
			
 
				+      FileSystem mockFs = mock(FileSystem.class);
			
 
				+
			
 
				+      BlockLocation[] blockLocations = mockBlockLocations(length, splitSize);
			
 
				+      when(mockFs.getFileBlockLocations(mockFileStatus, 0, length)).thenReturn(
			
 
				+          blockLocations);
			
 
				+      when(mockPath.getFileSystem(any(Configuration.class))).thenReturn(mockFs);
			
 
				+
			
 
				+      when(mockFileStatus.getPath()).thenReturn(mockPath);
			
 
				+      when(mockFileStatus.getLen()).thenReturn(length);
			
 
				+
			
 
				+      List<FileStatus> list = new ArrayList<FileStatus>();
			
 
				+      list.add(mockFileStatus);
			
 
				+      return list;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    protected long computeSplitSize(long blockSize, long minSize, long maxSize) {
			
 
				+      return splitSize;
			
 
				+    }
			
 
				+
			
 
				+    private BlockLocation[] mockBlockLocations(long size, long splitSize) {
			
 
				+      int numLocations = (int) (size / splitSize);
			
 
				+      if (size % splitSize != 0)
			
 
				+        numLocations++;
			
 
				+      BlockLocation[] blockLocations = new BlockLocation[numLocations];
			
 
				+      for (int i = 0; i < numLocations; i++) {
			
 
				+        String[] names = new String[] { "b" + i };
			
 
				+        String[] hosts = new String[] { "host" + i };
			
 
				+        blockLocations[i] = new BlockLocation(names, hosts, i * splitSize,
			
 
				+            Math.min(splitSize, size - (splitSize * i)));
			
 
				+      }
			
 
				+      return blockLocations;
			
 
				+    }
			
 
				+  }
			
 
				 }
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/test/MapredTestDriver.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/test/MapredTestDriver.java
--- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/pom.xml
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/pom.xml
@@ -98,9 +98,9 @@
 
				   <build>
			
 
				    <plugins>
			
 
				     <plugin>
			
 
				-   	 <groupId>org.apache.maven.plugins</groupId>
			
 
				+    <groupId>org.apache.maven.plugins</groupId>
			
 
				      <artifactId>maven-jar-plugin</artifactId>
			
 
				-  	  <configuration>
			
 
				+      <configuration>
			
 
				        <archive>
			
 
				          <manifest>
			
 
				            <mainClass>org.apache.hadoop.examples.ExampleDriver</mainClass>
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraGen.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraGen.java
@@ -238,7 +238,9 @@ public class TeraGen extends Configured implements Tool {
 
				 
			
 
				     @Override
			
 
				     public void cleanup(Context context) {
			
 
				-      checksumCounter.increment(total.getLow8());
			
 
				+      if (checksumCounter != null) {
			
 
				+        checksumCounter.increment(total.getLow8());
			
 
				+      }
			
 
				     }
			
 
				   }
			
 
				 
			
@@ -307,5 +309,4 @@ public class TeraGen extends Configured implements Tool {
 
				     int res = ToolRunner.run(new Configuration(), new TeraGen(), args);
			
 
				     System.exit(res);
			
 
				   }
			
 
				-
			
 
				 }
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java
@@ -84,21 +84,7 @@ public interface ApplicationConstants {
 
				   public static final String STDERR = "stderr";
			
 
				 
			
 
				   public static final String STDOUT = "stdout";
			
 
				-  
			
 
				-  /**
			
 
				-   * Classpath for typical applications.
			
 
				-   */
			
 
				-  public static final String[] APPLICATION_CLASSPATH =
			
 
				-      new String[] {
			
 
				-        "$HADOOP_CONF_DIR",
			
 
				-        "$HADOOP_COMMON_HOME/share/hadoop/common/*",
			
 
				-        "$HADOOP_COMMON_HOME/share/hadoop/common/lib/*",
			
 
				-        "$HADOOP_HDFS_HOME/share/hadoop/hdfs/*",
			
 
				-        "$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*",
			
 
				-        "$YARN_HOME/share/hadoop/mapreduce/*",
			
 
				-        "$YARN_HOME/share/hadoop/mapreduce/lib/*"
			
 
				-      };
			
 
				-  
			
 
				+
			
 
				   /**
			
 
				    * Environment for Applications.
			
 
				    * 
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -508,6 +508,10 @@ public class YarnConfiguration extends Configuration {
 
				   public static final long DEFAULT_NM_PROCESS_KILL_WAIT_MS =
			
 
				       2000;
			
 
				 
			
 
				+  /** Standard Hadoop classes */
			
 
				+  public static final String YARN_APPLICATION_CLASSPATH = YARN_PREFIX
			
 
				+      + "application.classpath";
			
 
				+
			
 
				   public YarnConfiguration() {
			
 
				     super();
			
 
				   }
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApp.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApp.java
@@ -36,6 +36,7 @@ import com.google.common.collect.Lists;
 
				 import com.google.inject.Provides;
			
 
				 import com.google.inject.servlet.GuiceFilter;
			
 
				 import com.google.inject.servlet.ServletModule;
			
 
				+import com.sun.jersey.api.container.filter.GZIPContentEncodingFilter;
			
 
				 import com.sun.jersey.api.core.ResourceConfig;
			
 
				 import com.sun.jersey.core.util.FeaturesAndProperties;
			
 
				 import com.sun.jersey.guice.spi.container.servlet.GuiceContainer;
			
@@ -160,6 +161,8 @@ public abstract class WebApp extends ServletModule {
 
				       params.put(ResourceConfig.FEATURE_IMPLICIT_VIEWABLES, "true");
			
 
				       params.put(ServletContainer.FEATURE_FILTER_FORWARD_ON_404, "true");
			
 
				       params.put(FeaturesAndProperties.FEATURE_XMLROOTELEMENT_PROCESSING, "true");
			
 
				+      params.put(ResourceConfig.PROPERTY_CONTAINER_REQUEST_FILTERS, GZIPContentEncodingFilter.class.getName());
			
 
				+      params.put(ResourceConfig.PROPERTY_CONTAINER_RESPONSE_FILTERS, GZIPContentEncodingFilter.class.getName());
			
 
				       filter("/*").through(GuiceContainer.class, params);
			
 
				     }
			
 
				 
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/resources/yarn-default.xml
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/resources/yarn-default.xml
@@ -482,4 +482,18 @@
 
				      <name>yarn.web-proxy.address</name>
			
 
				      <value/>
			
 
				   </property>
			
 
				+
			
 
				+  <property>
			
 
				+    <description>Classpath for typical applications.</description>
			
 
				+     <name>yarn.application.classpath</name>
			
 
				+     <value>
			
 
				+        $HADOOP_CONF_DIR,
			
 
				+        $HADOOP_COMMON_HOME/share/hadoop/common/*,
			
 
				+        $HADOOP_COMMON_HOME/share/hadoop/common/lib/*,
			
 
				+        $HADOOP_HDFS_HOME/share/hadoop/hdfs/*,
			
 
				+        $HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*,
			
 
				+        $YARN_HOME/share/hadoop/mapreduce/*,
			
 
				+        $YARN_HOME/share/hadoop/mapreduce/lib/*
			
 
				+     </value>
			
 
				+  </property>
			
 
				 </configuration>
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java
@@ -295,10 +295,6 @@ public class SchedulerApp {
 
				     }
			
 
				   }
			
 
				 
			
 
				-  public synchronized void setAvailableResourceLimit(Resource globalLimit) {
			
 
				-    this.resourceLimit = globalLimit; 
			
 
				-  }
			
 
				-
			
 
				   public synchronized RMContainer getRMContainer(ContainerId id) {
			
 
				     return liveContainers.get(id);
			
 
				   }
			
@@ -446,20 +442,21 @@ public class SchedulerApp {
 
				     return reservedContainers;
			
 
				   }
			
 
				   
			
 
				+  public synchronized void setHeadroom(Resource globalLimit) {
			
 
				+    this.resourceLimit = globalLimit; 
			
 
				+  }
			
 
				+
			
 
				   /**
			
 
				    * Get available headroom in terms of resources for the application's user.
			
 
				    * @return available resource headroom
			
 
				    */
			
 
				   public synchronized Resource getHeadroom() {
			
 
				-    Resource limit = Resources.subtract(resourceLimit, currentConsumption);
			
 
				-    Resources.subtractFrom(limit, currentReservation);
			
 
				-
			
 
				     // Corner case to deal with applications being slightly over-limit
			
 
				-    if (limit.getMemory() < 0) {
			
 
				-      limit.setMemory(0);
			
 
				+    if (resourceLimit.getMemory() < 0) {
			
 
				+      resourceLimit.setMemory(0);
			
 
				     }
			
 
				     
			
 
				-    return limit;
			
 
				+    return resourceLimit;
			
 
				   }
			
 
				 
			
 
				   public Queue getQueue() {
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java
@@ -17,12 +17,19 @@
 
				 */
			
 
				 package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
			
 
				 
			
 
				+import org.apache.hadoop.yarn.api.records.Resource;
			
 
				+
			
 
				 class CSQueueUtils {
			
 
				   
			
 
				   public static void checkMaxCapacity(String queueName, 
			
 
				       float capacity, float maximumCapacity) {
			
 
				-    if (Math.round(100 * maximumCapacity) != CapacitySchedulerConfiguration.UNDEFINED && 
			
 
				+    if (maximumCapacity < 0.0f || maximumCapacity > 1.0f || 
			
 
				         maximumCapacity < capacity) {
			
 
				+      throw new IllegalArgumentException(
			
 
				+          "Illegal value  of maximumCapacity " + maximumCapacity + 
			
 
				+          " used in call to setMaxCapacity for queue " + queueName);
			
 
				+    }
			
 
				+    if (maximumCapacity < capacity) {
			
 
				       throw new IllegalArgumentException(
			
 
				           "Illegal call to setMaxCapacity. " +
			
 
				           "Queue '" + queueName + "' has " +
			
@@ -30,5 +37,26 @@ class CSQueueUtils {
 
				           "maximumCapacity (" + maximumCapacity + ")" );
			
 
				     }
			
 
				   }
			
 
				+
			
 
				+  public static float computeAbsoluteMaximumCapacity(
			
 
				+      float maximumCapacity, CSQueue parent) {
			
 
				+    float parentAbsMaxCapacity = 
			
 
				+        (parent == null) ? 1.0f : parent.getAbsoluteMaximumCapacity();
			
 
				+    return (parentAbsMaxCapacity * maximumCapacity);
			
 
				+  }
			
 
				+
			
 
				+  public static int computeMaxActiveApplications(Resource clusterResource,
			
 
				+      float maxAMResourcePercent, float absoluteCapacity) {
			
 
				+    return 
			
 
				+        Math.max(
			
 
				+            (int)((clusterResource.getMemory() / (float)LeafQueue.DEFAULT_AM_RESOURCE) * 
			
 
				+                   maxAMResourcePercent * absoluteCapacity), 
			
 
				+            1);
			
 
				+  }
			
 
				+
			
 
				+  public static int computeMaxActiveApplicationsPerUser(
			
 
				+      int maxActiveApplications, int userLimit, float userLimitFactor) {
			
 
				+    return (int)(maxActiveApplications * (userLimit / 100.0f) * userLimitFactor);
			
 
				+  }
			
 
				   
			
 
				 }
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
@@ -149,7 +149,7 @@ public class CapacitySchedulerConfiguration extends Configuration {
 
				       throw new IllegalArgumentException("Illegal " +
			
 
				       		"capacity of " + capacity + " for queue " + queue);
			
 
				     }
			
 
				-    LOG.debug("CSConf - setCapacity: queuePrefix=" + getQueuePrefix(queue) + 
			
 
				+    LOG.debug("CSConf - getCapacity: queuePrefix=" + getQueuePrefix(queue) + 
			
 
				         ", capacity=" + capacity);
			
 
				     return capacity;
			
 
				   }
			
@@ -162,11 +162,15 @@ public class CapacitySchedulerConfiguration extends Configuration {
 
				 
			
 
				   public int getMaximumCapacity(String queue) {
			
 
				     int maxCapacity = 
			
 
				-      getInt(getQueuePrefix(queue) + MAXIMUM_CAPACITY, UNDEFINED);
			
 
				+      getInt(getQueuePrefix(queue) + MAXIMUM_CAPACITY, MAXIMUM_CAPACITY_VALUE);
			
 
				     return maxCapacity;
			
 
				   }
			
 
				   
			
 
				   public void setMaximumCapacity(String queue, int maxCapacity) {
			
 
				+    if (maxCapacity > MAXIMUM_CAPACITY_VALUE) {
			
 
				+      throw new IllegalArgumentException("Illegal " +
			
 
				+          "maximum-capacity of " + maxCapacity + " for queue " + queue);
			
 
				+    }
			
 
				     setInt(getQueuePrefix(queue) + MAXIMUM_CAPACITY, maxCapacity);
			
 
				     LOG.debug("CSConf - setMaxCapacity: queuePrefix=" + getQueuePrefix(queue) + 
			
 
				         ", maxCapacity=" + maxCapacity);
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
@@ -144,10 +144,10 @@ public class LeafQueue implements CSQueue {
 
				       (float)cs.getConfiguration().getCapacity(getQueuePath()) / 100;
			
 
				     float absoluteCapacity = parent.getAbsoluteCapacity() * capacity;
			
 
				 
			
 
				-    float maximumCapacity = (float)cs.getConfiguration().getMaximumCapacity(getQueuePath()) / 100;
			
 
				+    float maximumCapacity = 
			
 
				+        (float)cs.getConfiguration().getMaximumCapacity(getQueuePath()) / 100;
			
 
				     float absoluteMaxCapacity = 
			
 
				-      (Math.round(maximumCapacity * 100) == CapacitySchedulerConfiguration.UNDEFINED) ? 
			
 
				-          Float.MAX_VALUE : (parent.getAbsoluteCapacity() * maximumCapacity);
			
 
				+        CSQueueUtils.computeAbsoluteMaximumCapacity(maximumCapacity, parent);
			
 
				 
			
 
				     int userLimit = cs.getConfiguration().getUserLimit(getQueuePath());
			
 
				     float userLimitFactor = 
			
@@ -161,10 +161,10 @@ public class LeafQueue implements CSQueue {
 
				     this.maxAMResourcePercent = 
			
 
				         cs.getConfiguration().getMaximumApplicationMasterResourcePercent();
			
 
				     int maxActiveApplications = 
			
 
				-        computeMaxActiveApplications(cs.getClusterResources(), 
			
 
				+        CSQueueUtils.computeMaxActiveApplications(cs.getClusterResources(), 
			
 
				             maxAMResourcePercent, absoluteCapacity);
			
 
				     int maxActiveApplicationsPerUser = 
			
 
				-        computeMaxActiveApplicationsPerUser(maxActiveApplications, userLimit, 
			
 
				+        CSQueueUtils.computeMaxActiveApplicationsPerUser(maxActiveApplications, userLimit, 
			
 
				             userLimitFactor);
			
 
				 
			
 
				     this.queueInfo = recordFactory.newRecordInstance(QueueInfo.class);
			
@@ -193,20 +193,6 @@ public class LeafQueue implements CSQueue {
 
				     this.activeApplications = new TreeSet<SchedulerApp>(applicationComparator);
			
 
				   }
			
 
				 
			
 
				-  private int computeMaxActiveApplications(Resource clusterResource,
			
 
				-      float maxAMResourcePercent, float absoluteCapacity) {
			
 
				-    return 
			
 
				-        Math.max(
			
 
				-            (int)((clusterResource.getMemory() / (float)DEFAULT_AM_RESOURCE) * 
			
 
				-                   maxAMResourcePercent * absoluteCapacity), 
			
 
				-            1);
			
 
				-  }
			
 
				-  
			
 
				-  private int computeMaxActiveApplicationsPerUser(int maxActiveApplications, 
			
 
				-      int userLimit, float userLimitFactor) {
			
 
				-    return (int)(maxActiveApplications * (userLimit / 100.0f) * userLimitFactor);
			
 
				-  }
			
 
				-  
			
 
				   private synchronized void setupQueueConfigs(
			
 
				       float capacity, float absoluteCapacity, 
			
 
				       float maximumCapacity, float absoluteMaxCapacity,
			
@@ -254,8 +240,8 @@ public class LeafQueue implements CSQueue {
 
				         "maxCapacity = " + maximumCapacity +
			
 
				         " [= configuredMaxCapacity ]" + "\n" +
			
 
				         "absoluteMaxCapacity = " + absoluteMaxCapacity +
			
 
				-        " [= Float.MAX_VALUE if maximumCapacity undefined, " +
			
 
				-        "(parentAbsoluteCapacity * maximumCapacity) / 100 otherwise ]" + "\n" +
			
 
				+        " [= 1.0 maximumCapacity undefined, " +
			
 
				+        "(parentAbsoluteMaxCapacity * maximumCapacity) / 100 otherwise ]" + "\n" +
			
 
				         "userLimit = " + userLimit +
			
 
				         " [= configuredUserLimit ]" + "\n" +
			
 
				         "userLimitFactor = " + userLimitFactor +
			
@@ -272,9 +258,9 @@ public class LeafQueue implements CSQueue {
 
				         "maxActiveApplicationsPerUser = " + maxActiveApplicationsPerUser +
			
 
				         " [= (int)(maxActiveApplications * (userLimit / 100.0f) * userLimitFactor) ]" + "\n" +
			
 
				         "utilization = " + utilization +
			
 
				-        " [= usedResourcesMemory / queueLimit ]" + "\n" +
			
 
				+        " [= usedResourcesMemory /  (clusterResourceMemory * absoluteCapacity)]" + "\n" +
			
 
				         "usedCapacity = " + usedCapacity +
			
 
				-        " [= usedResourcesMemory / (clusterResourceMemory * capacity) ]" + "\n" +
			
 
				+        " [= usedResourcesMemory / (clusterResourceMemory * parent.absoluteCapacity)]" + "\n" +
			
 
				         "maxAMResourcePercent = " + maxAMResourcePercent +
			
 
				         " [= configuredMaximumAMResourcePercent ]" + "\n" +
			
 
				         "minimumAllocationFactor = " + minimumAllocationFactor +
			
@@ -400,9 +386,7 @@ public class LeafQueue implements CSQueue {
 
				     
			
 
				     this.maximumCapacity = maximumCapacity;
			
 
				     this.absoluteMaxCapacity = 
			
 
				-      (Math.round(maximumCapacity * 100) == CapacitySchedulerConfiguration.UNDEFINED) ? 
			
 
				-          Float.MAX_VALUE : 
			
 
				-          (parent.getAbsoluteCapacity() * maximumCapacity);
			
 
				+        CSQueueUtils.computeAbsoluteMaximumCapacity(maximumCapacity, parent);
			
 
				   }
			
 
				   
			
 
				   /**
			
@@ -502,9 +486,14 @@ public class LeafQueue implements CSQueue {
 
				   }
			
 
				 
			
 
				   public String toString() {
			
 
				-    return queueName + ":" + capacity + ":" + absoluteCapacity + ":" + 
			
 
				-    getUsedCapacity() + ":" + getUtilization() + ":" + 
			
 
				-    getNumApplications() + ":" + getNumContainers();
			
 
				+    return queueName + ": " + 
			
 
				+        "capacity=" + capacity + ", " + 
			
 
				+        "absoluteCapacity=" + absoluteCapacity + ", " + 
			
 
				+        "usedResources=" + usedResources.getMemory() + "MB, " + 
			
 
				+        "usedCapacity=" + getUsedCapacity() + ", " + 
			
 
				+        "utilization=" + getUtilization() + ", " + 
			
 
				+        "numApps=" + getNumApplications() + ", " + 
			
 
				+        "numContainers=" + getNumContainers();  
			
 
				   }
			
 
				 
			
 
				   private synchronized User getUser(String userName) {
			
@@ -731,12 +720,11 @@ public class LeafQueue implements CSQueue {
 
				       if(LOG.isDebugEnabled()) {
			
 
				         LOG.debug("pre-assignContainers for application "
			
 
				         + application.getApplicationId());
			
 
				+        application.showRequests();
			
 
				       }
			
 
				-      application.showRequests();
			
 
				 
			
 
				       synchronized (application) {
			
 
				-        computeAndSetUserResourceLimit(application, clusterResource);
			
 
				-        
			
 
				+        // Schedule in priority order
			
 
				         for (Priority priority : application.getPriorities()) {
			
 
				           // Required resource
			
 
				           Resource required = 
			
@@ -747,15 +735,21 @@ public class LeafQueue implements CSQueue {
 
				             continue;
			
 
				           }
			
 
				 
			
 
				-          // Are we going over limits by allocating to this application?
			
 
				-          // Maximum Capacity of the queue
			
 
				+          // Compute & set headroom
			
 
				+          // Note: We set the headroom with the highest priority request 
			
 
				+          //       as the target. 
			
 
				+          //       This works since we never assign lower priority requests
			
 
				+          //       before all higher priority ones are serviced.
			
 
				+          Resource userLimit = 
			
 
				+              computeAndSetUserResourceLimit(application, clusterResource, 
			
 
				+                  required);
			
 
				+
			
 
				+          // Check queue max-capacity limit
			
 
				           if (!assignToQueue(clusterResource, required)) {
			
 
				             return NULL_ASSIGNMENT;
			
 
				           }
			
 
				 
			
 
				-          // User limits
			
 
				-          Resource userLimit = 
			
 
				-            computeUserLimit(application, clusterResource, required); 
			
 
				+          // Check user limit
			
 
				           if (!assignToUser(application.getUser(), userLimit)) {
			
 
				             break; 
			
 
				           }
			
@@ -769,7 +763,7 @@ public class LeafQueue implements CSQueue {
 
				                 null);
			
 
				           
			
 
				           Resource assigned = assignment.getResource();
			
 
				-            
			
 
				+          
			
 
				           // Did we schedule or reserve a container?
			
 
				           if (Resources.greaterThan(assigned, Resources.none())) {
			
 
				 
			
@@ -830,25 +824,28 @@ public class LeafQueue implements CSQueue {
 
				     float potentialNewCapacity = 
			
 
				       (float)(usedResources.getMemory() + required.getMemory()) / 
			
 
				         clusterResource.getMemory();
			
 
				-    LOG.info(getQueueName() + 
			
 
				-        " usedResources: " + usedResources.getMemory() + 
			
 
				-        " currentCapacity " + ((float)usedResources.getMemory())/clusterResource.getMemory() + 
			
 
				-        " required " + required.getMemory() +
			
 
				-        " potentialNewCapacity: " + potentialNewCapacity + " ( " +
			
 
				-        " max-capacity: " + absoluteMaxCapacity + ")");
			
 
				     if (potentialNewCapacity > absoluteMaxCapacity) {
			
 
				+      LOG.info(getQueueName() + 
			
 
				+          " usedResources: " + usedResources.getMemory() +
			
 
				+          " clusterResources: " + clusterResource.getMemory() +
			
 
				+          " currentCapacity " + ((float)usedResources.getMemory())/clusterResource.getMemory() + 
			
 
				+          " required " + required.getMemory() +
			
 
				+          " potentialNewCapacity: " + potentialNewCapacity + " ( " +
			
 
				+          " max-capacity: " + absoluteMaxCapacity + ")");
			
 
				       return false;
			
 
				     }
			
 
				     return true;
			
 
				   }
			
 
				 
			
 
				-  private void computeAndSetUserResourceLimit(SchedulerApp application, 
			
 
				-      Resource clusterResource) {
			
 
				-    Resource userLimit = 
			
 
				-        computeUserLimit(application, clusterResource, Resources.none());
			
 
				-    application.setAvailableResourceLimit(userLimit);
			
 
				-    metrics.setAvailableResourcesToUser(application.getUser(), 
			
 
				-        application.getHeadroom());
			
 
				+  private Resource computeAndSetUserResourceLimit(SchedulerApp application, 
			
 
				+      Resource clusterResource, Resource required) {
			
 
				+    String user = application.getUser();
			
 
				+    Resource limit = computeUserLimit(application, clusterResource, required);
			
 
				+    Resource headroom = 
			
 
				+        Resources.subtract(limit, getUser(user).getConsumedResources());
			
 
				+    application.setHeadroom(headroom);
			
 
				+    metrics.setAvailableResourcesToUser(user, headroom);
			
 
				+    return limit;
			
 
				   }
			
 
				   
			
 
				   private int roundUp(int memory) {
			
@@ -919,7 +916,7 @@ public class LeafQueue implements CSQueue {
 
				     User user = getUser(userName);
			
 
				     
			
 
				     // Note: We aren't considering the current request since there is a fixed
			
 
				-    // overhead of the AM, but it's a >= check, so... 
			
 
				+    // overhead of the AM, but it's a > check, not a >= check, so... 
			
 
				     if ((user.getConsumedResources().getMemory()) > limit.getMemory()) {
			
 
				       if (LOG.isDebugEnabled()) {
			
 
				         LOG.debug("User " + userName + " in queue " + getQueueName() + 
			
@@ -1237,8 +1234,8 @@ public class LeafQueue implements CSQueue {
 
				         // happen under scheduler's lock... 
			
 
				         // So, this is, in effect, a transaction across application & node
			
 
				         if (rmContainer.getState() == RMContainerState.RESERVED) {
			
 
				-          application.unreserve(node, rmContainer.getReservedPriority());
			
 
				-          node.unreserveResource(application);
			
 
				+          unreserve(application, rmContainer.getReservedPriority(), 
			
 
				+              node, rmContainer);
			
 
				         } else {
			
 
				           application.containerCompleted(rmContainer, containerStatus, event);
			
 
				           node.releaseContainer(container);
			
@@ -1303,24 +1300,25 @@ public class LeafQueue implements CSQueue {
 
				   public synchronized void updateClusterResource(Resource clusterResource) {
			
 
				     // Update queue properties
			
 
				     maxActiveApplications = 
			
 
				-        computeMaxActiveApplications(clusterResource, maxAMResourcePercent, 
			
 
				+        CSQueueUtils.computeMaxActiveApplications(clusterResource, maxAMResourcePercent, 
			
 
				             absoluteCapacity);
			
 
				     maxActiveApplicationsPerUser = 
			
 
				-        computeMaxActiveApplicationsPerUser(maxActiveApplications, userLimit, 
			
 
				+        CSQueueUtils.computeMaxActiveApplicationsPerUser(maxActiveApplications, userLimit, 
			
 
				             userLimitFactor);
			
 
				     
			
 
				     // Update application properties
			
 
				     for (SchedulerApp application : activeApplications) {
			
 
				-      computeAndSetUserResourceLimit(application, clusterResource);
			
 
				+      computeAndSetUserResourceLimit(
			
 
				+          application, clusterResource, Resources.none());
			
 
				     }
			
 
				   }
			
 
				   
			
 
				   private synchronized void updateResource(Resource clusterResource) {
			
 
				-    float queueLimit = clusterResource.getMemory() * absoluteCapacity; 
			
 
				+    float queueLimit = clusterResource.getMemory() * absoluteCapacity;
			
 
				     setUtilization(usedResources.getMemory() / queueLimit);
			
 
				-    setUsedCapacity(
			
 
				-        usedResources.getMemory() / (clusterResource.getMemory() * capacity));
			
 
				-    
			
 
				+    setUsedCapacity(usedResources.getMemory()
			
 
				+        / (clusterResource.getMemory() * parent.getAbsoluteCapacity()));
			
 
				+
			
 
				     Resource resourceLimit = 
			
 
				       Resources.createResource(roundUp((int)queueLimit));
			
 
				     metrics.setAvailableResourcesToQueue(
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java
@@ -118,16 +118,14 @@ public class ParentQueue implements CSQueue {
 
				     }
			
 
				 
			
 
				     float capacity = (float) rawCapacity / 100;
			
 
				-
			
 
				     float parentAbsoluteCapacity = 
			
 
				-      (parent == null) ? 1.0f : parent.getAbsoluteCapacity();
			
 
				+      (rootQueue) ? 1.0f : parent.getAbsoluteCapacity();
			
 
				     float absoluteCapacity = parentAbsoluteCapacity * capacity; 
			
 
				 
			
 
				-    float maximumCapacity = 
			
 
				+    float  maximumCapacity =
			
 
				       (float) cs.getConfiguration().getMaximumCapacity(getQueuePath()) / 100;
			
 
				     float absoluteMaxCapacity = 
			
 
				-      (Math.round(maximumCapacity * 100) == CapacitySchedulerConfiguration.UNDEFINED) ? 
			
 
				-          Float.MAX_VALUE :  (parentAbsoluteCapacity * maximumCapacity);
			
 
				+          CSQueueUtils.computeAbsoluteMaximumCapacity(maximumCapacity, parent);
			
 
				     
			
 
				     QueueState state = cs.getConfiguration().getState(getQueuePath());
			
 
				 
			
@@ -333,10 +331,15 @@ public class ParentQueue implements CSQueue {
 
				   }
			
 
				 
			
 
				   public String toString() {
			
 
				-    return queueName + ":" + capacity + ":" + absoluteCapacity + ":" + 
			
 
				-      getUsedCapacity() + ":" + getUtilization() + ":" + 
			
 
				-      getNumApplications() + ":" + getNumContainers() + ":" + 
			
 
				-      childQueues.size() + " child-queues";
			
 
				+    return queueName + ": " +
			
 
				+        "numChildQueue= " + childQueues.size() + ", " + 
			
 
				+        "capacity=" + capacity + ", " +  
			
 
				+        "absoluteCapacity=" + absoluteCapacity + ", " +
			
 
				+        "usedResources=" + usedResources.getMemory() + "MB, " + 
			
 
				+        "usedCapacity=" + getUsedCapacity() + ", " + 
			
 
				+        "utilization=" + getUtilization() + ", " +
			
 
				+        "numApps=" + getNumApplications() + ", " + 
			
 
				+        "numContainers=" + getNumContainers();
			
 
				   }
			
 
				   
			
 
				   @Override
			
@@ -492,12 +495,8 @@ public class ParentQueue implements CSQueue {
 
				     CSQueueUtils.checkMaxCapacity(getQueueName(), capacity, maximumCapacity);
			
 
				     
			
 
				     this.maximumCapacity = maximumCapacity;
			
 
				-    float parentAbsoluteCapacity = 
			
 
				-        (rootQueue) ? 100.0f : parent.getAbsoluteCapacity();
			
 
				     this.absoluteMaxCapacity = 
			
 
				-      (maximumCapacity == CapacitySchedulerConfiguration.UNDEFINED) ? 
			
 
				-          Float.MAX_VALUE : 
			
 
				-          (parentAbsoluteCapacity * maximumCapacity);
			
 
				+        CSQueueUtils.computeAbsoluteMaximumCapacity(maximumCapacity, parent);
			
 
				   }
			
 
				 
			
 
				   @Override
			
@@ -688,11 +687,13 @@ public class ParentQueue implements CSQueue {
 
				   }
			
 
				   
			
 
				   private synchronized void updateResource(Resource clusterResource) {
			
 
				-    float queueLimit = clusterResource.getMemory() * absoluteCapacity; 
			
 
				+    float queueLimit = clusterResource.getMemory() * absoluteCapacity;
			
 
				+    float parentAbsoluteCapacity = 
			
 
				+        (rootQueue) ? 1.0f : parent.getAbsoluteCapacity();
			
 
				     setUtilization(usedResources.getMemory() / queueLimit);
			
 
				-    setUsedCapacity(
			
 
				-        usedResources.getMemory() / (clusterResource.getMemory() * capacity));
			
 
				-    
			
 
				+    setUsedCapacity(usedResources.getMemory() 
			
 
				+        / (clusterResource.getMemory() * parentAbsoluteCapacity));
			
 
				+  
			
 
				     Resource resourceLimit = 
			
 
				       Resources.createResource((int)queueLimit);
			
 
				     metrics.setAvailableResourcesToQueue(
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
@@ -358,7 +358,7 @@ public class FifoScheduler implements ResourceScheduler {
 
				         }
			
 
				       }
			
 
				       
			
 
				-      application.setAvailableResourceLimit(clusterResource);
			
 
				+      application.setHeadroom(clusterResource);
			
 
				       
			
 
				       LOG.debug("post-assignContainers");
			
 
				       application.showRequests();
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java
@@ -21,16 +21,24 @@ import static org.junit.Assert.*;
 
				 import static org.mockito.Mockito.*;
			
 
				 
			
 
				 import java.io.IOException;
			
 
				+import java.util.ArrayList;
			
 
				 import java.util.HashMap;
			
 
				+import java.util.List;
			
 
				 import java.util.Map;
			
 
				 
			
 
				 import org.apache.commons.logging.Log;
			
 
				 import org.apache.commons.logging.LogFactory;
			
 
				 import org.apache.hadoop.security.UserGroupInformation;
			
 
				 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
			
 
				+import org.apache.hadoop.yarn.api.records.Priority;
			
 
				 import org.apache.hadoop.yarn.api.records.QueueACL;
			
 
				 import org.apache.hadoop.yarn.api.records.Resource;
			
 
				+import org.apache.hadoop.yarn.api.records.ResourceRequest;
			
 
				+import org.apache.hadoop.yarn.factories.RecordFactory;
			
 
				+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
			
 
				+import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
			
 
				 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
			
 
				+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
			
 
				 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApp;
			
 
				 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
			
 
				 import org.junit.After;
			
@@ -283,38 +291,76 @@ public class TestApplicationLimits {
 
				     final String user_0 = "user_0";
			
 
				     final String user_1 = "user_1";
			
 
				     
			
 
				-    int APPLICATION_ID = 0;
			
 
				+    RecordFactory recordFactory = 
			
 
				+        RecordFactoryProvider.getRecordFactory(null);
			
 
				+    RMContext rmContext = TestUtils.getMockRMContext();
			
 
				+
			
 
				+    Priority priority_1 = TestUtils.createMockPriority(1);
			
 
				 
			
 
				-    // Submit first application from user_0, check headroom
			
 
				-    SchedulerApp app_0_0 = getMockApplication(APPLICATION_ID++, user_0);
			
 
				+    // Submit first application with some resource-requests from user_0, 
			
 
				+    // and check headroom
			
 
				+    final ApplicationAttemptId appAttemptId_0_0 = 
			
 
				+        TestUtils.getMockApplicationAttemptId(0, 0); 
			
 
				+    SchedulerApp app_0_0 = 
			
 
				+        spy(new SchedulerApp(appAttemptId_0_0, user_0, queue, rmContext, null));
			
 
				     queue.submitApplication(app_0_0, user_0, A);
			
 
				-    queue.assignContainers(clusterResource, node_0); // Schedule to compute
			
 
				+
			
 
				+    List<ResourceRequest> app_0_0_requests = new ArrayList<ResourceRequest>();
			
 
				+    app_0_0_requests.add(
			
 
				+        TestUtils.createResourceRequest(RMNodeImpl.ANY, 1*GB, 2, 
			
 
				+            priority_1, recordFactory));
			
 
				+    app_0_0.updateResourceRequests(app_0_0_requests);
			
 
				+
			
 
				+    // Schedule to compute 
			
 
				+    queue.assignContainers(clusterResource, node_0);
			
 
				     Resource expectedHeadroom = Resources.createResource(10*16*GB);
			
 
				-    verify(app_0_0).setAvailableResourceLimit(eq(expectedHeadroom));
			
 
				+    verify(app_0_0).setHeadroom(eq(expectedHeadroom));
			
 
				 
			
 
				     // Submit second application from user_0, check headroom
			
 
				-    SchedulerApp app_0_1 = getMockApplication(APPLICATION_ID++, user_0);
			
 
				+    final ApplicationAttemptId appAttemptId_0_1 = 
			
 
				+        TestUtils.getMockApplicationAttemptId(1, 0); 
			
 
				+    SchedulerApp app_0_1 = 
			
 
				+        spy(new SchedulerApp(appAttemptId_0_1, user_0, queue, rmContext, null));
			
 
				     queue.submitApplication(app_0_1, user_0, A);
			
 
				+    
			
 
				+    List<ResourceRequest> app_0_1_requests = new ArrayList<ResourceRequest>();
			
 
				+    app_0_1_requests.add(
			
 
				+        TestUtils.createResourceRequest(RMNodeImpl.ANY, 1*GB, 2, 
			
 
				+            priority_1, recordFactory));
			
 
				+    app_0_1.updateResourceRequests(app_0_1_requests);
			
 
				+
			
 
				+    // Schedule to compute 
			
 
				     queue.assignContainers(clusterResource, node_0); // Schedule to compute
			
 
				-    verify(app_0_0, times(2)).setAvailableResourceLimit(eq(expectedHeadroom));
			
 
				-    verify(app_0_1).setAvailableResourceLimit(eq(expectedHeadroom));// no change
			
 
				+    verify(app_0_0, times(2)).setHeadroom(eq(expectedHeadroom));
			
 
				+    verify(app_0_1).setHeadroom(eq(expectedHeadroom));// no change
			
 
				     
			
 
				     // Submit first application from user_1, check  for new headroom
			
 
				-    SchedulerApp app_1_0 = getMockApplication(APPLICATION_ID++, user_1);
			
 
				+    final ApplicationAttemptId appAttemptId_1_0 = 
			
 
				+        TestUtils.getMockApplicationAttemptId(2, 0); 
			
 
				+    SchedulerApp app_1_0 = 
			
 
				+        spy(new SchedulerApp(appAttemptId_1_0, user_1, queue, rmContext, null));
			
 
				     queue.submitApplication(app_1_0, user_1, A);
			
 
				+
			
 
				+    List<ResourceRequest> app_1_0_requests = new ArrayList<ResourceRequest>();
			
 
				+    app_1_0_requests.add(
			
 
				+        TestUtils.createResourceRequest(RMNodeImpl.ANY, 1*GB, 2, 
			
 
				+            priority_1, recordFactory));
			
 
				+    app_1_0.updateResourceRequests(app_1_0_requests);
			
 
				+    
			
 
				+    // Schedule to compute 
			
 
				     queue.assignContainers(clusterResource, node_0); // Schedule to compute
			
 
				     expectedHeadroom = Resources.createResource(10*16*GB / 2); // changes
			
 
				-    verify(app_0_0).setAvailableResourceLimit(eq(expectedHeadroom));
			
 
				-    verify(app_0_1).setAvailableResourceLimit(eq(expectedHeadroom));
			
 
				-    verify(app_1_0).setAvailableResourceLimit(eq(expectedHeadroom));
			
 
				-    
			
 
				+    verify(app_0_0).setHeadroom(eq(expectedHeadroom));
			
 
				+    verify(app_0_1).setHeadroom(eq(expectedHeadroom));
			
 
				+    verify(app_1_0).setHeadroom(eq(expectedHeadroom));
			
 
				+
			
 
				     // Now reduce cluster size and check for the smaller headroom
			
 
				     clusterResource = Resources.createResource(90*16*GB);
			
 
				     queue.assignContainers(clusterResource, node_0); // Schedule to compute
			
 
				     expectedHeadroom = Resources.createResource(9*16*GB / 2); // changes
			
 
				-    verify(app_0_0).setAvailableResourceLimit(eq(expectedHeadroom));
			
 
				-    verify(app_0_1).setAvailableResourceLimit(eq(expectedHeadroom));
			
 
				-    verify(app_1_0).setAvailableResourceLimit(eq(expectedHeadroom));
			
 
				+    verify(app_0_0).setHeadroom(eq(expectedHeadroom));
			
 
				+    verify(app_0_1).setHeadroom(eq(expectedHeadroom));
			
 
				+    verify(app_1_0).setHeadroom(eq(expectedHeadroom));
			
 
				   }
			
 
				   
			
 
				 
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
@@ -255,7 +255,7 @@ public class TestLeafQueue {
 
				     // Manipulate queue 'a'
			
 
				     LeafQueue a = stubLeafQueue((LeafQueue)queues.get(A));
			
 
				     //unset maxCapacity
			
 
				-    a.setMaxCapacity(-0.01f);
			
 
				+    a.setMaxCapacity(1.0f);
			
 
				 
			
 
				     // Users
			
 
				     final String user_0 = "user_0";
			
@@ -377,7 +377,7 @@ public class TestLeafQueue {
 
				     // Mock the queue
			
 
				     LeafQueue a = stubLeafQueue((LeafQueue)queues.get(A));
			
 
				     //unset maxCapacity
			
 
				-    a.setMaxCapacity(-0.01f);
			
 
				+    a.setMaxCapacity(1.0f);
			
 
				     
			
 
				     // Users
			
 
				     final String user_0 = "user_0";
			
@@ -491,7 +491,7 @@ public class TestLeafQueue {
 
				     
			
 
				     // Revert max-capacity and user-limit-factor
			
 
				     // Now, allocations should goto app_3 since it's under user-limit 
			
 
				-    a.setMaxCapacity(-0.01f);
			
 
				+    a.setMaxCapacity(1.0f);
			
 
				     a.setUserLimitFactor(1);
			
 
				     a.assignContainers(clusterResource, node_0);
			
 
				     assertEquals(7*GB, a.getUsedResources().getMemory()); 
			
@@ -548,7 +548,7 @@ public class TestLeafQueue {
 
				     // Manipulate queue 'a'
			
 
				     LeafQueue a = stubLeafQueue((LeafQueue)queues.get(A));
			
 
				     //unset maxCapacity
			
 
				-    a.setMaxCapacity(-0.01f);
			
 
				+    a.setMaxCapacity(1.0f);
			
 
				 
			
 
				     // Users
			
 
				     final String user_0 = "user_0";
			
@@ -571,7 +571,7 @@ public class TestLeafQueue {
 
				     String host_0 = "host_0";
			
 
				     SchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 4*GB);
			
 
				     
			
 
				-    final int numNodes = 1;
			
 
				+    final int numNodes = 2;
			
 
				     Resource clusterResource = Resources.createResource(numNodes * (4*GB));
			
 
				     when(csContext.getNumClusterNodes()).thenReturn(numNodes);
			
 
				     
			
@@ -646,7 +646,7 @@ public class TestLeafQueue {
 
				     // Manipulate queue 'a'
			
 
				     LeafQueue a = stubLeafQueue((LeafQueue)queues.get(A));
			
 
				     //unset maxCapacity
			
 
				-    a.setMaxCapacity(-0.01f);
			
 
				+    a.setMaxCapacity(1.0f);
			
 
				     a.setUserLimitFactor(10);
			
 
				 
			
 
				     // Users
			
@@ -673,7 +673,7 @@ public class TestLeafQueue {
 
				     String host_1 = "host_1";
			
 
				     SchedulerNode node_1 = TestUtils.getMockNode(host_1, DEFAULT_RACK, 0, 4*GB);
			
 
				     
			
 
				-    final int numNodes = 2;
			
 
				+    final int numNodes = 3;
			
 
				     Resource clusterResource = Resources.createResource(numNodes * (4*GB));
			
 
				     when(csContext.getNumClusterNodes()).thenReturn(numNodes);
			
 
				     when(csContext.getMaximumResourceCapability()).thenReturn(
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestParentQueue.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestParentQueue.java
@@ -138,12 +138,34 @@ public class TestParentQueue {
 
				     when(queue).assignContainers(eq(clusterResource), eq(node));
			
 
				   }
			
 
				   
			
 
				+  private float computeQueueUsedCapacity(CSQueue queue, 
			
 
				+      int expectedMemory, Resource clusterResource) {
			
 
				+    return (
			
 
				+        ((float)expectedMemory / clusterResource.getMemory()) *
			
 
				+        queue.getParent().getAbsoluteCapacity()
			
 
				+      );
			
 
				+  }
			
 
				+  
			
 
				   private float computeQueueUtilization(CSQueue queue, 
			
 
				       int expectedMemory, Resource clusterResource) {
			
 
				     return (expectedMemory / 
			
 
				         (clusterResource.getMemory() * queue.getAbsoluteCapacity()));
			
 
				   }
			
 
				   
			
 
				+  final static float DELTA = 0.0001f;
			
 
				+  private void verifyQueueMetrics(CSQueue queue, 
			
 
				+      int expectedMemory, Resource clusterResource) {
			
 
				+    assertEquals(
			
 
				+        computeQueueUtilization(queue, expectedMemory, clusterResource), 
			
 
				+        queue.getUtilization(), 
			
 
				+        DELTA);
			
 
				+    assertEquals(
			
 
				+        computeQueueUsedCapacity(queue, expectedMemory, clusterResource), 
			
 
				+        queue.getUsedCapacity(), 
			
 
				+        DELTA);
			
 
				+
			
 
				+  }
			
 
				+  
			
 
				   @Test
			
 
				   public void testSingleLevelQueues() throws Exception {
			
 
				     // Setup queue configs
			
@@ -173,15 +195,13 @@ public class TestParentQueue {
 
				     // Start testing
			
 
				     LeafQueue a = (LeafQueue)queues.get(A);
			
 
				     LeafQueue b = (LeafQueue)queues.get(B);
			
 
				-    final float delta = 0.0001f;
			
 
				     
			
 
				     // Simulate B returning a container on node_0
			
 
				     stubQueueAllocation(a, clusterResource, node_0, 0*GB);
			
 
				     stubQueueAllocation(b, clusterResource, node_0, 1*GB);
			
 
				     root.assignContainers(clusterResource, node_0);
			
 
				-    assertEquals(0.0f, a.getUtilization(), delta);
			
 
				-    assertEquals(computeQueueUtilization(b, 1*GB, clusterResource), 
			
 
				-        b.getUtilization(), delta);
			
 
				+    verifyQueueMetrics(a, 0*GB, clusterResource);
			
 
				+    verifyQueueMetrics(b, 1*GB, clusterResource);
			
 
				     
			
 
				     // Now, A should get the scheduling opportunity since A=0G/6G, B=1G/14G
			
 
				     stubQueueAllocation(a, clusterResource, node_1, 2*GB);
			
@@ -192,10 +212,8 @@ public class TestParentQueue {
 
				         any(SchedulerNode.class));
			
 
				     allocationOrder.verify(b).assignContainers(eq(clusterResource), 
			
 
				         any(SchedulerNode.class));
			
 
				-    assertEquals(computeQueueUtilization(a, 2*GB, clusterResource), 
			
 
				-        a.getUtilization(), delta);
			
 
				-    assertEquals(computeQueueUtilization(b, 2*GB, clusterResource), 
			
 
				-        b.getUtilization(), delta);
			
 
				+    verifyQueueMetrics(a, 2*GB, clusterResource);
			
 
				+    verifyQueueMetrics(b, 2*GB, clusterResource);
			
 
				 
			
 
				     // Now, B should get the scheduling opportunity 
			
 
				     // since A has 2/6G while B has 2/14G
			
@@ -207,10 +225,8 @@ public class TestParentQueue {
 
				         any(SchedulerNode.class));
			
 
				     allocationOrder.verify(a).assignContainers(eq(clusterResource), 
			
 
				         any(SchedulerNode.class));
			
 
				-    assertEquals(computeQueueUtilization(a, 3*GB, clusterResource), 
			
 
				-        a.getUtilization(), delta);
			
 
				-    assertEquals(computeQueueUtilization(b, 4*GB, clusterResource), 
			
 
				-        b.getUtilization(), delta);
			
 
				+    verifyQueueMetrics(a, 3*GB, clusterResource);
			
 
				+    verifyQueueMetrics(b, 4*GB, clusterResource);
			
 
				 
			
 
				     // Now, B should still get the scheduling opportunity 
			
 
				     // since A has 3/6G while B has 4/14G
			
@@ -222,10 +238,8 @@ public class TestParentQueue {
 
				         any(SchedulerNode.class));
			
 
				     allocationOrder.verify(a).assignContainers(eq(clusterResource), 
			
 
				         any(SchedulerNode.class));
			
 
				-    assertEquals(computeQueueUtilization(a, 3*GB, clusterResource), 
			
 
				-        a.getUtilization(), delta);
			
 
				-    assertEquals(computeQueueUtilization(b, 8*GB, clusterResource), 
			
 
				-        b.getUtilization(), delta);
			
 
				+    verifyQueueMetrics(a, 3*GB, clusterResource);
			
 
				+    verifyQueueMetrics(b, 8*GB, clusterResource);
			
 
				 
			
 
				     // Now, A should get the scheduling opportunity 
			
 
				     // since A has 3/6G while B has 8/14G
			
@@ -237,10 +251,8 @@ public class TestParentQueue {
 
				         any(SchedulerNode.class));
			
 
				     allocationOrder.verify(a).assignContainers(eq(clusterResource), 
			
 
				         any(SchedulerNode.class));
			
 
				-    assertEquals(computeQueueUtilization(a, 4*GB, clusterResource), 
			
 
				-        a.getUtilization(), delta);
			
 
				-    assertEquals(computeQueueUtilization(b, 9*GB, clusterResource), 
			
 
				-        b.getUtilization(), delta);
			
 
				+    verifyQueueMetrics(a, 4*GB, clusterResource);
			
 
				+    verifyQueueMetrics(b, 9*GB, clusterResource);
			
 
				   }
			
 
				 
			
 
				   private static final String C = "c";
			
@@ -323,22 +335,16 @@ public class TestParentQueue {
 
				     CSQueue b2 = queues.get(B2);
			
 
				     CSQueue b3 = queues.get(B3);
			
 
				 
			
 
				-    final float delta = 0.0001f;
			
 
				-    
			
 
				     // Simulate C returning a container on node_0
			
 
				     stubQueueAllocation(a, clusterResource, node_0, 0*GB);
			
 
				     stubQueueAllocation(b, clusterResource, node_0, 0*GB);
			
 
				     stubQueueAllocation(c, clusterResource, node_0, 1*GB);
			
 
				     stubQueueAllocation(d, clusterResource, node_0, 0*GB);
			
 
				     root.assignContainers(clusterResource, node_0);
			
 
				-    assertEquals(computeQueueUtilization(a, 0*GB, clusterResource), 
			
 
				-        a.getUtilization(), delta);
			
 
				-    assertEquals(computeQueueUtilization(b, 0*GB, clusterResource), 
			
 
				-        b.getUtilization(), delta);
			
 
				-    assertEquals(computeQueueUtilization(c, 1*GB, clusterResource), 
			
 
				-        c.getUtilization(), delta);
			
 
				-    assertEquals(computeQueueUtilization(d, 0*GB, clusterResource), 
			
 
				-        d.getUtilization(), delta);
			
 
				+    verifyQueueMetrics(a, 0*GB, clusterResource);
			
 
				+    verifyQueueMetrics(b, 0*GB, clusterResource);
			
 
				+    verifyQueueMetrics(c, 1*GB, clusterResource);
			
 
				+    verifyQueueMetrics(d, 0*GB, clusterResource);
			
 
				     reset(a); reset(b); reset(c);
			
 
				 
			
 
				     // Now get B2 to allocate
			
@@ -347,12 +353,9 @@ public class TestParentQueue {
 
				     stubQueueAllocation(b2, clusterResource, node_1, 4*GB);
			
 
				     stubQueueAllocation(c, clusterResource, node_1, 0*GB);
			
 
				     root.assignContainers(clusterResource, node_1);
			
 
				-    assertEquals(computeQueueUtilization(a, 0*GB, clusterResource), 
			
 
				-        a.getUtilization(), delta);
			
 
				-    assertEquals(computeQueueUtilization(b, 4*GB, clusterResource), 
			
 
				-        b.getUtilization(), delta);
			
 
				-    assertEquals(computeQueueUtilization(c, 1*GB, clusterResource), 
			
 
				-        c.getUtilization(), delta);
			
 
				+    verifyQueueMetrics(a, 0*GB, clusterResource);
			
 
				+    verifyQueueMetrics(b, 4*GB, clusterResource);
			
 
				+    verifyQueueMetrics(c, 1*GB, clusterResource);
			
 
				     reset(a); reset(b); reset(c);
			
 
				     
			
 
				     // Now get both A1, C & B3 to allocate in right order
			
@@ -368,12 +371,9 @@ public class TestParentQueue {
 
				         any(SchedulerNode.class));
			
 
				     allocationOrder.verify(b).assignContainers(eq(clusterResource), 
			
 
				         any(SchedulerNode.class));
			
 
				-    assertEquals(computeQueueUtilization(a, 1*GB, clusterResource), 
			
 
				-        a.getUtilization(), delta);
			
 
				-    assertEquals(computeQueueUtilization(b, 6*GB, clusterResource), 
			
 
				-        b.getUtilization(), delta);
			
 
				-    assertEquals(computeQueueUtilization(c, 3*GB, clusterResource), 
			
 
				-        c.getUtilization(), delta);
			
 
				+    verifyQueueMetrics(a, 1*GB, clusterResource);
			
 
				+    verifyQueueMetrics(b, 6*GB, clusterResource);
			
 
				+    verifyQueueMetrics(c, 3*GB, clusterResource);
			
 
				     reset(a); reset(b); reset(c);
			
 
				     
			
 
				     // Now verify max-capacity
			
@@ -399,16 +399,12 @@ public class TestParentQueue {
 
				         any(SchedulerNode.class));
			
 
				     allocationOrder.verify(c).assignContainers(eq(clusterResource), 
			
 
				         any(SchedulerNode.class));
			
 
				-    assertEquals(computeQueueUtilization(a, 3*GB, clusterResource), 
			
 
				-        a.getUtilization(), delta);
			
 
				-    assertEquals(computeQueueUtilization(b, 8*GB, clusterResource), 
			
 
				-        b.getUtilization(), delta);
			
 
				-    assertEquals(computeQueueUtilization(c, 4*GB, clusterResource), 
			
 
				-        c.getUtilization(), delta);
			
 
				+    verifyQueueMetrics(a, 3*GB, clusterResource);
			
 
				+    verifyQueueMetrics(b, 8*GB, clusterResource);
			
 
				+    verifyQueueMetrics(c, 4*GB, clusterResource);
			
 
				     reset(a); reset(b); reset(c);
			
 
				-    
			
 
				   }
			
 
				-  
			
 
				+
			
 
				   @Test
			
 
				   public void testOffSwitchScheduling() throws Exception {
			
 
				     // Setup queue configs
			
@@ -438,15 +434,13 @@ public class TestParentQueue {
 
				     // Start testing
			
 
				     LeafQueue a = (LeafQueue)queues.get(A);
			
 
				     LeafQueue b = (LeafQueue)queues.get(B);
			
 
				-    final float delta = 0.0001f;
			
 
				     
			
 
				     // Simulate B returning a container on node_0
			
 
				     stubQueueAllocation(a, clusterResource, node_0, 0*GB, NodeType.OFF_SWITCH);
			
 
				     stubQueueAllocation(b, clusterResource, node_0, 1*GB, NodeType.OFF_SWITCH);
			
 
				     root.assignContainers(clusterResource, node_0);
			
 
				-    assertEquals(0.0f, a.getUtilization(), delta);
			
 
				-    assertEquals(computeQueueUtilization(b, 1*GB, clusterResource), 
			
 
				-        b.getUtilization(), delta);
			
 
				+    verifyQueueMetrics(a, 0*GB, clusterResource);
			
 
				+    verifyQueueMetrics(b, 1*GB, clusterResource);
			
 
				     
			
 
				     // Now, A should get the scheduling opportunity since A=0G/6G, B=1G/14G
			
 
				     // also, B gets a scheduling opportunity since A allocates RACK_LOCAL
			
@@ -458,10 +452,8 @@ public class TestParentQueue {
 
				         any(SchedulerNode.class));
			
 
				     allocationOrder.verify(b).assignContainers(eq(clusterResource), 
			
 
				         any(SchedulerNode.class));
			
 
				-    assertEquals(computeQueueUtilization(a, 2*GB, clusterResource), 
			
 
				-        a.getUtilization(), delta);
			
 
				-    assertEquals(computeQueueUtilization(b, 2*GB, clusterResource), 
			
 
				-        b.getUtilization(), delta);
			
 
				+    verifyQueueMetrics(a, 2*GB, clusterResource);
			
 
				+    verifyQueueMetrics(b, 2*GB, clusterResource);
			
 
				     
			
 
				     // Now, B should get the scheduling opportunity 
			
 
				     // since A has 2/6G while B has 2/14G, 
			
@@ -474,10 +466,8 @@ public class TestParentQueue {
 
				         any(SchedulerNode.class));
			
 
				     allocationOrder.verify(a).assignContainers(eq(clusterResource), 
			
 
				         any(SchedulerNode.class));
			
 
				-    assertEquals(computeQueueUtilization(a, 2*GB, clusterResource), 
			
 
				-        a.getUtilization(), delta);
			
 
				-    assertEquals(computeQueueUtilization(b, 4*GB, clusterResource), 
			
 
				-        b.getUtilization(), delta);
			
 
				+    verifyQueueMetrics(a, 2*GB, clusterResource);
			
 
				+    verifyQueueMetrics(b, 4*GB, clusterResource);
			
 
				 
			
 
				   }
			
 
				   
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestQueueParsing.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestQueueParsing.java
@@ -30,6 +30,8 @@ public class TestQueueParsing {
 
				 
			
 
				   private static final Log LOG = LogFactory.getLog(TestQueueParsing.class);
			
 
				   
			
 
				+  private static final double DELTA = 0.000001;
			
 
				+  
			
 
				   @Test
			
 
				   public void testQueueParsing() throws Exception {
			
 
				     CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration();
			
@@ -37,6 +39,20 @@ public class TestQueueParsing {
 
				 
			
 
				     CapacityScheduler capacityScheduler = new CapacityScheduler();
			
 
				     capacityScheduler.reinitialize(conf, null, null);
			
 
				+    
			
 
				+    CSQueue a = capacityScheduler.getQueue("a");
			
 
				+    Assert.assertEquals(0.10, a.getAbsoluteCapacity(), DELTA);
			
 
				+    Assert.assertEquals(0.15, a.getAbsoluteMaximumCapacity(), DELTA);
			
 
				+    
			
 
				+    CSQueue b1 = capacityScheduler.getQueue("b1");
			
 
				+    Assert.assertEquals(0.2 * 0.5, b1.getAbsoluteCapacity(), DELTA);
			
 
				+    Assert.assertEquals("Parent B has no MAX_CAP", 
			
 
				+        0.85, b1.getAbsoluteMaximumCapacity(), DELTA);
			
 
				+    
			
 
				+    CSQueue c12 = capacityScheduler.getQueue("c12");
			
 
				+    Assert.assertEquals(0.7 * 0.5 * 0.45, c12.getAbsoluteCapacity(), DELTA);
			
 
				+    Assert.assertEquals(0.7 * 0.55 * 0.7, 
			
 
				+        c12.getAbsoluteMaximumCapacity(), DELTA);
			
 
				   }
			
 
				   
			
 
				   private void setupQueueConfiguration(CapacitySchedulerConfiguration conf) {
			
@@ -47,12 +63,14 @@ public class TestQueueParsing {
 
				     
			
 
				     final String A = CapacitySchedulerConfiguration.ROOT + ".a";
			
 
				     conf.setCapacity(A, 10);
			
 
				+    conf.setMaximumCapacity(A, 15);
			
 
				     
			
 
				     final String B = CapacitySchedulerConfiguration.ROOT + ".b";
			
 
				     conf.setCapacity(B, 20);
			
 
				-
			
 
				+    
			
 
				     final String C = CapacitySchedulerConfiguration.ROOT + ".c";
			
 
				     conf.setCapacity(C, 70);
			
 
				+    conf.setMaximumCapacity(C, 70);
			
 
				 
			
 
				     LOG.info("Setup top-level queues");
			
 
				     
			
@@ -61,15 +79,20 @@ public class TestQueueParsing {
 
				     final String A2 = A + ".a2";
			
 
				     conf.setQueues(A, new String[] {"a1", "a2"});
			
 
				     conf.setCapacity(A1, 30);
			
 
				+    conf.setMaximumCapacity(A1, 45);
			
 
				     conf.setCapacity(A2, 70);
			
 
				+    conf.setMaximumCapacity(A2, 85);
			
 
				     
			
 
				     final String B1 = B + ".b1";
			
 
				     final String B2 = B + ".b2";
			
 
				     final String B3 = B + ".b3";
			
 
				     conf.setQueues(B, new String[] {"b1", "b2", "b3"});
			
 
				     conf.setCapacity(B1, 50);
			
 
				+    conf.setMaximumCapacity(B1, 85);
			
 
				     conf.setCapacity(B2, 30);
			
 
				+    conf.setMaximumCapacity(B2, 35);
			
 
				     conf.setCapacity(B3, 20);
			
 
				+    conf.setMaximumCapacity(B3, 35);
			
 
				 
			
 
				     final String C1 = C + ".c1";
			
 
				     final String C2 = C + ".c2";
			
@@ -77,9 +100,13 @@ public class TestQueueParsing {
 
				     final String C4 = C + ".c4";
			
 
				     conf.setQueues(C, new String[] {"c1", "c2", "c3", "c4"});
			
 
				     conf.setCapacity(C1, 50);
			
 
				+    conf.setMaximumCapacity(C1, 55);
			
 
				     conf.setCapacity(C2, 10);
			
 
				+    conf.setMaximumCapacity(C2, 25);
			
 
				     conf.setCapacity(C3, 35);
			
 
				+    conf.setMaximumCapacity(C3, 38);
			
 
				     conf.setCapacity(C4, 5);
			
 
				+    conf.setMaximumCapacity(C4, 5);
			
 
				     
			
 
				     LOG.info("Setup 2nd-level queues");
			
 
				     
			
@@ -89,8 +116,11 @@ public class TestQueueParsing {
 
				     final String C13 = C1 + ".c13";
			
 
				     conf.setQueues(C1, new String[] {"c11", "c12", "c13"});
			
 
				     conf.setCapacity(C11, 15);
			
 
				+    conf.setMaximumCapacity(C11, 30);
			
 
				     conf.setCapacity(C12, 45);
			
 
				+    conf.setMaximumCapacity(C12, 70);
			
 
				     conf.setCapacity(C13, 40);
			
 
				+    conf.setMaximumCapacity(C13, 40);
			
 
				     
			
 
				     LOG.info("Setup 3rd-level queues");
			
 
				   }
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesCapacitySched.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesCapacitySched.java
@@ -235,12 +235,13 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
 
				         Element qElem = (Element) queues.item(j);
			
 
				         String qName = WebServicesTestUtils.getXmlString(qElem, "queueName");
			
 
				         String q = CapacitySchedulerConfiguration.ROOT + "." + qName;
			
 
				-        verifySubQueueXML(qElem, q, 100);
			
 
				+        verifySubQueueXML(qElem, q, 100, 100);
			
 
				       }
			
 
				     }
			
 
				   }
			
 
				 
			
 
				-  public void verifySubQueueXML(Element qElem, String q, float parentAbsCapacity)
			
 
				+  public void verifySubQueueXML(Element qElem, String q, 
			
 
				+      float parentAbsCapacity, float parentAbsMaxCapacity)
			
 
				       throws Exception {
			
 
				     NodeList queues = qElem.getElementsByTagName("subQueues");
			
 
				     QueueInfo qi = (queues != null) ? new QueueInfo() : new LeafQueueInfo();
			
@@ -258,14 +259,15 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
 
				         WebServicesTestUtils.getXmlString(qElem, "usedResources");
			
 
				     qi.queueName = WebServicesTestUtils.getXmlString(qElem, "queueName");
			
 
				     qi.state = WebServicesTestUtils.getXmlString(qElem, "state");
			
 
				-    verifySubQueueGeneric(q, qi, parentAbsCapacity);
			
 
				+    verifySubQueueGeneric(q, qi, parentAbsCapacity, parentAbsMaxCapacity);
			
 
				 
			
 
				     if (queues != null) {
			
 
				       for (int j = 0; j < queues.getLength(); j++) {
			
 
				         Element subqElem = (Element) queues.item(j);
			
 
				         String qName = WebServicesTestUtils.getXmlString(subqElem, "queueName");
			
 
				         String q2 = q + "." + qName;
			
 
				-        verifySubQueueXML(subqElem, q2, qi.absoluteCapacity);
			
 
				+        verifySubQueueXML(subqElem, q2, 
			
 
				+            qi.absoluteCapacity, qi.absoluteMaxCapacity);
			
 
				       }
			
 
				     } else {
			
 
				       LeafQueueInfo lqi = (LeafQueueInfo) qi;
			
@@ -309,7 +311,7 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
 
				     for (int i = 0; i < arr.length(); i++) {
			
 
				       JSONObject obj = arr.getJSONObject(i);
			
 
				       String q = CapacitySchedulerConfiguration.ROOT + "." + obj.getString("queueName");
			
 
				-      verifySubQueue(obj, q, 100);
			
 
				+      verifySubQueue(obj, q, 100, 100);
			
 
				     }
			
 
				   }
			
 
				 
			
@@ -323,7 +325,8 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
 
				     assertTrue("queueName doesn't match", "root".matches(queueName));
			
 
				   }
			
 
				 
			
 
				-  private void verifySubQueue(JSONObject info, String q, float parentAbsCapacity)
			
 
				+  private void verifySubQueue(JSONObject info, String q, 
			
 
				+      float parentAbsCapacity, float parentAbsMaxCapacity)
			
 
				       throws JSONException, Exception {
			
 
				     int numExpectedElements = 11;
			
 
				     boolean isParentQueue = true;
			
@@ -345,7 +348,7 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
 
				     qi.queueName = info.getString("queueName");
			
 
				     qi.state = info.getString("state");
			
 
				 
			
 
				-    verifySubQueueGeneric(q, qi, parentAbsCapacity);
			
 
				+    verifySubQueueGeneric(q, qi, parentAbsCapacity, parentAbsMaxCapacity);
			
 
				 
			
 
				     if (isParentQueue) {
			
 
				       JSONArray arr = info.getJSONArray("subQueues");
			
@@ -353,7 +356,7 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
 
				       for (int i = 0; i < arr.length(); i++) {
			
 
				         JSONObject obj = arr.getJSONObject(i);
			
 
				         String q2 = q + "." + obj.getString("queueName");
			
 
				-        verifySubQueue(obj, q2, qi.absoluteCapacity);
			
 
				+        verifySubQueue(obj, q2, qi.absoluteCapacity, qi.absoluteMaxCapacity);
			
 
				       }
			
 
				     } else {
			
 
				       LeafQueueInfo lqi = (LeafQueueInfo) qi;
			
@@ -371,7 +374,7 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
 
				   }
			
 
				 
			
 
				   private void verifySubQueueGeneric(String q, QueueInfo info,
			
 
				-      float parentAbsCapacity) throws Exception {
			
 
				+      float parentAbsCapacity, float parentAbsMaxCapacity) throws Exception {
			
 
				     String[] qArr = q.split("\\.");
			
 
				     assertTrue("q name invalid: " + q, qArr.length > 1);
			
 
				     String qshortName = qArr[qArr.length - 1];
			
@@ -380,7 +383,7 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
 
				     assertEquals("capacity doesn't match", csConf.getCapacity(q),
			
 
				         info.capacity, 1e-3f);
			
 
				     float expectCapacity = csConf.getMaximumCapacity(q);
			
 
				-    float expectAbsMaxCapacity = parentAbsCapacity * (info.maxCapacity/100);
			
 
				+    float expectAbsMaxCapacity = parentAbsMaxCapacity * (info.maxCapacity/100);
			
 
				     if (CapacitySchedulerConfiguration.UNDEFINED == expectCapacity) {
			
 
				       expectCapacity = 100;
			
 
				       expectAbsMaxCapacity = 100;
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/amfilter/AmIpFilter.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/amfilter/AmIpFilter.java
@@ -57,7 +57,7 @@ public class AmIpFilter implements Filter {
 
				     proxyUriBase = conf.getInitParameter(PROXY_URI_BASE);
			
 
				   }
			
 
				   
			
 
				-  private Set<String> getProxyAddresses() throws ServletException {
			
 
				+  protected Set<String> getProxyAddresses() throws ServletException {
			
 
				     long now = System.currentTimeMillis();
			
 
				     synchronized(this) {
			
 
				       if(proxyAddresses == null || (lastUpdate + updateInterval) >= now) {
			
@@ -97,10 +97,13 @@ public class AmIpFilter implements Filter {
 
				     }
			
 
				     
			
 
				     String user = null;
			
 
				-    for(Cookie c: httpReq.getCookies()) {
			
 
				-      if(WebAppProxyServlet.PROXY_USER_COOKIE_NAME.equals(c.getName())){
			
 
				-        user = c.getValue();
			
 
				-        break;
			
 
				+    
			
 
				+    if (httpReq.getCookies() != null) {
			
 
				+      for(Cookie c: httpReq.getCookies()) {
			
 
				+        if(WebAppProxyServlet.PROXY_USER_COOKIE_NAME.equals(c.getName())){
			
 
				+          user = c.getValue();
			
 
				+          break;
			
 
				+        }
			
 
				       }
			
 
				     }
			
 
				     if(user == null) {
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/amfilter/TestAmFilter.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/amfilter/TestAmFilter.java
@@ -0,0 +1,121 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.yarn.server.webproxy.amfilter;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.Collections;
			
 
				+import java.util.Enumeration;
			
 
				+import java.util.HashMap;
			
 
				+import java.util.HashSet;
			
 
				+import java.util.Map;
			
 
				+import java.util.Set;
			
 
				+import java.util.concurrent.atomic.AtomicBoolean;
			
 
				+
			
 
				+import javax.servlet.Filter;
			
 
				+import javax.servlet.FilterChain;
			
 
				+import javax.servlet.FilterConfig;
			
 
				+import javax.servlet.ServletContext;
			
 
				+import javax.servlet.ServletException;
			
 
				+import javax.servlet.ServletRequest;
			
 
				+import javax.servlet.ServletResponse;
			
 
				+import javax.servlet.http.HttpServletRequest;
			
 
				+import javax.servlet.http.HttpServletResponse;
			
 
				+
			
 
				+import junit.framework.Assert;
			
 
				+
			
 
				+import org.junit.Test;
			
 
				+import org.mockito.Mockito;
			
 
				+
			
 
				+
			
 
				+public class TestAmFilter  {
			
 
				+
			
 
				+  private String proxyHost = "bogushost.com";
			
 
				+  private String proxyUri = "http://bogus";
			
 
				+
			
 
				+  private class TestAmIpFilter extends AmIpFilter {
			
 
				+
			
 
				+    private Set<String> proxyAddresses = null;
			
 
				+
			
 
				+    protected Set<String> getProxyAddresses() {
			
 
				+      if(proxyAddresses == null) {
			
 
				+        proxyAddresses = new HashSet<String>();
			
 
				+      }
			
 
				+      proxyAddresses.add(proxyHost);
			
 
				+      return proxyAddresses;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+
			
 
				+  private static class DummyFilterConfig implements FilterConfig {
			
 
				+    final Map<String, String> map;
			
 
				+
			
 
				+
			
 
				+    DummyFilterConfig(Map<String,String> map) {
			
 
				+      this.map = map;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public String getFilterName() {
			
 
				+      return "dummy";
			
 
				+    }
			
 
				+    @Override
			
 
				+    public String getInitParameter(String arg0) {
			
 
				+      return map.get(arg0);
			
 
				+    }
			
 
				+    @Override
			
 
				+    public Enumeration<String> getInitParameterNames() {
			
 
				+      return Collections.enumeration(map.keySet());
			
 
				+    }
			
 
				+    @Override
			
 
				+    public ServletContext getServletContext() {
			
 
				+      return null;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+
			
 
				+  @Test
			
 
				+  public void filterNullCookies() throws Exception {
			
 
				+    HttpServletRequest request = Mockito.mock(HttpServletRequest.class);
			
 
				+
			
 
				+    Mockito.when(request.getCookies()).thenReturn(null);
			
 
				+    Mockito.when(request.getRemoteAddr()).thenReturn(proxyHost);
			
 
				+
			
 
				+    HttpServletResponse response = Mockito.mock(HttpServletResponse.class);
			
 
				+
			
 
				+    final AtomicBoolean invoked = new AtomicBoolean();
			
 
				+
			
 
				+    FilterChain chain = new FilterChain() {
			
 
				+      @Override
			
 
				+      public void doFilter(ServletRequest servletRequest, ServletResponse servletResponse)
			
 
				+        throws IOException, ServletException {
			
 
				+        invoked.set(true);
			
 
				+      }
			
 
				+    };
			
 
				+
			
 
				+    Map<String, String> params = new HashMap<String, String>();
			
 
				+    params.put(AmIpFilter.PROXY_HOST, proxyHost);
			
 
				+    params.put(AmIpFilter.PROXY_URI_BASE, proxyUri);
			
 
				+    FilterConfig conf = new DummyFilterConfig(params);
			
 
				+    Filter filter = new TestAmIpFilter();
			
 
				+    filter.init(conf);
			
 
				+    filter.doFilter(request, response, chain);
			
 
				+    Assert.assertTrue(invoked.get());
			
 
				+    filter.destroy();
			
 
				+  }
			
 
				+}
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm
@@ -95,7 +95,7 @@ Hadoop MapReduce Next Generation - Cluster Setup
 
				 *--------------------------------------+--------------------------------------+

			
 
				 | DataNode                             | HADOOP_DATANODE_OPTS                 |

			
 
				 *--------------------------------------+--------------------------------------+

			
 
				-| Backup NameNode                      | HADOOP_SECONDARYNAMENODE_OPTS        |

			
 
				+| Secondary NameNode                   | HADOOP_SECONDARYNAMENODE_OPTS        |

			
 
				 *--------------------------------------+--------------------------------------+

			
 
				 | ResourceManager                      | YARN_RESOURCEMANAGER_OPTS            |

			
 
				 *--------------------------------------+--------------------------------------+

			
@@ -537,15 +537,15 @@ Hadoop MapReduce Next Generation - Cluster Setup
 
				       

			
 
				   It's recommended to have them share a Unix group, for e.g. <<<hadoop>>>.

			
 
				       

			
 
				-*--------------------------------------+--------------------------------------+

			
 
				-|| User:Group                          || Daemons                             |

			
 
				-*--------------------------------------+--------------------------------------+

			
 
				-| hdfs:hadoop                          | NameNode, Backup NameNode, DataNode  |

			
 
				-*--------------------------------------+--------------------------------------+

			
 
				-| yarn:hadoop                          | ResourceManager, NodeManager         |

			
 
				-*--------------------------------------+--------------------------------------+

			
 
				-| mapred:hadoop                        | MapReduce JobHistory Server          |  

			
 
				-*--------------------------------------+--------------------------------------+

			
 
				+*--------------------------------------+----------------------------------------------------------------------+

			
 
				+|| User:Group                          || Daemons                                                             |

			
 
				+*--------------------------------------+----------------------------------------------------------------------+

			
 
				+| hdfs:hadoop                          | NameNode, Secondary NameNode, Checkpoint Node, Backup Node, DataNode |

			
 
				+*--------------------------------------+----------------------------------------------------------------------+

			
 
				+| yarn:hadoop                          | ResourceManager, NodeManager                                         |

			
 
				+*--------------------------------------+----------------------------------------------------------------------+

			
 
				+| mapred:hadoop                        | MapReduce JobHistory Server                                          |

			
 
				+*--------------------------------------+----------------------------------------------------------------------+

			
 
				       

			
 
				   * <<<Permissions for both HDFS and local fileSystem paths>>>

			
 
				      

			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/WebApplicationProxy.apt.vm
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/WebApplicationProxy.apt.vm
@@ -0,0 +1,49 @@
 
				+~~ Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+~~ you may not use this file except in compliance with the License.
			
 
				+~~ You may obtain a copy of the License at
			
 
				+~~
			
 
				+~~   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+~~
			
 
				+~~ Unless required by applicable law or agreed to in writing, software
			
 
				+~~ distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+~~ See the License for the specific language governing permissions and
			
 
				+~~ limitations under the License. See accompanying LICENSE file.
			
 
				+
			
 
				+  ---
			
 
				+  YARN
			
 
				+  ---
			
 
				+  ---
			
 
				+  ${maven.build.timestamp}
			
 
				+
			
 
				+Web Application Proxy
			
 
				+
			
 
				+  The Web Application Proxy is part of YARN.  By default it will run as part of
			
 
				+  the Resource Manager(RM), but can be configured to run in stand alone mode.
			
 
				+  The reason for the proxy is to reduce the possibility of web based attacks
			
 
				+  through YARN.
			
 
				+
			
 
				+  In YARN the Application Master(AM) has the responsibility to provide a web UI
			
 
				+  and to send that link to the RM.  This opens up a number of potential
			
 
				+  issues.  The RM runs as a trusted user, and people visiting that web
			
 
				+  address will treat it, and links it provides to them as trusted, when in
			
 
				+  reality the AM is running as a non-trusted user, and the links it gives to
			
 
				+  the RM could point to anything malicious or otherwise.  The Web Application
			
 
				+  Proxy mitigates this risk by warning users that do not own the given
			
 
				+  application that they are connecting to an untrusted site.
			
 
				+
			
 
				+  In addition to this the proxy also tries to reduce the impact that a malicious
			
 
				+  AM could have on a user.  It primarily does this by stripping out cookies from
			
 
				+  the user, and replacing them with a single cookie providing the user name of
			
 
				+  the logged in user.  This is because most web based authentication systems will
			
 
				+  identify a user based off of a cookie.  By providing this cookie to an
			
 
				+  untrusted application it opens up the potential for an exploit.  If the cookie
			
 
				+  is designed properly that potential should be fairly minimal, but this is just
			
 
				+  to reduce that potential attack vector.  The current proxy implementation does
			
 
				+  nothing to prevent the AM from providing links to malicious external sites,
			
 
				+  nor does it do anything to prevent malicious javascript code from running as
			
 
				+  well.  In fact javascript can be used to get the cookies, so stripping the
			
 
				+  cookies from the request has minimal benefit at this time.
			
 
				+
			
 
				+  In the future we hope to address the attack vectors described above and make
			
 
				+  attaching to an AM's web UI safer.
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/index.apt.vm
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/index.apt.vm
@@ -47,4 +47,6 @@ MapReduce NextGen aka YARN aka MRv2
 
				 
			
 
				   * {{{./CapacityScheduler.html}Capacity Scheduler}}
			
 
				 
			
 
				+  * {{{./WebApplicationProxy.html}Web Application Proxy}}
			
 
				+
			
 
				 
			
--- a/hadoop-project/pom.xml
+++ b/hadoop-project/pom.xml
@@ -223,6 +223,11 @@
 
				         <artifactId>hadoop-archives</artifactId>
			
 
				         <version>${project.version}</version>
			
 
				       </dependency>
			
 
				+      <dependency>
			
 
				+        <groupId>org.apache.hadoop</groupId>
			
 
				+        <artifactId>hadoop-distcp</artifactId>
			
 
				+        <version>${project.version}</version>
			
 
				+      </dependency>
			
 
				       <dependency>
			
 
				         <groupId>org.apache.hadoop</groupId>
			
 
				         <artifactId>hadoop-rumen</artifactId>
			
@@ -709,11 +714,21 @@
 
				           <artifactId>maven-project-info-reports-plugin</artifactId>
			
 
				           <version>2.4</version>
			
 
				         </plugin>
			
 
				+        <plugin>
			
 
				+          <groupId>org.apache.maven.plugins</groupId>
			
 
				+          <artifactId>maven-resources-plugin</artifactId>
			
 
				+          <version>2.2</version>
			
 
				+        </plugin>
			
 
				         <plugin>
			
 
				           <groupId>org.codehaus.mojo</groupId>
			
 
				           <artifactId>exec-maven-plugin</artifactId>
			
 
				           <version>1.2</version>
			
 
				         </plugin>
			
 
				+        <plugin>
			
 
				+          <groupId>org.apache.maven.plugins</groupId>
			
 
				+          <artifactId>maven-pdf-plugin</artifactId>
			
 
				+          <version>1.1</version>
			
 
				+        </plugin>
			
 
				       </plugins>
			
 
				     </pluginManagement>
			
 
				 
			
@@ -811,6 +826,14 @@
 
				           </excludes>
			
 
				         </configuration>
			
 
				       </plugin>
			
 
				+      <plugin>
			
 
				+        <groupId>org.apache.maven.plugins</groupId>
			
 
				+        <artifactId>maven-pdf-plugin</artifactId>
			
 
				+        <configuration>
			
 
				+          <outputDirectory>${project.reporting.outputDirectory}</outputDirectory>
			
 
				+          <includeReports>false</includeReports>
			
 
				+        </configuration>
			
 
				+      </plugin>
			
 
				     </plugins>
			
 
				   </build>
			
 
				 
			
--- a/hadoop-project/src/site/site.xml
+++ b/hadoop-project/src/site/site.xml
@@ -61,6 +61,7 @@
 
				       <item name="YARN Architecture" href="hadoop-yarn/hadoop-yarn-site/YARN.html"/>
			
 
				       <item name="Writing Yarn Applications" href="hadoop-yarn/hadoop-yarn-site/WritingYarnApplications.html"/>
			
 
				       <item name="Capacity Scheduler" href="hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html"/>
			
 
				+      <item name="Web Application Proxy" href="hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html"/>
			
 
				     </menu>
			
 
				 
			
 
				     <menu name="YARN REST API's" inherit="top">
			
--- a/hadoop-tools/hadoop-distcp/README
+++ b/hadoop-tools/hadoop-distcp/README
@@ -0,0 +1,7 @@
 
				+DistCp (distributed copy) is a tool used for large inter/intra-cluster copying. 
			
 
				+It uses Map/Reduce to effect its distribution, error handling and recovery, 
			
 
				+and reporting. It expands a list of files and directories into input to map tasks, 
			
 
				+each of which will copy a partition of the files specified in the source list.
			
 
				+
			
 
				+Version 0.1 (2010/08/02 sriksun)
			
 
				+ - Initial Version
			
--- a/hadoop-tools/hadoop-distcp/pom.xml
+++ b/hadoop-tools/hadoop-distcp/pom.xml
@@ -0,0 +1,198 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<!--
			
 
				+  Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+  you may not use this file except in compliance with the License.
			
 
				+  You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+  Unless required by applicable law or agreed to in writing, software
			
 
				+  distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+  See the License for the specific language governing permissions and
			
 
				+  limitations under the License. See accompanying LICENSE file.
			
 
				+-->
			
 
				+<project>
			
 
				+  <modelVersion>4.0.0</modelVersion>
			
 
				+  <parent>
			
 
				+    <groupId>org.apache.hadoop</groupId>
			
 
				+    <artifactId>hadoop-project</artifactId>
			
 
				+    <version>0.23.1-SNAPSHOT</version>
			
 
				+    <relativePath>../../hadoop-project</relativePath>
			
 
				+  </parent>
			
 
				+  <groupId>org.apache.hadoop</groupId>
			
 
				+  <artifactId>hadoop-distcp</artifactId>
			
 
				+  <version>0.23.1-SNAPSHOT</version>
			
 
				+  <description>Apache Hadoop Distributed Copy</description>
			
 
				+  <name>Apache Hadoop Distributed Copy</name>
			
 
				+  <packaging>jar</packaging>
			
 
				+
			
 
				+  <properties>
			
 
				+    <file.encoding>UTF-8</file.encoding>
			
 
				+    <downloadSources>true</downloadSources>
			
 
				+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
			
 
				+  </properties>
			
 
				+
			
 
				+  <dependencies>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-common</artifactId>
			
 
				+      <scope>provided</scope>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-annotations</artifactId>
			
 
				+      <scope>provided</scope>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-mapreduce-client-app</artifactId>
			
 
				+      <scope>test</scope>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-mapreduce-client-hs</artifactId>
			
 
				+      <scope>test</scope>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-mapreduce-client-core</artifactId>
			
 
				+      <scope>provided</scope>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
			
 
				+      <scope>provided</scope>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
			
 
				+      <scope>test</scope>
			
 
				+      <type>test-jar</type>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-hdfs</artifactId>
			
 
				+      <scope>provided</scope>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-hdfs</artifactId>
			
 
				+      <scope>test</scope>
			
 
				+      <type>test-jar</type>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-common</artifactId>
			
 
				+      <scope>test</scope>
			
 
				+      <type>test-jar</type>
			
 
				+    </dependency>
			
 
				+  </dependencies>
			
 
				+
			
 
				+  <build>
			
 
				+    <resources>
			
 
				+      <resource>
			
 
				+        <directory>src/main/resources</directory>
			
 
				+        <filtering>true</filtering>
			
 
				+      </resource>
			
 
				+    </resources>
			
 
				+    <testResources>
			
 
				+      <testResource>
			
 
				+        <directory>src/test/resources</directory>
			
 
				+        <filtering>true</filtering>
			
 
				+      </testResource>
			
 
				+    </testResources>
			
 
				+    <plugins>
			
 
				+      <plugin>
			
 
				+        <groupId>org.apache.maven.plugins</groupId>
			
 
				+        <artifactId>maven-surefire-plugin</artifactId>
			
 
				+        <configuration>
			
 
				+          <forkMode>always</forkMode>
			
 
				+          <forkedProcessTimeoutInSeconds>600</forkedProcessTimeoutInSeconds>
			
 
				+          <argLine>-Xmx1024m</argLine>
			
 
				+          <includes>
			
 
				+            <include>**/Test*.java</include>
			
 
				+          </includes>
			
 
				+          <redirectTestOutputToFile>true</redirectTestOutputToFile>
			
 
				+          <systemProperties>
			
 
				+            <property>
			
 
				+              <name>test.build.data</name>
			
 
				+              <value>${basedir}/target/test/data</value>
			
 
				+            </property>
			
 
				+            <property>
			
 
				+              <name>hadoop.log.dir</name>
			
 
				+              <value>target/test/logs</value>
			
 
				+            </property>
			
 
				+            <property>
			
 
				+              <name>org.apache.commons.logging.Log</name>
			
 
				+              <value>org.apache.commons.logging.impl.SimpleLog</value>
			
 
				+            </property>
			
 
				+            <property>
			
 
				+              <name>org.apache.commons.logging.simplelog.defaultlog</name>
			
 
				+              <value>warn</value>
			
 
				+            </property>
			
 
				+          </systemProperties>
			
 
				+        </configuration>
			
 
				+      </plugin>
			
 
				+      <plugin>
			
 
				+        <artifactId>maven-dependency-plugin</artifactId>
			
 
				+        <executions>
			
 
				+          <execution>
			
 
				+            <phase>package</phase>
			
 
				+            <goals>
			
 
				+              <goal>copy-dependencies</goal>
			
 
				+            </goals>
			
 
				+            <configuration>
			
 
				+              <outputDirectory>${project.build.directory}/lib</outputDirectory>
			
 
				+            </configuration>
			
 
				+          </execution>
			
 
				+        </executions>
			
 
				+      </plugin>
			
 
				+      <plugin>
			
 
				+        <groupId>org.apache.maven.plugins</groupId>
			
 
				+        <artifactId>maven-checkstyle-plugin</artifactId>
			
 
				+        <configuration>
			
 
				+          <enableRulesSummary>true</enableRulesSummary>
			
 
				+        </configuration>
			
 
				+      </plugin>
			
 
				+      <plugin>
			
 
				+        <groupId>org.apache.maven.plugins</groupId>
			
 
				+        <artifactId>maven-jar-plugin</artifactId>
			
 
				+        <configuration>
			
 
				+          <archive>
			
 
				+            <manifest>
			
 
				+              <mainClass>org.apache.hadoop.tools.DistCp</mainClass>
			
 
				+            </manifest>
			
 
				+          </archive>
			
 
				+        </configuration>
			
 
				+      </plugin>
			
 
				+      <plugin>
			
 
				+        <groupId>org.apache.maven.plugins</groupId>
			
 
				+        <artifactId>maven-source-plugin</artifactId>
			
 
				+        <configuration>
			
 
				+          <attach>true</attach>
			
 
				+        </configuration>
			
 
				+        <executions>
			
 
				+          <execution>
			
 
				+            <goals>
			
 
				+              <goal>jar</goal>
			
 
				+            </goals>
			
 
				+          </execution>
			
 
				+        </executions>
			
 
				+      </plugin>
			
 
				+      <plugin>
			
 
				+        <groupId>org.apache.maven.plugins</groupId>
			
 
				+        <artifactId>maven-pdf-plugin</artifactId>
			
 
				+        <executions>
			
 
				+          <execution>
			
 
				+            <id>pdf</id>
			
 
				+            <phase>package</phase>
			
 
				+            <goals>
			
 
				+              <goal>pdf</goal>
			
 
				+            </goals>
			
 
				+          </execution>
			
 
				+        </executions>
			
 
				+      </plugin>
			
 
				+    </plugins>
			
 
				+  </build>
			
 
				+</project>
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java
@@ -0,0 +1,218 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.conf.Configured;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.io.SequenceFile;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+import org.apache.hadoop.security.Credentials;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+/**
			
 
				+ * The CopyListing abstraction is responsible for how the list of
			
 
				+ * sources and targets is constructed, for DistCp's copy function.
			
 
				+ * The copy-listing should be a SequenceFile<Text, FileStatus>,
			
 
				+ * located at the path specified to buildListing(),
			
 
				+ * each entry being a pair of (Source relative path, source file status),
			
 
				+ * all the paths being fully qualified.
			
 
				+ */
			
 
				+public abstract class CopyListing extends Configured {
			
 
				+
			
 
				+  private Credentials credentials;
			
 
				+
			
 
				+  /**
			
 
				+   * Build listing function creates the input listing that distcp uses to
			
 
				+   * perform the copy.
			
 
				+   *
			
 
				+   * The build listing is a sequence file that has relative path of a file in the key
			
 
				+   * and the file status information of the source file in the value
			
 
				+   *
			
 
				+   * For instance if the source path is /tmp/data and the traversed path is
			
 
				+   * /tmp/data/dir1/dir2/file1, then the sequence file would contain
			
 
				+   *
			
 
				+   * key: /dir1/dir2/file1 and value: FileStatus(/tmp/data/dir1/dir2/file1)
			
 
				+   *
			
 
				+   * File would also contain directory entries. Meaning, if /tmp/data/dir1/dir2/file1
			
 
				+   * is the only file under /tmp/data, the resulting sequence file would contain the
			
 
				+   * following entries
			
 
				+   *
			
 
				+   * key: /dir1 and value: FileStatus(/tmp/data/dir1)
			
 
				+   * key: /dir1/dir2 and value: FileStatus(/tmp/data/dir1/dir2)
			
 
				+   * key: /dir1/dir2/file1 and value: FileStatus(/tmp/data/dir1/dir2/file1)
			
 
				+   *
			
 
				+   * Cases requiring special handling:
			
 
				+   * If source path is a file (/tmp/file1), contents of the file will be as follows
			
 
				+   *
			
 
				+   * TARGET DOES NOT EXIST: Key-"", Value-FileStatus(/tmp/file1)
			
 
				+   * TARGET IS FILE       : Key-"", Value-FileStatus(/tmp/file1)
			
 
				+   * TARGET IS DIR        : Key-"/file1", Value-FileStatus(/tmp/file1)  
			
 
				+   *
			
 
				+   * @param pathToListFile - Output file where the listing would be stored
			
 
				+   * @param options - Input options to distcp
			
 
				+   * @throws IOException - Exception if any
			
 
				+   */
			
 
				+  public final void buildListing(Path pathToListFile,
			
 
				+                                 DistCpOptions options) throws IOException {
			
 
				+    validatePaths(options);
			
 
				+    doBuildListing(pathToListFile, options);
			
 
				+    Configuration config = getConf();
			
 
				+
			
 
				+    config.set(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, pathToListFile.toString());
			
 
				+    config.setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, getBytesToCopy());
			
 
				+    config.setLong(DistCpConstants.CONF_LABEL_TOTAL_NUMBER_OF_RECORDS, getNumberOfPaths());
			
 
				+
			
 
				+    checkForDuplicates(pathToListFile);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Validate input and output paths
			
 
				+   *
			
 
				+   * @param options - Input options
			
 
				+   * @throws InvalidInputException: If inputs are invalid
			
 
				+   * @throws IOException: any Exception with FS 
			
 
				+   */
			
 
				+  protected abstract void validatePaths(DistCpOptions options)
			
 
				+      throws IOException, InvalidInputException;
			
 
				+
			
 
				+  /**
			
 
				+   * The interface to be implemented by sub-classes, to create the source/target file listing.
			
 
				+   * @param pathToListFile Path on HDFS where the listing file is written.
			
 
				+   * @param options Input Options for DistCp (indicating source/target paths.)
			
 
				+   * @throws IOException: Thrown on failure to create the listing file.
			
 
				+   */
			
 
				+  protected abstract void doBuildListing(Path pathToListFile,
			
 
				+                                         DistCpOptions options) throws IOException;
			
 
				+
			
 
				+  /**
			
 
				+   * Return the total bytes that distCp should copy for the source paths
			
 
				+   * This doesn't consider whether file is same should be skipped during copy
			
 
				+   *
			
 
				+   * @return total bytes to copy
			
 
				+   */
			
 
				+  protected abstract long getBytesToCopy();
			
 
				+
			
 
				+  /**
			
 
				+   * Return the total number of paths to distcp, includes directories as well
			
 
				+   * This doesn't consider whether file/dir is already present and should be skipped during copy
			
 
				+   *
			
 
				+   * @return Total number of paths to distcp
			
 
				+   */
			
 
				+  protected abstract long getNumberOfPaths();
			
 
				+
			
 
				+  /**
			
 
				+   * Validate the final resulting path listing to see if there are any duplicate entries
			
 
				+   *
			
 
				+   * @param pathToListFile - path listing build by doBuildListing
			
 
				+   * @throws IOException - Any issues while checking for duplicates and throws
			
 
				+   * @throws DuplicateFileException - if there are duplicates
			
 
				+   */
			
 
				+  private void checkForDuplicates(Path pathToListFile)
			
 
				+      throws DuplicateFileException, IOException {
			
 
				+
			
 
				+    Configuration config = getConf();
			
 
				+    FileSystem fs = pathToListFile.getFileSystem(config);
			
 
				+
			
 
				+    Path sortedList = DistCpUtils.sortListing(fs, config, pathToListFile);
			
 
				+
			
 
				+    SequenceFile.Reader reader = new SequenceFile.Reader(
			
 
				+                          config, SequenceFile.Reader.file(sortedList));
			
 
				+    try {
			
 
				+      Text lastKey = new Text("*"); //source relative path can never hold *
			
 
				+      FileStatus lastFileStatus = new FileStatus();
			
 
				+
			
 
				+      Text currentKey = new Text();
			
 
				+      while (reader.next(currentKey)) {
			
 
				+        if (currentKey.equals(lastKey)) {
			
 
				+          FileStatus currentFileStatus = new FileStatus();
			
 
				+          reader.getCurrentValue(currentFileStatus);
			
 
				+          throw new DuplicateFileException("File " + lastFileStatus.getPath() + " and " +
			
 
				+              currentFileStatus.getPath() + " would cause duplicates. Aborting");
			
 
				+        }
			
 
				+        reader.getCurrentValue(lastFileStatus);
			
 
				+        lastKey.set(currentKey);
			
 
				+      }
			
 
				+    } finally {
			
 
				+      IOUtils.closeStream(reader);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Protected constructor, to initialize configuration.
			
 
				+   * @param configuration The input configuration,
			
 
				+   *                        with which the source/target FileSystems may be accessed.
			
 
				+   * @param credentials - Credentials object on which the FS delegation tokens are cached.If null
			
 
				+   * delegation token caching is skipped
			
 
				+   */
			
 
				+  protected CopyListing(Configuration configuration, Credentials credentials) {
			
 
				+    setConf(configuration);
			
 
				+    setCredentials(credentials);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * set Credentials store, on which FS delegatin token will be cached
			
 
				+   * @param credentials - Credentials object
			
 
				+   */
			
 
				+  protected void setCredentials(Credentials credentials) {
			
 
				+    this.credentials = credentials;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * get credentials to update the delegation tokens for accessed FS objects
			
 
				+   * @return Credentials object
			
 
				+   */
			
 
				+  protected Credentials getCredentials() {
			
 
				+    return credentials;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Public Factory method with which the appropriate CopyListing implementation may be retrieved.
			
 
				+   * @param configuration The input configuration.
			
 
				+   * @param credentials Credentials object on which the FS delegation tokens are cached
			
 
				+   * @param options The input Options, to help choose the appropriate CopyListing Implementation.
			
 
				+   * @return An instance of the appropriate CopyListing implementation.
			
 
				+   */
			
 
				+  public static CopyListing getCopyListing(Configuration configuration,
			
 
				+                                           Credentials credentials,
			
 
				+                                           DistCpOptions options) {
			
 
				+    if (options.getSourceFileListing() == null) {
			
 
				+      return new GlobbedCopyListing(configuration, credentials);
			
 
				+    } else {
			
 
				+      return new FileBasedCopyListing(configuration, credentials);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  static class DuplicateFileException extends RuntimeException {
			
 
				+    public DuplicateFileException(String message) {
			
 
				+      super(message);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  static class InvalidInputException extends RuntimeException {
			
 
				+    public InvalidInputException(String message) {
			
 
				+      super(message);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java
@@ -0,0 +1,405 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.conf.Configured;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.mapreduce.Job;
			
 
				+import org.apache.hadoop.mapreduce.JobContext;
			
 
				+import org.apache.hadoop.mapreduce.JobSubmissionFiles;
			
 
				+import org.apache.hadoop.mapreduce.Cluster;
			
 
				+import org.apache.hadoop.tools.CopyListing.*;
			
 
				+import org.apache.hadoop.tools.mapred.CopyMapper;
			
 
				+import org.apache.hadoop.tools.mapred.CopyOutputFormat;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+import org.apache.hadoop.util.Tool;
			
 
				+import org.apache.hadoop.util.ToolRunner;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.Random;
			
 
				+
			
 
				+/**
			
 
				+ * DistCp is the main driver-class for DistCpV2.
			
 
				+ * For command-line use, DistCp::main() orchestrates the parsing of command-line
			
 
				+ * parameters and the launch of the DistCp job.
			
 
				+ * For programmatic use, a DistCp object can be constructed by specifying
			
 
				+ * options (in a DistCpOptions object), and DistCp::execute() may be used to
			
 
				+ * launch the copy-job. DistCp may alternatively be sub-classed to fine-tune
			
 
				+ * behaviour.
			
 
				+ */
			
 
				+public class DistCp extends Configured implements Tool {
			
 
				+  private static final Log LOG = LogFactory.getLog(DistCp.class);
			
 
				+
			
 
				+  private DistCpOptions inputOptions;
			
 
				+  private Path metaFolder;
			
 
				+
			
 
				+  private static final String PREFIX = "_distcp";
			
 
				+  private static final String WIP_PREFIX = "._WIP_";
			
 
				+  private static final String DISTCP_DEFAULT_XML = "distcp-default.xml";
			
 
				+  public static final Random rand = new Random();
			
 
				+
			
 
				+  private boolean submitted;
			
 
				+  private FileSystem jobFS;
			
 
				+
			
 
				+  /**
			
 
				+   * Public Constructor. Creates DistCp object with specified input-parameters.
			
 
				+   * (E.g. source-paths, target-location, etc.)
			
 
				+   * @param inputOptions Options (indicating source-paths, target-location.)
			
 
				+   * @param configuration The Hadoop configuration against which the Copy-mapper must run.
			
 
				+   * @throws Exception, on failure.
			
 
				+   */
			
 
				+  public DistCp(Configuration configuration, DistCpOptions inputOptions) throws Exception {
			
 
				+    Configuration config = new Configuration(configuration);
			
 
				+    config.addResource(DISTCP_DEFAULT_XML);
			
 
				+    setConf(config);
			
 
				+    this.inputOptions = inputOptions;
			
 
				+    this.metaFolder   = createMetaFolderPath();
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * To be used with the ToolRunner. Not for public consumption.
			
 
				+   */
			
 
				+  private DistCp() {}
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of Tool::run(). Orchestrates the copy of source file(s)
			
 
				+   * to target location, by:
			
 
				+   *  1. Creating a list of files to be copied to target.
			
 
				+   *  2. Launching a Map-only job to copy the files. (Delegates to execute().)
			
 
				+   * @param argv List of arguments passed to DistCp, from the ToolRunner.
			
 
				+   * @return On success, it returns 0. Else, -1.
			
 
				+   */
			
 
				+  public int run(String[] argv) {
			
 
				+    try {
			
 
				+      inputOptions = (OptionsParser.parse(argv));
			
 
				+
			
 
				+      LOG.info("Input Options: " + inputOptions);
			
 
				+    } catch (Throwable e) {
			
 
				+      LOG.error("Invalid arguments: ", e);
			
 
				+      System.err.println("Invalid arguments: " + e.getMessage());
			
 
				+      OptionsParser.usage();      
			
 
				+      return DistCpConstants.INVALID_ARGUMENT;
			
 
				+    }
			
 
				+    
			
 
				+    try {
			
 
				+      execute();
			
 
				+    } catch (InvalidInputException e) {
			
 
				+      LOG.error("Invalid input: ", e);
			
 
				+      return DistCpConstants.INVALID_ARGUMENT;
			
 
				+    } catch (DuplicateFileException e) {
			
 
				+      LOG.error("Duplicate files in input path: ", e);
			
 
				+      return DistCpConstants.DUPLICATE_INPUT;
			
 
				+    } catch (Exception e) {
			
 
				+      LOG.error("Exception encountered ", e);
			
 
				+      return DistCpConstants.UNKNOWN_ERROR;
			
 
				+    }
			
 
				+    return DistCpConstants.SUCCESS;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Implements the core-execution. Creates the file-list for copy,
			
 
				+   * and launches the Hadoop-job, to do the copy.
			
 
				+   * @return Job handle
			
 
				+   * @throws Exception, on failure.
			
 
				+   */
			
 
				+  public Job execute() throws Exception {
			
 
				+    assert inputOptions != null;
			
 
				+    assert getConf() != null;
			
 
				+
			
 
				+    Job job = null;
			
 
				+    try {
			
 
				+      metaFolder = createMetaFolderPath();
			
 
				+      jobFS = metaFolder.getFileSystem(getConf());
			
 
				+
			
 
				+      job = createJob();
			
 
				+      createInputFileListing(job);
			
 
				+
			
 
				+      job.submit();
			
 
				+      submitted = true;
			
 
				+    } finally {
			
 
				+      if (!submitted) {
			
 
				+        cleanup();
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    String jobID = job.getJobID().toString();
			
 
				+    job.getConfiguration().set(DistCpConstants.CONF_LABEL_DISTCP_JOB_ID, jobID);
			
 
				+    
			
 
				+    LOG.info("DistCp job-id: " + jobID);
			
 
				+    if (inputOptions.shouldBlock()) {
			
 
				+      job.waitForCompletion(true);
			
 
				+    }
			
 
				+    return job;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Create Job object for submitting it, with all the configuration
			
 
				+   *
			
 
				+   * @return Reference to job object.
			
 
				+   * @throws IOException - Exception if any
			
 
				+   */
			
 
				+  private Job createJob() throws IOException {
			
 
				+    String jobName = "distcp";
			
 
				+    String userChosenName = getConf().get(JobContext.JOB_NAME);
			
 
				+    if (userChosenName != null)
			
 
				+      jobName += ": " + userChosenName;
			
 
				+    Job job = Job.getInstance(getConf());
			
 
				+    job.setJobName(jobName);
			
 
				+    job.setInputFormatClass(DistCpUtils.getStrategy(getConf(), inputOptions));
			
 
				+    job.setJarByClass(CopyMapper.class);
			
 
				+    configureOutputFormat(job);
			
 
				+
			
 
				+    job.setMapperClass(CopyMapper.class);
			
 
				+    job.setNumReduceTasks(0);
			
 
				+    job.setMapOutputKeyClass(Text.class);
			
 
				+    job.setMapOutputValueClass(Text.class);
			
 
				+    job.setOutputFormatClass(CopyOutputFormat.class);
			
 
				+    job.getConfiguration().set(JobContext.MAP_SPECULATIVE, "false");
			
 
				+    job.getConfiguration().set(JobContext.NUM_MAPS,
			
 
				+                  String.valueOf(inputOptions.getMaxMaps()));
			
 
				+
			
 
				+    if (inputOptions.getSslConfigurationFile() != null) {
			
 
				+      setupSSLConfig(job);
			
 
				+    }
			
 
				+
			
 
				+    inputOptions.appendToConf(job.getConfiguration());
			
 
				+    return job;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Setup ssl configuration on the job configuration to enable hsftp access
			
 
				+   * from map job. Also copy the ssl configuration file to Distributed cache
			
 
				+   *
			
 
				+   * @param job - Reference to job's handle
			
 
				+   * @throws java.io.IOException - Exception if unable to locate ssl config file
			
 
				+   */
			
 
				+  private void setupSSLConfig(Job job) throws IOException  {
			
 
				+    Configuration configuration = job.getConfiguration();
			
 
				+    Path sslConfigPath = new Path(configuration.
			
 
				+        getResource(inputOptions.getSslConfigurationFile()).toString());
			
 
				+
			
 
				+    addSSLFilesToDistCache(job, sslConfigPath);
			
 
				+    configuration.set(DistCpConstants.CONF_LABEL_SSL_CONF, sslConfigPath.getName());
			
 
				+    configuration.set(DistCpConstants.CONF_LABEL_SSL_KEYSTORE, sslConfigPath.getName());
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Add SSL files to distributed cache. Trust store, key store and ssl config xml
			
 
				+   *
			
 
				+   * @param job - Job handle
			
 
				+   * @param sslConfigPath - ssl Configuration file specified through options
			
 
				+   * @throws IOException - If any
			
 
				+   */
			
 
				+  private void addSSLFilesToDistCache(Job job,
			
 
				+                                      Path sslConfigPath) throws IOException {
			
 
				+    Configuration configuration = job.getConfiguration();
			
 
				+    FileSystem localFS = FileSystem.getLocal(configuration);
			
 
				+
			
 
				+    Configuration sslConf = new Configuration(false);
			
 
				+    sslConf.addResource(sslConfigPath);
			
 
				+
			
 
				+    Path localStorePath = getLocalStorePath(sslConf,
			
 
				+                            DistCpConstants.CONF_LABEL_SSL_TRUST_STORE_LOCATION);
			
 
				+    job.addCacheFile(localStorePath.makeQualified(localFS.getUri(),
			
 
				+                                      localFS.getWorkingDirectory()).toUri());
			
 
				+    configuration.set(DistCpConstants.CONF_LABEL_SSL_TRUST_STORE_LOCATION,
			
 
				+                      localStorePath.getName());
			
 
				+
			
 
				+    localStorePath = getLocalStorePath(sslConf,
			
 
				+                             DistCpConstants.CONF_LABEL_SSL_KEY_STORE_LOCATION);
			
 
				+    job.addCacheFile(localStorePath.makeQualified(localFS.getUri(),
			
 
				+                                      localFS.getWorkingDirectory()).toUri());
			
 
				+    configuration.set(DistCpConstants.CONF_LABEL_SSL_KEY_STORE_LOCATION,
			
 
				+                                      localStorePath.getName());
			
 
				+
			
 
				+    job.addCacheFile(sslConfigPath.makeQualified(localFS.getUri(),
			
 
				+                                      localFS.getWorkingDirectory()).toUri());
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get Local Trust store/key store path
			
 
				+   *
			
 
				+   * @param sslConf - Config from SSL Client xml
			
 
				+   * @param storeKey - Key for either trust store or key store
			
 
				+   * @return - Path where the store is present
			
 
				+   * @throws IOException -If any
			
 
				+   */
			
 
				+  private Path getLocalStorePath(Configuration sslConf, String storeKey) throws IOException {
			
 
				+    if (sslConf.get(storeKey) != null) {
			
 
				+      return new Path(sslConf.get(storeKey));
			
 
				+    } else {
			
 
				+      throw new IOException("Store for " + storeKey + " is not set in " +
			
 
				+          inputOptions.getSslConfigurationFile());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Setup output format appropriately
			
 
				+   *
			
 
				+   * @param job - Job handle
			
 
				+   * @throws IOException - Exception if any
			
 
				+   */
			
 
				+  private void configureOutputFormat(Job job) throws IOException {
			
 
				+    final Configuration configuration = job.getConfiguration();
			
 
				+    Path targetPath = inputOptions.getTargetPath();
			
 
				+    FileSystem targetFS = targetPath.getFileSystem(configuration);
			
 
				+    targetPath = targetPath.makeQualified(targetFS.getUri(),
			
 
				+                                          targetFS.getWorkingDirectory());
			
 
				+
			
 
				+    if (inputOptions.shouldAtomicCommit()) {
			
 
				+      Path workDir = inputOptions.getAtomicWorkPath();
			
 
				+      if (workDir == null) {
			
 
				+        workDir = targetPath.getParent();
			
 
				+      }
			
 
				+      workDir = new Path(workDir, WIP_PREFIX + targetPath.getName()
			
 
				+                                + rand.nextInt());
			
 
				+      FileSystem workFS = workDir.getFileSystem(configuration);
			
 
				+      if (!DistCpUtils.compareFs(targetFS, workFS)) {
			
 
				+        throw new IllegalArgumentException("Work path " + workDir +
			
 
				+            " and target path " + targetPath + " are in different file system");
			
 
				+      }
			
 
				+      CopyOutputFormat.setWorkingDirectory(job, workDir);
			
 
				+    } else {
			
 
				+      CopyOutputFormat.setWorkingDirectory(job, targetPath);
			
 
				+    }
			
 
				+    CopyOutputFormat.setCommitDirectory(job, targetPath);
			
 
				+
			
 
				+    Path logPath = inputOptions.getLogPath();
			
 
				+    if (logPath == null) {
			
 
				+      logPath = new Path(metaFolder, "_logs");
			
 
				+    } else {
			
 
				+      LOG.info("DistCp job log path: " + logPath);
			
 
				+    }
			
 
				+    CopyOutputFormat.setOutputPath(job, logPath);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Create input listing by invoking an appropriate copy listing
			
 
				+   * implementation. Also add delegation tokens for each path
			
 
				+   * to job's credential store
			
 
				+   *
			
 
				+   * @param job - Handle to job
			
 
				+   * @return Returns the path where the copy listing is created
			
 
				+   * @throws IOException - If any
			
 
				+   */
			
 
				+  private Path createInputFileListing(Job job) throws IOException {
			
 
				+    Path fileListingPath = getFileListingPath();
			
 
				+    CopyListing copyListing = CopyListing.getCopyListing(job.getConfiguration(),
			
 
				+        job.getCredentials(), inputOptions);
			
 
				+    copyListing.buildListing(fileListingPath, inputOptions);
			
 
				+    return fileListingPath;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get default name of the copy listing file. Use the meta folder
			
 
				+   * to create the copy listing file
			
 
				+   *
			
 
				+   * @return - Path where the copy listing file has to be saved
			
 
				+   * @throws IOException - Exception if any
			
 
				+   */
			
 
				+  private Path getFileListingPath() throws IOException {
			
 
				+    String fileListPathStr = metaFolder + "/fileList.seq";
			
 
				+    Path path = new Path(fileListPathStr);
			
 
				+    return new Path(path.toUri().normalize().toString());
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Create a default working folder for the job, under the
			
 
				+   * job staging directory
			
 
				+   *
			
 
				+   * @return Returns the working folder information
			
 
				+   * @throws Exception - EXception if any
			
 
				+   */
			
 
				+  private Path createMetaFolderPath() throws Exception {
			
 
				+    Configuration configuration = getConf();
			
 
				+    Path stagingDir = JobSubmissionFiles.getStagingDir(
			
 
				+            new Cluster(configuration), configuration);
			
 
				+    Path metaFolderPath = new Path(stagingDir, PREFIX + String.valueOf(rand.nextInt()));
			
 
				+    if (LOG.isDebugEnabled())
			
 
				+      LOG.debug("Meta folder location: " + metaFolderPath);
			
 
				+    configuration.set(DistCpConstants.CONF_LABEL_META_FOLDER, metaFolderPath.toString());    
			
 
				+    return metaFolderPath;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Main function of the DistCp program. Parses the input arguments (via OptionsParser),
			
 
				+   * and invokes the DistCp::run() method, via the ToolRunner.
			
 
				+   * @param argv Command-line arguments sent to DistCp.
			
 
				+   */
			
 
				+  public static void main(String argv[]) {
			
 
				+    try {
			
 
				+      DistCp distCp = new DistCp();
			
 
				+      Cleanup CLEANUP = new Cleanup(distCp);
			
 
				+
			
 
				+      Runtime.getRuntime().addShutdownHook(CLEANUP);
			
 
				+      System.exit(ToolRunner.run(getDefaultConf(), distCp, argv));
			
 
				+    }
			
 
				+    catch (Exception e) {
			
 
				+      LOG.error("Couldn't complete DistCp operation: ", e);
			
 
				+      System.exit(DistCpConstants.UNKNOWN_ERROR);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Loads properties from distcp-default.xml into configuration
			
 
				+   * object
			
 
				+   * @return Configuration which includes properties from distcp-default.xml
			
 
				+   */
			
 
				+  private static Configuration getDefaultConf() {
			
 
				+    Configuration config = new Configuration();
			
 
				+    config.addResource(DISTCP_DEFAULT_XML);
			
 
				+    return config;
			
 
				+  }
			
 
				+
			
 
				+  private synchronized void cleanup() {
			
 
				+    try {
			
 
				+      if (metaFolder == null) return;
			
 
				+
			
 
				+      jobFS.delete(metaFolder, true);
			
 
				+      metaFolder = null;
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Unable to cleanup meta folder: " + metaFolder, e);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private boolean isSubmitted() {
			
 
				+    return submitted;
			
 
				+  }
			
 
				+
			
 
				+  private static class Cleanup extends Thread {
			
 
				+    private final DistCp distCp;
			
 
				+
			
 
				+    public Cleanup(DistCp distCp) {
			
 
				+      this.distCp = distCp;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public void run() {
			
 
				+      if (distCp.isSubmitted()) return;
			
 
				+
			
 
				+      distCp.cleanup();
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java
@@ -0,0 +1,104 @@
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+ * Utility class to hold commonly used constants.
			
 
				+ */
			
 
				+public class DistCpConstants {
			
 
				+
			
 
				+  /* Default number of maps to use for DistCp */
			
 
				+  public static final int DEFAULT_MAPS = 20;
			
 
				+
			
 
				+  /* Default bandwidth if none specified */
			
 
				+  public static final int DEFAULT_BANDWIDTH_MB = 100;
			
 
				+
			
 
				+  /* Default strategy for copying. Implementation looked up
			
 
				+     from distcp-default.xml
			
 
				+   */
			
 
				+  public static final String UNIFORMSIZE = "uniformsize";
			
 
				+
			
 
				+  /**
			
 
				+   *  Constants mapping to command line switches/input options
			
 
				+   */
			
 
				+  public static final String CONF_LABEL_ATOMIC_COPY = "distcp.atomic.copy";
			
 
				+  public static final String CONF_LABEL_WORK_PATH = "distcp.work.path";
			
 
				+  public static final String CONF_LABEL_LOG_PATH = "distcp.log.path";
			
 
				+  public static final String CONF_LABEL_IGNORE_FAILURES = "distcp.ignore.failures";
			
 
				+  public static final String CONF_LABEL_PRESERVE_STATUS = "distcp.preserve.status";
			
 
				+  public static final String CONF_LABEL_SYNC_FOLDERS = "distcp.sync.folders";
			
 
				+  public static final String CONF_LABEL_DELETE_MISSING = "distcp.delete.missing.source";
			
 
				+  public static final String CONF_LABEL_SSL_CONF = "distcp.keystore.resource";
			
 
				+  public static final String CONF_LABEL_MAX_MAPS = "distcp.max.maps";
			
 
				+  public static final String CONF_LABEL_SOURCE_LISTING = "distcp.source.listing";
			
 
				+  public static final String CONF_LABEL_COPY_STRATEGY = "distcp.copy.strategy";
			
 
				+  public static final String CONF_LABEL_SKIP_CRC = "distcp.skip.crc";
			
 
				+  public static final String CONF_LABEL_OVERWRITE = "distcp.copy.overwrite";
			
 
				+  public static final String CONF_LABEL_BANDWIDTH_MB = "distcp.map.bandwidth.mb";
			
 
				+
			
 
				+  /* Total bytes to be copied. Updated by copylisting. Unfiltered count */
			
 
				+  public static final String CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED = "mapred.total.bytes.expected";
			
 
				+
			
 
				+  /* Total number of paths to copy, includes directories. Unfiltered count */
			
 
				+  public static final String CONF_LABEL_TOTAL_NUMBER_OF_RECORDS = "mapred.number.of.records";
			
 
				+
			
 
				+  /* SSL keystore resource */
			
 
				+  public static final String CONF_LABEL_SSL_KEYSTORE = "dfs.https.client.keystore.resource";
			
 
				+
			
 
				+  /* If input is based -f <<source listing>>, file containing the src paths */
			
 
				+  public static final String CONF_LABEL_LISTING_FILE_PATH = "distcp.listing.file.path";
			
 
				+
			
 
				+  /* Directory where the mapreduce job will write to. If not atomic commit, then same
			
 
				+    as CONF_LABEL_TARGET_FINAL_PATH
			
 
				+   */
			
 
				+  public static final String CONF_LABEL_TARGET_WORK_PATH = "distcp.target.work.path";
			
 
				+
			
 
				+  /* Directory where the final data will be committed to. If not atomic commit, then same
			
 
				+    as CONF_LABEL_TARGET_WORK_PATH
			
 
				+   */
			
 
				+  public static final String CONF_LABEL_TARGET_FINAL_PATH = "distcp.target.final.path";
			
 
				+
			
 
				+  /**
			
 
				+   * DistCp job id for consumers of the Disctp 
			
 
				+   */
			
 
				+  public static final String CONF_LABEL_DISTCP_JOB_ID = "distcp.job.id";
			
 
				+
			
 
				+  /* Meta folder where the job's intermediate data is kept */
			
 
				+  public static final String CONF_LABEL_META_FOLDER = "distcp.meta.folder";
			
 
				+
			
 
				+  /**
			
 
				+   * Conf label for SSL Trust-store location.
			
 
				+   */
			
 
				+  public static final String CONF_LABEL_SSL_TRUST_STORE_LOCATION
			
 
				+      = "ssl.client.truststore.location";
			
 
				+
			
 
				+  /**
			
 
				+   * Conf label for SSL Key-store location.
			
 
				+   */
			
 
				+  public static final String CONF_LABEL_SSL_KEY_STORE_LOCATION
			
 
				+      = "ssl.client.keystore.location";
			
 
				+
			
 
				+  /**
			
 
				+   * Constants for DistCp return code to shell / consumer of ToolRunner's run
			
 
				+   */
			
 
				+  public static final int SUCCESS = 0;
			
 
				+  public static final int INVALID_ARGUMENT = -1;
			
 
				+  public static final int DUPLICATE_INPUT = -2;
			
 
				+  public static final int UNKNOWN_ERROR = -999;
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java
@@ -0,0 +1,218 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.commons.cli.Option;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+
			
 
				+/**
			
 
				+ * Enumeration mapping configuration keys to distcp command line
			
 
				+ * options.
			
 
				+ */
			
 
				+public enum DistCpOptionSwitch {
			
 
				+
			
 
				+  /**
			
 
				+   * Ignores any failures during copy, and continues with rest.
			
 
				+   * Logs failures in a file
			
 
				+   */
			
 
				+  IGNORE_FAILURES(DistCpConstants.CONF_LABEL_IGNORE_FAILURES,
			
 
				+      new Option("i", false, "Ignore failures during copy")),
			
 
				+
			
 
				+  /**
			
 
				+   * Preserves status of file/path in the target.
			
 
				+   * Default behavior with -p, is to preserve replication,
			
 
				+   * block size, user, group and permission on the target file
			
 
				+   *
			
 
				+   * If any of the optional switches are present among rbugp, then
			
 
				+   * only the corresponding file attribute is preserved
			
 
				+   *
			
 
				+   */
			
 
				+  PRESERVE_STATUS(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
			
 
				+      new Option("p", true, "preserve status (rbugp)" +
			
 
				+          "(replication, block-size, user, group, permission)")),
			
 
				+
			
 
				+  /**
			
 
				+   * Update target location by copying only files that are missing
			
 
				+   * in the target. This can be used to periodically sync two folders
			
 
				+   * across source and target. Typically used with DELETE_MISSING
			
 
				+   * Incompatible with ATOMIC_COMMIT
			
 
				+   */
			
 
				+  SYNC_FOLDERS(DistCpConstants.CONF_LABEL_SYNC_FOLDERS, 
			
 
				+      new Option("update", false, "Update target, copying only missing" +
			
 
				+          "files or directories")),
			
 
				+
			
 
				+  /**
			
 
				+   * Deletes missing files in target that are missing from source
			
 
				+   * This allows the target to be in sync with the source contents
			
 
				+   * Typically used in conjunction with SYNC_FOLDERS
			
 
				+   * Incompatible with ATOMIC_COMMIT
			
 
				+   */
			
 
				+  DELETE_MISSING(DistCpConstants.CONF_LABEL_DELETE_MISSING,
			
 
				+      new Option("delete", false, "Delete from target, " +
			
 
				+          "files missing in source")),
			
 
				+
			
 
				+  /**
			
 
				+   * Configuration file to use with hftps:// for securely copying
			
 
				+   * files across clusters. Typically the configuration file contains
			
 
				+   * truststore/keystore information such as location, password and type
			
 
				+   */
			
 
				+  SSL_CONF(DistCpConstants.CONF_LABEL_SSL_CONF,
			
 
				+      new Option("mapredSslConf", true, "Configuration for ssl config file" +
			
 
				+          ", to use with hftps://")),
			
 
				+
			
 
				+  /**
			
 
				+   * Max number of maps to use during copy. DistCp will split work
			
 
				+   * as equally as possible among these maps
			
 
				+   */
			
 
				+  MAX_MAPS(DistCpConstants.CONF_LABEL_MAX_MAPS, 
			
 
				+      new Option("m", true, "Max number of concurrent maps to use for copy")),
			
 
				+
			
 
				+  /**
			
 
				+   * Source file listing can be provided to DistCp in a file.
			
 
				+   * This allows DistCp to copy random list of files from source
			
 
				+   * and copy them to target
			
 
				+   */
			
 
				+  SOURCE_FILE_LISTING(DistCpConstants.CONF_LABEL_SOURCE_LISTING,
			
 
				+      new Option("f", true, "List of files that need to be copied")),
			
 
				+
			
 
				+  /**
			
 
				+   * Copy all the source files and commit them atomically to the target
			
 
				+   * This is typically useful in cases where there is a process
			
 
				+   * polling for availability of a file/dir. This option is incompatible
			
 
				+   * with SYNC_FOLDERS & DELETE_MISSING
			
 
				+   */
			
 
				+  ATOMIC_COMMIT(DistCpConstants.CONF_LABEL_ATOMIC_COPY,
			
 
				+      new Option("atomic", false, "Commit all changes or none")),
			
 
				+
			
 
				+  /**
			
 
				+   * Work path to be used only in conjunction in Atomic commit
			
 
				+   */
			
 
				+  WORK_PATH(DistCpConstants.CONF_LABEL_WORK_PATH,
			
 
				+      new Option("tmp", true, "Intermediate work path to be used for atomic commit")),
			
 
				+
			
 
				+  /**
			
 
				+   * Log path where distcp output logs are written to
			
 
				+   */
			
 
				+  LOG_PATH(DistCpConstants.CONF_LABEL_LOG_PATH,
			
 
				+      new Option("log", true, "Folder on DFS where distcp execution logs are saved")),
			
 
				+
			
 
				+  /**
			
 
				+   * Copy strategy is use. This could be dynamic or uniform size etc.
			
 
				+   * DistCp would use an appropriate input format based on this.
			
 
				+   */
			
 
				+  COPY_STRATEGY(DistCpConstants.CONF_LABEL_COPY_STRATEGY,
			
 
				+      new Option("strategy", true, "Copy strategy to use. Default is " +
			
 
				+          "dividing work based on file sizes")),
			
 
				+
			
 
				+  /**
			
 
				+   * Skip CRC checks between source and target, when determining what
			
 
				+   * files need to be copied.
			
 
				+   */
			
 
				+  SKIP_CRC(DistCpConstants.CONF_LABEL_SKIP_CRC,
			
 
				+      new Option("skipcrccheck", false, "Whether to skip CRC checks between " +
			
 
				+          "source and target paths.")),
			
 
				+
			
 
				+  /**
			
 
				+   * Overwrite target-files unconditionally.
			
 
				+   */
			
 
				+  OVERWRITE(DistCpConstants.CONF_LABEL_OVERWRITE,
			
 
				+      new Option("overwrite", false, "Choose to overwrite target files " +
			
 
				+          "unconditionally, even if they exist.")),
			
 
				+
			
 
				+  /**
			
 
				+   * Should DisctpExecution be blocking
			
 
				+   */
			
 
				+  BLOCKING("",
			
 
				+      new Option("async", false, "Should distcp execution be blocking")),
			
 
				+
			
 
				+  FILE_LIMIT("",
			
 
				+      new Option("filelimit", true, "(Deprecated!) Limit number of files " +
			
 
				+              "copied to <= n")),
			
 
				+
			
 
				+  SIZE_LIMIT("",
			
 
				+      new Option("sizelimit", true, "(Deprecated!) Limit number of files " +
			
 
				+              "copied to <= n bytes")),
			
 
				+
			
 
				+  /**
			
 
				+   * Specify bandwidth per map in MB
			
 
				+   */
			
 
				+  BANDWIDTH(DistCpConstants.CONF_LABEL_BANDWIDTH_MB,
			
 
				+      new Option("bandwidth", true, "Specify bandwidth per map in MB"));
			
 
				+
			
 
				+  private final String confLabel;
			
 
				+  private final Option option;
			
 
				+
			
 
				+  DistCpOptionSwitch(String confLabel, Option option) {
			
 
				+    this.confLabel = confLabel;
			
 
				+    this.option = option;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get Configuration label for the option
			
 
				+   * @return configuration label name
			
 
				+   */
			
 
				+  public String getConfigLabel() {
			
 
				+    return confLabel;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get CLI Option corresponding to the distcp option
			
 
				+   * @return option
			
 
				+   */
			
 
				+  public Option getOption() {
			
 
				+    return option;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get Switch symbol
			
 
				+   * @return switch symbol char
			
 
				+   */
			
 
				+  public String getSwitch() {
			
 
				+    return option.getOpt();
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public String toString() {
			
 
				+    return  super.name() + " {" +
			
 
				+        "confLabel='" + confLabel + '\'' +
			
 
				+        ", option=" + option + '}';
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Helper function to add an option to hadoop configuration object
			
 
				+   * @param conf - Configuration object to include the option
			
 
				+   * @param option - Option to add
			
 
				+   * @param value - Value
			
 
				+   */
			
 
				+  public static void addToConf(Configuration conf,
			
 
				+                               DistCpOptionSwitch option,
			
 
				+                               String value) {
			
 
				+    conf.set(option.getConfigLabel(), value);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Helper function to set an option to hadoop configuration object
			
 
				+   * @param conf - Configuration object to include the option
			
 
				+   * @param option - Option to add
			
 
				+   */
			
 
				+  public static void addToConf(Configuration conf,
			
 
				+                               DistCpOptionSwitch option) {
			
 
				+    conf.set(option.getConfigLabel(), "true");
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
@@ -0,0 +1,525 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+
			
 
				+import java.util.EnumSet;
			
 
				+import java.util.Iterator;
			
 
				+import java.util.List;
			
 
				+import java.util.NoSuchElementException;
			
 
				+
			
 
				+/**
			
 
				+ * The Options class encapsulates all DistCp options.
			
 
				+ * These may be set from command-line (via the OptionsParser)
			
 
				+ * or may be set manually.
			
 
				+ */
			
 
				+public class DistCpOptions {
			
 
				+
			
 
				+  private boolean atomicCommit = false;
			
 
				+  private boolean syncFolder = false;
			
 
				+  private boolean deleteMissing = false;
			
 
				+  private boolean ignoreFailures = false;
			
 
				+  private boolean overwrite = false;
			
 
				+  private boolean skipCRC = false;
			
 
				+  private boolean blocking = true;
			
 
				+
			
 
				+  private int maxMaps = DistCpConstants.DEFAULT_MAPS;
			
 
				+  private int mapBandwidth = DistCpConstants.DEFAULT_BANDWIDTH_MB;
			
 
				+
			
 
				+  private String sslConfigurationFile;
			
 
				+
			
 
				+  private String copyStrategy = DistCpConstants.UNIFORMSIZE;
			
 
				+
			
 
				+  private EnumSet<FileAttribute> preserveStatus = EnumSet.noneOf(FileAttribute.class);
			
 
				+
			
 
				+  private Path atomicWorkPath;
			
 
				+
			
 
				+  private Path logPath;
			
 
				+
			
 
				+  private Path sourceFileListing;
			
 
				+  private List<Path> sourcePaths;
			
 
				+
			
 
				+  private Path targetPath;
			
 
				+
			
 
				+  public static enum FileAttribute{
			
 
				+    REPLICATION, BLOCKSIZE, USER, GROUP, PERMISSION;
			
 
				+
			
 
				+    public static FileAttribute getAttribute(char symbol) {
			
 
				+      for (FileAttribute attribute : values()) {
			
 
				+        if (attribute.name().charAt(0) == Character.toUpperCase(symbol)) {
			
 
				+          return attribute;
			
 
				+        }
			
 
				+      }
			
 
				+      throw new NoSuchElementException("No attribute for " + symbol);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Constructor, to initialize source/target paths.
			
 
				+   * @param sourcePaths List of source-paths (including wildcards)
			
 
				+   *                     to be copied to target.
			
 
				+   * @param targetPath Destination path for the dist-copy.
			
 
				+   */
			
 
				+  public DistCpOptions(List<Path> sourcePaths, Path targetPath) {
			
 
				+    assert sourcePaths != null && !sourcePaths.isEmpty() : "Invalid source paths";
			
 
				+    assert targetPath != null : "Invalid Target path";
			
 
				+
			
 
				+    this.sourcePaths = sourcePaths;
			
 
				+    this.targetPath = targetPath;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Constructor, to initialize source/target paths.
			
 
				+   * @param sourceFileListing File containing list of source paths
			
 
				+   * @param targetPath Destination path for the dist-copy.
			
 
				+   */
			
 
				+  public DistCpOptions(Path sourceFileListing, Path targetPath) {
			
 
				+    assert sourceFileListing != null : "Invalid source paths";
			
 
				+    assert targetPath != null : "Invalid Target path";
			
 
				+
			
 
				+    this.sourceFileListing = sourceFileListing;
			
 
				+    this.targetPath = targetPath;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Copy constructor.
			
 
				+   * @param that DistCpOptions being copied from.
			
 
				+   */
			
 
				+  public DistCpOptions(DistCpOptions that) {
			
 
				+    if (this != that && that != null) {
			
 
				+      this.atomicCommit = that.atomicCommit;
			
 
				+      this.syncFolder = that.syncFolder;
			
 
				+      this.deleteMissing = that.deleteMissing;
			
 
				+      this.ignoreFailures = that.ignoreFailures;
			
 
				+      this.overwrite = that.overwrite;
			
 
				+      this.skipCRC = that.skipCRC;
			
 
				+      this.blocking = that.blocking;
			
 
				+      this.maxMaps = that.maxMaps;
			
 
				+      this.mapBandwidth = that.mapBandwidth;
			
 
				+      this.sslConfigurationFile = that.getSslConfigurationFile();
			
 
				+      this.copyStrategy = that.copyStrategy;
			
 
				+      this.preserveStatus = that.preserveStatus;
			
 
				+      this.atomicWorkPath = that.getAtomicWorkPath();
			
 
				+      this.logPath = that.getLogPath();
			
 
				+      this.sourceFileListing = that.getSourceFileListing();
			
 
				+      this.sourcePaths = that.getSourcePaths();
			
 
				+      this.targetPath = that.getTargetPath();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Should the data be committed atomically?
			
 
				+   *
			
 
				+   * @return true if data should be committed automically. false otherwise
			
 
				+   */
			
 
				+  public boolean shouldAtomicCommit() {
			
 
				+    return atomicCommit;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set if data need to be committed automatically
			
 
				+   *
			
 
				+   * @param atomicCommit - boolean switch
			
 
				+   */
			
 
				+  public void setAtomicCommit(boolean atomicCommit) {
			
 
				+    validate(DistCpOptionSwitch.ATOMIC_COMMIT, atomicCommit);
			
 
				+    this.atomicCommit = atomicCommit;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Should the data be sync'ed between source and target paths?
			
 
				+   *
			
 
				+   * @return true if data should be sync'ed up. false otherwise
			
 
				+   */
			
 
				+  public boolean shouldSyncFolder() {
			
 
				+    return syncFolder;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set if source and target folder contents be sync'ed up
			
 
				+   *
			
 
				+   * @param syncFolder - boolean switch
			
 
				+   */
			
 
				+  public void setSyncFolder(boolean syncFolder) {
			
 
				+    validate(DistCpOptionSwitch.SYNC_FOLDERS, syncFolder);
			
 
				+    this.syncFolder = syncFolder;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Should target files missing in source should be deleted?
			
 
				+   *
			
 
				+   * @return true if zoombie target files to be removed. false otherwise
			
 
				+   */
			
 
				+  public boolean shouldDeleteMissing() {
			
 
				+    return deleteMissing;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set if files only present in target should be deleted
			
 
				+   *
			
 
				+   * @param deleteMissing - boolean switch
			
 
				+   */
			
 
				+  public void setDeleteMissing(boolean deleteMissing) {
			
 
				+    validate(DistCpOptionSwitch.DELETE_MISSING, deleteMissing);
			
 
				+    this.deleteMissing = deleteMissing;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Should failures be logged and ignored during copy?
			
 
				+   *
			
 
				+   * @return true if failures are to be logged and ignored. false otherwise
			
 
				+   */
			
 
				+  public boolean shouldIgnoreFailures() {
			
 
				+    return ignoreFailures;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set if failures during copy be ignored
			
 
				+   *
			
 
				+   * @param ignoreFailures - boolean switch
			
 
				+   */
			
 
				+  public void setIgnoreFailures(boolean ignoreFailures) {
			
 
				+    this.ignoreFailures = ignoreFailures;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Should DistCp be running in blocking mode
			
 
				+   *
			
 
				+   * @return true if should run in blocking, false otherwise
			
 
				+   */
			
 
				+  public boolean shouldBlock() {
			
 
				+    return blocking;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set if Disctp should run blocking or non-blocking
			
 
				+   *
			
 
				+   * @param blocking - boolean switch
			
 
				+   */
			
 
				+  public void setBlocking(boolean blocking) {
			
 
				+    this.blocking = blocking;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Should files be overwritten always?
			
 
				+   *
			
 
				+   * @return true if files in target that may exist before distcp, should always
			
 
				+   *         be overwritten. false otherwise
			
 
				+   */
			
 
				+  public boolean shouldOverwrite() {
			
 
				+    return overwrite;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set if files should always be overwritten on target
			
 
				+   *
			
 
				+   * @param overwrite - boolean switch
			
 
				+   */
			
 
				+  public void setOverwrite(boolean overwrite) {
			
 
				+    validate(DistCpOptionSwitch.OVERWRITE, overwrite);
			
 
				+    this.overwrite = overwrite;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Should CRC/checksum check be skipped while checking files are identical
			
 
				+   *
			
 
				+   * @return true if checksum check should be skipped while checking files are
			
 
				+   *         identical. false otherwise
			
 
				+   */
			
 
				+  public boolean shouldSkipCRC() {
			
 
				+    return skipCRC;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set if checksum comparison should be skipped while determining if
			
 
				+   * source and destination files are identical
			
 
				+   *
			
 
				+   * @param skipCRC - boolean switch
			
 
				+   */
			
 
				+  public void setSkipCRC(boolean skipCRC) {
			
 
				+    validate(DistCpOptionSwitch.SKIP_CRC, skipCRC);
			
 
				+    this.skipCRC = skipCRC;
			
 
				+  }
			
 
				+
			
 
				+  /** Get the max number of maps to use for this copy
			
 
				+   *
			
 
				+   * @return Max number of maps
			
 
				+   */
			
 
				+  public int getMaxMaps() {
			
 
				+    return maxMaps;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the max number of maps to use for copy
			
 
				+   *
			
 
				+   * @param maxMaps - Number of maps
			
 
				+   */
			
 
				+  public void setMaxMaps(int maxMaps) {
			
 
				+    this.maxMaps = maxMaps;
			
 
				+  }
			
 
				+
			
 
				+  /** Get the map bandwidth in MB
			
 
				+   *
			
 
				+   * @return Bandwidth in MB
			
 
				+   */
			
 
				+  public int getMapBandwidth() {
			
 
				+    return mapBandwidth;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set per map bandwidth
			
 
				+   *
			
 
				+   * @param mapBandwidth - per map bandwidth
			
 
				+   */
			
 
				+  public void setMapBandwidth(int mapBandwidth) {
			
 
				+    assert mapBandwidth > 0 : "Bandwidth " + mapBandwidth + " is invalid (should be > 0)";
			
 
				+    this.mapBandwidth = mapBandwidth;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get path where the ssl configuration file is present to use for hftps://
			
 
				+   *
			
 
				+   * @return Path on local file system
			
 
				+   */
			
 
				+  public String getSslConfigurationFile() {
			
 
				+    return sslConfigurationFile;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the SSL configuration file path to use with hftps:// (local path)
			
 
				+   *
			
 
				+   * @param sslConfigurationFile - Local ssl config file path
			
 
				+   */
			
 
				+  public void setSslConfigurationFile(String sslConfigurationFile) {
			
 
				+    this.sslConfigurationFile = sslConfigurationFile;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Returns an iterator with the list of file attributes to preserve
			
 
				+   *
			
 
				+   * @return iterator of file attributes to preserve
			
 
				+   */
			
 
				+  public Iterator<FileAttribute> preserveAttributes() {
			
 
				+    return preserveStatus.iterator();
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Checks if the input attibute should be preserved or not
			
 
				+   *
			
 
				+   * @param attribute - Attribute to check
			
 
				+   * @return True if attribute should be preserved, false otherwise
			
 
				+   */
			
 
				+  public boolean shouldPreserve(FileAttribute attribute) {
			
 
				+    return preserveStatus.contains(attribute);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Add file attributes that need to be preserved. This method may be
			
 
				+   * called multiple times to add attributes.
			
 
				+   *
			
 
				+   * @param fileAttribute - Attribute to add, one at a time
			
 
				+   */
			
 
				+  public void preserve(FileAttribute fileAttribute) {
			
 
				+    for (FileAttribute attribute : preserveStatus) {
			
 
				+      if (attribute.equals(fileAttribute)) {
			
 
				+        return;
			
 
				+      }
			
 
				+    }
			
 
				+    preserveStatus.add(fileAttribute);
			
 
				+  }
			
 
				+
			
 
				+  /** Get work path for atomic commit. If null, the work
			
 
				+   * path would be parentOf(targetPath) + "/._WIP_" + nameOf(targetPath)
			
 
				+   *
			
 
				+   * @return Atomic work path on the target cluster. Null if not set
			
 
				+   */
			
 
				+  public Path getAtomicWorkPath() {
			
 
				+    return atomicWorkPath;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the work path for atomic commit
			
 
				+   *
			
 
				+   * @param atomicWorkPath - Path on the target cluster
			
 
				+   */
			
 
				+  public void setAtomicWorkPath(Path atomicWorkPath) {
			
 
				+    this.atomicWorkPath = atomicWorkPath;
			
 
				+  }
			
 
				+
			
 
				+  /** Get output directory for writing distcp logs. Otherwise logs
			
 
				+   * are temporarily written to JobStagingDir/_logs and deleted
			
 
				+   * upon job completion
			
 
				+   *
			
 
				+   * @return Log output path on the cluster where distcp job is run
			
 
				+   */
			
 
				+  public Path getLogPath() {
			
 
				+    return logPath;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the log path where distcp output logs are stored
			
 
				+   * Uses JobStagingDir/_logs by default
			
 
				+   *
			
 
				+   * @param logPath - Path where logs will be saved
			
 
				+   */
			
 
				+  public void setLogPath(Path logPath) {
			
 
				+    this.logPath = logPath;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the copy strategy to use. Uses appropriate input format
			
 
				+   *
			
 
				+   * @return copy strategy to use
			
 
				+   */
			
 
				+  public String getCopyStrategy() {
			
 
				+    return copyStrategy;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the copy strategy to use. Should map to a strategy implementation
			
 
				+   * in distp-default.xml
			
 
				+   *
			
 
				+   * @param copyStrategy - copy Strategy to use
			
 
				+   */
			
 
				+  public void setCopyStrategy(String copyStrategy) {
			
 
				+    this.copyStrategy = copyStrategy;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * File path (hdfs:// or file://) that contains the list of actual
			
 
				+   * files to copy
			
 
				+   *
			
 
				+   * @return - Source listing file path
			
 
				+   */
			
 
				+  public Path getSourceFileListing() {
			
 
				+    return sourceFileListing;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Getter for sourcePaths.
			
 
				+   * @return List of source-paths.
			
 
				+   */
			
 
				+  public List<Path> getSourcePaths() {
			
 
				+    return sourcePaths;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Setter for sourcePaths.
			
 
				+   * @param sourcePaths The new list of source-paths.
			
 
				+   */
			
 
				+  public void setSourcePaths(List<Path> sourcePaths) {
			
 
				+    assert sourcePaths != null && sourcePaths.size() != 0;
			
 
				+    this.sourcePaths = sourcePaths;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Getter for the targetPath.
			
 
				+   * @return The target-path.
			
 
				+   */
			
 
				+  public Path getTargetPath() {
			
 
				+    return targetPath;
			
 
				+  }
			
 
				+
			
 
				+  public void validate(DistCpOptionSwitch option, boolean value) {
			
 
				+
			
 
				+    boolean syncFolder = (option == DistCpOptionSwitch.SYNC_FOLDERS ?
			
 
				+        value : this.syncFolder);
			
 
				+    boolean overwrite = (option == DistCpOptionSwitch.OVERWRITE ?
			
 
				+        value : this.overwrite);
			
 
				+    boolean deleteMissing = (option == DistCpOptionSwitch.DELETE_MISSING ?
			
 
				+        value : this.deleteMissing);
			
 
				+    boolean atomicCommit = (option == DistCpOptionSwitch.ATOMIC_COMMIT ?
			
 
				+        value : this.atomicCommit);
			
 
				+    boolean skipCRC = (option == DistCpOptionSwitch.SKIP_CRC ?
			
 
				+        value : this.skipCRC);
			
 
				+
			
 
				+    if (syncFolder && atomicCommit) {
			
 
				+      throw new IllegalArgumentException("Atomic commit can't be used with " +
			
 
				+          "sync folder or overwrite options");
			
 
				+    }
			
 
				+
			
 
				+    if (deleteMissing && !(overwrite || syncFolder)) {
			
 
				+      throw new IllegalArgumentException("Delete missing is applicable " +
			
 
				+          "only with update or overwrite options");
			
 
				+    }
			
 
				+
			
 
				+    if (overwrite && syncFolder) {
			
 
				+      throw new IllegalArgumentException("Overwrite and update options are " +
			
 
				+          "mutually exclusive");
			
 
				+    }
			
 
				+
			
 
				+    if (!syncFolder && skipCRC) {
			
 
				+      throw new IllegalArgumentException("Skip CRC is valid only with update options");
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Add options to configuration. These will be used in the Mapper/committer
			
 
				+   *
			
 
				+   * @param conf - Configruation object to which the options need to be added
			
 
				+   */
			
 
				+  public void appendToConf(Configuration conf) {
			
 
				+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.ATOMIC_COMMIT,
			
 
				+        String.valueOf(atomicCommit));
			
 
				+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.IGNORE_FAILURES,
			
 
				+        String.valueOf(ignoreFailures));
			
 
				+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.SYNC_FOLDERS,
			
 
				+        String.valueOf(syncFolder));
			
 
				+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.DELETE_MISSING,
			
 
				+        String.valueOf(deleteMissing));
			
 
				+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.OVERWRITE,
			
 
				+        String.valueOf(overwrite));
			
 
				+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.SKIP_CRC,
			
 
				+        String.valueOf(skipCRC));
			
 
				+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.BANDWIDTH,
			
 
				+        String.valueOf(mapBandwidth));
			
 
				+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.PRESERVE_STATUS,
			
 
				+        DistCpUtils.packAttributes(preserveStatus));
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Utility to easily string-ify Options, for logging.
			
 
				+   *
			
 
				+   * @return String representation of the Options.
			
 
				+   */
			
 
				+  @Override
			
 
				+  public String toString() {
			
 
				+    return "DistCpOptions{" +
			
 
				+        "atomicCommit=" + atomicCommit +
			
 
				+        ", syncFolder=" + syncFolder +
			
 
				+        ", deleteMissing=" + deleteMissing +
			
 
				+        ", ignoreFailures=" + ignoreFailures +
			
 
				+        ", maxMaps=" + maxMaps +
			
 
				+        ", sslConfigurationFile='" + sslConfigurationFile + '\'' +
			
 
				+        ", copyStrategy='" + copyStrategy + '\'' +
			
 
				+        ", sourceFileListing=" + sourceFileListing +
			
 
				+        ", sourcePaths=" + sourcePaths +
			
 
				+        ", targetPath=" + targetPath +
			
 
				+        '}';
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  protected DistCpOptions clone() throws CloneNotSupportedException {
			
 
				+    return (DistCpOptions) super.clone();
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/FileBasedCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/FileBasedCopyListing.java
@@ -0,0 +1,100 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.security.Credentials;
			
 
				+
			
 
				+import java.io.BufferedReader;
			
 
				+import java.io.IOException;
			
 
				+import java.io.InputStreamReader;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.List;
			
 
				+
			
 
				+/**
			
 
				+ * FileBasedCopyListing implements the CopyListing interface,
			
 
				+ * to create the copy-listing for DistCp,
			
 
				+ * by iterating over all source paths mentioned in a specified input-file.
			
 
				+ */
			
 
				+public class FileBasedCopyListing extends CopyListing {
			
 
				+
			
 
				+  private final CopyListing globbedListing;
			
 
				+  /**
			
 
				+   * Constructor, to initialize base-class.
			
 
				+   * @param configuration The input Configuration object.
			
 
				+   * @param credentials - Credentials object on which the FS delegation tokens are cached. If null
			
 
				+   * delegation token caching is skipped
			
 
				+   */
			
 
				+  public FileBasedCopyListing(Configuration configuration, Credentials credentials) {
			
 
				+    super(configuration, credentials);
			
 
				+    globbedListing = new GlobbedCopyListing(getConf(), credentials);
			
 
				+  }
			
 
				+
			
 
				+  /** {@inheritDoc} */
			
 
				+  @Override
			
 
				+  protected void validatePaths(DistCpOptions options)
			
 
				+      throws IOException, InvalidInputException {
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of CopyListing::buildListing().
			
 
				+   *   Iterates over all source paths mentioned in the input-file.
			
 
				+   * @param pathToListFile Path on HDFS where the listing file is written.
			
 
				+   * @param options Input Options for DistCp (indicating source/target paths.)
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  @Override
			
 
				+  public void doBuildListing(Path pathToListFile, DistCpOptions options) throws IOException {
			
 
				+    DistCpOptions newOption = new DistCpOptions(options);
			
 
				+    newOption.setSourcePaths(fetchFileList(options.getSourceFileListing()));
			
 
				+    globbedListing.buildListing(pathToListFile, newOption);
			
 
				+  }
			
 
				+
			
 
				+  private List<Path> fetchFileList(Path sourceListing) throws IOException {
			
 
				+    List<Path> result = new ArrayList<Path>();
			
 
				+    FileSystem fs = sourceListing.getFileSystem(getConf());
			
 
				+    BufferedReader input = null;
			
 
				+    try {
			
 
				+      input = new BufferedReader(new InputStreamReader(fs.open(sourceListing)));
			
 
				+      String line = input.readLine();
			
 
				+      while (line != null) {
			
 
				+        result.add(new Path(line));
			
 
				+        line = input.readLine();
			
 
				+      }
			
 
				+    } finally {
			
 
				+      IOUtils.closeStream(input);
			
 
				+    }
			
 
				+    return result;
			
 
				+  }
			
 
				+
			
 
				+  /** {@inheritDoc} */
			
 
				+  @Override
			
 
				+  protected long getBytesToCopy() {
			
 
				+    return globbedListing.getBytesToCopy();
			
 
				+  }
			
 
				+
			
 
				+  /** {@inheritDoc} */
			
 
				+  @Override
			
 
				+  protected long getNumberOfPaths() {
			
 
				+    return globbedListing.getNumberOfPaths();
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/GlobbedCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/GlobbedCopyListing.java
@@ -0,0 +1,105 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.security.Credentials;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.List;
			
 
				+import java.util.ArrayList;
			
 
				+
			
 
				+/**
			
 
				+ * GlobbedCopyListing implements the CopyListing interface, to create the copy
			
 
				+ * listing-file by "globbing" all specified source paths (wild-cards and all.)
			
 
				+ */
			
 
				+public class GlobbedCopyListing extends CopyListing {
			
 
				+  private static final Log LOG = LogFactory.getLog(GlobbedCopyListing.class);
			
 
				+
			
 
				+  private final CopyListing simpleListing;
			
 
				+  /**
			
 
				+   * Constructor, to initialize the configuration.
			
 
				+   * @param configuration The input Configuration object.
			
 
				+   * @param credentials Credentials object on which the FS delegation tokens are cached. If null
			
 
				+   * delegation token caching is skipped
			
 
				+   */
			
 
				+  public GlobbedCopyListing(Configuration configuration, Credentials credentials) {
			
 
				+    super(configuration, credentials);
			
 
				+    simpleListing = new SimpleCopyListing(getConf(), credentials) ;
			
 
				+  }
			
 
				+
			
 
				+  /** {@inheritDoc} */
			
 
				+  @Override
			
 
				+  protected void validatePaths(DistCpOptions options)
			
 
				+      throws IOException, InvalidInputException {
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of CopyListing::buildListing().
			
 
				+   * Creates the copy listing by "globbing" all source-paths.
			
 
				+   * @param pathToListingFile The location at which the copy-listing file
			
 
				+   *                           is to be created.
			
 
				+   * @param options Input Options for DistCp (indicating source/target paths.)
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  @Override
			
 
				+  public void doBuildListing(Path pathToListingFile,
			
 
				+                             DistCpOptions options) throws IOException {
			
 
				+
			
 
				+    List<Path> globbedPaths = new ArrayList<Path>();
			
 
				+    if (options.getSourcePaths().isEmpty()) {
			
 
				+      throw new InvalidInputException("Nothing to process. Source paths::EMPTY");  
			
 
				+    }
			
 
				+
			
 
				+    for (Path p : options.getSourcePaths()) {
			
 
				+      FileSystem fs = p.getFileSystem(getConf());
			
 
				+      FileStatus[] inputs = fs.globStatus(p);
			
 
				+
			
 
				+      if(inputs != null && inputs.length > 0) {
			
 
				+        for (FileStatus onePath: inputs) {
			
 
				+          globbedPaths.add(onePath.getPath());
			
 
				+        }
			
 
				+      } else {
			
 
				+        throw new InvalidInputException(p + " doesn't exist");        
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    DistCpOptions optionsGlobbed = new DistCpOptions(options);
			
 
				+    optionsGlobbed.setSourcePaths(globbedPaths);
			
 
				+    simpleListing.buildListing(pathToListingFile, optionsGlobbed);
			
 
				+  }
			
 
				+
			
 
				+  /** {@inheritDoc} */
			
 
				+  @Override
			
 
				+  protected long getBytesToCopy() {
			
 
				+    return simpleListing.getBytesToCopy();
			
 
				+  }
			
 
				+
			
 
				+  /** {@inheritDoc} */
			
 
				+  @Override
			
 
				+  protected long getNumberOfPaths() {
			
 
				+    return simpleListing.getNumberOfPaths();
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java
@@ -0,0 +1,246 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.commons.cli.*;
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
			
 
				+
			
 
				+import java.util.*;
			
 
				+
			
 
				+/**
			
 
				+ * The OptionsParser parses out the command-line options passed to DistCp,
			
 
				+ * and interprets those specific to DistCp, to create an Options object.
			
 
				+ */
			
 
				+public class OptionsParser {
			
 
				+
			
 
				+  private static final Log LOG = LogFactory.getLog(OptionsParser.class);
			
 
				+
			
 
				+  private static final Options cliOptions = new Options();      
			
 
				+
			
 
				+  static {
			
 
				+    for (DistCpOptionSwitch option : DistCpOptionSwitch.values()) {
			
 
				+      if (LOG.isDebugEnabled()) {
			
 
				+        LOG.debug("Adding option " + option.getOption());
			
 
				+      }
			
 
				+      cliOptions.addOption(option.getOption());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private static class CustomParser extends GnuParser {
			
 
				+    @Override
			
 
				+    protected String[] flatten(Options options, String[] arguments, boolean stopAtNonOption) {
			
 
				+      for (int index = 0; index < arguments.length; index++) {
			
 
				+        if (arguments[index].equals("-" + DistCpOptionSwitch.PRESERVE_STATUS.getSwitch())) {
			
 
				+          arguments[index] = "-prbugp";
			
 
				+        }
			
 
				+      }
			
 
				+      return super.flatten(options, arguments, stopAtNonOption);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * The parse method parses the command-line options, and creates
			
 
				+   * a corresponding Options object.
			
 
				+   * @param args Command-line arguments (excluding the options consumed
			
 
				+   *              by the GenericOptionsParser).
			
 
				+   * @return The Options object, corresponding to the specified command-line.
			
 
				+   * @throws IllegalArgumentException: Thrown if the parse fails.
			
 
				+   */
			
 
				+  public static DistCpOptions parse(String args[]) throws IllegalArgumentException {
			
 
				+
			
 
				+    CommandLineParser parser = new CustomParser();
			
 
				+
			
 
				+    CommandLine command;
			
 
				+    try {
			
 
				+      command = parser.parse(cliOptions, args, true);
			
 
				+    } catch (ParseException e) {
			
 
				+      throw new IllegalArgumentException("Unable to parse arguments. " +
			
 
				+        Arrays.toString(args), e);
			
 
				+    }
			
 
				+
			
 
				+    DistCpOptions option;
			
 
				+    Path targetPath;
			
 
				+    List<Path> sourcePaths = new ArrayList<Path>();
			
 
				+
			
 
				+    String leftOverArgs[] = command.getArgs();
			
 
				+    if (leftOverArgs == null || leftOverArgs.length < 1) {
			
 
				+      throw new IllegalArgumentException("Target path not specified");
			
 
				+    }
			
 
				+
			
 
				+    //Last Argument is the target path
			
 
				+    targetPath = new Path(leftOverArgs[leftOverArgs.length -1].trim());
			
 
				+
			
 
				+    //Copy any source paths in the arguments to the list
			
 
				+    for (int index = 0; index < leftOverArgs.length - 1; index++) {
			
 
				+      sourcePaths.add(new Path(leftOverArgs[index].trim()));
			
 
				+    }
			
 
				+
			
 
				+    /* If command has source file listing, use it else, fall back on source paths in args
			
 
				+       If both are present, throw exception and bail */
			
 
				+    if (command.hasOption(DistCpOptionSwitch.SOURCE_FILE_LISTING.getSwitch())) {
			
 
				+      if (!sourcePaths.isEmpty()) {
			
 
				+        throw new IllegalArgumentException("Both source file listing and source paths present");
			
 
				+      }
			
 
				+      option = new DistCpOptions(new Path(getVal(command, DistCpOptionSwitch.
			
 
				+              SOURCE_FILE_LISTING.getSwitch())), targetPath);
			
 
				+    } else {
			
 
				+      if (sourcePaths.isEmpty()) {
			
 
				+        throw new IllegalArgumentException("Neither source file listing nor source paths present");
			
 
				+      }
			
 
				+      option = new DistCpOptions(sourcePaths, targetPath);
			
 
				+    }
			
 
				+
			
 
				+    //Process all the other option switches and set options appropriately
			
 
				+    if (command.hasOption(DistCpOptionSwitch.IGNORE_FAILURES.getSwitch())) {
			
 
				+      option.setIgnoreFailures(true);
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.ATOMIC_COMMIT.getSwitch())) {
			
 
				+      option.setAtomicCommit(true);
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.WORK_PATH.getSwitch()) &&
			
 
				+        option.shouldAtomicCommit()) {
			
 
				+      String workPath = getVal(command, DistCpOptionSwitch.WORK_PATH.getSwitch());
			
 
				+      if (workPath != null && !workPath.isEmpty()) {
			
 
				+        option.setAtomicWorkPath(new Path(workPath));
			
 
				+      }
			
 
				+    } else if (command.hasOption(DistCpOptionSwitch.WORK_PATH.getSwitch())) {
			
 
				+      throw new IllegalArgumentException("-tmp work-path can only be specified along with -atomic");      
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.LOG_PATH.getSwitch())) {
			
 
				+      option.setLogPath(new Path(getVal(command, DistCpOptionSwitch.LOG_PATH.getSwitch())));
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.SYNC_FOLDERS.getSwitch())) {
			
 
				+      option.setSyncFolder(true);
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.OVERWRITE.getSwitch())) {
			
 
				+      option.setOverwrite(true);
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.DELETE_MISSING.getSwitch())) {
			
 
				+      option.setDeleteMissing(true);
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.SKIP_CRC.getSwitch())) {
			
 
				+      option.setSkipCRC(true);
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.BLOCKING.getSwitch())) {
			
 
				+      option.setBlocking(false);
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.BANDWIDTH.getSwitch())) {
			
 
				+      try {
			
 
				+        Integer mapBandwidth = Integer.parseInt(
			
 
				+            getVal(command, DistCpOptionSwitch.BANDWIDTH.getSwitch()).trim());
			
 
				+        option.setMapBandwidth(mapBandwidth);
			
 
				+      } catch (NumberFormatException e) {
			
 
				+        throw new IllegalArgumentException("Bandwidth specified is invalid: " +
			
 
				+            getVal(command, DistCpOptionSwitch.BANDWIDTH.getSwitch()), e);
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.SSL_CONF.getSwitch())) {
			
 
				+      option.setSslConfigurationFile(command.
			
 
				+          getOptionValue(DistCpOptionSwitch.SSL_CONF.getSwitch()));
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.MAX_MAPS.getSwitch())) {
			
 
				+      try {
			
 
				+        Integer maps = Integer.parseInt(
			
 
				+            getVal(command, DistCpOptionSwitch.MAX_MAPS.getSwitch()).trim());
			
 
				+        option.setMaxMaps(maps);
			
 
				+      } catch (NumberFormatException e) {
			
 
				+        throw new IllegalArgumentException("Number of maps is invalid: " +
			
 
				+            getVal(command, DistCpOptionSwitch.MAX_MAPS.getSwitch()), e);
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.COPY_STRATEGY.getSwitch())) {
			
 
				+      option.setCopyStrategy(
			
 
				+            getVal(command, DistCpOptionSwitch.COPY_STRATEGY.getSwitch()));
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.PRESERVE_STATUS.getSwitch())) {
			
 
				+      String attributes =
			
 
				+          getVal(command, DistCpOptionSwitch.PRESERVE_STATUS.getSwitch());
			
 
				+      if (attributes == null || attributes.isEmpty()) {
			
 
				+        for (FileAttribute attribute : FileAttribute.values()) {
			
 
				+          option.preserve(attribute);
			
 
				+        }
			
 
				+      } else {
			
 
				+        for (int index = 0; index < attributes.length(); index++) {
			
 
				+          option.preserve(FileAttribute.
			
 
				+              getAttribute(attributes.charAt(index)));
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.FILE_LIMIT.getSwitch())) {
			
 
				+      String fileLimitString = getVal(command,
			
 
				+                              DistCpOptionSwitch.FILE_LIMIT.getSwitch().trim());
			
 
				+      try {
			
 
				+        Integer.parseInt(fileLimitString);
			
 
				+      }
			
 
				+      catch (NumberFormatException e) {
			
 
				+        throw new IllegalArgumentException("File-limit is invalid: "
			
 
				+                                            + fileLimitString, e);
			
 
				+      }
			
 
				+      LOG.warn(DistCpOptionSwitch.FILE_LIMIT.getSwitch() + " is a deprecated" +
			
 
				+              " option. Ignoring.");
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.SIZE_LIMIT.getSwitch())) {
			
 
				+      String sizeLimitString = getVal(command,
			
 
				+                              DistCpOptionSwitch.SIZE_LIMIT.getSwitch().trim());
			
 
				+      try {
			
 
				+        Long.parseLong(sizeLimitString);
			
 
				+      }
			
 
				+      catch (NumberFormatException e) {
			
 
				+        throw new IllegalArgumentException("Size-limit is invalid: "
			
 
				+                                            + sizeLimitString, e);
			
 
				+      }
			
 
				+      LOG.warn(DistCpOptionSwitch.SIZE_LIMIT.getSwitch() + " is a deprecated" +
			
 
				+              " option. Ignoring.");
			
 
				+    }
			
 
				+
			
 
				+    return option;
			
 
				+  }
			
 
				+
			
 
				+  private static String getVal(CommandLine command, String swtch) {
			
 
				+    String optionValue = command.getOptionValue(swtch);
			
 
				+    if (optionValue == null) {
			
 
				+      return null;
			
 
				+    } else {
			
 
				+      return optionValue.trim();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  public static void usage() {
			
 
				+    HelpFormatter formatter = new HelpFormatter();
			
 
				+    formatter.printHelp("distcp OPTIONS [source_path...] <target_path>\n\nOPTIONS", cliOptions);
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
@@ -0,0 +1,275 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.io.SequenceFile;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.io.DataInputBuffer;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+import org.apache.hadoop.mapreduce.security.TokenCache;
			
 
				+import org.apache.hadoop.security.Credentials;
			
 
				+
			
 
				+import java.io.*;
			
 
				+import java.util.Stack;
			
 
				+
			
 
				+/**
			
 
				+ * The SimpleCopyListing is responsible for making the exhaustive list of
			
 
				+ * all files/directories under its specified list of input-paths.
			
 
				+ * These are written into the specified copy-listing file.
			
 
				+ * Note: The SimpleCopyListing doesn't handle wild-cards in the input-paths.
			
 
				+ */
			
 
				+public class SimpleCopyListing extends CopyListing {
			
 
				+  private static final Log LOG = LogFactory.getLog(SimpleCopyListing.class);
			
 
				+
			
 
				+  private long totalPaths = 0;
			
 
				+  private long totalBytesToCopy = 0;
			
 
				+
			
 
				+  /**
			
 
				+   * Protected constructor, to initialize configuration.
			
 
				+   *
			
 
				+   * @param configuration The input configuration, with which the source/target FileSystems may be accessed.
			
 
				+   * @param credentials - Credentials object on which the FS delegation tokens are cached. If null
			
 
				+   * delegation token caching is skipped
			
 
				+   */
			
 
				+  protected SimpleCopyListing(Configuration configuration, Credentials credentials) {
			
 
				+    super(configuration, credentials);
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  protected void validatePaths(DistCpOptions options)
			
 
				+      throws IOException, InvalidInputException {
			
 
				+
			
 
				+    Path targetPath = options.getTargetPath();
			
 
				+    FileSystem targetFS = targetPath.getFileSystem(getConf());
			
 
				+    boolean targetIsFile = targetFS.isFile(targetPath);
			
 
				+
			
 
				+    //If target is a file, then source has to be single file
			
 
				+    if (targetIsFile) {
			
 
				+      if (options.getSourcePaths().size() > 1) {
			
 
				+        throw new InvalidInputException("Multiple source being copied to a file: " +
			
 
				+            targetPath);
			
 
				+      }
			
 
				+
			
 
				+      Path srcPath = options.getSourcePaths().get(0);
			
 
				+      FileSystem sourceFS = srcPath.getFileSystem(getConf());
			
 
				+      if (!sourceFS.isFile(srcPath)) {
			
 
				+        throw new InvalidInputException("Cannot copy " + srcPath +
			
 
				+            ", which is not a file to " + targetPath);
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    if (options.shouldAtomicCommit() && targetFS.exists(targetPath)) {
			
 
				+      throw new InvalidInputException("Target path for atomic-commit already exists: " +
			
 
				+        targetPath + ". Cannot atomic-commit to pre-existing target-path.");
			
 
				+    }
			
 
				+
			
 
				+    for (Path path: options.getSourcePaths()) {
			
 
				+      FileSystem fs = path.getFileSystem(getConf());
			
 
				+      if (!fs.exists(path)) {
			
 
				+        throw new InvalidInputException(path + " doesn't exist");
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    /* This is requires to allow map tasks to access each of the source
			
 
				+       clusters. This would retrieve the delegation token for each unique
			
 
				+       file system and add them to job's private credential store
			
 
				+     */
			
 
				+    Credentials credentials = getCredentials();
			
 
				+    if (credentials != null) {
			
 
				+      Path[] inputPaths = options.getSourcePaths().toArray(new Path[1]);
			
 
				+      TokenCache.obtainTokensForNamenodes(credentials, inputPaths, getConf());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /** {@inheritDoc} */
			
 
				+  @Override
			
 
				+  public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException {
			
 
				+
			
 
				+    SequenceFile.Writer fileListWriter = null;
			
 
				+
			
 
				+    try {
			
 
				+      fileListWriter = getWriter(pathToListingFile);
			
 
				+
			
 
				+      for (Path path: options.getSourcePaths()) {
			
 
				+        FileSystem sourceFS = path.getFileSystem(getConf());
			
 
				+        path = makeQualified(path);
			
 
				+
			
 
				+        FileStatus rootStatus = sourceFS.getFileStatus(path);
			
 
				+        Path sourcePathRoot = computeSourceRootPath(rootStatus, options);
			
 
				+        boolean localFile = (rootStatus.getClass() != FileStatus.class);
			
 
				+
			
 
				+        FileStatus[] sourceFiles = sourceFS.listStatus(path);
			
 
				+        if (sourceFiles != null && sourceFiles.length > 0) {
			
 
				+          for (FileStatus sourceStatus: sourceFiles) {
			
 
				+            if (LOG.isDebugEnabled()) {
			
 
				+              LOG.debug("Recording source-path: " + sourceStatus.getPath() + " for copy.");
			
 
				+            }
			
 
				+            writeToFileListing(fileListWriter, sourceStatus, sourcePathRoot, localFile);
			
 
				+
			
 
				+            if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) {
			
 
				+              if (LOG.isDebugEnabled()) {
			
 
				+                LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath());
			
 
				+              }
			
 
				+              traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot, localFile);
			
 
				+            }
			
 
				+          }
			
 
				+        } else {
			
 
				+          writeToFileListing(fileListWriter, rootStatus, sourcePathRoot, localFile);
			
 
				+        }
			
 
				+      }
			
 
				+    } finally {
			
 
				+      IOUtils.closeStream(fileListWriter);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private Path computeSourceRootPath(FileStatus sourceStatus,
			
 
				+                                     DistCpOptions options) throws IOException {
			
 
				+
			
 
				+    Path target = options.getTargetPath();
			
 
				+    FileSystem targetFS = target.getFileSystem(getConf());
			
 
				+
			
 
				+    boolean solitaryFile = options.getSourcePaths().size() == 1
			
 
				+                                                && !sourceStatus.isDirectory();
			
 
				+
			
 
				+    if (solitaryFile) {
			
 
				+      if (targetFS.isFile(target) || !targetFS.exists(target)) {
			
 
				+        return sourceStatus.getPath();
			
 
				+      } else {
			
 
				+        return sourceStatus.getPath().getParent();
			
 
				+      }
			
 
				+    } else {
			
 
				+      boolean specialHandling = (options.getSourcePaths().size() == 1 && !targetFS.exists(target)) ||
			
 
				+          options.shouldSyncFolder() || options.shouldOverwrite();
			
 
				+
			
 
				+      return specialHandling && sourceStatus.isDirectory() ? sourceStatus.getPath() :
			
 
				+          sourceStatus.getPath().getParent();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /** {@inheritDoc} */
			
 
				+  @Override
			
 
				+  protected long getBytesToCopy() {
			
 
				+    return totalBytesToCopy;
			
 
				+  }
			
 
				+
			
 
				+  /** {@inheritDoc} */
			
 
				+  @Override
			
 
				+  protected long getNumberOfPaths() {
			
 
				+    return totalPaths;
			
 
				+  }
			
 
				+
			
 
				+  private Path makeQualified(Path path) throws IOException {
			
 
				+    final FileSystem fs = path.getFileSystem(getConf());
			
 
				+    return path.makeQualified(fs.getUri(), fs.getWorkingDirectory());
			
 
				+  }
			
 
				+
			
 
				+  private SequenceFile.Writer getWriter(Path pathToListFile) throws IOException {
			
 
				+    FileSystem fs = pathToListFile.getFileSystem(getConf());
			
 
				+    if (fs.exists(pathToListFile)) {
			
 
				+      fs.delete(pathToListFile, false);
			
 
				+    }
			
 
				+    return SequenceFile.createWriter(getConf(),
			
 
				+            SequenceFile.Writer.file(pathToListFile),
			
 
				+            SequenceFile.Writer.keyClass(Text.class),
			
 
				+            SequenceFile.Writer.valueClass(FileStatus.class),
			
 
				+            SequenceFile.Writer.compression(SequenceFile.CompressionType.NONE));
			
 
				+  }
			
 
				+
			
 
				+  private static boolean isDirectoryAndNotEmpty(FileSystem fileSystem,
			
 
				+                                    FileStatus fileStatus) throws IOException {
			
 
				+    return fileStatus.isDirectory() && getChildren(fileSystem, fileStatus).length > 0;
			
 
				+  }
			
 
				+
			
 
				+  private static FileStatus[] getChildren(FileSystem fileSystem,
			
 
				+                                         FileStatus parent) throws IOException {
			
 
				+    return fileSystem.listStatus(parent.getPath());
			
 
				+  }
			
 
				+
			
 
				+  private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter,
			
 
				+                                         FileStatus sourceStatus,
			
 
				+                                         Path sourcePathRoot, boolean localFile)
			
 
				+                                         throws IOException {
			
 
				+    FileSystem sourceFS = sourcePathRoot.getFileSystem(getConf());
			
 
				+    Stack<FileStatus> pathStack = new Stack<FileStatus>();
			
 
				+    pathStack.push(sourceStatus);
			
 
				+
			
 
				+    while (!pathStack.isEmpty()) {
			
 
				+      for (FileStatus child: getChildren(sourceFS, pathStack.pop())) {
			
 
				+        if (LOG.isDebugEnabled())
			
 
				+          LOG.debug("Recording source-path: "
			
 
				+                    + sourceStatus.getPath() + " for copy.");
			
 
				+        writeToFileListing(fileListWriter, child, sourcePathRoot, localFile);
			
 
				+        if (isDirectoryAndNotEmpty(sourceFS, child)) {
			
 
				+          if (LOG.isDebugEnabled())
			
 
				+            LOG.debug("Traversing non-empty source dir: "
			
 
				+                       + sourceStatus.getPath());
			
 
				+          pathStack.push(child);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void writeToFileListing(SequenceFile.Writer fileListWriter,
			
 
				+                                  FileStatus fileStatus, Path sourcePathRoot,
			
 
				+                                  boolean localFile) throws IOException {
			
 
				+    if (fileStatus.getPath().equals(sourcePathRoot) && fileStatus.isDirectory())
			
 
				+      return; // Skip the root-paths.
			
 
				+
			
 
				+    if (LOG.isDebugEnabled()) {
			
 
				+      LOG.debug("REL PATH: " + DistCpUtils.getRelativePath(sourcePathRoot,
			
 
				+        fileStatus.getPath()) + ", FULL PATH: " + fileStatus.getPath());
			
 
				+    }
			
 
				+
			
 
				+    FileStatus status = fileStatus;
			
 
				+    if (localFile) {
			
 
				+      status = getFileStatus(fileStatus);
			
 
				+    }
			
 
				+
			
 
				+    fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot,
			
 
				+        fileStatus.getPath())), status);
			
 
				+    fileListWriter.sync();
			
 
				+
			
 
				+    if (!fileStatus.isDirectory()) {
			
 
				+      totalBytesToCopy += fileStatus.getLen();
			
 
				+    }
			
 
				+    totalPaths++;
			
 
				+  }
			
 
				+
			
 
				+  private static final ByteArrayOutputStream buffer = new ByteArrayOutputStream(64);
			
 
				+  private DataInputBuffer in = new DataInputBuffer();
			
 
				+  
			
 
				+  private FileStatus getFileStatus(FileStatus fileStatus) throws IOException {
			
 
				+    FileStatus status = new FileStatus();
			
 
				+
			
 
				+    buffer.reset();
			
 
				+    DataOutputStream out = new DataOutputStream(buffer);
			
 
				+    fileStatus.write(out);
			
 
				+
			
 
				+    in.reset(buffer.toByteArray(), 0, buffer.size());
			
 
				+    status.readFields(in);
			
 
				+    return status;
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
@@ -0,0 +1,297 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.mapred;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.io.SequenceFile;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.mapreduce.*;
			
 
				+import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
			
 
				+import org.apache.hadoop.tools.*;
			
 
				+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.EnumSet;
			
 
				+import java.util.List;
			
 
				+
			
 
				+/**
			
 
				+ * The CopyCommitter class is DistCp's OutputCommitter implementation. It is
			
 
				+ * responsible for handling the completion/cleanup of the DistCp run.
			
 
				+ * Specifically, it does the following:
			
 
				+ *  1. Cleanup of the meta-folder (where DistCp maintains its file-list, etc.)
			
 
				+ *  2. Preservation of user/group/replication-factor on any directories that
			
 
				+ *     have been copied. (Files are taken care of in their map-tasks.)
			
 
				+ *  3. Atomic-move of data from the temporary work-folder to the final path
			
 
				+ *     (if atomic-commit was opted for).
			
 
				+ *  4. Deletion of files from the target that are missing at source (if opted for).
			
 
				+ *  5. Cleanup of any partially copied files, from previous, failed attempts.
			
 
				+ */
			
 
				+public class CopyCommitter extends FileOutputCommitter {
			
 
				+  private static final Log LOG = LogFactory.getLog(CopyCommitter.class);
			
 
				+
			
 
				+  private final TaskAttemptContext taskAttemptContext;
			
 
				+
			
 
				+  /**
			
 
				+   * Create a output committer
			
 
				+   *
			
 
				+   * @param outputPath the job's output path
			
 
				+   * @param context    the task's context
			
 
				+   * @throws IOException - Exception if any
			
 
				+   */
			
 
				+  public CopyCommitter(Path outputPath, TaskAttemptContext context) throws IOException {
			
 
				+    super(outputPath, context);
			
 
				+    this.taskAttemptContext = context;
			
 
				+  }
			
 
				+
			
 
				+  /** @inheritDoc */
			
 
				+  @Override
			
 
				+  public void commitJob(JobContext jobContext) throws IOException {
			
 
				+    Configuration conf = jobContext.getConfiguration();
			
 
				+    super.commitJob(jobContext);
			
 
				+
			
 
				+    cleanupTempFiles(jobContext);
			
 
				+
			
 
				+    String attributes = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
			
 
				+    if (attributes != null && !attributes.isEmpty()) {
			
 
				+      preserveFileAttributesForDirectories(conf);
			
 
				+    }
			
 
				+
			
 
				+    try {
			
 
				+      if (conf.getBoolean(DistCpConstants.CONF_LABEL_DELETE_MISSING, false)) {
			
 
				+        deleteMissing(conf);
			
 
				+      } else if (conf.getBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, false)) {
			
 
				+        commitData(conf);
			
 
				+      }
			
 
				+      taskAttemptContext.setStatus("Commit Successful");
			
 
				+    }
			
 
				+    finally {
			
 
				+      cleanup(conf);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /** @inheritDoc */
			
 
				+  @Override
			
 
				+  public void abortJob(JobContext jobContext,
			
 
				+                       JobStatus.State state) throws IOException {
			
 
				+    try {
			
 
				+      super.abortJob(jobContext, state);
			
 
				+    } finally {
			
 
				+      cleanupTempFiles(jobContext);
			
 
				+      cleanup(jobContext.getConfiguration());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void cleanupTempFiles(JobContext context) {
			
 
				+    try {
			
 
				+      Configuration conf = context.getConfiguration();
			
 
				+
			
 
				+      Path targetWorkPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
			
 
				+      FileSystem targetFS = targetWorkPath.getFileSystem(conf);
			
 
				+
			
 
				+      String jobId = context.getJobID().toString();
			
 
				+      deleteAttemptTempFiles(targetWorkPath, targetFS, jobId);
			
 
				+      deleteAttemptTempFiles(targetWorkPath.getParent(), targetFS, jobId);
			
 
				+    } catch (Throwable t) {
			
 
				+      LOG.warn("Unable to cleanup temp files", t);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void deleteAttemptTempFiles(Path targetWorkPath,
			
 
				+                                      FileSystem targetFS,
			
 
				+                                      String jobId) throws IOException {
			
 
				+
			
 
				+    FileStatus[] tempFiles = targetFS.globStatus(
			
 
				+        new Path(targetWorkPath, ".distcp.tmp." + jobId.replaceAll("job","attempt") + "*"));
			
 
				+
			
 
				+    if (tempFiles != null && tempFiles.length > 0) {
			
 
				+      for (FileStatus file : tempFiles) {
			
 
				+        LOG.info("Cleaning up " + file.getPath());
			
 
				+        targetFS.delete(file.getPath(), false);
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Cleanup meta folder and other temporary files
			
 
				+   *
			
 
				+   * @param conf - Job Configuration
			
 
				+   */
			
 
				+  private void cleanup(Configuration conf) {
			
 
				+    Path metaFolder = new Path(conf.get(DistCpConstants.CONF_LABEL_META_FOLDER));
			
 
				+    try {
			
 
				+      FileSystem fs = metaFolder.getFileSystem(conf);
			
 
				+      LOG.info("Cleaning up temporary work folder: " + metaFolder);
			
 
				+      fs.delete(metaFolder, true);
			
 
				+    } catch (IOException ignore) {
			
 
				+      LOG.error("Exception encountered ", ignore);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // This method changes the target-directories' file-attributes (owner,
			
 
				+  // user/group permissions, etc.) based on the corresponding source directories.
			
 
				+  private void preserveFileAttributesForDirectories(Configuration conf) throws IOException {
			
 
				+    String attrSymbols = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
			
 
				+    LOG.info("About to preserve attributes: " + attrSymbols);
			
 
				+
			
 
				+    EnumSet<FileAttribute> attributes = DistCpUtils.unpackAttributes(attrSymbols);
			
 
				+
			
 
				+    Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
			
 
				+    FileSystem clusterFS = sourceListing.getFileSystem(conf);
			
 
				+    SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
			
 
				+                                      SequenceFile.Reader.file(sourceListing));
			
 
				+    long totalLen = clusterFS.getFileStatus(sourceListing).getLen();
			
 
				+
			
 
				+    Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
			
 
				+
			
 
				+    long preservedEntries = 0;
			
 
				+    try {
			
 
				+      FileStatus srcFileStatus = new FileStatus();
			
 
				+      Text srcRelPath = new Text();
			
 
				+
			
 
				+      // Iterate over every source path that was copied.
			
 
				+      while (sourceReader.next(srcRelPath, srcFileStatus)) {
			
 
				+        // File-attributes for files are set at the time of copy,
			
 
				+        // in the map-task.
			
 
				+        if (! srcFileStatus.isDirectory()) continue;
			
 
				+
			
 
				+        Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath);
			
 
				+
			
 
				+        // Skip the root folder.
			
 
				+        // Status can't be preserved on root-folder. (E.g. multiple paths may
			
 
				+        // be copied to a single target folder. Which source-attributes to use
			
 
				+        // on the target is undefined.)
			
 
				+        if (targetRoot.equals(targetFile)) continue;
			
 
				+
			
 
				+        FileSystem targetFS = targetFile.getFileSystem(conf);
			
 
				+        DistCpUtils.preserve(targetFS, targetFile, srcFileStatus,  attributes);
			
 
				+
			
 
				+        taskAttemptContext.progress();
			
 
				+        taskAttemptContext.setStatus("Preserving status on directory entries. [" +
			
 
				+            sourceReader.getPosition() * 100 / totalLen + "%]");
			
 
				+      }
			
 
				+    } finally {
			
 
				+      IOUtils.closeStream(sourceReader);
			
 
				+    }
			
 
				+    LOG.info("Preserved status on " + preservedEntries + " dir entries on target");
			
 
				+  }
			
 
				+
			
 
				+  // This method deletes "extra" files from the target, if they're not
			
 
				+  // available at the source.
			
 
				+  private void deleteMissing(Configuration conf) throws IOException {
			
 
				+    LOG.info("-delete option is enabled. About to remove entries from " +
			
 
				+        "target that are missing in source");
			
 
				+
			
 
				+    // Sort the source-file listing alphabetically.
			
 
				+    Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
			
 
				+    FileSystem clusterFS = sourceListing.getFileSystem(conf);
			
 
				+    Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing);
			
 
				+
			
 
				+    // Similarly, create the listing of target-files. Sort alphabetically.
			
 
				+    Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
			
 
				+    CopyListing target = new GlobbedCopyListing(new Configuration(conf), null);
			
 
				+
			
 
				+    List<Path> targets = new ArrayList<Path>(1);
			
 
				+    Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
			
 
				+    targets.add(targetFinalPath);
			
 
				+    DistCpOptions options = new DistCpOptions(targets, new Path("/NONE"));
			
 
				+
			
 
				+    target.buildListing(targetListing, options);
			
 
				+    Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing);
			
 
				+    long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen();
			
 
				+
			
 
				+    SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
			
 
				+                                 SequenceFile.Reader.file(sortedSourceListing));
			
 
				+    SequenceFile.Reader targetReader = new SequenceFile.Reader(conf,
			
 
				+                                 SequenceFile.Reader.file(sortedTargetListing));
			
 
				+
			
 
				+    // Walk both source and target file listings.
			
 
				+    // Delete all from target that doesn't also exist on source.
			
 
				+    long deletedEntries = 0;
			
 
				+    try {
			
 
				+      FileStatus srcFileStatus = new FileStatus();
			
 
				+      Text srcRelPath = new Text();
			
 
				+      FileStatus trgtFileStatus = new FileStatus();
			
 
				+      Text trgtRelPath = new Text();
			
 
				+
			
 
				+      FileSystem targetFS = targetFinalPath.getFileSystem(conf);
			
 
				+      boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
			
 
				+      while (targetReader.next(trgtRelPath, trgtFileStatus)) {
			
 
				+        // Skip sources that don't exist on target.
			
 
				+        while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) {
			
 
				+          srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
			
 
				+        }
			
 
				+
			
 
				+        if (srcAvailable && trgtRelPath.equals(srcRelPath)) continue;
			
 
				+
			
 
				+        // Target doesn't exist at source. Delete.
			
 
				+        boolean result = (!targetFS.exists(trgtFileStatus.getPath()) ||
			
 
				+            targetFS.delete(trgtFileStatus.getPath(), true));
			
 
				+        if (result) {
			
 
				+          LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source");
			
 
				+          deletedEntries++;
			
 
				+        } else {
			
 
				+          throw new IOException("Unable to delete " + trgtFileStatus.getPath());
			
 
				+        }
			
 
				+        taskAttemptContext.progress();
			
 
				+        taskAttemptContext.setStatus("Deleting missing files from target. [" +
			
 
				+            targetReader.getPosition() * 100 / totalLen + "%]");
			
 
				+      }
			
 
				+    } finally {
			
 
				+      IOUtils.closeStream(sourceReader);
			
 
				+      IOUtils.closeStream(targetReader);
			
 
				+    }
			
 
				+    LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0));
			
 
				+  }
			
 
				+
			
 
				+  private void commitData(Configuration conf) throws IOException {
			
 
				+
			
 
				+    Path workDir = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
			
 
				+    Path finalDir = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
			
 
				+    FileSystem targetFS = workDir.getFileSystem(conf);
			
 
				+
			
 
				+    LOG.info("Atomic commit enabled. Moving " + workDir + " to " + finalDir);
			
 
				+    if (targetFS.exists(finalDir) && targetFS.exists(workDir)) {
			
 
				+      LOG.error("Pre-existing final-path found at: " + finalDir);
			
 
				+      throw new IOException("Target-path can't be committed to because it " +
			
 
				+          "exists at " + finalDir + ". Copied data is in temp-dir: " + workDir + ". ");
			
 
				+    }
			
 
				+
			
 
				+    boolean result = targetFS.rename(workDir, finalDir);
			
 
				+    if (!result) {
			
 
				+      LOG.warn("Rename failed. Perhaps data already moved. Verifying...");
			
 
				+      result = targetFS.exists(finalDir) && !targetFS.exists(workDir);
			
 
				+    }
			
 
				+    if (result) {
			
 
				+      LOG.info("Data committed successfully to " + finalDir);
			
 
				+      taskAttemptContext.setStatus("Data committed successfully to " + finalDir);
			
 
				+    } else {
			
 
				+      LOG.error("Unable to commit data to " + finalDir);
			
 
				+      throw new IOException("Atomic commit failed. Temporary data in " + workDir +
			
 
				+        ", Unable to move to " + finalDir);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java
@@ -0,0 +1,330 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.mapred;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.mapreduce.JobContext;
			
 
				+import org.apache.hadoop.mapreduce.Mapper;
			
 
				+import org.apache.hadoop.tools.DistCpConstants;
			
 
				+import org.apache.hadoop.tools.DistCpOptionSwitch;
			
 
				+import org.apache.hadoop.tools.DistCpOptions;
			
 
				+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+import org.apache.hadoop.util.StringUtils;
			
 
				+
			
 
				+import java.io.*;
			
 
				+import java.util.EnumSet;
			
 
				+import java.util.Arrays;
			
 
				+
			
 
				+/**
			
 
				+ * Mapper class that executes the DistCp copy operation.
			
 
				+ * Implements the o.a.h.mapreduce.Mapper<> interface.
			
 
				+ */
			
 
				+public class CopyMapper extends Mapper<Text, FileStatus, Text, Text> {
			
 
				+
			
 
				+  /**
			
 
				+   * Hadoop counters for the DistCp CopyMapper.
			
 
				+   * (These have been kept identical to the old DistCp,
			
 
				+   * for backward compatibility.)
			
 
				+   */
			
 
				+  public static enum Counter {
			
 
				+    COPY,         // Number of files received by the mapper for copy.
			
 
				+    SKIP,         // Number of files skipped.
			
 
				+    FAIL,         // Number of files that failed to be copied.
			
 
				+    BYTESCOPIED,  // Number of bytes actually copied by the copy-mapper, total.
			
 
				+    BYTESEXPECTED,// Number of bytes expected to be copied.
			
 
				+    BYTESFAILED,  // Number of bytes that failed to be copied.
			
 
				+    BYTESSKIPPED, // Number of bytes that were skipped from copy.
			
 
				+  }
			
 
				+
			
 
				+  private static Log LOG = LogFactory.getLog(CopyMapper.class);
			
 
				+
			
 
				+  private Configuration conf;
			
 
				+
			
 
				+  private boolean syncFolders = false;
			
 
				+  private boolean ignoreFailures = false;
			
 
				+  private boolean skipCrc = false;
			
 
				+  private boolean overWrite = false;
			
 
				+  private EnumSet<FileAttribute> preserve = EnumSet.noneOf(FileAttribute.class);
			
 
				+
			
 
				+  private FileSystem targetFS = null;
			
 
				+  private Path    targetWorkPath = null;
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of the Mapper::setup() method. This extracts the DistCp-
			
 
				+   * options specified in the Job's configuration, to set up the Job.
			
 
				+   * @param context Mapper's context.
			
 
				+   * @throws IOException On IO failure.
			
 
				+   * @throws InterruptedException If the job is interrupted.
			
 
				+   */
			
 
				+  @Override
			
 
				+  public void setup(Context context) throws IOException, InterruptedException {
			
 
				+    conf = context.getConfiguration();
			
 
				+
			
 
				+    syncFolders = conf.getBoolean(DistCpOptionSwitch.SYNC_FOLDERS.getConfigLabel(), false);
			
 
				+    ignoreFailures = conf.getBoolean(DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(), false);
			
 
				+    skipCrc = conf.getBoolean(DistCpOptionSwitch.SKIP_CRC.getConfigLabel(), false);
			
 
				+    overWrite = conf.getBoolean(DistCpOptionSwitch.OVERWRITE.getConfigLabel(), false);
			
 
				+    preserve = DistCpUtils.unpackAttributes(conf.get(DistCpOptionSwitch.
			
 
				+        PRESERVE_STATUS.getConfigLabel()));
			
 
				+
			
 
				+    targetWorkPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
			
 
				+    Path targetFinalPath = new Path(conf.get(
			
 
				+            DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
			
 
				+    targetFS = targetFinalPath.getFileSystem(conf);
			
 
				+
			
 
				+    if (targetFS.exists(targetFinalPath) && targetFS.isFile(targetFinalPath)) {
			
 
				+      overWrite = true; // When target is an existing file, overwrite it.
			
 
				+    }
			
 
				+
			
 
				+    if (conf.get(DistCpConstants.CONF_LABEL_SSL_CONF) != null) {
			
 
				+      initializeSSLConf(context);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Initialize SSL Config if same is set in conf
			
 
				+   *
			
 
				+   * @throws IOException - If any
			
 
				+   */
			
 
				+  private void initializeSSLConf(Context context) throws IOException {
			
 
				+    LOG.info("Initializing SSL configuration");
			
 
				+    
			
 
				+    String workDir = conf.get(JobContext.JOB_LOCAL_DIR) + "/work";
			
 
				+    Path[] cacheFiles = context.getLocalCacheFiles();
			
 
				+
			
 
				+    Configuration sslConfig = new Configuration(false);
			
 
				+    String sslConfFileName = conf.get(DistCpConstants.CONF_LABEL_SSL_CONF);
			
 
				+    Path sslClient = findCacheFile(cacheFiles, sslConfFileName);
			
 
				+    if (sslClient == null) {
			
 
				+      LOG.warn("SSL Client config file not found. Was looking for " + sslConfFileName +
			
 
				+          " in " + Arrays.toString(cacheFiles));
			
 
				+      return;
			
 
				+    }
			
 
				+    sslConfig.addResource(sslClient);
			
 
				+
			
 
				+    String trustStoreFile = conf.get("ssl.client.truststore.location");
			
 
				+    Path trustStorePath = findCacheFile(cacheFiles, trustStoreFile);
			
 
				+    sslConfig.set("ssl.client.truststore.location", trustStorePath.toString());
			
 
				+
			
 
				+    String keyStoreFile = conf.get("ssl.client.keystore.location");
			
 
				+    Path keyStorePath = findCacheFile(cacheFiles, keyStoreFile);
			
 
				+    sslConfig.set("ssl.client.keystore.location", keyStorePath.toString());
			
 
				+
			
 
				+    try {
			
 
				+      OutputStream out = new FileOutputStream(workDir + "/" + sslConfFileName);
			
 
				+      try {
			
 
				+        sslConfig.writeXml(out);
			
 
				+      } finally {
			
 
				+        out.close();
			
 
				+      }
			
 
				+      conf.set(DistCpConstants.CONF_LABEL_SSL_KEYSTORE, sslConfFileName);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.warn("Unable to write out the ssl configuration. " +
			
 
				+          "Will fall back to default ssl-client.xml in class path, if there is one", e);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Find entry from distributed cache
			
 
				+   *
			
 
				+   * @param cacheFiles - All localized cache files
			
 
				+   * @param fileName - fileName to search
			
 
				+   * @return Path of the filename if found, else null
			
 
				+   */
			
 
				+  private Path findCacheFile(Path[] cacheFiles, String fileName) {
			
 
				+    if (cacheFiles != null && cacheFiles.length > 0) {
			
 
				+      for (Path file : cacheFiles) {
			
 
				+        if (file.getName().equals(fileName)) {
			
 
				+          return file;
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+    return null;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of the Mapper<>::map(). Does the copy.
			
 
				+   * @param relPath The target path.
			
 
				+   * @param sourceFileStatus The source path.
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  @Override
			
 
				+  public void map(Text relPath, FileStatus sourceFileStatus, Context context)
			
 
				+          throws IOException, InterruptedException {
			
 
				+    Path sourcePath = sourceFileStatus.getPath();
			
 
				+
			
 
				+    if (LOG.isDebugEnabled())
			
 
				+      LOG.debug("DistCpMapper::map(): Received " + sourcePath + ", " + relPath);
			
 
				+
			
 
				+    Path target = new Path(targetWorkPath.makeQualified(targetFS.getUri(),
			
 
				+                          targetFS.getWorkingDirectory()) + relPath.toString());
			
 
				+
			
 
				+    EnumSet<DistCpOptions.FileAttribute> fileAttributes
			
 
				+            = getFileAttributeSettings(context);
			
 
				+
			
 
				+    final String description = "Copying " + sourcePath + " to " + target;
			
 
				+    context.setStatus(description);
			
 
				+
			
 
				+    LOG.info(description);
			
 
				+
			
 
				+    try {
			
 
				+      FileStatus sourceCurrStatus;
			
 
				+      FileSystem sourceFS;
			
 
				+      try {
			
 
				+        sourceFS = sourcePath.getFileSystem(conf);
			
 
				+        sourceCurrStatus = sourceFS.getFileStatus(sourcePath);
			
 
				+      } catch (FileNotFoundException e) {
			
 
				+        throw new IOException(new RetriableFileCopyCommand.CopyReadException(e));
			
 
				+      }
			
 
				+
			
 
				+      FileStatus targetStatus = null;
			
 
				+
			
 
				+      try {
			
 
				+        targetStatus = targetFS.getFileStatus(target);
			
 
				+      } catch (FileNotFoundException ignore) {
			
 
				+        if (LOG.isDebugEnabled())
			
 
				+          LOG.debug("Path could not be found: " + target, ignore);
			
 
				+      }
			
 
				+
			
 
				+      if (targetStatus != null && (targetStatus.isDirectory() != sourceCurrStatus.isDirectory())) {
			
 
				+        throw new IOException("Can't replace " + target + ". Target is " +
			
 
				+            getFileType(targetStatus) + ", Source is " + getFileType(sourceCurrStatus));
			
 
				+      }
			
 
				+
			
 
				+      if (sourceCurrStatus.isDirectory()) {
			
 
				+        createTargetDirsWithRetry(description, target, context);
			
 
				+        return;
			
 
				+      }
			
 
				+
			
 
				+      if (skipFile(sourceFS, sourceCurrStatus, target)) {
			
 
				+        LOG.info("Skipping copy of " + sourceCurrStatus.getPath()
			
 
				+                 + " to " + target);
			
 
				+        updateSkipCounters(context, sourceCurrStatus);
			
 
				+        context.write(null, new Text("SKIP: " + sourceCurrStatus.getPath()));
			
 
				+      }
			
 
				+      else {
			
 
				+        copyFileWithRetry(description, sourceCurrStatus, target, context,
			
 
				+                          fileAttributes);
			
 
				+      }
			
 
				+
			
 
				+      DistCpUtils.preserve(target.getFileSystem(conf), target,
			
 
				+                           sourceCurrStatus, fileAttributes);
			
 
				+
			
 
				+    } catch (IOException exception) {
			
 
				+      handleFailures(exception, sourceFileStatus, target, context);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private String getFileType(FileStatus fileStatus) {
			
 
				+    return fileStatus == null ? "N/A" : (fileStatus.isDirectory() ? "dir" : "file");
			
 
				+  }
			
 
				+
			
 
				+  private static EnumSet<DistCpOptions.FileAttribute>
			
 
				+          getFileAttributeSettings(Mapper.Context context) {
			
 
				+    String attributeString = context.getConfiguration().get(
			
 
				+            DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel());
			
 
				+    return DistCpUtils.unpackAttributes(attributeString);
			
 
				+  }
			
 
				+
			
 
				+  private void copyFileWithRetry(String description, FileStatus sourceFileStatus,
			
 
				+               Path target, Context context,
			
 
				+               EnumSet<DistCpOptions.FileAttribute> fileAttributes) throws IOException {
			
 
				+
			
 
				+    long bytesCopied;
			
 
				+    try {
			
 
				+      bytesCopied = (Long)new RetriableFileCopyCommand(description)
			
 
				+                       .execute(sourceFileStatus, target, context, fileAttributes);
			
 
				+    } catch (Exception e) {
			
 
				+      context.setStatus("Copy Failure: " + sourceFileStatus.getPath());
			
 
				+      throw new IOException("File copy failed: " + sourceFileStatus.getPath() +
			
 
				+          " --> " + target, e);
			
 
				+    }
			
 
				+    incrementCounter(context, Counter.BYTESEXPECTED, sourceFileStatus.getLen());
			
 
				+    incrementCounter(context, Counter.BYTESCOPIED, bytesCopied);
			
 
				+    incrementCounter(context, Counter.COPY, 1);
			
 
				+  }
			
 
				+
			
 
				+  private void createTargetDirsWithRetry(String description,
			
 
				+                   Path target, Context context) throws IOException {
			
 
				+    try {
			
 
				+      new RetriableDirectoryCreateCommand(description).execute(target, context);
			
 
				+    } catch (Exception e) {
			
 
				+      throw new IOException("mkdir failed for " + target, e);
			
 
				+    }
			
 
				+    incrementCounter(context, Counter.COPY, 1);
			
 
				+  }
			
 
				+
			
 
				+  private static void updateSkipCounters(Context context,
			
 
				+                                         FileStatus sourceFile) {
			
 
				+    incrementCounter(context, Counter.SKIP, 1);
			
 
				+    incrementCounter(context, Counter.BYTESSKIPPED, sourceFile.getLen());
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  private void handleFailures(IOException exception,
			
 
				+                                     FileStatus sourceFileStatus, Path target,
			
 
				+                                     Context context) throws IOException, InterruptedException {
			
 
				+    LOG.error("Failure in copying " + sourceFileStatus.getPath() + " to " +
			
 
				+                target, exception);
			
 
				+
			
 
				+    if (ignoreFailures && exception.getCause() instanceof
			
 
				+            RetriableFileCopyCommand.CopyReadException) {
			
 
				+      incrementCounter(context, Counter.FAIL, 1);
			
 
				+      incrementCounter(context, Counter.BYTESFAILED, sourceFileStatus.getLen());
			
 
				+      context.write(null, new Text("FAIL: " + sourceFileStatus.getPath() + " - " + 
			
 
				+          StringUtils.stringifyException(exception)));
			
 
				+    }
			
 
				+    else
			
 
				+      throw exception;
			
 
				+  }
			
 
				+
			
 
				+  private static void incrementCounter(Context context, Counter counter,
			
 
				+                                       long value) {
			
 
				+    context.getCounter(counter).increment(value);
			
 
				+  }
			
 
				+
			
 
				+  private boolean skipFile(FileSystem sourceFS, FileStatus source, Path target)
			
 
				+                                          throws IOException {
			
 
				+    return     targetFS.exists(target)
			
 
				+            && !overWrite
			
 
				+            && !mustUpdate(sourceFS, source, target);
			
 
				+  }
			
 
				+
			
 
				+  private boolean mustUpdate(FileSystem sourceFS, FileStatus source, Path target)
			
 
				+                                    throws IOException {
			
 
				+    final FileStatus targetFileStatus = targetFS.getFileStatus(target);
			
 
				+
			
 
				+    return     syncFolders
			
 
				+            && (
			
 
				+                   targetFileStatus.getLen() != source.getLen()
			
 
				+                || (!skipCrc &&
			
 
				+                       !DistCpUtils.checksumsAreEqual(sourceFS,
			
 
				+                                          source.getPath(), targetFS, target))
			
 
				+                || (source.getBlockSize() != targetFileStatus.getBlockSize() &&
			
 
				+                      preserve.contains(FileAttribute.BLOCKSIZE))
			
 
				+               );
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyOutputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyOutputFormat.java
@@ -0,0 +1,124 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.mapred;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.mapreduce.*;
			
 
				+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
			
 
				+import org.apache.hadoop.mapreduce.security.TokenCache;
			
 
				+import org.apache.hadoop.tools.DistCpConstants;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+/**
			
 
				+ * The CopyOutputFormat is the Hadoop OutputFormat used in DistCp.
			
 
				+ * It sets up the Job's Configuration (in the Job-Context) with the settings
			
 
				+ * for the work-directory, final commit-directory, etc. It also sets the right
			
 
				+ * output-committer.
			
 
				+ * @param <K>
			
 
				+ * @param <V>
			
 
				+ */
			
 
				+public class CopyOutputFormat<K, V> extends TextOutputFormat<K, V> {
			
 
				+
			
 
				+  /**
			
 
				+   * Setter for the working directory for DistCp (where files will be copied
			
 
				+   * before they are moved to the final commit-directory.)
			
 
				+   * @param job The Job on whose configuration the working-directory is to be set.
			
 
				+   * @param workingDirectory The path to use as the working directory.
			
 
				+   */
			
 
				+  public static void setWorkingDirectory(Job job, Path workingDirectory) {
			
 
				+    job.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH,
			
 
				+        workingDirectory.toString());
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Setter for the final directory for DistCp (where files copied will be
			
 
				+   * moved, atomically.)
			
 
				+   * @param job The Job on whose configuration the working-directory is to be set.
			
 
				+   * @param commitDirectory The path to use for final commit.
			
 
				+   */
			
 
				+  public static void setCommitDirectory(Job job, Path commitDirectory) {
			
 
				+    job.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH,
			
 
				+        commitDirectory.toString());
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Getter for the working directory.
			
 
				+   * @param job The Job from whose configuration the working-directory is to
			
 
				+   * be retrieved.
			
 
				+   * @return The working-directory Path.
			
 
				+   */
			
 
				+  public static Path getWorkingDirectory(Job job) {
			
 
				+    return getWorkingDirectory(job.getConfiguration());
			
 
				+  }
			
 
				+
			
 
				+  private static Path getWorkingDirectory(Configuration conf) {
			
 
				+    String workingDirectory = conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH);
			
 
				+    if (workingDirectory == null || workingDirectory.isEmpty()) {
			
 
				+      return null;
			
 
				+    } else {
			
 
				+      return new Path(workingDirectory);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Getter for the final commit-directory.
			
 
				+   * @param job The Job from whose configuration the commit-directory is to be
			
 
				+   * retrieved.
			
 
				+   * @return The commit-directory Path.
			
 
				+   */
			
 
				+  public static Path getCommitDirectory(Job job) {
			
 
				+    return getCommitDirectory(job.getConfiguration());
			
 
				+  }
			
 
				+
			
 
				+  private static Path getCommitDirectory(Configuration conf) {
			
 
				+    String commitDirectory = conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH);
			
 
				+    if (commitDirectory == null || commitDirectory.isEmpty()) {
			
 
				+      return null;
			
 
				+    } else {
			
 
				+      return new Path(commitDirectory);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /** @inheritDoc */
			
 
				+  @Override
			
 
				+  public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException {
			
 
				+    return new CopyCommitter(getOutputPath(context), context);
			
 
				+  }
			
 
				+
			
 
				+  /** @inheritDoc */
			
 
				+  @Override
			
 
				+  public void checkOutputSpecs(JobContext context) throws IOException {
			
 
				+    Configuration conf = context.getConfiguration();
			
 
				+
			
 
				+    if (getCommitDirectory(conf) == null) {
			
 
				+      throw new IllegalStateException("Commit directory not configured");
			
 
				+    }
			
 
				+
			
 
				+    Path workingPath = getWorkingDirectory(conf);
			
 
				+    if (workingPath == null) {
			
 
				+      throw new IllegalStateException("Working directory not configured");
			
 
				+    }
			
 
				+
			
 
				+    // get delegation token for outDir's file system
			
 
				+    TokenCache.obtainTokensForNamenodes(context.getCredentials(),
			
 
				+                                        new Path[] {workingPath}, conf);
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableDirectoryCreateCommand.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableDirectoryCreateCommand.java
@@ -0,0 +1,56 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.mapred;
			
 
				+
			
 
				+import org.apache.hadoop.tools.util.RetriableCommand;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.mapreduce.Mapper;
			
 
				+
			
 
				+/**
			
 
				+ * This class extends Retriable command to implement the creation of directories
			
 
				+ * with retries on failure.
			
 
				+ */
			
 
				+public class RetriableDirectoryCreateCommand extends RetriableCommand {
			
 
				+
			
 
				+  /**
			
 
				+   * Constructor, taking a description of the action.
			
 
				+   * @param description Verbose description of the copy operation.
			
 
				+   */
			
 
				+  public RetriableDirectoryCreateCommand(String description) {
			
 
				+    super(description);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of RetriableCommand::doExecute().
			
 
				+   * This implements the actual mkdirs() functionality.
			
 
				+   * @param arguments Argument-list to the command.
			
 
				+   * @return Boolean. True, if the directory could be created successfully.
			
 
				+   * @throws Exception IOException, on failure to create the directory.
			
 
				+   */
			
 
				+  @Override
			
 
				+  protected Object doExecute(Object... arguments) throws Exception {
			
 
				+    assert arguments.length == 2 : "Unexpected argument list.";
			
 
				+    Path target = (Path)arguments[0];
			
 
				+    Mapper.Context context = (Mapper.Context)arguments[1];
			
 
				+
			
 
				+    FileSystem targetFS = target.getFileSystem(context.getConfiguration());
			
 
				+    return targetFS.mkdirs(target);
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
@@ -0,0 +1,245 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.mapred;
			
 
				+
			
 
				+import org.apache.hadoop.tools.util.RetriableCommand;
			
 
				+import org.apache.hadoop.tools.util.ThrottledInputStream;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+import org.apache.hadoop.tools.DistCpOptions.*;
			
 
				+import org.apache.hadoop.tools.DistCpConstants;
			
 
				+import org.apache.hadoop.fs.*;
			
 
				+import org.apache.hadoop.mapreduce.Mapper;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+
			
 
				+import java.io.*;
			
 
				+import java.util.EnumSet;
			
 
				+
			
 
				+/**
			
 
				+ * This class extends RetriableCommand to implement the copy of files,
			
 
				+ * with retries on failure.
			
 
				+ */
			
 
				+public class RetriableFileCopyCommand extends RetriableCommand {
			
 
				+
			
 
				+  private static Log LOG = LogFactory.getLog(RetriableFileCopyCommand.class);
			
 
				+  private static int BUFFER_SIZE = 8 * 1024;
			
 
				+
			
 
				+  /**
			
 
				+   * Constructor, taking a description of the action.
			
 
				+   * @param description Verbose description of the copy operation.
			
 
				+   */
			
 
				+  public RetriableFileCopyCommand(String description) {
			
 
				+    super(description);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of RetriableCommand::doExecute().
			
 
				+   * This is the actual copy-implementation.
			
 
				+   * @param arguments Argument-list to the command.
			
 
				+   * @return Number of bytes copied.
			
 
				+   * @throws Exception: CopyReadException, if there are read-failures. All other
			
 
				+   *         failures are IOExceptions.
			
 
				+   */
			
 
				+  @SuppressWarnings("unchecked")
			
 
				+  @Override
			
 
				+  protected Object doExecute(Object... arguments) throws Exception {
			
 
				+    assert arguments.length == 4 : "Unexpected argument list.";
			
 
				+    FileStatus source = (FileStatus)arguments[0];
			
 
				+    assert !source.isDirectory() : "Unexpected file-status. Expected file.";
			
 
				+    Path target = (Path)arguments[1];
			
 
				+    Mapper.Context context = (Mapper.Context)arguments[2];
			
 
				+    EnumSet<FileAttribute> fileAttributes
			
 
				+            = (EnumSet<FileAttribute>)arguments[3];
			
 
				+    return doCopy(source, target, context, fileAttributes);
			
 
				+  }
			
 
				+
			
 
				+  private long doCopy(FileStatus sourceFileStatus, Path target,
			
 
				+                      Mapper.Context context,
			
 
				+                      EnumSet<FileAttribute> fileAttributes)
			
 
				+          throws IOException {
			
 
				+
			
 
				+    Path tmpTargetPath = getTmpFile(target, context);
			
 
				+    final Configuration configuration = context.getConfiguration();
			
 
				+    FileSystem targetFS = target.getFileSystem(configuration);
			
 
				+
			
 
				+    try {
			
 
				+      if (LOG.isDebugEnabled()) {
			
 
				+        LOG.debug("Copying " + sourceFileStatus.getPath() + " to " + target);
			
 
				+        LOG.debug("Tmp-file path: " + tmpTargetPath);
			
 
				+      }
			
 
				+      FileSystem sourceFS = sourceFileStatus.getPath().getFileSystem(
			
 
				+              configuration);
			
 
				+      long bytesRead = copyToTmpFile(tmpTargetPath, targetFS, sourceFileStatus,
			
 
				+                                     context, fileAttributes);
			
 
				+
			
 
				+      compareFileLengths(sourceFileStatus, tmpTargetPath, configuration, bytesRead);
			
 
				+      compareCheckSums(sourceFS, sourceFileStatus.getPath(), targetFS, tmpTargetPath);
			
 
				+      promoteTmpToTarget(tmpTargetPath, target, targetFS);
			
 
				+      return bytesRead;
			
 
				+
			
 
				+    } finally {
			
 
				+      if (targetFS.exists(tmpTargetPath))
			
 
				+        targetFS.delete(tmpTargetPath, false);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private long copyToTmpFile(Path tmpTargetPath, FileSystem targetFS,
			
 
				+                             FileStatus sourceFileStatus, Mapper.Context context,
			
 
				+                             EnumSet<FileAttribute> fileAttributes)
			
 
				+                             throws IOException {
			
 
				+    OutputStream outStream = new BufferedOutputStream(targetFS.create(
			
 
				+            tmpTargetPath, true, BUFFER_SIZE,
			
 
				+            getReplicationFactor(fileAttributes, sourceFileStatus, targetFS),
			
 
				+            getBlockSize(fileAttributes, sourceFileStatus, targetFS), context));
			
 
				+    return copyBytes(sourceFileStatus, outStream, BUFFER_SIZE, true, context);
			
 
				+  }
			
 
				+
			
 
				+  private void compareFileLengths(FileStatus sourceFileStatus, Path target,
			
 
				+                                  Configuration configuration, long bytesRead)
			
 
				+                                  throws IOException {
			
 
				+    final Path sourcePath = sourceFileStatus.getPath();
			
 
				+    FileSystem fs = sourcePath.getFileSystem(configuration);
			
 
				+    if (fs.getFileStatus(sourcePath).getLen() != bytesRead)
			
 
				+      throw new IOException("Mismatch in length of source:" + sourcePath
			
 
				+                + " and target:" + target);
			
 
				+  }
			
 
				+
			
 
				+  private void compareCheckSums(FileSystem sourceFS, Path source,
			
 
				+                                FileSystem targetFS, Path target)
			
 
				+                                throws IOException {
			
 
				+    if (!DistCpUtils.checksumsAreEqual(sourceFS, source, targetFS, target))
			
 
				+      throw new IOException("Check-sum mismatch between "
			
 
				+                              + source + " and " + target);
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  //If target file exists and unable to delete target - fail
			
 
				+  //If target doesn't exist and unable to create parent folder - fail
			
 
				+  //If target is successfully deleted and parent exists, if rename fails - fail
			
 
				+  private void promoteTmpToTarget(Path tmpTarget, Path target, FileSystem fs)
			
 
				+                                  throws IOException {
			
 
				+    if ((fs.exists(target) && !fs.delete(target, false))
			
 
				+        || (!fs.exists(target.getParent()) && !fs.mkdirs(target.getParent()))
			
 
				+        || !fs.rename(tmpTarget, target)) {
			
 
				+      throw new IOException("Failed to promote tmp-file:" + tmpTarget
			
 
				+                              + " to: " + target);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private Path getTmpFile(Path target, Mapper.Context context) {
			
 
				+    Path targetWorkPath = new Path(context.getConfiguration().
			
 
				+        get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
			
 
				+
			
 
				+    Path root = target.equals(targetWorkPath)? targetWorkPath.getParent() : targetWorkPath;
			
 
				+    LOG.info("Creating temp file: " +
			
 
				+        new Path(root, ".distcp.tmp." + context.getTaskAttemptID().toString()));
			
 
				+    return new Path(root, ".distcp.tmp." + context.getTaskAttemptID().toString());
			
 
				+  }
			
 
				+
			
 
				+  private long copyBytes(FileStatus sourceFileStatus, OutputStream outStream,
			
 
				+                         int bufferSize, boolean mustCloseStream,
			
 
				+                         Mapper.Context context) throws IOException {
			
 
				+    Path source = sourceFileStatus.getPath();
			
 
				+    byte buf[] = new byte[bufferSize];
			
 
				+    ThrottledInputStream inStream = null;
			
 
				+    long totalBytesRead = 0;
			
 
				+
			
 
				+    try {
			
 
				+      inStream = getInputStream(source, context.getConfiguration());
			
 
				+      int bytesRead = readBytes(inStream, buf);
			
 
				+      while (bytesRead >= 0) {
			
 
				+        totalBytesRead += bytesRead;
			
 
				+        outStream.write(buf, 0, bytesRead);
			
 
				+        updateContextStatus(totalBytesRead, context, sourceFileStatus);
			
 
				+        bytesRead = inStream.read(buf);
			
 
				+      }
			
 
				+    } finally {
			
 
				+      if (mustCloseStream)
			
 
				+        IOUtils.cleanup(LOG, outStream, inStream);
			
 
				+    }
			
 
				+
			
 
				+    return totalBytesRead;
			
 
				+  }
			
 
				+
			
 
				+  private void updateContextStatus(long totalBytesRead, Mapper.Context context,
			
 
				+                                   FileStatus sourceFileStatus) {
			
 
				+    StringBuilder message = new StringBuilder(DistCpUtils.getFormatter()
			
 
				+                .format(totalBytesRead * 100.0f / sourceFileStatus.getLen()));
			
 
				+    message.append("% ")
			
 
				+            .append(description).append(" [")
			
 
				+            .append(DistCpUtils.getStringDescriptionFor(totalBytesRead))
			
 
				+            .append('/')
			
 
				+        .append(DistCpUtils.getStringDescriptionFor(sourceFileStatus.getLen()))
			
 
				+            .append(']');
			
 
				+    context.setStatus(message.toString());
			
 
				+  }
			
 
				+
			
 
				+  private static int readBytes(InputStream inStream, byte buf[])
			
 
				+          throws IOException {
			
 
				+    try {
			
 
				+      return inStream.read(buf);
			
 
				+    }
			
 
				+    catch (IOException e) {
			
 
				+      throw new CopyReadException(e);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private static ThrottledInputStream getInputStream(Path path, Configuration conf)
			
 
				+          throws IOException {
			
 
				+    try {
			
 
				+      FileSystem fs = path.getFileSystem(conf);
			
 
				+      long bandwidthMB = conf.getInt(DistCpConstants.CONF_LABEL_BANDWIDTH_MB,
			
 
				+              DistCpConstants.DEFAULT_BANDWIDTH_MB);
			
 
				+      return new ThrottledInputStream(new BufferedInputStream(fs.open(path)),
			
 
				+              bandwidthMB * 1024 * 1024);
			
 
				+    }
			
 
				+    catch (IOException e) {
			
 
				+      throw new CopyReadException(e);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private static short getReplicationFactor(
			
 
				+          EnumSet<FileAttribute> fileAttributes,
			
 
				+          FileStatus sourceFile, FileSystem targetFS) {
			
 
				+    return fileAttributes.contains(FileAttribute.REPLICATION)?
			
 
				+            sourceFile.getReplication() : targetFS.getDefaultReplication();
			
 
				+  }
			
 
				+
			
 
				+  private static long getBlockSize(
			
 
				+          EnumSet<FileAttribute> fileAttributes,
			
 
				+          FileStatus sourceFile, FileSystem targetFS) {
			
 
				+    return fileAttributes.contains(FileAttribute.BLOCKSIZE)?
			
 
				+            sourceFile.getBlockSize() : targetFS.getDefaultBlockSize();
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Special subclass of IOException. This is used to distinguish read-operation
			
 
				+   * failures from other kinds of IOExceptions.
			
 
				+   * The failure to read from source is dealt with specially, in the CopyMapper.
			
 
				+   * Such failures may be skipped if the DistCpOptions indicate so.
			
 
				+   * Write failures are intolerable, and amount to CopyMapper failure.  
			
 
				+   */
			
 
				+  public static class CopyReadException extends IOException {
			
 
				+    public CopyReadException(Throwable rootCause) {
			
 
				+      super(rootCause);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformSizeInputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformSizeInputFormat.java
@@ -0,0 +1,169 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.mapred;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.io.SequenceFile;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.tools.DistCpConstants;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.mapreduce.*;
			
 
				+import org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader;
			
 
				+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.List;
			
 
				+import java.util.ArrayList;
			
 
				+
			
 
				+/**
			
 
				+ * UniformSizeInputFormat extends the InputFormat<> class, to produce
			
 
				+ * input-splits for DistCp.
			
 
				+ * It looks at the copy-listing and groups the contents into input-splits such
			
 
				+ * that the total-number of bytes to be copied for each input split is
			
 
				+ * uniform.
			
 
				+ */
			
 
				+public class UniformSizeInputFormat extends InputFormat<Text, FileStatus> {
			
 
				+  private static final Log LOG
			
 
				+                = LogFactory.getLog(UniformSizeInputFormat.class);
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of InputFormat::getSplits(). Returns a list of InputSplits,
			
 
				+   * such that the number of bytes to be copied for all the splits are
			
 
				+   * approximately equal.
			
 
				+   * @param context JobContext for the job.
			
 
				+   * @return The list of uniformly-distributed input-splits.
			
 
				+   * @throws IOException: On failure.
			
 
				+   * @throws InterruptedException
			
 
				+   */
			
 
				+  @Override
			
 
				+  public List<InputSplit> getSplits(JobContext context)
			
 
				+                      throws IOException, InterruptedException {
			
 
				+    Configuration configuration = context.getConfiguration();
			
 
				+    int numSplits = DistCpUtils.getInt(configuration,
			
 
				+                                       JobContext.NUM_MAPS);
			
 
				+
			
 
				+    if (numSplits == 0) return new ArrayList<InputSplit>();
			
 
				+
			
 
				+    return getSplits(configuration, numSplits,
			
 
				+                     DistCpUtils.getLong(configuration,
			
 
				+                          DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED));
			
 
				+  }
			
 
				+
			
 
				+  private List<InputSplit> getSplits(Configuration configuration, int numSplits,
			
 
				+                                     long totalSizeBytes) throws IOException {
			
 
				+    List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
			
 
				+    long nBytesPerSplit = (long) Math.ceil(totalSizeBytes * 1.0 / numSplits);
			
 
				+
			
 
				+    FileStatus srcFileStatus = new FileStatus();
			
 
				+    Text srcRelPath = new Text();
			
 
				+    long currentSplitSize = 0;
			
 
				+    long lastSplitStart = 0;
			
 
				+    long lastPosition = 0;
			
 
				+
			
 
				+    final Path listingFilePath = getListingFilePath(configuration);
			
 
				+
			
 
				+    if (LOG.isDebugEnabled()) {
			
 
				+      LOG.debug("Average bytes per map: " + nBytesPerSplit +
			
 
				+          ", Number of maps: " + numSplits + ", total size: " + totalSizeBytes);
			
 
				+    }
			
 
				+    SequenceFile.Reader reader=null;
			
 
				+    try {
			
 
				+      reader = getListingFileReader(configuration);
			
 
				+      while (reader.next(srcRelPath, srcFileStatus)) {
			
 
				+        // If adding the current file would cause the bytes per map to exceed
			
 
				+        // limit. Add the current file to new split
			
 
				+        if (currentSplitSize + srcFileStatus.getLen() > nBytesPerSplit && lastPosition != 0) {
			
 
				+          FileSplit split = new FileSplit(listingFilePath, lastSplitStart,
			
 
				+              lastPosition - lastSplitStart, null);
			
 
				+          if (LOG.isDebugEnabled()) {
			
 
				+            LOG.debug ("Creating split : " + split + ", bytes in split: " + currentSplitSize);
			
 
				+          }
			
 
				+          splits.add(split);
			
 
				+          lastSplitStart = lastPosition;
			
 
				+          currentSplitSize = 0;
			
 
				+        }
			
 
				+        currentSplitSize += srcFileStatus.getLen();
			
 
				+        lastPosition = reader.getPosition();
			
 
				+      }
			
 
				+      if (lastPosition > lastSplitStart) {
			
 
				+        FileSplit split = new FileSplit(listingFilePath, lastSplitStart,
			
 
				+            lastPosition - lastSplitStart, null);
			
 
				+        if (LOG.isDebugEnabled()) {
			
 
				+          LOG.info ("Creating split : " + split + ", bytes in split: " + currentSplitSize);
			
 
				+        }
			
 
				+        splits.add(split);
			
 
				+      }
			
 
				+
			
 
				+    } finally {
			
 
				+      IOUtils.closeStream(reader);
			
 
				+    }
			
 
				+
			
 
				+    return splits;
			
 
				+  }
			
 
				+
			
 
				+  private static Path getListingFilePath(Configuration configuration) {
			
 
				+    final String listingFilePathString =
			
 
				+            configuration.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, "");
			
 
				+
			
 
				+    assert !listingFilePathString.equals("")
			
 
				+              : "Couldn't find listing file. Invalid input.";
			
 
				+    return new Path(listingFilePathString);
			
 
				+  }
			
 
				+
			
 
				+  private SequenceFile.Reader getListingFileReader(Configuration configuration) {
			
 
				+
			
 
				+    final Path listingFilePath = getListingFilePath(configuration);
			
 
				+    try {
			
 
				+      final FileSystem fileSystem = listingFilePath.getFileSystem(configuration);
			
 
				+      if (!fileSystem.exists(listingFilePath))
			
 
				+        throw new IllegalArgumentException("Listing file doesn't exist at: "
			
 
				+                                           + listingFilePath);
			
 
				+
			
 
				+      return new SequenceFile.Reader(configuration,
			
 
				+                                     SequenceFile.Reader.file(listingFilePath));
			
 
				+    }
			
 
				+    catch (IOException exception) {
			
 
				+      LOG.error("Couldn't find listing file at: " + listingFilePath, exception);
			
 
				+      throw new IllegalArgumentException("Couldn't find listing-file at: "
			
 
				+                                         + listingFilePath, exception);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of InputFormat::createRecordReader().
			
 
				+   * @param split The split for which the RecordReader is sought.
			
 
				+   * @param context The context of the current task-attempt.
			
 
				+   * @return A SequenceFileRecordReader instance, (since the copy-listing is a
			
 
				+   * simple sequence-file.)
			
 
				+   * @throws IOException
			
 
				+   * @throws InterruptedException
			
 
				+   */
			
 
				+  @Override
			
 
				+  public RecordReader<Text, FileStatus> createRecordReader(InputSplit split,
			
 
				+                                                     TaskAttemptContext context)
			
 
				+                                      throws IOException, InterruptedException {
			
 
				+    return new SequenceFileRecordReader<Text, FileStatus>();
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputChunk.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputChunk.java
@@ -0,0 +1,246 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.hadoop.tools.mapred.lib;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.io.SequenceFile;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.tools.DistCpConstants;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+import org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader;
			
 
				+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
			
 
				+import org.apache.hadoop.mapreduce.TaskAttemptContext;
			
 
				+import org.apache.hadoop.mapreduce.TaskID;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+/**
			
 
				+ * The DynamicInputChunk represents a single chunk of work, when used in
			
 
				+ * conjunction with the DynamicInputFormat and the DynamicRecordReader.
			
 
				+ * The records in the DynamicInputFormat's input-file are split across various
			
 
				+ * DynamicInputChunks. Each one is claimed and processed in an iteration of
			
 
				+ * a dynamic-mapper. When a DynamicInputChunk has been exhausted, the faster
			
 
				+ * mapper may claim another and process it, until there are no more to be
			
 
				+ * consumed.
			
 
				+ */
			
 
				+class DynamicInputChunk<K, V> {
			
 
				+  private static Log LOG = LogFactory.getLog(DynamicInputChunk.class);
			
 
				+
			
 
				+  private static Configuration configuration;
			
 
				+  private static Path chunkRootPath;
			
 
				+  private static String chunkFilePrefix;
			
 
				+  private static int numChunksLeft = -1; // Un-initialized before 1st dir-scan.
			
 
				+  private static FileSystem fs;
			
 
				+
			
 
				+  private Path chunkFilePath;
			
 
				+  private SequenceFileRecordReader<K, V> reader;
			
 
				+  private SequenceFile.Writer writer;
			
 
				+
			
 
				+  private static void initializeChunkInvariants(Configuration config)
			
 
				+                                                  throws IOException {
			
 
				+    configuration = config;
			
 
				+    Path listingFilePath = new Path(getListingFilePath(configuration));
			
 
				+    chunkRootPath = new Path(listingFilePath.getParent(), "chunkDir");
			
 
				+    fs = chunkRootPath.getFileSystem(configuration);
			
 
				+    chunkFilePrefix = listingFilePath.getName() + ".chunk.";
			
 
				+  }
			
 
				+
			
 
				+  private static String getListingFilePath(Configuration configuration) {
			
 
				+    final String listingFileString = configuration.get(
			
 
				+            DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, "");
			
 
				+    assert !listingFileString.equals("") : "Listing file not found.";
			
 
				+    return listingFileString;
			
 
				+  }
			
 
				+
			
 
				+  private static boolean areInvariantsInitialized() {
			
 
				+    return chunkRootPath != null;
			
 
				+  }
			
 
				+
			
 
				+  private DynamicInputChunk(String chunkId, Configuration configuration)
			
 
				+                                                      throws IOException {
			
 
				+    if (!areInvariantsInitialized())
			
 
				+      initializeChunkInvariants(configuration);
			
 
				+
			
 
				+    chunkFilePath = new Path(chunkRootPath, chunkFilePrefix + chunkId);
			
 
				+    openForWrite();
			
 
				+  }
			
 
				+
			
 
				+
			
 
				+  private void openForWrite() throws IOException {
			
 
				+    writer = SequenceFile.createWriter(
			
 
				+            chunkFilePath.getFileSystem(configuration), configuration,
			
 
				+            chunkFilePath, Text.class, FileStatus.class,
			
 
				+            SequenceFile.CompressionType.NONE);
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Factory method to create chunk-files for writing to.
			
 
				+   * (For instance, when the DynamicInputFormat splits the input-file into
			
 
				+   * chunks.)
			
 
				+   * @param chunkId String to identify the chunk.
			
 
				+   * @param configuration Configuration, describing the location of the listing-
			
 
				+   * file, file-system for the map-job, etc.
			
 
				+   * @return A DynamicInputChunk, corresponding to a chunk-file, with the name
			
 
				+   * incorporating the chunk-id.
			
 
				+   * @throws IOException Exception on failure to create the chunk.
			
 
				+   */
			
 
				+  public static DynamicInputChunk createChunkForWrite(String chunkId,
			
 
				+                          Configuration configuration) throws IOException {
			
 
				+    return new DynamicInputChunk(chunkId, configuration);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Method to write records into a chunk.
			
 
				+   * @param key Key from the listing file.
			
 
				+   * @param value Corresponding value from the listing file.
			
 
				+   * @throws IOException Exception onf failure to write to the file.
			
 
				+   */
			
 
				+  public void write(Text key, FileStatus value) throws IOException {
			
 
				+    writer.append(key, value);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Closes streams opened to the chunk-file.
			
 
				+   */
			
 
				+  public void close() {
			
 
				+    IOUtils.cleanup(LOG, reader, writer);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Reassigns the chunk to a specified Map-Task, for consumption.
			
 
				+   * @param taskId The Map-Task to which a the chunk is to be reassigned.
			
 
				+   * @throws IOException Exception on failure to reassign.
			
 
				+   */
			
 
				+  public void assignTo(TaskID taskId) throws IOException {
			
 
				+    Path newPath = new Path(chunkRootPath, taskId.toString());
			
 
				+    if (!fs.rename(chunkFilePath, newPath)) {
			
 
				+      LOG.warn(chunkFilePath + " could not be assigned to " + taskId);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private DynamicInputChunk(Path chunkFilePath,
			
 
				+                            TaskAttemptContext taskAttemptContext)
			
 
				+                                   throws IOException, InterruptedException {
			
 
				+    if (!areInvariantsInitialized())
			
 
				+      initializeChunkInvariants(taskAttemptContext.getConfiguration());
			
 
				+
			
 
				+    this.chunkFilePath = chunkFilePath;
			
 
				+    openForRead(taskAttemptContext);
			
 
				+  }
			
 
				+
			
 
				+  private void openForRead(TaskAttemptContext taskAttemptContext)
			
 
				+          throws IOException, InterruptedException {
			
 
				+    reader = new SequenceFileRecordReader<K, V>();
			
 
				+    reader.initialize(new FileSplit(chunkFilePath, 0,
			
 
				+            DistCpUtils.getFileSize(chunkFilePath, configuration), null),
			
 
				+            taskAttemptContext);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Factory method that
			
 
				+   * 1. acquires a chunk for the specified map-task attempt
			
 
				+   * 2. returns a DynamicInputChunk associated with the acquired chunk-file.
			
 
				+   * @param taskAttemptContext The attempt-context for the map task that's
			
 
				+   * trying to acquire a chunk.
			
 
				+   * @return The acquired dynamic-chunk. The chunk-file is renamed to the
			
 
				+   * attempt-id (from the attempt-context.)
			
 
				+   * @throws IOException Exception on failure.
			
 
				+   * @throws InterruptedException Exception on failure.
			
 
				+   */
			
 
				+  public static DynamicInputChunk acquire(TaskAttemptContext taskAttemptContext)
			
 
				+                                      throws IOException, InterruptedException {
			
 
				+    if (!areInvariantsInitialized())
			
 
				+        initializeChunkInvariants(taskAttemptContext.getConfiguration());
			
 
				+
			
 
				+    String taskId
			
 
				+            = taskAttemptContext.getTaskAttemptID().getTaskID().toString();
			
 
				+    Path acquiredFilePath = new Path(chunkRootPath, taskId);
			
 
				+
			
 
				+    if (fs.exists(acquiredFilePath)) {
			
 
				+      LOG.info("Acquiring pre-assigned chunk: " + acquiredFilePath);
			
 
				+      return new DynamicInputChunk(acquiredFilePath, taskAttemptContext);
			
 
				+    }
			
 
				+
			
 
				+    for (FileStatus chunkFile : getListOfChunkFiles()) {
			
 
				+      if (fs.rename(chunkFile.getPath(), acquiredFilePath)) {
			
 
				+        LOG.info(taskId + " acquired " + chunkFile.getPath());
			
 
				+        return new DynamicInputChunk(acquiredFilePath, taskAttemptContext);
			
 
				+      }
			
 
				+      else
			
 
				+        LOG.warn(taskId + " could not acquire " + chunkFile.getPath());
			
 
				+    }
			
 
				+
			
 
				+    return null;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Method to be called to relinquish an acquired chunk. All streams open to
			
 
				+   * the chunk are closed, and the chunk-file is deleted.
			
 
				+   * @throws IOException Exception thrown on failure to release (i.e. delete)
			
 
				+   * the chunk file.
			
 
				+   */
			
 
				+  public void release() throws IOException {
			
 
				+    close();
			
 
				+    if (!fs.delete(chunkFilePath, false)) {
			
 
				+      LOG.error("Unable to release chunk at path: " + chunkFilePath);
			
 
				+      throw new IOException("Unable to release chunk at path: " + chunkFilePath);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  static FileStatus [] getListOfChunkFiles() throws IOException {
			
 
				+    Path chunkFilePattern = new Path(chunkRootPath, chunkFilePrefix + "*");
			
 
				+    FileStatus chunkFiles[] = fs.globStatus(chunkFilePattern);
			
 
				+    numChunksLeft = chunkFiles.length;
			
 
				+    return chunkFiles;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Getter for the chunk-file's path, on HDFS.
			
 
				+   * @return The qualified path to the chunk-file.
			
 
				+   */
			
 
				+  public Path getPath() {
			
 
				+    return chunkFilePath;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Getter for the record-reader, opened to the chunk-file.
			
 
				+   * @return Opened Sequence-file reader.
			
 
				+   */
			
 
				+  public SequenceFileRecordReader<K,V> getReader() {
			
 
				+    assert reader != null : "Reader un-initialized!";
			
 
				+    return reader;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Getter for the number of chunk-files left in the chunk-file directory.
			
 
				+   * Useful to determine how many chunks (and hence, records) are left to be
			
 
				+   * processed.
			
 
				+   * @return Before the first scan of the directory, the number returned is -1.
			
 
				+   * Otherwise, the number of chunk-files seen from the last scan is returned.
			
 
				+   */
			
 
				+  public static int getNumChunksLeft() {
			
 
				+    return numChunksLeft;
			
 
				+  }
			
 
				+}