13 年之前 · 1c95060a72
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -35,11 +35,6 @@ Trunk (unreleased changes)
 
															     HADOOP-7717. Move handling of concurrent client fail-overs to
														
 
															     RetryInvocationHandler (atm)
														
 
															-    HADOOP-6490. Use StringUtils over String#replace in Path#normalizePath.
														
 
															-    (Uma Maheswara Rao G via harsh)
														
 
															-
														
 
															-    HADOOP-7736. Remove duplicate Path#normalizePath call. (harsh)
														
 
															-
														
 
															     HADOOP-7664. Remove warmings when overriding final parameter configuration
														
 
															     if the override value is same as the final parameter value.
														
 
															     (Ravi Prakash via suresh)
														
@@ -68,12 +63,6 @@ Trunk (unreleased changes)
 
															     HADOOP-7899. Generate proto java files as part of the build. (tucu)
														
 
															-    HADOOP-7574. Improve FSShell -stat, add user/group elements.
														
 
															-    (XieXianshan via harsh)
														
 
															-
														
 
															-    HADOOP-7919. Remove the unused hadoop.logfile.* properties from the 
														
 
															-    core-default.xml file. (harsh)
														
 
															-
														
 
															     HADOOP-7808. Port HADOOP-7510 - Add configurable option to use original 
														
 
															     hostname in token instead of IP to allow server IP change. 
														
 
															     (Daryn Sharp via suresh)
														
@@ -81,10 +70,10 @@ Trunk (unreleased changes)
 
															     HADOOP-7957. Classes deriving GetGroupsBase should be able to override 
														
 
															     proxy creation. (jitendra)
														
 
															-    HADOOP-4515. Configuration#getBoolean must not be case sensitive. (Sho Shimauchi via harsh)
														
 
															-
														
 
															     HADOOP-7968. Errant println left in RPC.getHighestSupportedProtocol (Sho Shimauchi via harsh)
														
 
															+    HADOOP-7987. Support setting the run-as user in unsecure mode. (jitendra)
														
 
															+
														
 
															   BUGS
														
 
															     HADOOP-7851. Configuration.getClasses() never returns the default value. 
														
@@ -207,6 +196,19 @@ Release 0.23.1 - Unreleased
 
															     HADOOP-7975. Add LZ4 as an entry in the default codec list, missed by HADOOP-7657 (harsh)
														
 
															+    HADOOP-4515. Configuration#getBoolean must not be case sensitive. (Sho Shimauchi via harsh)
														
 
															+
														
 
															+    HADOOP-6490. Use StringUtils over String#replace in Path#normalizePath.
														
 
															+    (Uma Maheswara Rao G via harsh)
														
 
															+
														
 
															+    HADOOP-7574. Improve FSShell -stat, add user/group elements.
														
 
															+    (XieXianshan via harsh)
														
 
															+
														
 
															+    HADOOP-7736. Remove duplicate Path#normalizePath call. (harsh)
														
 
															+
														
 
															+    HADOOP-7919. Remove the unused hadoop.logfile.* properties from the 
														
 
															+    core-default.xml file. (harsh)
														
 
															+
														
 
															   OPTIMIZATIONS
														
 
															   BUG FIXES
														
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
@@ -80,6 +80,7 @@ public class UserGroupInformation {
 
															    * Percentage of the ticket window to use before we renew ticket.
														
 
															    */
														
 
															   private static final float TICKET_RENEW_WINDOW = 0.80f;
														
 
															+  static final String HADOOP_USER_NAME = "HADOOP_USER_NAME";
														
 
															   /** 
														
 
															    * UgiMetrics maintains UGI activity statistics
														
@@ -137,7 +138,16 @@ public class UserGroupInformation {
 
															           LOG.debug("using kerberos user:"+user);
														
 
															         }
														
 
															       }
														
 
															-      // if we don't have a kerberos user, use the OS user
														
 
															+      //If we don't have a kerberos user and security is disabled, check
														
 
															+      //if user is specified in the environment or properties
														
 
															+      if (!isSecurityEnabled() && (user == null)) {
														
 
															+        String envUser = System.getenv(HADOOP_USER_NAME);
														
 
															+        if (envUser == null) {
														
 
															+          envUser = System.getProperty(HADOOP_USER_NAME);
														
 
															+        }
														
 
															+        user = envUser == null ? null : new User(envUser);
														
 
															+      }
														
 
															+      // use the OS user
														
 
															       if (user == null) {
														
 
															         user = getCanonicalUser(OS_PRINCIPAL_CLASS);
														
 
															         if (LOG.isDebugEnabled()) {
														
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserFromEnv.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserFromEnv.java
@@ -0,0 +1,32 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one or more
														
 
															+ * contributor license agreements. See the NOTICE file distributed with this
														
 
															+ * work for additional information regarding copyright ownership. The ASF
														
 
															+ * licenses this file to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance with the License.
														
 
															+ * You may obtain a copy of the License at
														
 
															+ * 
														
 
															+ * http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ * 
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
														
 
															+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
														
 
															+ * License for the specific language governing permissions and limitations under
														
 
															+ * the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.security;
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+import org.junit.Assert;
														
 
															+import org.junit.Test;
														
 
															+
														
 
															+public class TestUserFromEnv {
														
 
															+
														
 
															+  @Test
														
 
															+  public void testUserFromEnvironment() throws IOException {
														
 
															+    System.setProperty(UserGroupInformation.HADOOP_USER_NAME, "randomUser");
														
 
															+    Assert.assertEquals("randomUser", UserGroupInformation.getLoginUser()
														
 
															+        .getUserName());
														
 
															+  }
														
 
															+}
														
--- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSServer.java
@@ -219,7 +219,7 @@ public class HttpFSServer {
 
															    * operation is @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.GetOpValues#LISTSTATUS}
														
 
															    * @param doAs user being impersonated, defualt value is none. It can be used
														
 
															    * only if the current user is a HttpFSServer proxyuser.
														
 
															-   * @param override, default is true. Used only for
														
 
															+   * @param override default is true. Used only for
														
 
															    * @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.PutOpValues#CREATE} operations.
														
 
															    * @param blockSize block size to set, used only by
														
 
															    * @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.PutOpValues#CREATE} operations.
														
@@ -419,7 +419,7 @@ public class HttpFSServer {
 
															    * @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.PutOpValues#SETOWNER} operations.
														
 
															    * @param group group to set, used only for
														
 
															    * @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.PutOpValues#SETOWNER} operations.
														
 
															-   * @param override, default is true. Used only for
														
 
															+   * @param override default is true. Used only for
														
 
															    * @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.PutOpValues#CREATE} operations.
														
 
															    * @param blockSize block size to set, used only by
														
 
															    * @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.PutOpValues#CREATE} operations.
														
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -363,6 +363,8 @@ Release 0.23.1 - UNRELEASED
 
															     HDFS-442. dfsthroughput in test jar throws NPE (harsh)
														
 
															+    HDFS-2836. HttpFSServer still has 2 javadoc warnings in trunk (revans2 via tucu)
														
 
															+
														
 
															 Release 0.23.0 - 2011-11-01 
														
 
															   INCOMPATIBLE CHANGES
														
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -52,6 +52,9 @@ Trunk (unreleased changes)
 
															     MAPREDUCE-2944. Improve checking of input for JobClient.displayTasks() (XieXianshan via harsh)
														
 
															   BUG FIXES
														
 
															+    MAPREDUCE-3194. "mapred mradmin" command is broken in mrv2
														
 
															+                     (Jason Lowe via bobby)
														
 
															+
														
 
															     MAPREDUCE-3462. Fix Gridmix JUnit testcase failures. 
														
 
															                     (Ravi Prakash and Ravi Gummadi via amarrk)
														
@@ -192,6 +195,8 @@ Release 0.23.1 - Unreleased
 
															     MAPREDUCE-3710. Improved FileInputFormat to return better locality for the
														
 
															     last split. (Siddarth Seth via vinodkv)
														
 
															+    MAPREDUCE-2765. DistCp Rewrite. (Mithun Radhakrishnan via mahadev)
														
 
															+
														
 
															   OPTIMIZATIONS
														
 
															     MAPREDUCE-3567. Extraneous JobConf objects in AM heap. (Vinod Kumar
														
@@ -558,6 +563,19 @@ Release 0.23.1 - Unreleased
 
															     MAPREDUCE-3630. Fixes a NullPointer exception while running TeraGen - if a
														
 
															     map is asked to generate 0 records. (Mahadev Konar via sseth)
														
 
															+    MAPREDUCE-3683. Fixed maxCapacity of queues to be product of parent
														
 
															+    maxCapacities. (acmurthy)
														
 
															+
														
 
															+    MAPREDUCE-3713. Fixed the way head-room is allocated to applications by
														
 
															+    CapacityScheduler so that it deducts current-usage per user and not
														
 
															+    per-application. (Arun C Murthy via vinodkv)
														
 
															+
														
 
															+    MAPREDUCE-3721. Fixed a race in shuffle which caused reduces to hang.
														
 
															+    (sseth via acmurthy) 
														
 
															+
														
 
															+    MAPREDUCE-3733. Add Apache License Header to hadoop-distcp/pom.xml.
														
 
															+    (mahadev)
														
 
															+
														
 
															 Release 0.23.0 - 2011-11-01 
														
 
															   INCOMPATIBLE CHANGES
														
--- a/hadoop-mapreduce-project/bin/mapred
+++ b/hadoop-mapreduce-project/bin/mapred
@@ -30,9 +30,6 @@ fi
 
															 function print_usage(){
														
 
															   echo "Usage: mapred [--config confdir] COMMAND"
														
 
															   echo "       where COMMAND is one of:"
														
 
															-  echo "  mradmin              run a Map-Reduce admin client"
														
 
															-  echo "  jobtracker           run the MapReduce job Tracker node" 
														
 
															-  echo "  tasktracker          run a MapReduce task Tracker node" 
														
 
															   echo "  pipes                run a Pipes job"
														
 
															   echo "  job                  manipulate MapReduce jobs"
														
 
															   echo "  queue                get information regarding JobQueues"
														
@@ -51,16 +48,7 @@ fi
 
															 COMMAND=$1
														
 
															 shift
														
 
															-if [ "$COMMAND" = "mradmin" ] ; then
														
 
															-  CLASS=org.apache.hadoop.mapred.tools.MRAdmin
														
 
															-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
														
 
															-elif [ "$COMMAND" = "jobtracker" ] ; then
														
 
															-  CLASS=org.apache.hadoop.mapred.JobTracker
														
 
															-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_JOBTRACKER_OPTS"
														
 
															-elif [ "$COMMAND" = "tasktracker" ] ; then
														
 
															-  CLASS=org.apache.hadoop.mapred.TaskTracker
														
 
															-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_TASKTRACKER_OPTS"
														
 
															-elif [ "$COMMAND" = "job" ] ; then
														
 
															+if [ "$COMMAND" = "job" ] ; then
														
 
															   CLASS=org.apache.hadoop.mapred.JobClient
														
 
															 elif [ "$COMMAND" = "queue" ] ; then
														
 
															   CLASS=org.apache.hadoop.mapred.JobQueueClient
														
@@ -75,6 +63,13 @@ elif [ "$COMMAND" = "classpath" ] ; then
 
															 elif [ "$COMMAND" = "groups" ] ; then
														
 
															   CLASS=org.apache.hadoop.mapred.tools.GetGroups
														
 
															   HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
														
 
															+elif [ "$COMMAND" = "mradmin" ] \
														
 
															+    || [ "$COMMAND" = "jobtracker" ] \
														
 
															+    || [ "$COMMAND" = "tasktracker" ] ; then
														
 
															+  echo "Sorry, the $COMMAND command is no longer supported."
														
 
															+  echo "You may find similar functionality with the \"yarn\" shell command."
														
 
															+  print_usage
														
 
															+  exit
														
 
															 else
														
 
															   echo $COMMAND - invalid command
														
 
															   print_usage
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManager.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManager.java
@@ -92,6 +92,7 @@ public class MergeManager<K, V> {
 
															   private final long memoryLimit;
														
 
															   private long usedMemory;
														
 
															+  private long commitMemory;
														
 
															   private final long maxSingleShuffleLimit;
														
 
															   private final int memToMemMergeOutputsThreshold; 
														
@@ -181,6 +182,13 @@ public class MergeManager<K, V> {
 
															              "ioSortFactor=" + ioSortFactor + ", " +
														
 
															              "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold);
														
 
															+    if (this.maxSingleShuffleLimit >= this.mergeThreshold) {
														
 
															+      throw new RuntimeException("Invlaid configuration: "
														
 
															+          + "maxSingleShuffleLimit should be less than mergeThreshold"
														
 
															+          + "maxSingleShuffleLimit: " + this.maxSingleShuffleLimit
														
 
															+          + "mergeThreshold: " + this.mergeThreshold);
														
 
															+    }
														
 
															+
														
 
															     boolean allowMemToMemMerge = 
														
 
															       jobConf.getBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, false);
														
 
															     if (allowMemToMemMerge) {
														
@@ -245,16 +253,16 @@ public class MergeManager<K, V> {
 
															     // all the stalled threads
														
 
															     if (usedMemory > memoryLimit) {
														
 
															-      LOG.debug(mapId + ": Stalling shuffle since usedMemory (" + usedMemory + 
														
 
															-               ") is greater than memoryLimit (" + memoryLimit + ")"); 
														
 
															-      
														
 
															+      LOG.debug(mapId + ": Stalling shuffle since usedMemory (" + usedMemory
														
 
															+          + ") is greater than memoryLimit (" + memoryLimit + ")." + 
														
 
															+          " CommitMemory is (" + commitMemory + ")"); 
														
 
															       return stallShuffle;
														
 
															     }
														
 
															     // Allow the in-memory shuffle to progress
														
 
															-    LOG.debug(mapId + ": Proceeding with shuffle since usedMemory (" +
														
 
															-        usedMemory + 
														
 
															-        ") is lesser than memoryLimit (" + memoryLimit + ")"); 
														
 
															+    LOG.debug(mapId + ": Proceeding with shuffle since usedMemory ("
														
 
															+        + usedMemory + ") is lesser than memoryLimit (" + memoryLimit + ")."
														
 
															+        + "CommitMemory is (" + commitMemory + ")"); 
														
 
															     return unconditionalReserve(mapId, requestedSize, true);
														
 
															   }
														
@@ -270,18 +278,24 @@ public class MergeManager<K, V> {
 
															   }
														
 
															   synchronized void unreserve(long size) {
														
 
															+    commitMemory -= size;
														
 
															     usedMemory -= size;
														
 
															   }
														
 
															-  
														
 
															+
														
 
															   public synchronized void closeInMemoryFile(MapOutput<K,V> mapOutput) { 
														
 
															     inMemoryMapOutputs.add(mapOutput);
														
 
															     LOG.info("closeInMemoryFile -> map-output of size: " + mapOutput.getSize()
														
 
															-        + ", inMemoryMapOutputs.size() -> " + inMemoryMapOutputs.size());
														
 
															-    
														
 
															+        + ", inMemoryMapOutputs.size() -> " + inMemoryMapOutputs.size()
														
 
															+        + ", commitMemory -> " + commitMemory + ", usedMemory ->" + usedMemory);
														
 
															+
														
 
															+    commitMemory+= mapOutput.getSize();
														
 
															+
														
 
															     synchronized (inMemoryMerger) {
														
 
															-      if (!inMemoryMerger.isInProgress() && usedMemory >= mergeThreshold) {
														
 
															-        LOG.info("Starting inMemoryMerger's merge since usedMemory=" +
														
 
															-            usedMemory + " > mergeThreshold=" + mergeThreshold);
														
 
															+      // Can hang if mergeThreshold is really low.
														
 
															+      if (!inMemoryMerger.isInProgress() && commitMemory >= mergeThreshold) {
														
 
															+        LOG.info("Starting inMemoryMerger's merge since commitMemory=" +
														
 
															+            commitMemory + " > mergeThreshold=" + mergeThreshold + 
														
 
															+            ". Current usedMemory=" + usedMemory);
														
 
															         inMemoryMapOutputs.addAll(inMemoryMergedMapOutputs);
														
 
															         inMemoryMergedMapOutputs.clear();
														
 
															         inMemoryMerger.startMerge(inMemoryMapOutputs);
														
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java
@@ -295,10 +295,6 @@ public class SchedulerApp {
 
															     }
														
 
															   }
														
 
															-  public synchronized void setAvailableResourceLimit(Resource globalLimit) {
														
 
															-    this.resourceLimit = globalLimit; 
														
 
															-  }
														
 
															-
														
 
															   public synchronized RMContainer getRMContainer(ContainerId id) {
														
 
															     return liveContainers.get(id);
														
 
															   }
														
@@ -446,20 +442,21 @@ public class SchedulerApp {
 
															     return reservedContainers;
														
 
															   }
														
 
															+  public synchronized void setHeadroom(Resource globalLimit) {
														
 
															+    this.resourceLimit = globalLimit; 
														
 
															+  }
														
 
															+
														
 
															   /**
														
 
															    * Get available headroom in terms of resources for the application's user.
														
 
															    * @return available resource headroom
														
 
															    */
														
 
															   public synchronized Resource getHeadroom() {
														
 
															-    Resource limit = Resources.subtract(resourceLimit, currentConsumption);
														
 
															-    Resources.subtractFrom(limit, currentReservation);
														
 
															-
														
 
															     // Corner case to deal with applications being slightly over-limit
														
 
															-    if (limit.getMemory() < 0) {
														
 
															-      limit.setMemory(0);
														
 
															+    if (resourceLimit.getMemory() < 0) {
														
 
															+      resourceLimit.setMemory(0);
														
 
															     }
														
 
															-    return limit;
														
 
															+    return resourceLimit;
														
 
															   }
														
 
															   public Queue getQueue() {
														
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java
@@ -17,12 +17,19 @@
 
															 */
														
 
															 package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
														
 
															+import org.apache.hadoop.yarn.api.records.Resource;
														
 
															+
														
 
															 class CSQueueUtils {
														
 
															   public static void checkMaxCapacity(String queueName, 
														
 
															       float capacity, float maximumCapacity) {
														
 
															-    if (Math.round(100 * maximumCapacity) != CapacitySchedulerConfiguration.UNDEFINED && 
														
 
															+    if (maximumCapacity < 0.0f || maximumCapacity > 1.0f || 
														
 
															         maximumCapacity < capacity) {
														
 
															+      throw new IllegalArgumentException(
														
 
															+          "Illegal value  of maximumCapacity " + maximumCapacity + 
														
 
															+          " used in call to setMaxCapacity for queue " + queueName);
														
 
															+    }
														
 
															+    if (maximumCapacity < capacity) {
														
 
															       throw new IllegalArgumentException(
														
 
															           "Illegal call to setMaxCapacity. " +
														
 
															           "Queue '" + queueName + "' has " +
														
@@ -30,5 +37,26 @@ class CSQueueUtils {
 
															           "maximumCapacity (" + maximumCapacity + ")" );
														
 
															     }
														
 
															   }
														
 
															+
														
 
															+  public static float computeAbsoluteMaximumCapacity(
														
 
															+      float maximumCapacity, CSQueue parent) {
														
 
															+    float parentAbsMaxCapacity = 
														
 
															+        (parent == null) ? 1.0f : parent.getAbsoluteMaximumCapacity();
														
 
															+    return (parentAbsMaxCapacity * maximumCapacity);
														
 
															+  }
														
 
															+
														
 
															+  public static int computeMaxActiveApplications(Resource clusterResource,
														
 
															+      float maxAMResourcePercent, float absoluteCapacity) {
														
 
															+    return 
														
 
															+        Math.max(
														
 
															+            (int)((clusterResource.getMemory() / (float)LeafQueue.DEFAULT_AM_RESOURCE) * 
														
 
															+                   maxAMResourcePercent * absoluteCapacity), 
														
 
															+            1);
														
 
															+  }
														
 
															+
														
 
															+  public static int computeMaxActiveApplicationsPerUser(
														
 
															+      int maxActiveApplications, int userLimit, float userLimitFactor) {
														
 
															+    return (int)(maxActiveApplications * (userLimit / 100.0f) * userLimitFactor);
														
 
															+  }
														
 
															 }
														
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
@@ -149,7 +149,7 @@ public class CapacitySchedulerConfiguration extends Configuration {
 
															       throw new IllegalArgumentException("Illegal " +
														
 
															       		"capacity of " + capacity + " for queue " + queue);
														
 
															     }
														
 
															-    LOG.debug("CSConf - setCapacity: queuePrefix=" + getQueuePrefix(queue) + 
														
 
															+    LOG.debug("CSConf - getCapacity: queuePrefix=" + getQueuePrefix(queue) + 
														
 
															         ", capacity=" + capacity);
														
 
															     return capacity;
														
 
															   }
														
@@ -162,11 +162,15 @@ public class CapacitySchedulerConfiguration extends Configuration {
 
															   public int getMaximumCapacity(String queue) {
														
 
															     int maxCapacity = 
														
 
															-      getInt(getQueuePrefix(queue) + MAXIMUM_CAPACITY, UNDEFINED);
														
 
															+      getInt(getQueuePrefix(queue) + MAXIMUM_CAPACITY, MAXIMUM_CAPACITY_VALUE);
														
 
															     return maxCapacity;
														
 
															   }
														
 
															   public void setMaximumCapacity(String queue, int maxCapacity) {
														
 
															+    if (maxCapacity > MAXIMUM_CAPACITY_VALUE) {
														
 
															+      throw new IllegalArgumentException("Illegal " +
														
 
															+          "maximum-capacity of " + maxCapacity + " for queue " + queue);
														
 
															+    }
														
 
															     setInt(getQueuePrefix(queue) + MAXIMUM_CAPACITY, maxCapacity);
														
 
															     LOG.debug("CSConf - setMaxCapacity: queuePrefix=" + getQueuePrefix(queue) + 
														
 
															         ", maxCapacity=" + maxCapacity);
														
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
@@ -144,10 +144,10 @@ public class LeafQueue implements CSQueue {
 
															       (float)cs.getConfiguration().getCapacity(getQueuePath()) / 100;
														
 
															     float absoluteCapacity = parent.getAbsoluteCapacity() * capacity;
														
 
															-    float maximumCapacity = (float)cs.getConfiguration().getMaximumCapacity(getQueuePath()) / 100;
														
 
															+    float maximumCapacity = 
														
 
															+        (float)cs.getConfiguration().getMaximumCapacity(getQueuePath()) / 100;
														
 
															     float absoluteMaxCapacity = 
														
 
															-      (Math.round(maximumCapacity * 100) == CapacitySchedulerConfiguration.UNDEFINED) ? 
														
 
															-          Float.MAX_VALUE : (parent.getAbsoluteCapacity() * maximumCapacity);
														
 
															+        CSQueueUtils.computeAbsoluteMaximumCapacity(maximumCapacity, parent);
														
 
															     int userLimit = cs.getConfiguration().getUserLimit(getQueuePath());
														
 
															     float userLimitFactor = 
														
@@ -161,10 +161,10 @@ public class LeafQueue implements CSQueue {
 
															     this.maxAMResourcePercent = 
														
 
															         cs.getConfiguration().getMaximumApplicationMasterResourcePercent();
														
 
															     int maxActiveApplications = 
														
 
															-        computeMaxActiveApplications(cs.getClusterResources(), 
														
 
															+        CSQueueUtils.computeMaxActiveApplications(cs.getClusterResources(), 
														
 
															             maxAMResourcePercent, absoluteCapacity);
														
 
															     int maxActiveApplicationsPerUser = 
														
 
															-        computeMaxActiveApplicationsPerUser(maxActiveApplications, userLimit, 
														
 
															+        CSQueueUtils.computeMaxActiveApplicationsPerUser(maxActiveApplications, userLimit, 
														
 
															             userLimitFactor);
														
 
															     this.queueInfo = recordFactory.newRecordInstance(QueueInfo.class);
														
@@ -193,20 +193,6 @@ public class LeafQueue implements CSQueue {
 
															     this.activeApplications = new TreeSet<SchedulerApp>(applicationComparator);
														
 
															   }
														
 
															-  private int computeMaxActiveApplications(Resource clusterResource,
														
 
															-      float maxAMResourcePercent, float absoluteCapacity) {
														
 
															-    return 
														
 
															-        Math.max(
														
 
															-            (int)((clusterResource.getMemory() / (float)DEFAULT_AM_RESOURCE) * 
														
 
															-                   maxAMResourcePercent * absoluteCapacity), 
														
 
															-            1);
														
 
															-  }
														
 
															-  
														
 
															-  private int computeMaxActiveApplicationsPerUser(int maxActiveApplications, 
														
 
															-      int userLimit, float userLimitFactor) {
														
 
															-    return (int)(maxActiveApplications * (userLimit / 100.0f) * userLimitFactor);
														
 
															-  }
														
 
															-  
														
 
															   private synchronized void setupQueueConfigs(
														
 
															       float capacity, float absoluteCapacity, 
														
 
															       float maximumCapacity, float absoluteMaxCapacity,
														
@@ -254,8 +240,8 @@ public class LeafQueue implements CSQueue {
 
															         "maxCapacity = " + maximumCapacity +
														
 
															         " [= configuredMaxCapacity ]" + "\n" +
														
 
															         "absoluteMaxCapacity = " + absoluteMaxCapacity +
														
 
															-        " [= Float.MAX_VALUE if maximumCapacity undefined, " +
														
 
															-        "(parentAbsoluteCapacity * maximumCapacity) / 100 otherwise ]" + "\n" +
														
 
															+        " [= 1.0 maximumCapacity undefined, " +
														
 
															+        "(parentAbsoluteMaxCapacity * maximumCapacity) / 100 otherwise ]" + "\n" +
														
 
															         "userLimit = " + userLimit +
														
 
															         " [= configuredUserLimit ]" + "\n" +
														
 
															         "userLimitFactor = " + userLimitFactor +
														
@@ -400,9 +386,7 @@ public class LeafQueue implements CSQueue {
 
															     this.maximumCapacity = maximumCapacity;
														
 
															     this.absoluteMaxCapacity = 
														
 
															-      (Math.round(maximumCapacity * 100) == CapacitySchedulerConfiguration.UNDEFINED) ? 
														
 
															-          Float.MAX_VALUE : 
														
 
															-          (parent.getAbsoluteCapacity() * maximumCapacity);
														
 
															+        CSQueueUtils.computeAbsoluteMaximumCapacity(maximumCapacity, parent);
														
 
															   }
														
 
															   /**
														
@@ -736,12 +720,11 @@ public class LeafQueue implements CSQueue {
 
															       if(LOG.isDebugEnabled()) {
														
 
															         LOG.debug("pre-assignContainers for application "
														
 
															         + application.getApplicationId());
														
 
															+        application.showRequests();
														
 
															       }
														
 
															-      application.showRequests();
														
 
															       synchronized (application) {
														
 
															-        computeAndSetUserResourceLimit(application, clusterResource);
														
 
															-        
														
 
															+        // Schedule in priority order
														
 
															         for (Priority priority : application.getPriorities()) {
														
 
															           // Required resource
														
 
															           Resource required = 
														
@@ -752,15 +735,21 @@ public class LeafQueue implements CSQueue {
 
															             continue;
														
 
															           }
														
 
															-          // Are we going over limits by allocating to this application?
														
 
															-          // Maximum Capacity of the queue
														
 
															+          // Compute & set headroom
														
 
															+          // Note: We set the headroom with the highest priority request 
														
 
															+          //       as the target. 
														
 
															+          //       This works since we never assign lower priority requests
														
 
															+          //       before all higher priority ones are serviced.
														
 
															+          Resource userLimit = 
														
 
															+              computeAndSetUserResourceLimit(application, clusterResource, 
														
 
															+                  required);
														
 
															+
														
 
															+          // Check queue max-capacity limit
														
 
															           if (!assignToQueue(clusterResource, required)) {
														
 
															             return NULL_ASSIGNMENT;
														
 
															           }
														
 
															-          // User limits
														
 
															-          Resource userLimit = 
														
 
															-            computeUserLimit(application, clusterResource, required); 
														
 
															+          // Check user limit
														
 
															           if (!assignToUser(application.getUser(), userLimit)) {
														
 
															             break; 
														
 
															           }
														
@@ -774,7 +763,7 @@ public class LeafQueue implements CSQueue {
 
															                 null);
														
 
															           Resource assigned = assignment.getResource();
														
 
															-            
														
 
															+          
														
 
															           // Did we schedule or reserve a container?
														
 
															           if (Resources.greaterThan(assigned, Resources.none())) {
														
@@ -835,25 +824,28 @@ public class LeafQueue implements CSQueue {
 
															     float potentialNewCapacity = 
														
 
															       (float)(usedResources.getMemory() + required.getMemory()) / 
														
 
															         clusterResource.getMemory();
														
 
															-    LOG.info(getQueueName() + 
														
 
															-        " usedResources: " + usedResources.getMemory() + 
														
 
															-        " currentCapacity " + ((float)usedResources.getMemory())/clusterResource.getMemory() + 
														
 
															-        " required " + required.getMemory() +
														
 
															-        " potentialNewCapacity: " + potentialNewCapacity + " ( " +
														
 
															-        " max-capacity: " + absoluteMaxCapacity + ")");
														
 
															     if (potentialNewCapacity > absoluteMaxCapacity) {
														
 
															+      LOG.info(getQueueName() + 
														
 
															+          " usedResources: " + usedResources.getMemory() +
														
 
															+          " clusterResources: " + clusterResource.getMemory() +
														
 
															+          " currentCapacity " + ((float)usedResources.getMemory())/clusterResource.getMemory() + 
														
 
															+          " required " + required.getMemory() +
														
 
															+          " potentialNewCapacity: " + potentialNewCapacity + " ( " +
														
 
															+          " max-capacity: " + absoluteMaxCapacity + ")");
														
 
															       return false;
														
 
															     }
														
 
															     return true;
														
 
															   }
														
 
															-  private void computeAndSetUserResourceLimit(SchedulerApp application, 
														
 
															-      Resource clusterResource) {
														
 
															-    Resource userLimit = 
														
 
															-        computeUserLimit(application, clusterResource, Resources.none());
														
 
															-    application.setAvailableResourceLimit(userLimit);
														
 
															-    metrics.setAvailableResourcesToUser(application.getUser(), 
														
 
															-        application.getHeadroom());
														
 
															+  private Resource computeAndSetUserResourceLimit(SchedulerApp application, 
														
 
															+      Resource clusterResource, Resource required) {
														
 
															+    String user = application.getUser();
														
 
															+    Resource limit = computeUserLimit(application, clusterResource, required);
														
 
															+    Resource headroom = 
														
 
															+        Resources.subtract(limit, getUser(user).getConsumedResources());
														
 
															+    application.setHeadroom(headroom);
														
 
															+    metrics.setAvailableResourcesToUser(user, headroom);
														
 
															+    return limit;
														
 
															   }
														
 
															   private int roundUp(int memory) {
														
@@ -924,7 +916,7 @@ public class LeafQueue implements CSQueue {
 
															     User user = getUser(userName);
														
 
															     // Note: We aren't considering the current request since there is a fixed
														
 
															-    // overhead of the AM, but it's a >= check, so... 
														
 
															+    // overhead of the AM, but it's a > check, not a >= check, so... 
														
 
															     if ((user.getConsumedResources().getMemory()) > limit.getMemory()) {
														
 
															       if (LOG.isDebugEnabled()) {
														
 
															         LOG.debug("User " + userName + " in queue " + getQueueName() + 
														
@@ -1242,8 +1234,8 @@ public class LeafQueue implements CSQueue {
 
															         // happen under scheduler's lock... 
														
 
															         // So, this is, in effect, a transaction across application & node
														
 
															         if (rmContainer.getState() == RMContainerState.RESERVED) {
														
 
															-          application.unreserve(node, rmContainer.getReservedPriority());
														
 
															-          node.unreserveResource(application);
														
 
															+          unreserve(application, rmContainer.getReservedPriority(), 
														
 
															+              node, rmContainer);
														
 
															         } else {
														
 
															           application.containerCompleted(rmContainer, containerStatus, event);
														
 
															           node.releaseContainer(container);
														
@@ -1308,15 +1300,16 @@ public class LeafQueue implements CSQueue {
 
															   public synchronized void updateClusterResource(Resource clusterResource) {
														
 
															     // Update queue properties
														
 
															     maxActiveApplications = 
														
 
															-        computeMaxActiveApplications(clusterResource, maxAMResourcePercent, 
														
 
															+        CSQueueUtils.computeMaxActiveApplications(clusterResource, maxAMResourcePercent, 
														
 
															             absoluteCapacity);
														
 
															     maxActiveApplicationsPerUser = 
														
 
															-        computeMaxActiveApplicationsPerUser(maxActiveApplications, userLimit, 
														
 
															+        CSQueueUtils.computeMaxActiveApplicationsPerUser(maxActiveApplications, userLimit, 
														
 
															             userLimitFactor);
														
 
															     // Update application properties
														
 
															     for (SchedulerApp application : activeApplications) {
														
 
															-      computeAndSetUserResourceLimit(application, clusterResource);
														
 
															+      computeAndSetUserResourceLimit(
														
 
															+          application, clusterResource, Resources.none());
														
 
															     }
														
 
															   }
														
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java
@@ -118,16 +118,14 @@ public class ParentQueue implements CSQueue {
 
															     }
														
 
															     float capacity = (float) rawCapacity / 100;
														
 
															-
														
 
															     float parentAbsoluteCapacity = 
														
 
															-      (parent == null) ? 1.0f : parent.getAbsoluteCapacity();
														
 
															+      (rootQueue) ? 1.0f : parent.getAbsoluteCapacity();
														
 
															     float absoluteCapacity = parentAbsoluteCapacity * capacity; 
														
 
															-    float maximumCapacity = 
														
 
															+    float  maximumCapacity =
														
 
															       (float) cs.getConfiguration().getMaximumCapacity(getQueuePath()) / 100;
														
 
															     float absoluteMaxCapacity = 
														
 
															-      (Math.round(maximumCapacity * 100) == CapacitySchedulerConfiguration.UNDEFINED) ? 
														
 
															-          Float.MAX_VALUE :  (parentAbsoluteCapacity * maximumCapacity);
														
 
															+          CSQueueUtils.computeAbsoluteMaximumCapacity(maximumCapacity, parent);
														
 
															     QueueState state = cs.getConfiguration().getState(getQueuePath());
														
@@ -497,12 +495,8 @@ public class ParentQueue implements CSQueue {
 
															     CSQueueUtils.checkMaxCapacity(getQueueName(), capacity, maximumCapacity);
														
 
															     this.maximumCapacity = maximumCapacity;
														
 
															-    float parentAbsoluteCapacity = 
														
 
															-        (rootQueue) ? 100.0f : parent.getAbsoluteCapacity();
														
 
															     this.absoluteMaxCapacity = 
														
 
															-      (maximumCapacity == CapacitySchedulerConfiguration.UNDEFINED) ? 
														
 
															-          Float.MAX_VALUE : 
														
 
															-          (parentAbsoluteCapacity * maximumCapacity);
														
 
															+        CSQueueUtils.computeAbsoluteMaximumCapacity(maximumCapacity, parent);
														
 
															   }
														
 
															   @Override
														
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
@@ -358,7 +358,7 @@ public class FifoScheduler implements ResourceScheduler {
 
															         }
														
 
															       }
														
 
															-      application.setAvailableResourceLimit(clusterResource);
														
 
															+      application.setHeadroom(clusterResource);
														
 
															       LOG.debug("post-assignContainers");
														
 
															       application.showRequests();
														
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java
@@ -21,16 +21,24 @@ import static org.junit.Assert.*;
 
															 import static org.mockito.Mockito.*;
														
 
															 import java.io.IOException;
														
 
															+import java.util.ArrayList;
														
 
															 import java.util.HashMap;
														
 
															+import java.util.List;
														
 
															 import java.util.Map;
														
 
															 import org.apache.commons.logging.Log;
														
 
															 import org.apache.commons.logging.LogFactory;
														
 
															 import org.apache.hadoop.security.UserGroupInformation;
														
 
															 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
														
 
															+import org.apache.hadoop.yarn.api.records.Priority;
														
 
															 import org.apache.hadoop.yarn.api.records.QueueACL;
														
 
															 import org.apache.hadoop.yarn.api.records.Resource;
														
 
															+import org.apache.hadoop.yarn.api.records.ResourceRequest;
														
 
															+import org.apache.hadoop.yarn.factories.RecordFactory;
														
 
															+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
														
 
															+import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
														
 
															 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
														
 
															+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
														
 
															 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApp;
														
 
															 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
														
 
															 import org.junit.After;
														
@@ -283,38 +291,76 @@ public class TestApplicationLimits {
 
															     final String user_0 = "user_0";
														
 
															     final String user_1 = "user_1";
														
 
															-    int APPLICATION_ID = 0;
														
 
															+    RecordFactory recordFactory = 
														
 
															+        RecordFactoryProvider.getRecordFactory(null);
														
 
															+    RMContext rmContext = TestUtils.getMockRMContext();
														
 
															+
														
 
															+    Priority priority_1 = TestUtils.createMockPriority(1);
														
 
															-    // Submit first application from user_0, check headroom
														
 
															-    SchedulerApp app_0_0 = getMockApplication(APPLICATION_ID++, user_0);
														
 
															+    // Submit first application with some resource-requests from user_0, 
														
 
															+    // and check headroom
														
 
															+    final ApplicationAttemptId appAttemptId_0_0 = 
														
 
															+        TestUtils.getMockApplicationAttemptId(0, 0); 
														
 
															+    SchedulerApp app_0_0 = 
														
 
															+        spy(new SchedulerApp(appAttemptId_0_0, user_0, queue, rmContext, null));
														
 
															     queue.submitApplication(app_0_0, user_0, A);
														
 
															-    queue.assignContainers(clusterResource, node_0); // Schedule to compute
														
 
															+
														
 
															+    List<ResourceRequest> app_0_0_requests = new ArrayList<ResourceRequest>();
														
 
															+    app_0_0_requests.add(
														
 
															+        TestUtils.createResourceRequest(RMNodeImpl.ANY, 1*GB, 2, 
														
 
															+            priority_1, recordFactory));
														
 
															+    app_0_0.updateResourceRequests(app_0_0_requests);
														
 
															+
														
 
															+    // Schedule to compute 
														
 
															+    queue.assignContainers(clusterResource, node_0);
														
 
															     Resource expectedHeadroom = Resources.createResource(10*16*GB);
														
 
															-    verify(app_0_0).setAvailableResourceLimit(eq(expectedHeadroom));
														
 
															+    verify(app_0_0).setHeadroom(eq(expectedHeadroom));
														
 
															     // Submit second application from user_0, check headroom
														
 
															-    SchedulerApp app_0_1 = getMockApplication(APPLICATION_ID++, user_0);
														
 
															+    final ApplicationAttemptId appAttemptId_0_1 = 
														
 
															+        TestUtils.getMockApplicationAttemptId(1, 0); 
														
 
															+    SchedulerApp app_0_1 = 
														
 
															+        spy(new SchedulerApp(appAttemptId_0_1, user_0, queue, rmContext, null));
														
 
															     queue.submitApplication(app_0_1, user_0, A);
														
 
															+    
														
 
															+    List<ResourceRequest> app_0_1_requests = new ArrayList<ResourceRequest>();
														
 
															+    app_0_1_requests.add(
														
 
															+        TestUtils.createResourceRequest(RMNodeImpl.ANY, 1*GB, 2, 
														
 
															+            priority_1, recordFactory));
														
 
															+    app_0_1.updateResourceRequests(app_0_1_requests);
														
 
															+
														
 
															+    // Schedule to compute 
														
 
															     queue.assignContainers(clusterResource, node_0); // Schedule to compute
														
 
															-    verify(app_0_0, times(2)).setAvailableResourceLimit(eq(expectedHeadroom));
														
 
															-    verify(app_0_1).setAvailableResourceLimit(eq(expectedHeadroom));// no change
														
 
															+    verify(app_0_0, times(2)).setHeadroom(eq(expectedHeadroom));
														
 
															+    verify(app_0_1).setHeadroom(eq(expectedHeadroom));// no change
														
 
															     // Submit first application from user_1, check  for new headroom
														
 
															-    SchedulerApp app_1_0 = getMockApplication(APPLICATION_ID++, user_1);
														
 
															+    final ApplicationAttemptId appAttemptId_1_0 = 
														
 
															+        TestUtils.getMockApplicationAttemptId(2, 0); 
														
 
															+    SchedulerApp app_1_0 = 
														
 
															+        spy(new SchedulerApp(appAttemptId_1_0, user_1, queue, rmContext, null));
														
 
															     queue.submitApplication(app_1_0, user_1, A);
														
 
															+
														
 
															+    List<ResourceRequest> app_1_0_requests = new ArrayList<ResourceRequest>();
														
 
															+    app_1_0_requests.add(
														
 
															+        TestUtils.createResourceRequest(RMNodeImpl.ANY, 1*GB, 2, 
														
 
															+            priority_1, recordFactory));
														
 
															+    app_1_0.updateResourceRequests(app_1_0_requests);
														
 
															+    
														
 
															+    // Schedule to compute 
														
 
															     queue.assignContainers(clusterResource, node_0); // Schedule to compute
														
 
															     expectedHeadroom = Resources.createResource(10*16*GB / 2); // changes
														
 
															-    verify(app_0_0).setAvailableResourceLimit(eq(expectedHeadroom));
														
 
															-    verify(app_0_1).setAvailableResourceLimit(eq(expectedHeadroom));
														
 
															-    verify(app_1_0).setAvailableResourceLimit(eq(expectedHeadroom));
														
 
															-    
														
 
															+    verify(app_0_0).setHeadroom(eq(expectedHeadroom));
														
 
															+    verify(app_0_1).setHeadroom(eq(expectedHeadroom));
														
 
															+    verify(app_1_0).setHeadroom(eq(expectedHeadroom));
														
 
															+
														
 
															     // Now reduce cluster size and check for the smaller headroom
														
 
															     clusterResource = Resources.createResource(90*16*GB);
														
 
															     queue.assignContainers(clusterResource, node_0); // Schedule to compute
														
 
															     expectedHeadroom = Resources.createResource(9*16*GB / 2); // changes
														
 
															-    verify(app_0_0).setAvailableResourceLimit(eq(expectedHeadroom));
														
 
															-    verify(app_0_1).setAvailableResourceLimit(eq(expectedHeadroom));
														
 
															-    verify(app_1_0).setAvailableResourceLimit(eq(expectedHeadroom));
														
 
															+    verify(app_0_0).setHeadroom(eq(expectedHeadroom));
														
 
															+    verify(app_0_1).setHeadroom(eq(expectedHeadroom));
														
 
															+    verify(app_1_0).setHeadroom(eq(expectedHeadroom));
														
 
															   }
														
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
@@ -255,7 +255,7 @@ public class TestLeafQueue {
 
															     // Manipulate queue 'a'
														
 
															     LeafQueue a = stubLeafQueue((LeafQueue)queues.get(A));
														
 
															     //unset maxCapacity
														
 
															-    a.setMaxCapacity(-0.01f);
														
 
															+    a.setMaxCapacity(1.0f);
														
 
															     // Users
														
 
															     final String user_0 = "user_0";
														
@@ -377,7 +377,7 @@ public class TestLeafQueue {
 
															     // Mock the queue
														
 
															     LeafQueue a = stubLeafQueue((LeafQueue)queues.get(A));
														
 
															     //unset maxCapacity
														
 
															-    a.setMaxCapacity(-0.01f);
														
 
															+    a.setMaxCapacity(1.0f);
														
 
															     // Users
														
 
															     final String user_0 = "user_0";
														
@@ -491,7 +491,7 @@ public class TestLeafQueue {
 
															     // Revert max-capacity and user-limit-factor
														
 
															     // Now, allocations should goto app_3 since it's under user-limit 
														
 
															-    a.setMaxCapacity(-0.01f);
														
 
															+    a.setMaxCapacity(1.0f);
														
 
															     a.setUserLimitFactor(1);
														
 
															     a.assignContainers(clusterResource, node_0);
														
 
															     assertEquals(7*GB, a.getUsedResources().getMemory()); 
														
@@ -548,7 +548,7 @@ public class TestLeafQueue {
 
															     // Manipulate queue 'a'
														
 
															     LeafQueue a = stubLeafQueue((LeafQueue)queues.get(A));
														
 
															     //unset maxCapacity
														
 
															-    a.setMaxCapacity(-0.01f);
														
 
															+    a.setMaxCapacity(1.0f);
														
 
															     // Users
														
 
															     final String user_0 = "user_0";
														
@@ -571,7 +571,7 @@ public class TestLeafQueue {
 
															     String host_0 = "host_0";
														
 
															     SchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 4*GB);
														
 
															-    final int numNodes = 1;
														
 
															+    final int numNodes = 2;
														
 
															     Resource clusterResource = Resources.createResource(numNodes * (4*GB));
														
 
															     when(csContext.getNumClusterNodes()).thenReturn(numNodes);
														
@@ -646,7 +646,7 @@ public class TestLeafQueue {
 
															     // Manipulate queue 'a'
														
 
															     LeafQueue a = stubLeafQueue((LeafQueue)queues.get(A));
														
 
															     //unset maxCapacity
														
 
															-    a.setMaxCapacity(-0.01f);
														
 
															+    a.setMaxCapacity(1.0f);
														
 
															     a.setUserLimitFactor(10);
														
 
															     // Users
														
@@ -673,7 +673,7 @@ public class TestLeafQueue {
 
															     String host_1 = "host_1";
														
 
															     SchedulerNode node_1 = TestUtils.getMockNode(host_1, DEFAULT_RACK, 0, 4*GB);
														
 
															-    final int numNodes = 2;
														
 
															+    final int numNodes = 3;
														
 
															     Resource clusterResource = Resources.createResource(numNodes * (4*GB));
														
 
															     when(csContext.getNumClusterNodes()).thenReturn(numNodes);
														
 
															     when(csContext.getMaximumResourceCapability()).thenReturn(
														
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestQueueParsing.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestQueueParsing.java
@@ -30,6 +30,8 @@ public class TestQueueParsing {
 
															   private static final Log LOG = LogFactory.getLog(TestQueueParsing.class);
														
 
															+  private static final double DELTA = 0.000001;
														
 
															+  
														
 
															   @Test
														
 
															   public void testQueueParsing() throws Exception {
														
 
															     CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration();
														
@@ -37,6 +39,20 @@ public class TestQueueParsing {
 
															     CapacityScheduler capacityScheduler = new CapacityScheduler();
														
 
															     capacityScheduler.reinitialize(conf, null, null);
														
 
															+    
														
 
															+    CSQueue a = capacityScheduler.getQueue("a");
														
 
															+    Assert.assertEquals(0.10, a.getAbsoluteCapacity(), DELTA);
														
 
															+    Assert.assertEquals(0.15, a.getAbsoluteMaximumCapacity(), DELTA);
														
 
															+    
														
 
															+    CSQueue b1 = capacityScheduler.getQueue("b1");
														
 
															+    Assert.assertEquals(0.2 * 0.5, b1.getAbsoluteCapacity(), DELTA);
														
 
															+    Assert.assertEquals("Parent B has no MAX_CAP", 
														
 
															+        0.85, b1.getAbsoluteMaximumCapacity(), DELTA);
														
 
															+    
														
 
															+    CSQueue c12 = capacityScheduler.getQueue("c12");
														
 
															+    Assert.assertEquals(0.7 * 0.5 * 0.45, c12.getAbsoluteCapacity(), DELTA);
														
 
															+    Assert.assertEquals(0.7 * 0.55 * 0.7, 
														
 
															+        c12.getAbsoluteMaximumCapacity(), DELTA);
														
 
															   }
														
 
															   private void setupQueueConfiguration(CapacitySchedulerConfiguration conf) {
														
@@ -47,12 +63,14 @@ public class TestQueueParsing {
 
															     final String A = CapacitySchedulerConfiguration.ROOT + ".a";
														
 
															     conf.setCapacity(A, 10);
														
 
															+    conf.setMaximumCapacity(A, 15);
														
 
															     final String B = CapacitySchedulerConfiguration.ROOT + ".b";
														
 
															     conf.setCapacity(B, 20);
														
 
															-
														
 
															+    
														
 
															     final String C = CapacitySchedulerConfiguration.ROOT + ".c";
														
 
															     conf.setCapacity(C, 70);
														
 
															+    conf.setMaximumCapacity(C, 70);
														
 
															     LOG.info("Setup top-level queues");
														
@@ -61,15 +79,20 @@ public class TestQueueParsing {
 
															     final String A2 = A + ".a2";
														
 
															     conf.setQueues(A, new String[] {"a1", "a2"});
														
 
															     conf.setCapacity(A1, 30);
														
 
															+    conf.setMaximumCapacity(A1, 45);
														
 
															     conf.setCapacity(A2, 70);
														
 
															+    conf.setMaximumCapacity(A2, 85);
														
 
															     final String B1 = B + ".b1";
														
 
															     final String B2 = B + ".b2";
														
 
															     final String B3 = B + ".b3";
														
 
															     conf.setQueues(B, new String[] {"b1", "b2", "b3"});
														
 
															     conf.setCapacity(B1, 50);
														
 
															+    conf.setMaximumCapacity(B1, 85);
														
 
															     conf.setCapacity(B2, 30);
														
 
															+    conf.setMaximumCapacity(B2, 35);
														
 
															     conf.setCapacity(B3, 20);
														
 
															+    conf.setMaximumCapacity(B3, 35);
														
 
															     final String C1 = C + ".c1";
														
 
															     final String C2 = C + ".c2";
														
@@ -77,9 +100,13 @@ public class TestQueueParsing {
 
															     final String C4 = C + ".c4";
														
 
															     conf.setQueues(C, new String[] {"c1", "c2", "c3", "c4"});
														
 
															     conf.setCapacity(C1, 50);
														
 
															+    conf.setMaximumCapacity(C1, 55);
														
 
															     conf.setCapacity(C2, 10);
														
 
															+    conf.setMaximumCapacity(C2, 25);
														
 
															     conf.setCapacity(C3, 35);
														
 
															+    conf.setMaximumCapacity(C3, 38);
														
 
															     conf.setCapacity(C4, 5);
														
 
															+    conf.setMaximumCapacity(C4, 5);
														
 
															     LOG.info("Setup 2nd-level queues");
														
@@ -89,8 +116,11 @@ public class TestQueueParsing {
 
															     final String C13 = C1 + ".c13";
														
 
															     conf.setQueues(C1, new String[] {"c11", "c12", "c13"});
														
 
															     conf.setCapacity(C11, 15);
														
 
															+    conf.setMaximumCapacity(C11, 30);
														
 
															     conf.setCapacity(C12, 45);
														
 
															+    conf.setMaximumCapacity(C12, 70);
														
 
															     conf.setCapacity(C13, 40);
														
 
															+    conf.setMaximumCapacity(C13, 40);
														
 
															     LOG.info("Setup 3rd-level queues");
														
 
															   }
														
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesCapacitySched.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesCapacitySched.java
@@ -235,12 +235,13 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
 
															         Element qElem = (Element) queues.item(j);
														
 
															         String qName = WebServicesTestUtils.getXmlString(qElem, "queueName");
														
 
															         String q = CapacitySchedulerConfiguration.ROOT + "." + qName;
														
 
															-        verifySubQueueXML(qElem, q, 100);
														
 
															+        verifySubQueueXML(qElem, q, 100, 100);
														
 
															       }
														
 
															     }
														
 
															   }
														
 
															-  public void verifySubQueueXML(Element qElem, String q, float parentAbsCapacity)
														
 
															+  public void verifySubQueueXML(Element qElem, String q, 
														
 
															+      float parentAbsCapacity, float parentAbsMaxCapacity)
														
 
															       throws Exception {
														
 
															     NodeList queues = qElem.getElementsByTagName("subQueues");
														
 
															     QueueInfo qi = (queues != null) ? new QueueInfo() : new LeafQueueInfo();
														
@@ -258,14 +259,15 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
 
															         WebServicesTestUtils.getXmlString(qElem, "usedResources");
														
 
															     qi.queueName = WebServicesTestUtils.getXmlString(qElem, "queueName");
														
 
															     qi.state = WebServicesTestUtils.getXmlString(qElem, "state");
														
 
															-    verifySubQueueGeneric(q, qi, parentAbsCapacity);
														
 
															+    verifySubQueueGeneric(q, qi, parentAbsCapacity, parentAbsMaxCapacity);
														
 
															     if (queues != null) {
														
 
															       for (int j = 0; j < queues.getLength(); j++) {
														
 
															         Element subqElem = (Element) queues.item(j);
														
 
															         String qName = WebServicesTestUtils.getXmlString(subqElem, "queueName");
														
 
															         String q2 = q + "." + qName;
														
 
															-        verifySubQueueXML(subqElem, q2, qi.absoluteCapacity);
														
 
															+        verifySubQueueXML(subqElem, q2, 
														
 
															+            qi.absoluteCapacity, qi.absoluteMaxCapacity);
														
 
															       }
														
 
															     } else {
														
 
															       LeafQueueInfo lqi = (LeafQueueInfo) qi;
														
@@ -309,7 +311,7 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
 
															     for (int i = 0; i < arr.length(); i++) {
														
 
															       JSONObject obj = arr.getJSONObject(i);
														
 
															       String q = CapacitySchedulerConfiguration.ROOT + "." + obj.getString("queueName");
														
 
															-      verifySubQueue(obj, q, 100);
														
 
															+      verifySubQueue(obj, q, 100, 100);
														
 
															     }
														
 
															   }
														
@@ -323,7 +325,8 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
 
															     assertTrue("queueName doesn't match", "root".matches(queueName));
														
 
															   }
														
 
															-  private void verifySubQueue(JSONObject info, String q, float parentAbsCapacity)
														
 
															+  private void verifySubQueue(JSONObject info, String q, 
														
 
															+      float parentAbsCapacity, float parentAbsMaxCapacity)
														
 
															       throws JSONException, Exception {
														
 
															     int numExpectedElements = 11;
														
 
															     boolean isParentQueue = true;
														
@@ -345,7 +348,7 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
 
															     qi.queueName = info.getString("queueName");
														
 
															     qi.state = info.getString("state");
														
 
															-    verifySubQueueGeneric(q, qi, parentAbsCapacity);
														
 
															+    verifySubQueueGeneric(q, qi, parentAbsCapacity, parentAbsMaxCapacity);
														
 
															     if (isParentQueue) {
														
 
															       JSONArray arr = info.getJSONArray("subQueues");
														
@@ -353,7 +356,7 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
 
															       for (int i = 0; i < arr.length(); i++) {
														
 
															         JSONObject obj = arr.getJSONObject(i);
														
 
															         String q2 = q + "." + obj.getString("queueName");
														
 
															-        verifySubQueue(obj, q2, qi.absoluteCapacity);
														
 
															+        verifySubQueue(obj, q2, qi.absoluteCapacity, qi.absoluteMaxCapacity);
														
 
															       }
														
 
															     } else {
														
 
															       LeafQueueInfo lqi = (LeafQueueInfo) qi;
														
@@ -371,7 +374,7 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
 
															   }
														
 
															   private void verifySubQueueGeneric(String q, QueueInfo info,
														
 
															-      float parentAbsCapacity) throws Exception {
														
 
															+      float parentAbsCapacity, float parentAbsMaxCapacity) throws Exception {
														
 
															     String[] qArr = q.split("\\.");
														
 
															     assertTrue("q name invalid: " + q, qArr.length > 1);
														
 
															     String qshortName = qArr[qArr.length - 1];
														
@@ -380,7 +383,7 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
 
															     assertEquals("capacity doesn't match", csConf.getCapacity(q),
														
 
															         info.capacity, 1e-3f);
														
 
															     float expectCapacity = csConf.getMaximumCapacity(q);
														
 
															-    float expectAbsMaxCapacity = parentAbsCapacity * (info.maxCapacity/100);
														
 
															+    float expectAbsMaxCapacity = parentAbsMaxCapacity * (info.maxCapacity/100);
														
 
															     if (CapacitySchedulerConfiguration.UNDEFINED == expectCapacity) {
														
 
															       expectCapacity = 100;
														
 
															       expectAbsMaxCapacity = 100;
														
--- a/hadoop-project/pom.xml
+++ b/hadoop-project/pom.xml
@@ -714,11 +714,21 @@
 
															           <artifactId>maven-project-info-reports-plugin</artifactId>
														
 
															           <version>2.4</version>
														
 
															         </plugin>
														
 
															+        <plugin>
														
 
															+          <groupId>org.apache.maven.plugins</groupId>
														
 
															+          <artifactId>maven-resources-plugin</artifactId>
														
 
															+          <version>2.2</version>
														
 
															+        </plugin>
														
 
															         <plugin>
														
 
															           <groupId>org.codehaus.mojo</groupId>
														
 
															           <artifactId>exec-maven-plugin</artifactId>
														
 
															           <version>1.2</version>
														
 
															         </plugin>
														
 
															+        <plugin>
														
 
															+          <groupId>org.apache.maven.plugins</groupId>
														
 
															+          <artifactId>maven-pdf-plugin</artifactId>
														
 
															+          <version>1.1</version>
														
 
															+        </plugin>
														
 
															       </plugins>
														
 
															     </pluginManagement>
														
@@ -778,6 +788,14 @@
 
															           </excludes>
														
 
															         </configuration>
														
 
															       </plugin>
														
 
															+      <plugin>
														
 
															+        <groupId>org.apache.maven.plugins</groupId>
														
 
															+        <artifactId>maven-pdf-plugin</artifactId>
														
 
															+        <configuration>
														
 
															+          <outputDirectory>${project.reporting.outputDirectory}</outputDirectory>
														
 
															+          <includeReports>false</includeReports>
														
 
															+        </configuration>
														
 
															+      </plugin>
														
 
															     </plugins>
														
 
															   </build>
														
--- a/hadoop-tools/hadoop-distcp/README
+++ b/hadoop-tools/hadoop-distcp/README
@@ -0,0 +1,7 @@
 
															+DistCp (distributed copy) is a tool used for large inter/intra-cluster copying. 
														
 
															+It uses Map/Reduce to effect its distribution, error handling and recovery, 
														
 
															+and reporting. It expands a list of files and directories into input to map tasks, 
														
 
															+each of which will copy a partition of the files specified in the source list.
														
 
															+
														
 
															+Version 0.1 (2010/08/02 sriksun)
														
 
															+ - Initial Version
														
--- a/hadoop-tools/hadoop-distcp/pom.xml
+++ b/hadoop-tools/hadoop-distcp/pom.xml
@@ -0,0 +1,198 @@
 
															+<?xml version="1.0" encoding="UTF-8"?>
														
 
															+<!--
														
 
															+  Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+  you may not use this file except in compliance with the License.
														
 
															+  You may obtain a copy of the License at
														
 
															+
														
 
															+    http://www.apache.org/licenses/LICENSE-2.0
														
 
															+
														
 
															+  Unless required by applicable law or agreed to in writing, software
														
 
															+  distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+  See the License for the specific language governing permissions and
														
 
															+  limitations under the License. See accompanying LICENSE file.
														
 
															+-->
														
 
															+<project>
														
 
															+  <modelVersion>4.0.0</modelVersion>
														
 
															+  <parent>
														
 
															+    <groupId>org.apache.hadoop</groupId>
														
 
															+    <artifactId>hadoop-project</artifactId>
														
 
															+    <version>0.24.0-SNAPSHOT</version>
														
 
															+    <relativePath>../../hadoop-project</relativePath>
														
 
															+  </parent>
														
 
															+  <groupId>org.apache.hadoop.tools</groupId>
														
 
															+  <artifactId>hadoop-distcp</artifactId>
														
 
															+  <version>0.24.0-SNAPSHOT</version>
														
 
															+  <description>Apache Hadoop Distributed Copy</description>
														
 
															+  <name>Apache Hadoop Distributed Copy</name>
														
 
															+  <packaging>jar</packaging>
														
 
															+
														
 
															+  <properties>
														
 
															+    <file.encoding>UTF-8</file.encoding>
														
 
															+    <downloadSources>true</downloadSources>
														
 
															+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
														
 
															+  </properties>
														
 
															+
														
 
															+  <dependencies>
														
 
															+    <dependency>
														
 
															+      <groupId>org.apache.hadoop</groupId>
														
 
															+      <artifactId>hadoop-common</artifactId>
														
 
															+      <scope>provided</scope>
														
 
															+    </dependency>
														
 
															+    <dependency>
														
 
															+      <groupId>org.apache.hadoop</groupId>
														
 
															+      <artifactId>hadoop-annotations</artifactId>
														
 
															+      <scope>provided</scope>
														
 
															+    </dependency>
														
 
															+    <dependency>
														
 
															+      <groupId>org.apache.hadoop</groupId>
														
 
															+      <artifactId>hadoop-mapreduce-client-app</artifactId>
														
 
															+      <scope>test</scope>
														
 
															+    </dependency>
														
 
															+    <dependency>
														
 
															+      <groupId>org.apache.hadoop</groupId>
														
 
															+      <artifactId>hadoop-mapreduce-client-hs</artifactId>
														
 
															+      <scope>test</scope>
														
 
															+    </dependency>
														
 
															+    <dependency>
														
 
															+      <groupId>org.apache.hadoop</groupId>
														
 
															+      <artifactId>hadoop-mapreduce-client-core</artifactId>
														
 
															+      <scope>provided</scope>
														
 
															+    </dependency>
														
 
															+    <dependency>
														
 
															+      <groupId>org.apache.hadoop</groupId>
														
 
															+      <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
														
 
															+      <scope>provided</scope>
														
 
															+    </dependency>
														
 
															+    <dependency>
														
 
															+      <groupId>org.apache.hadoop</groupId>
														
 
															+      <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
														
 
															+      <scope>test</scope>
														
 
															+      <type>test-jar</type>
														
 
															+    </dependency>
														
 
															+    <dependency>
														
 
															+      <groupId>org.apache.hadoop</groupId>
														
 
															+      <artifactId>hadoop-hdfs</artifactId>
														
 
															+      <scope>provided</scope>
														
 
															+    </dependency>
														
 
															+    <dependency>
														
 
															+      <groupId>org.apache.hadoop</groupId>
														
 
															+      <artifactId>hadoop-hdfs</artifactId>
														
 
															+      <scope>test</scope>
														
 
															+      <type>test-jar</type>
														
 
															+    </dependency>
														
 
															+    <dependency>
														
 
															+      <groupId>org.apache.hadoop</groupId>
														
 
															+      <artifactId>hadoop-common</artifactId>
														
 
															+      <scope>test</scope>
														
 
															+      <type>test-jar</type>
														
 
															+    </dependency>
														
 
															+  </dependencies>
														
 
															+
														
 
															+  <build>
														
 
															+    <resources>
														
 
															+      <resource>
														
 
															+        <directory>src/main/resources</directory>
														
 
															+        <filtering>true</filtering>
														
 
															+      </resource>
														
 
															+    </resources>
														
 
															+    <testResources>
														
 
															+      <testResource>
														
 
															+        <directory>src/test/resources</directory>
														
 
															+        <filtering>true</filtering>
														
 
															+      </testResource>
														
 
															+    </testResources>
														
 
															+    <plugins>
														
 
															+      <plugin>
														
 
															+        <groupId>org.apache.maven.plugins</groupId>
														
 
															+        <artifactId>maven-surefire-plugin</artifactId>
														
 
															+        <configuration>
														
 
															+          <forkMode>always</forkMode>
														
 
															+          <forkedProcessTimeoutInSeconds>600</forkedProcessTimeoutInSeconds>
														
 
															+          <argLine>-Xmx1024m</argLine>
														
 
															+          <includes>
														
 
															+            <include>**/Test*.java</include>
														
 
															+          </includes>
														
 
															+          <redirectTestOutputToFile>true</redirectTestOutputToFile>
														
 
															+          <systemProperties>
														
 
															+            <property>
														
 
															+              <name>test.build.data</name>
														
 
															+              <value>${basedir}/target/test/data</value>
														
 
															+            </property>
														
 
															+            <property>
														
 
															+              <name>hadoop.log.dir</name>
														
 
															+              <value>target/test/logs</value>
														
 
															+            </property>
														
 
															+            <property>
														
 
															+              <name>org.apache.commons.logging.Log</name>
														
 
															+              <value>org.apache.commons.logging.impl.SimpleLog</value>
														
 
															+            </property>
														
 
															+            <property>
														
 
															+              <name>org.apache.commons.logging.simplelog.defaultlog</name>
														
 
															+              <value>warn</value>
														
 
															+            </property>
														
 
															+          </systemProperties>
														
 
															+        </configuration>
														
 
															+      </plugin>
														
 
															+      <plugin>
														
 
															+        <artifactId>maven-dependency-plugin</artifactId>
														
 
															+        <executions>
														
 
															+          <execution>
														
 
															+            <phase>package</phase>
														
 
															+            <goals>
														
 
															+              <goal>copy-dependencies</goal>
														
 
															+            </goals>
														
 
															+            <configuration>
														
 
															+              <outputDirectory>${project.build.directory}/lib</outputDirectory>
														
 
															+            </configuration>
														
 
															+          </execution>
														
 
															+        </executions>
														
 
															+      </plugin>
														
 
															+      <plugin>
														
 
															+        <groupId>org.apache.maven.plugins</groupId>
														
 
															+        <artifactId>maven-checkstyle-plugin</artifactId>
														
 
															+        <configuration>
														
 
															+          <enableRulesSummary>true</enableRulesSummary>
														
 
															+        </configuration>
														
 
															+      </plugin>
														
 
															+      <plugin>
														
 
															+        <groupId>org.apache.maven.plugins</groupId>
														
 
															+        <artifactId>maven-jar-plugin</artifactId>
														
 
															+        <configuration>
														
 
															+          <archive>
														
 
															+            <manifest>
														
 
															+              <mainClass>org.apache.hadoop.tools.DistCp</mainClass>
														
 
															+            </manifest>
														
 
															+          </archive>
														
 
															+        </configuration>
														
 
															+      </plugin>
														
 
															+      <plugin>
														
 
															+        <groupId>org.apache.maven.plugins</groupId>
														
 
															+        <artifactId>maven-source-plugin</artifactId>
														
 
															+        <configuration>
														
 
															+          <attach>true</attach>
														
 
															+        </configuration>
														
 
															+        <executions>
														
 
															+          <execution>
														
 
															+            <goals>
														
 
															+              <goal>jar</goal>
														
 
															+            </goals>
														
 
															+          </execution>
														
 
															+        </executions>
														
 
															+      </plugin>
														
 
															+      <plugin>
														
 
															+        <groupId>org.apache.maven.plugins</groupId>
														
 
															+        <artifactId>maven-pdf-plugin</artifactId>
														
 
															+        <executions>
														
 
															+          <execution>
														
 
															+            <id>pdf</id>
														
 
															+            <phase>package</phase>
														
 
															+            <goals>
														
 
															+              <goal>pdf</goal>
														
 
															+            </goals>
														
 
															+          </execution>
														
 
															+        </executions>
														
 
															+      </plugin>
														
 
															+    </plugins>
														
 
															+  </build>
														
 
															+</project>
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java
@@ -0,0 +1,218 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools;
														
 
															+
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.conf.Configured;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.FileStatus;
														
 
															+import org.apache.hadoop.io.SequenceFile;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.io.IOUtils;
														
 
															+import org.apache.hadoop.tools.util.DistCpUtils;
														
 
															+import org.apache.hadoop.security.Credentials;
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+
														
 
															+/**
														
 
															+ * The CopyListing abstraction is responsible for how the list of
														
 
															+ * sources and targets is constructed, for DistCp's copy function.
														
 
															+ * The copy-listing should be a SequenceFile<Text, FileStatus>,
														
 
															+ * located at the path specified to buildListing(),
														
 
															+ * each entry being a pair of (Source relative path, source file status),
														
 
															+ * all the paths being fully qualified.
														
 
															+ */
														
 
															+public abstract class CopyListing extends Configured {
														
 
															+
														
 
															+  private Credentials credentials;
														
 
															+
														
 
															+  /**
														
 
															+   * Build listing function creates the input listing that distcp uses to
														
 
															+   * perform the copy.
														
 
															+   *
														
 
															+   * The build listing is a sequence file that has relative path of a file in the key
														
 
															+   * and the file status information of the source file in the value
														
 
															+   *
														
 
															+   * For instance if the source path is /tmp/data and the traversed path is
														
 
															+   * /tmp/data/dir1/dir2/file1, then the sequence file would contain
														
 
															+   *
														
 
															+   * key: /dir1/dir2/file1 and value: FileStatus(/tmp/data/dir1/dir2/file1)
														
 
															+   *
														
 
															+   * File would also contain directory entries. Meaning, if /tmp/data/dir1/dir2/file1
														
 
															+   * is the only file under /tmp/data, the resulting sequence file would contain the
														
 
															+   * following entries
														
 
															+   *
														
 
															+   * key: /dir1 and value: FileStatus(/tmp/data/dir1)
														
 
															+   * key: /dir1/dir2 and value: FileStatus(/tmp/data/dir1/dir2)
														
 
															+   * key: /dir1/dir2/file1 and value: FileStatus(/tmp/data/dir1/dir2/file1)
														
 
															+   *
														
 
															+   * Cases requiring special handling:
														
 
															+   * If source path is a file (/tmp/file1), contents of the file will be as follows
														
 
															+   *
														
 
															+   * TARGET DOES NOT EXIST: Key-"", Value-FileStatus(/tmp/file1)
														
 
															+   * TARGET IS FILE       : Key-"", Value-FileStatus(/tmp/file1)
														
 
															+   * TARGET IS DIR        : Key-"/file1", Value-FileStatus(/tmp/file1)  
														
 
															+   *
														
 
															+   * @param pathToListFile - Output file where the listing would be stored
														
 
															+   * @param options - Input options to distcp
														
 
															+   * @throws IOException - Exception if any
														
 
															+   */
														
 
															+  public final void buildListing(Path pathToListFile,
														
 
															+                                 DistCpOptions options) throws IOException {
														
 
															+    validatePaths(options);
														
 
															+    doBuildListing(pathToListFile, options);
														
 
															+    Configuration config = getConf();
														
 
															+
														
 
															+    config.set(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, pathToListFile.toString());
														
 
															+    config.setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, getBytesToCopy());
														
 
															+    config.setLong(DistCpConstants.CONF_LABEL_TOTAL_NUMBER_OF_RECORDS, getNumberOfPaths());
														
 
															+
														
 
															+    checkForDuplicates(pathToListFile);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Validate input and output paths
														
 
															+   *
														
 
															+   * @param options - Input options
														
 
															+   * @throws InvalidInputException: If inputs are invalid
														
 
															+   * @throws IOException: any Exception with FS 
														
 
															+   */
														
 
															+  protected abstract void validatePaths(DistCpOptions options)
														
 
															+      throws IOException, InvalidInputException;
														
 
															+
														
 
															+  /**
														
 
															+   * The interface to be implemented by sub-classes, to create the source/target file listing.
														
 
															+   * @param pathToListFile Path on HDFS where the listing file is written.
														
 
															+   * @param options Input Options for DistCp (indicating source/target paths.)
														
 
															+   * @throws IOException: Thrown on failure to create the listing file.
														
 
															+   */
														
 
															+  protected abstract void doBuildListing(Path pathToListFile,
														
 
															+                                         DistCpOptions options) throws IOException;
														
 
															+
														
 
															+  /**
														
 
															+   * Return the total bytes that distCp should copy for the source paths
														
 
															+   * This doesn't consider whether file is same should be skipped during copy
														
 
															+   *
														
 
															+   * @return total bytes to copy
														
 
															+   */
														
 
															+  protected abstract long getBytesToCopy();
														
 
															+
														
 
															+  /**
														
 
															+   * Return the total number of paths to distcp, includes directories as well
														
 
															+   * This doesn't consider whether file/dir is already present and should be skipped during copy
														
 
															+   *
														
 
															+   * @return Total number of paths to distcp
														
 
															+   */
														
 
															+  protected abstract long getNumberOfPaths();
														
 
															+
														
 
															+  /**
														
 
															+   * Validate the final resulting path listing to see if there are any duplicate entries
														
 
															+   *
														
 
															+   * @param pathToListFile - path listing build by doBuildListing
														
 
															+   * @throws IOException - Any issues while checking for duplicates and throws
														
 
															+   * @throws DuplicateFileException - if there are duplicates
														
 
															+   */
														
 
															+  private void checkForDuplicates(Path pathToListFile)
														
 
															+      throws DuplicateFileException, IOException {
														
 
															+
														
 
															+    Configuration config = getConf();
														
 
															+    FileSystem fs = pathToListFile.getFileSystem(config);
														
 
															+
														
 
															+    Path sortedList = DistCpUtils.sortListing(fs, config, pathToListFile);
														
 
															+
														
 
															+    SequenceFile.Reader reader = new SequenceFile.Reader(
														
 
															+                          config, SequenceFile.Reader.file(sortedList));
														
 
															+    try {
														
 
															+      Text lastKey = new Text("*"); //source relative path can never hold *
														
 
															+      FileStatus lastFileStatus = new FileStatus();
														
 
															+
														
 
															+      Text currentKey = new Text();
														
 
															+      while (reader.next(currentKey)) {
														
 
															+        if (currentKey.equals(lastKey)) {
														
 
															+          FileStatus currentFileStatus = new FileStatus();
														
 
															+          reader.getCurrentValue(currentFileStatus);
														
 
															+          throw new DuplicateFileException("File " + lastFileStatus.getPath() + " and " +
														
 
															+              currentFileStatus.getPath() + " would cause duplicates. Aborting");
														
 
															+        }
														
 
															+        reader.getCurrentValue(lastFileStatus);
														
 
															+        lastKey.set(currentKey);
														
 
															+      }
														
 
															+    } finally {
														
 
															+      IOUtils.closeStream(reader);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Protected constructor, to initialize configuration.
														
 
															+   * @param configuration The input configuration,
														
 
															+   *                        with which the source/target FileSystems may be accessed.
														
 
															+   * @param credentials - Credentials object on which the FS delegation tokens are cached.If null
														
 
															+   * delegation token caching is skipped
														
 
															+   */
														
 
															+  protected CopyListing(Configuration configuration, Credentials credentials) {
														
 
															+    setConf(configuration);
														
 
															+    setCredentials(credentials);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * set Credentials store, on which FS delegatin token will be cached
														
 
															+   * @param credentials - Credentials object
														
 
															+   */
														
 
															+  protected void setCredentials(Credentials credentials) {
														
 
															+    this.credentials = credentials;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * get credentials to update the delegation tokens for accessed FS objects
														
 
															+   * @return Credentials object
														
 
															+   */
														
 
															+  protected Credentials getCredentials() {
														
 
															+    return credentials;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Public Factory method with which the appropriate CopyListing implementation may be retrieved.
														
 
															+   * @param configuration The input configuration.
														
 
															+   * @param credentials Credentials object on which the FS delegation tokens are cached
														
 
															+   * @param options The input Options, to help choose the appropriate CopyListing Implementation.
														
 
															+   * @return An instance of the appropriate CopyListing implementation.
														
 
															+   */
														
 
															+  public static CopyListing getCopyListing(Configuration configuration,
														
 
															+                                           Credentials credentials,
														
 
															+                                           DistCpOptions options) {
														
 
															+    if (options.getSourceFileListing() == null) {
														
 
															+      return new GlobbedCopyListing(configuration, credentials);
														
 
															+    } else {
														
 
															+      return new FileBasedCopyListing(configuration, credentials);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  static class DuplicateFileException extends RuntimeException {
														
 
															+    public DuplicateFileException(String message) {
														
 
															+      super(message);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  static class InvalidInputException extends RuntimeException {
														
 
															+    public InvalidInputException(String message) {
														
 
															+      super(message);
														
 
															+    }
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java
@@ -0,0 +1,405 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.conf.Configured;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.mapreduce.Job;
														
 
															+import org.apache.hadoop.mapreduce.JobContext;
														
 
															+import org.apache.hadoop.mapreduce.JobSubmissionFiles;
														
 
															+import org.apache.hadoop.mapreduce.Cluster;
														
 
															+import org.apache.hadoop.tools.CopyListing.*;
														
 
															+import org.apache.hadoop.tools.mapred.CopyMapper;
														
 
															+import org.apache.hadoop.tools.mapred.CopyOutputFormat;
														
 
															+import org.apache.hadoop.tools.util.DistCpUtils;
														
 
															+import org.apache.hadoop.util.Tool;
														
 
															+import org.apache.hadoop.util.ToolRunner;
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+import java.util.Random;
														
 
															+
														
 
															+/**
														
 
															+ * DistCp is the main driver-class for DistCpV2.
														
 
															+ * For command-line use, DistCp::main() orchestrates the parsing of command-line
														
 
															+ * parameters and the launch of the DistCp job.
														
 
															+ * For programmatic use, a DistCp object can be constructed by specifying
														
 
															+ * options (in a DistCpOptions object), and DistCp::execute() may be used to
														
 
															+ * launch the copy-job. DistCp may alternatively be sub-classed to fine-tune
														
 
															+ * behaviour.
														
 
															+ */
														
 
															+public class DistCp extends Configured implements Tool {
														
 
															+  private static final Log LOG = LogFactory.getLog(DistCp.class);
														
 
															+
														
 
															+  private DistCpOptions inputOptions;
														
 
															+  private Path metaFolder;
														
 
															+
														
 
															+  private static final String PREFIX = "_distcp";
														
 
															+  private static final String WIP_PREFIX = "._WIP_";
														
 
															+  private static final String DISTCP_DEFAULT_XML = "distcp-default.xml";
														
 
															+  public static final Random rand = new Random();
														
 
															+
														
 
															+  private boolean submitted;
														
 
															+  private FileSystem jobFS;
														
 
															+
														
 
															+  /**
														
 
															+   * Public Constructor. Creates DistCp object with specified input-parameters.
														
 
															+   * (E.g. source-paths, target-location, etc.)
														
 
															+   * @param inputOptions Options (indicating source-paths, target-location.)
														
 
															+   * @param configuration The Hadoop configuration against which the Copy-mapper must run.
														
 
															+   * @throws Exception, on failure.
														
 
															+   */
														
 
															+  public DistCp(Configuration configuration, DistCpOptions inputOptions) throws Exception {
														
 
															+    Configuration config = new Configuration(configuration);
														
 
															+    config.addResource(DISTCP_DEFAULT_XML);
														
 
															+    setConf(config);
														
 
															+    this.inputOptions = inputOptions;
														
 
															+    this.metaFolder   = createMetaFolderPath();
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * To be used with the ToolRunner. Not for public consumption.
														
 
															+   */
														
 
															+  private DistCp() {}
														
 
															+
														
 
															+  /**
														
 
															+   * Implementation of Tool::run(). Orchestrates the copy of source file(s)
														
 
															+   * to target location, by:
														
 
															+   *  1. Creating a list of files to be copied to target.
														
 
															+   *  2. Launching a Map-only job to copy the files. (Delegates to execute().)
														
 
															+   * @param argv List of arguments passed to DistCp, from the ToolRunner.
														
 
															+   * @return On success, it returns 0. Else, -1.
														
 
															+   */
														
 
															+  public int run(String[] argv) {
														
 
															+    try {
														
 
															+      inputOptions = (OptionsParser.parse(argv));
														
 
															+
														
 
															+      LOG.info("Input Options: " + inputOptions);
														
 
															+    } catch (Throwable e) {
														
 
															+      LOG.error("Invalid arguments: ", e);
														
 
															+      System.err.println("Invalid arguments: " + e.getMessage());
														
 
															+      OptionsParser.usage();      
														
 
															+      return DistCpConstants.INVALID_ARGUMENT;
														
 
															+    }
														
 
															+    
														
 
															+    try {
														
 
															+      execute();
														
 
															+    } catch (InvalidInputException e) {
														
 
															+      LOG.error("Invalid input: ", e);
														
 
															+      return DistCpConstants.INVALID_ARGUMENT;
														
 
															+    } catch (DuplicateFileException e) {
														
 
															+      LOG.error("Duplicate files in input path: ", e);
														
 
															+      return DistCpConstants.DUPLICATE_INPUT;
														
 
															+    } catch (Exception e) {
														
 
															+      LOG.error("Exception encountered ", e);
														
 
															+      return DistCpConstants.UNKNOWN_ERROR;
														
 
															+    }
														
 
															+    return DistCpConstants.SUCCESS;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Implements the core-execution. Creates the file-list for copy,
														
 
															+   * and launches the Hadoop-job, to do the copy.
														
 
															+   * @return Job handle
														
 
															+   * @throws Exception, on failure.
														
 
															+   */
														
 
															+  public Job execute() throws Exception {
														
 
															+    assert inputOptions != null;
														
 
															+    assert getConf() != null;
														
 
															+
														
 
															+    Job job = null;
														
 
															+    try {
														
 
															+      metaFolder = createMetaFolderPath();
														
 
															+      jobFS = metaFolder.getFileSystem(getConf());
														
 
															+
														
 
															+      job = createJob();
														
 
															+      createInputFileListing(job);
														
 
															+
														
 
															+      job.submit();
														
 
															+      submitted = true;
														
 
															+    } finally {
														
 
															+      if (!submitted) {
														
 
															+        cleanup();
														
 
															+      }
														
 
															+    }
														
 
															+
														
 
															+    String jobID = job.getJobID().toString();
														
 
															+    job.getConfiguration().set(DistCpConstants.CONF_LABEL_DISTCP_JOB_ID, jobID);
														
 
															+    
														
 
															+    LOG.info("DistCp job-id: " + jobID);
														
 
															+    if (inputOptions.shouldBlock()) {
														
 
															+      job.waitForCompletion(true);
														
 
															+    }
														
 
															+    return job;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Create Job object for submitting it, with all the configuration
														
 
															+   *
														
 
															+   * @return Reference to job object.
														
 
															+   * @throws IOException - Exception if any
														
 
															+   */
														
 
															+  private Job createJob() throws IOException {
														
 
															+    String jobName = "distcp";
														
 
															+    String userChosenName = getConf().get(JobContext.JOB_NAME);
														
 
															+    if (userChosenName != null)
														
 
															+      jobName += ": " + userChosenName;
														
 
															+    Job job = Job.getInstance(getConf());
														
 
															+    job.setJobName(jobName);
														
 
															+    job.setInputFormatClass(DistCpUtils.getStrategy(getConf(), inputOptions));
														
 
															+    job.setJarByClass(CopyMapper.class);
														
 
															+    configureOutputFormat(job);
														
 
															+
														
 
															+    job.setMapperClass(CopyMapper.class);
														
 
															+    job.setNumReduceTasks(0);
														
 
															+    job.setMapOutputKeyClass(Text.class);
														
 
															+    job.setMapOutputValueClass(Text.class);
														
 
															+    job.setOutputFormatClass(CopyOutputFormat.class);
														
 
															+    job.getConfiguration().set(JobContext.MAP_SPECULATIVE, "false");
														
 
															+    job.getConfiguration().set(JobContext.NUM_MAPS,
														
 
															+                  String.valueOf(inputOptions.getMaxMaps()));
														
 
															+
														
 
															+    if (inputOptions.getSslConfigurationFile() != null) {
														
 
															+      setupSSLConfig(job);
														
 
															+    }
														
 
															+
														
 
															+    inputOptions.appendToConf(job.getConfiguration());
														
 
															+    return job;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Setup ssl configuration on the job configuration to enable hsftp access
														
 
															+   * from map job. Also copy the ssl configuration file to Distributed cache
														
 
															+   *
														
 
															+   * @param job - Reference to job's handle
														
 
															+   * @throws java.io.IOException - Exception if unable to locate ssl config file
														
 
															+   */
														
 
															+  private void setupSSLConfig(Job job) throws IOException  {
														
 
															+    Configuration configuration = job.getConfiguration();
														
 
															+    Path sslConfigPath = new Path(configuration.
														
 
															+        getResource(inputOptions.getSslConfigurationFile()).toString());
														
 
															+
														
 
															+    addSSLFilesToDistCache(job, sslConfigPath);
														
 
															+    configuration.set(DistCpConstants.CONF_LABEL_SSL_CONF, sslConfigPath.getName());
														
 
															+    configuration.set(DistCpConstants.CONF_LABEL_SSL_KEYSTORE, sslConfigPath.getName());
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Add SSL files to distributed cache. Trust store, key store and ssl config xml
														
 
															+   *
														
 
															+   * @param job - Job handle
														
 
															+   * @param sslConfigPath - ssl Configuration file specified through options
														
 
															+   * @throws IOException - If any
														
 
															+   */
														
 
															+  private void addSSLFilesToDistCache(Job job,
														
 
															+                                      Path sslConfigPath) throws IOException {
														
 
															+    Configuration configuration = job.getConfiguration();
														
 
															+    FileSystem localFS = FileSystem.getLocal(configuration);
														
 
															+
														
 
															+    Configuration sslConf = new Configuration(false);
														
 
															+    sslConf.addResource(sslConfigPath);
														
 
															+
														
 
															+    Path localStorePath = getLocalStorePath(sslConf,
														
 
															+                            DistCpConstants.CONF_LABEL_SSL_TRUST_STORE_LOCATION);
														
 
															+    job.addCacheFile(localStorePath.makeQualified(localFS.getUri(),
														
 
															+                                      localFS.getWorkingDirectory()).toUri());
														
 
															+    configuration.set(DistCpConstants.CONF_LABEL_SSL_TRUST_STORE_LOCATION,
														
 
															+                      localStorePath.getName());
														
 
															+
														
 
															+    localStorePath = getLocalStorePath(sslConf,
														
 
															+                             DistCpConstants.CONF_LABEL_SSL_KEY_STORE_LOCATION);
														
 
															+    job.addCacheFile(localStorePath.makeQualified(localFS.getUri(),
														
 
															+                                      localFS.getWorkingDirectory()).toUri());
														
 
															+    configuration.set(DistCpConstants.CONF_LABEL_SSL_KEY_STORE_LOCATION,
														
 
															+                                      localStorePath.getName());
														
 
															+
														
 
															+    job.addCacheFile(sslConfigPath.makeQualified(localFS.getUri(),
														
 
															+                                      localFS.getWorkingDirectory()).toUri());
														
 
															+
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Get Local Trust store/key store path
														
 
															+   *
														
 
															+   * @param sslConf - Config from SSL Client xml
														
 
															+   * @param storeKey - Key for either trust store or key store
														
 
															+   * @return - Path where the store is present
														
 
															+   * @throws IOException -If any
														
 
															+   */
														
 
															+  private Path getLocalStorePath(Configuration sslConf, String storeKey) throws IOException {
														
 
															+    if (sslConf.get(storeKey) != null) {
														
 
															+      return new Path(sslConf.get(storeKey));
														
 
															+    } else {
														
 
															+      throw new IOException("Store for " + storeKey + " is not set in " +
														
 
															+          inputOptions.getSslConfigurationFile());
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Setup output format appropriately
														
 
															+   *
														
 
															+   * @param job - Job handle
														
 
															+   * @throws IOException - Exception if any
														
 
															+   */
														
 
															+  private void configureOutputFormat(Job job) throws IOException {
														
 
															+    final Configuration configuration = job.getConfiguration();
														
 
															+    Path targetPath = inputOptions.getTargetPath();
														
 
															+    FileSystem targetFS = targetPath.getFileSystem(configuration);
														
 
															+    targetPath = targetPath.makeQualified(targetFS.getUri(),
														
 
															+                                          targetFS.getWorkingDirectory());
														
 
															+
														
 
															+    if (inputOptions.shouldAtomicCommit()) {
														
 
															+      Path workDir = inputOptions.getAtomicWorkPath();
														
 
															+      if (workDir == null) {
														
 
															+        workDir = targetPath.getParent();
														
 
															+      }
														
 
															+      workDir = new Path(workDir, WIP_PREFIX + targetPath.getName()
														
 
															+                                + rand.nextInt());
														
 
															+      FileSystem workFS = workDir.getFileSystem(configuration);
														
 
															+      if (!DistCpUtils.compareFs(targetFS, workFS)) {
														
 
															+        throw new IllegalArgumentException("Work path " + workDir +
														
 
															+            " and target path " + targetPath + " are in different file system");
														
 
															+      }
														
 
															+      CopyOutputFormat.setWorkingDirectory(job, workDir);
														
 
															+    } else {
														
 
															+      CopyOutputFormat.setWorkingDirectory(job, targetPath);
														
 
															+    }
														
 
															+    CopyOutputFormat.setCommitDirectory(job, targetPath);
														
 
															+
														
 
															+    Path logPath = inputOptions.getLogPath();
														
 
															+    if (logPath == null) {
														
 
															+      logPath = new Path(metaFolder, "_logs");
														
 
															+    } else {
														
 
															+      LOG.info("DistCp job log path: " + logPath);
														
 
															+    }
														
 
															+    CopyOutputFormat.setOutputPath(job, logPath);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Create input listing by invoking an appropriate copy listing
														
 
															+   * implementation. Also add delegation tokens for each path
														
 
															+   * to job's credential store
														
 
															+   *
														
 
															+   * @param job - Handle to job
														
 
															+   * @return Returns the path where the copy listing is created
														
 
															+   * @throws IOException - If any
														
 
															+   */
														
 
															+  private Path createInputFileListing(Job job) throws IOException {
														
 
															+    Path fileListingPath = getFileListingPath();
														
 
															+    CopyListing copyListing = CopyListing.getCopyListing(job.getConfiguration(),
														
 
															+        job.getCredentials(), inputOptions);
														
 
															+    copyListing.buildListing(fileListingPath, inputOptions);
														
 
															+    return fileListingPath;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Get default name of the copy listing file. Use the meta folder
														
 
															+   * to create the copy listing file
														
 
															+   *
														
 
															+   * @return - Path where the copy listing file has to be saved
														
 
															+   * @throws IOException - Exception if any
														
 
															+   */
														
 
															+  private Path getFileListingPath() throws IOException {
														
 
															+    String fileListPathStr = metaFolder + "/fileList.seq";
														
 
															+    Path path = new Path(fileListPathStr);
														
 
															+    return new Path(path.toUri().normalize().toString());
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Create a default working folder for the job, under the
														
 
															+   * job staging directory
														
 
															+   *
														
 
															+   * @return Returns the working folder information
														
 
															+   * @throws Exception - EXception if any
														
 
															+   */
														
 
															+  private Path createMetaFolderPath() throws Exception {
														
 
															+    Configuration configuration = getConf();
														
 
															+    Path stagingDir = JobSubmissionFiles.getStagingDir(
														
 
															+            new Cluster(configuration), configuration);
														
 
															+    Path metaFolderPath = new Path(stagingDir, PREFIX + String.valueOf(rand.nextInt()));
														
 
															+    if (LOG.isDebugEnabled())
														
 
															+      LOG.debug("Meta folder location: " + metaFolderPath);
														
 
															+    configuration.set(DistCpConstants.CONF_LABEL_META_FOLDER, metaFolderPath.toString());    
														
 
															+    return metaFolderPath;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Main function of the DistCp program. Parses the input arguments (via OptionsParser),
														
 
															+   * and invokes the DistCp::run() method, via the ToolRunner.
														
 
															+   * @param argv Command-line arguments sent to DistCp.
														
 
															+   */
														
 
															+  public static void main(String argv[]) {
														
 
															+    try {
														
 
															+      DistCp distCp = new DistCp();
														
 
															+      Cleanup CLEANUP = new Cleanup(distCp);
														
 
															+
														
 
															+      Runtime.getRuntime().addShutdownHook(CLEANUP);
														
 
															+      System.exit(ToolRunner.run(getDefaultConf(), distCp, argv));
														
 
															+    }
														
 
															+    catch (Exception e) {
														
 
															+      LOG.error("Couldn't complete DistCp operation: ", e);
														
 
															+      System.exit(DistCpConstants.UNKNOWN_ERROR);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Loads properties from distcp-default.xml into configuration
														
 
															+   * object
														
 
															+   * @return Configuration which includes properties from distcp-default.xml
														
 
															+   */
														
 
															+  private static Configuration getDefaultConf() {
														
 
															+    Configuration config = new Configuration();
														
 
															+    config.addResource(DISTCP_DEFAULT_XML);
														
 
															+    return config;
														
 
															+  }
														
 
															+
														
 
															+  private synchronized void cleanup() {
														
 
															+    try {
														
 
															+      if (metaFolder == null) return;
														
 
															+
														
 
															+      jobFS.delete(metaFolder, true);
														
 
															+      metaFolder = null;
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Unable to cleanup meta folder: " + metaFolder, e);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private boolean isSubmitted() {
														
 
															+    return submitted;
														
 
															+  }
														
 
															+
														
 
															+  private static class Cleanup extends Thread {
														
 
															+    private final DistCp distCp;
														
 
															+
														
 
															+    public Cleanup(DistCp distCp) {
														
 
															+      this.distCp = distCp;
														
 
															+    }
														
 
															+
														
 
															+    @Override
														
 
															+    public void run() {
														
 
															+      if (distCp.isSubmitted()) return;
														
 
															+
														
 
															+      distCp.cleanup();
														
 
															+    }
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java
@@ -0,0 +1,104 @@
 
															+package org.apache.hadoop.tools;
														
 
															+
														
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+/**
														
 
															+ * Utility class to hold commonly used constants.
														
 
															+ */
														
 
															+public class DistCpConstants {
														
 
															+
														
 
															+  /* Default number of maps to use for DistCp */
														
 
															+  public static final int DEFAULT_MAPS = 20;
														
 
															+
														
 
															+  /* Default bandwidth if none specified */
														
 
															+  public static final int DEFAULT_BANDWIDTH_MB = 100;
														
 
															+
														
 
															+  /* Default strategy for copying. Implementation looked up
														
 
															+     from distcp-default.xml
														
 
															+   */
														
 
															+  public static final String UNIFORMSIZE = "uniformsize";
														
 
															+
														
 
															+  /**
														
 
															+   *  Constants mapping to command line switches/input options
														
 
															+   */
														
 
															+  public static final String CONF_LABEL_ATOMIC_COPY = "distcp.atomic.copy";
														
 
															+  public static final String CONF_LABEL_WORK_PATH = "distcp.work.path";
														
 
															+  public static final String CONF_LABEL_LOG_PATH = "distcp.log.path";
														
 
															+  public static final String CONF_LABEL_IGNORE_FAILURES = "distcp.ignore.failures";
														
 
															+  public static final String CONF_LABEL_PRESERVE_STATUS = "distcp.preserve.status";
														
 
															+  public static final String CONF_LABEL_SYNC_FOLDERS = "distcp.sync.folders";
														
 
															+  public static final String CONF_LABEL_DELETE_MISSING = "distcp.delete.missing.source";
														
 
															+  public static final String CONF_LABEL_SSL_CONF = "distcp.keystore.resource";
														
 
															+  public static final String CONF_LABEL_MAX_MAPS = "distcp.max.maps";
														
 
															+  public static final String CONF_LABEL_SOURCE_LISTING = "distcp.source.listing";
														
 
															+  public static final String CONF_LABEL_COPY_STRATEGY = "distcp.copy.strategy";
														
 
															+  public static final String CONF_LABEL_SKIP_CRC = "distcp.skip.crc";
														
 
															+  public static final String CONF_LABEL_OVERWRITE = "distcp.copy.overwrite";
														
 
															+  public static final String CONF_LABEL_BANDWIDTH_MB = "distcp.map.bandwidth.mb";
														
 
															+
														
 
															+  /* Total bytes to be copied. Updated by copylisting. Unfiltered count */
														
 
															+  public static final String CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED = "mapred.total.bytes.expected";
														
 
															+
														
 
															+  /* Total number of paths to copy, includes directories. Unfiltered count */
														
 
															+  public static final String CONF_LABEL_TOTAL_NUMBER_OF_RECORDS = "mapred.number.of.records";
														
 
															+
														
 
															+  /* SSL keystore resource */
														
 
															+  public static final String CONF_LABEL_SSL_KEYSTORE = "dfs.https.client.keystore.resource";
														
 
															+
														
 
															+  /* If input is based -f <<source listing>>, file containing the src paths */
														
 
															+  public static final String CONF_LABEL_LISTING_FILE_PATH = "distcp.listing.file.path";
														
 
															+
														
 
															+  /* Directory where the mapreduce job will write to. If not atomic commit, then same
														
 
															+    as CONF_LABEL_TARGET_FINAL_PATH
														
 
															+   */
														
 
															+  public static final String CONF_LABEL_TARGET_WORK_PATH = "distcp.target.work.path";
														
 
															+
														
 
															+  /* Directory where the final data will be committed to. If not atomic commit, then same
														
 
															+    as CONF_LABEL_TARGET_WORK_PATH
														
 
															+   */
														
 
															+  public static final String CONF_LABEL_TARGET_FINAL_PATH = "distcp.target.final.path";
														
 
															+
														
 
															+  /**
														
 
															+   * DistCp job id for consumers of the Disctp 
														
 
															+   */
														
 
															+  public static final String CONF_LABEL_DISTCP_JOB_ID = "distcp.job.id";
														
 
															+
														
 
															+  /* Meta folder where the job's intermediate data is kept */
														
 
															+  public static final String CONF_LABEL_META_FOLDER = "distcp.meta.folder";
														
 
															+
														
 
															+  /**
														
 
															+   * Conf label for SSL Trust-store location.
														
 
															+   */
														
 
															+  public static final String CONF_LABEL_SSL_TRUST_STORE_LOCATION
														
 
															+      = "ssl.client.truststore.location";
														
 
															+
														
 
															+  /**
														
 
															+   * Conf label for SSL Key-store location.
														
 
															+   */
														
 
															+  public static final String CONF_LABEL_SSL_KEY_STORE_LOCATION
														
 
															+      = "ssl.client.keystore.location";
														
 
															+
														
 
															+  /**
														
 
															+   * Constants for DistCp return code to shell / consumer of ToolRunner's run
														
 
															+   */
														
 
															+  public static final int SUCCESS = 0;
														
 
															+  public static final int INVALID_ARGUMENT = -1;
														
 
															+  public static final int DUPLICATE_INPUT = -2;
														
 
															+  public static final int UNKNOWN_ERROR = -999;
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java
@@ -0,0 +1,218 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools;
														
 
															+
														
 
															+import org.apache.commons.cli.Option;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+
														
 
															+/**
														
 
															+ * Enumeration mapping configuration keys to distcp command line
														
 
															+ * options.
														
 
															+ */
														
 
															+public enum DistCpOptionSwitch {
														
 
															+
														
 
															+  /**
														
 
															+   * Ignores any failures during copy, and continues with rest.
														
 
															+   * Logs failures in a file
														
 
															+   */
														
 
															+  IGNORE_FAILURES(DistCpConstants.CONF_LABEL_IGNORE_FAILURES,
														
 
															+      new Option("i", false, "Ignore failures during copy")),
														
 
															+
														
 
															+  /**
														
 
															+   * Preserves status of file/path in the target.
														
 
															+   * Default behavior with -p, is to preserve replication,
														
 
															+   * block size, user, group and permission on the target file
														
 
															+   *
														
 
															+   * If any of the optional switches are present among rbugp, then
														
 
															+   * only the corresponding file attribute is preserved
														
 
															+   *
														
 
															+   */
														
 
															+  PRESERVE_STATUS(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
														
 
															+      new Option("p", true, "preserve status (rbugp)" +
														
 
															+          "(replication, block-size, user, group, permission)")),
														
 
															+
														
 
															+  /**
														
 
															+   * Update target location by copying only files that are missing
														
 
															+   * in the target. This can be used to periodically sync two folders
														
 
															+   * across source and target. Typically used with DELETE_MISSING
														
 
															+   * Incompatible with ATOMIC_COMMIT
														
 
															+   */
														
 
															+  SYNC_FOLDERS(DistCpConstants.CONF_LABEL_SYNC_FOLDERS, 
														
 
															+      new Option("update", false, "Update target, copying only missing" +
														
 
															+          "files or directories")),
														
 
															+
														
 
															+  /**
														
 
															+   * Deletes missing files in target that are missing from source
														
 
															+   * This allows the target to be in sync with the source contents
														
 
															+   * Typically used in conjunction with SYNC_FOLDERS
														
 
															+   * Incompatible with ATOMIC_COMMIT
														
 
															+   */
														
 
															+  DELETE_MISSING(DistCpConstants.CONF_LABEL_DELETE_MISSING,
														
 
															+      new Option("delete", false, "Delete from target, " +
														
 
															+          "files missing in source")),
														
 
															+
														
 
															+  /**
														
 
															+   * Configuration file to use with hftps:// for securely copying
														
 
															+   * files across clusters. Typically the configuration file contains
														
 
															+   * truststore/keystore information such as location, password and type
														
 
															+   */
														
 
															+  SSL_CONF(DistCpConstants.CONF_LABEL_SSL_CONF,
														
 
															+      new Option("mapredSslConf", true, "Configuration for ssl config file" +
														
 
															+          ", to use with hftps://")),
														
 
															+
														
 
															+  /**
														
 
															+   * Max number of maps to use during copy. DistCp will split work
														
 
															+   * as equally as possible among these maps
														
 
															+   */
														
 
															+  MAX_MAPS(DistCpConstants.CONF_LABEL_MAX_MAPS, 
														
 
															+      new Option("m", true, "Max number of concurrent maps to use for copy")),
														
 
															+
														
 
															+  /**
														
 
															+   * Source file listing can be provided to DistCp in a file.
														
 
															+   * This allows DistCp to copy random list of files from source
														
 
															+   * and copy them to target
														
 
															+   */
														
 
															+  SOURCE_FILE_LISTING(DistCpConstants.CONF_LABEL_SOURCE_LISTING,
														
 
															+      new Option("f", true, "List of files that need to be copied")),
														
 
															+
														
 
															+  /**
														
 
															+   * Copy all the source files and commit them atomically to the target
														
 
															+   * This is typically useful in cases where there is a process
														
 
															+   * polling for availability of a file/dir. This option is incompatible
														
 
															+   * with SYNC_FOLDERS & DELETE_MISSING
														
 
															+   */
														
 
															+  ATOMIC_COMMIT(DistCpConstants.CONF_LABEL_ATOMIC_COPY,
														
 
															+      new Option("atomic", false, "Commit all changes or none")),
														
 
															+
														
 
															+  /**
														
 
															+   * Work path to be used only in conjunction in Atomic commit
														
 
															+   */
														
 
															+  WORK_PATH(DistCpConstants.CONF_LABEL_WORK_PATH,
														
 
															+      new Option("tmp", true, "Intermediate work path to be used for atomic commit")),
														
 
															+
														
 
															+  /**
														
 
															+   * Log path where distcp output logs are written to
														
 
															+   */
														
 
															+  LOG_PATH(DistCpConstants.CONF_LABEL_LOG_PATH,
														
 
															+      new Option("log", true, "Folder on DFS where distcp execution logs are saved")),
														
 
															+
														
 
															+  /**
														
 
															+   * Copy strategy is use. This could be dynamic or uniform size etc.
														
 
															+   * DistCp would use an appropriate input format based on this.
														
 
															+   */
														
 
															+  COPY_STRATEGY(DistCpConstants.CONF_LABEL_COPY_STRATEGY,
														
 
															+      new Option("strategy", true, "Copy strategy to use. Default is " +
														
 
															+          "dividing work based on file sizes")),
														
 
															+
														
 
															+  /**
														
 
															+   * Skip CRC checks between source and target, when determining what
														
 
															+   * files need to be copied.
														
 
															+   */
														
 
															+  SKIP_CRC(DistCpConstants.CONF_LABEL_SKIP_CRC,
														
 
															+      new Option("skipcrccheck", false, "Whether to skip CRC checks between " +
														
 
															+          "source and target paths.")),
														
 
															+
														
 
															+  /**
														
 
															+   * Overwrite target-files unconditionally.
														
 
															+   */
														
 
															+  OVERWRITE(DistCpConstants.CONF_LABEL_OVERWRITE,
														
 
															+      new Option("overwrite", false, "Choose to overwrite target files " +
														
 
															+          "unconditionally, even if they exist.")),
														
 
															+
														
 
															+  /**
														
 
															+   * Should DisctpExecution be blocking
														
 
															+   */
														
 
															+  BLOCKING("",
														
 
															+      new Option("async", false, "Should distcp execution be blocking")),
														
 
															+
														
 
															+  FILE_LIMIT("",
														
 
															+      new Option("filelimit", true, "(Deprecated!) Limit number of files " +
														
 
															+              "copied to <= n")),
														
 
															+
														
 
															+  SIZE_LIMIT("",
														
 
															+      new Option("sizelimit", true, "(Deprecated!) Limit number of files " +
														
 
															+              "copied to <= n bytes")),
														
 
															+
														
 
															+  /**
														
 
															+   * Specify bandwidth per map in MB
														
 
															+   */
														
 
															+  BANDWIDTH(DistCpConstants.CONF_LABEL_BANDWIDTH_MB,
														
 
															+      new Option("bandwidth", true, "Specify bandwidth per map in MB"));
														
 
															+
														
 
															+  private final String confLabel;
														
 
															+  private final Option option;
														
 
															+
														
 
															+  DistCpOptionSwitch(String confLabel, Option option) {
														
 
															+    this.confLabel = confLabel;
														
 
															+    this.option = option;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Get Configuration label for the option
														
 
															+   * @return configuration label name
														
 
															+   */
														
 
															+  public String getConfigLabel() {
														
 
															+    return confLabel;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Get CLI Option corresponding to the distcp option
														
 
															+   * @return option
														
 
															+   */
														
 
															+  public Option getOption() {
														
 
															+    return option;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Get Switch symbol
														
 
															+   * @return switch symbol char
														
 
															+   */
														
 
															+  public String getSwitch() {
														
 
															+    return option.getOpt();
														
 
															+  }
														
 
															+
														
 
															+  @Override
														
 
															+  public String toString() {
														
 
															+    return  super.name() + " {" +
														
 
															+        "confLabel='" + confLabel + '\'' +
														
 
															+        ", option=" + option + '}';
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Helper function to add an option to hadoop configuration object
														
 
															+   * @param conf - Configuration object to include the option
														
 
															+   * @param option - Option to add
														
 
															+   * @param value - Value
														
 
															+   */
														
 
															+  public static void addToConf(Configuration conf,
														
 
															+                               DistCpOptionSwitch option,
														
 
															+                               String value) {
														
 
															+    conf.set(option.getConfigLabel(), value);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Helper function to set an option to hadoop configuration object
														
 
															+   * @param conf - Configuration object to include the option
														
 
															+   * @param option - Option to add
														
 
															+   */
														
 
															+  public static void addToConf(Configuration conf,
														
 
															+                               DistCpOptionSwitch option) {
														
 
															+    conf.set(option.getConfigLabel(), "true");
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
@@ -0,0 +1,525 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools;
														
 
															+
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.tools.util.DistCpUtils;
														
 
															+
														
 
															+import java.util.EnumSet;
														
 
															+import java.util.Iterator;
														
 
															+import java.util.List;
														
 
															+import java.util.NoSuchElementException;
														
 
															+
														
 
															+/**
														
 
															+ * The Options class encapsulates all DistCp options.
														
 
															+ * These may be set from command-line (via the OptionsParser)
														
 
															+ * or may be set manually.
														
 
															+ */
														
 
															+public class DistCpOptions {
														
 
															+
														
 
															+  private boolean atomicCommit = false;
														
 
															+  private boolean syncFolder = false;
														
 
															+  private boolean deleteMissing = false;
														
 
															+  private boolean ignoreFailures = false;
														
 
															+  private boolean overwrite = false;
														
 
															+  private boolean skipCRC = false;
														
 
															+  private boolean blocking = true;
														
 
															+
														
 
															+  private int maxMaps = DistCpConstants.DEFAULT_MAPS;
														
 
															+  private int mapBandwidth = DistCpConstants.DEFAULT_BANDWIDTH_MB;
														
 
															+
														
 
															+  private String sslConfigurationFile;
														
 
															+
														
 
															+  private String copyStrategy = DistCpConstants.UNIFORMSIZE;
														
 
															+
														
 
															+  private EnumSet<FileAttribute> preserveStatus = EnumSet.noneOf(FileAttribute.class);
														
 
															+
														
 
															+  private Path atomicWorkPath;
														
 
															+
														
 
															+  private Path logPath;
														
 
															+
														
 
															+  private Path sourceFileListing;
														
 
															+  private List<Path> sourcePaths;
														
 
															+
														
 
															+  private Path targetPath;
														
 
															+
														
 
															+  public static enum FileAttribute{
														
 
															+    REPLICATION, BLOCKSIZE, USER, GROUP, PERMISSION;
														
 
															+
														
 
															+    public static FileAttribute getAttribute(char symbol) {
														
 
															+      for (FileAttribute attribute : values()) {
														
 
															+        if (attribute.name().charAt(0) == Character.toUpperCase(symbol)) {
														
 
															+          return attribute;
														
 
															+        }
														
 
															+      }
														
 
															+      throw new NoSuchElementException("No attribute for " + symbol);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Constructor, to initialize source/target paths.
														
 
															+   * @param sourcePaths List of source-paths (including wildcards)
														
 
															+   *                     to be copied to target.
														
 
															+   * @param targetPath Destination path for the dist-copy.
														
 
															+   */
														
 
															+  public DistCpOptions(List<Path> sourcePaths, Path targetPath) {
														
 
															+    assert sourcePaths != null && !sourcePaths.isEmpty() : "Invalid source paths";
														
 
															+    assert targetPath != null : "Invalid Target path";
														
 
															+
														
 
															+    this.sourcePaths = sourcePaths;
														
 
															+    this.targetPath = targetPath;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Constructor, to initialize source/target paths.
														
 
															+   * @param sourceFileListing File containing list of source paths
														
 
															+   * @param targetPath Destination path for the dist-copy.
														
 
															+   */
														
 
															+  public DistCpOptions(Path sourceFileListing, Path targetPath) {
														
 
															+    assert sourceFileListing != null : "Invalid source paths";
														
 
															+    assert targetPath != null : "Invalid Target path";
														
 
															+
														
 
															+    this.sourceFileListing = sourceFileListing;
														
 
															+    this.targetPath = targetPath;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Copy constructor.
														
 
															+   * @param that DistCpOptions being copied from.
														
 
															+   */
														
 
															+  public DistCpOptions(DistCpOptions that) {
														
 
															+    if (this != that && that != null) {
														
 
															+      this.atomicCommit = that.atomicCommit;
														
 
															+      this.syncFolder = that.syncFolder;
														
 
															+      this.deleteMissing = that.deleteMissing;
														
 
															+      this.ignoreFailures = that.ignoreFailures;
														
 
															+      this.overwrite = that.overwrite;
														
 
															+      this.skipCRC = that.skipCRC;
														
 
															+      this.blocking = that.blocking;
														
 
															+      this.maxMaps = that.maxMaps;
														
 
															+      this.mapBandwidth = that.mapBandwidth;
														
 
															+      this.sslConfigurationFile = that.getSslConfigurationFile();
														
 
															+      this.copyStrategy = that.copyStrategy;
														
 
															+      this.preserveStatus = that.preserveStatus;
														
 
															+      this.atomicWorkPath = that.getAtomicWorkPath();
														
 
															+      this.logPath = that.getLogPath();
														
 
															+      this.sourceFileListing = that.getSourceFileListing();
														
 
															+      this.sourcePaths = that.getSourcePaths();
														
 
															+      this.targetPath = that.getTargetPath();
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Should the data be committed atomically?
														
 
															+   *
														
 
															+   * @return true if data should be committed automically. false otherwise
														
 
															+   */
														
 
															+  public boolean shouldAtomicCommit() {
														
 
															+    return atomicCommit;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Set if data need to be committed automatically
														
 
															+   *
														
 
															+   * @param atomicCommit - boolean switch
														
 
															+   */
														
 
															+  public void setAtomicCommit(boolean atomicCommit) {
														
 
															+    validate(DistCpOptionSwitch.ATOMIC_COMMIT, atomicCommit);
														
 
															+    this.atomicCommit = atomicCommit;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Should the data be sync'ed between source and target paths?
														
 
															+   *
														
 
															+   * @return true if data should be sync'ed up. false otherwise
														
 
															+   */
														
 
															+  public boolean shouldSyncFolder() {
														
 
															+    return syncFolder;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Set if source and target folder contents be sync'ed up
														
 
															+   *
														
 
															+   * @param syncFolder - boolean switch
														
 
															+   */
														
 
															+  public void setSyncFolder(boolean syncFolder) {
														
 
															+    validate(DistCpOptionSwitch.SYNC_FOLDERS, syncFolder);
														
 
															+    this.syncFolder = syncFolder;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Should target files missing in source should be deleted?
														
 
															+   *
														
 
															+   * @return true if zoombie target files to be removed. false otherwise
														
 
															+   */
														
 
															+  public boolean shouldDeleteMissing() {
														
 
															+    return deleteMissing;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Set if files only present in target should be deleted
														
 
															+   *
														
 
															+   * @param deleteMissing - boolean switch
														
 
															+   */
														
 
															+  public void setDeleteMissing(boolean deleteMissing) {
														
 
															+    validate(DistCpOptionSwitch.DELETE_MISSING, deleteMissing);
														
 
															+    this.deleteMissing = deleteMissing;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Should failures be logged and ignored during copy?
														
 
															+   *
														
 
															+   * @return true if failures are to be logged and ignored. false otherwise
														
 
															+   */
														
 
															+  public boolean shouldIgnoreFailures() {
														
 
															+    return ignoreFailures;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Set if failures during copy be ignored
														
 
															+   *
														
 
															+   * @param ignoreFailures - boolean switch
														
 
															+   */
														
 
															+  public void setIgnoreFailures(boolean ignoreFailures) {
														
 
															+    this.ignoreFailures = ignoreFailures;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Should DistCp be running in blocking mode
														
 
															+   *
														
 
															+   * @return true if should run in blocking, false otherwise
														
 
															+   */
														
 
															+  public boolean shouldBlock() {
														
 
															+    return blocking;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Set if Disctp should run blocking or non-blocking
														
 
															+   *
														
 
															+   * @param blocking - boolean switch
														
 
															+   */
														
 
															+  public void setBlocking(boolean blocking) {
														
 
															+    this.blocking = blocking;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Should files be overwritten always?
														
 
															+   *
														
 
															+   * @return true if files in target that may exist before distcp, should always
														
 
															+   *         be overwritten. false otherwise
														
 
															+   */
														
 
															+  public boolean shouldOverwrite() {
														
 
															+    return overwrite;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Set if files should always be overwritten on target
														
 
															+   *
														
 
															+   * @param overwrite - boolean switch
														
 
															+   */
														
 
															+  public void setOverwrite(boolean overwrite) {
														
 
															+    validate(DistCpOptionSwitch.OVERWRITE, overwrite);
														
 
															+    this.overwrite = overwrite;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Should CRC/checksum check be skipped while checking files are identical
														
 
															+   *
														
 
															+   * @return true if checksum check should be skipped while checking files are
														
 
															+   *         identical. false otherwise
														
 
															+   */
														
 
															+  public boolean shouldSkipCRC() {
														
 
															+    return skipCRC;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Set if checksum comparison should be skipped while determining if
														
 
															+   * source and destination files are identical
														
 
															+   *
														
 
															+   * @param skipCRC - boolean switch
														
 
															+   */
														
 
															+  public void setSkipCRC(boolean skipCRC) {
														
 
															+    validate(DistCpOptionSwitch.SKIP_CRC, skipCRC);
														
 
															+    this.skipCRC = skipCRC;
														
 
															+  }
														
 
															+
														
 
															+  /** Get the max number of maps to use for this copy
														
 
															+   *
														
 
															+   * @return Max number of maps
														
 
															+   */
														
 
															+  public int getMaxMaps() {
														
 
															+    return maxMaps;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Set the max number of maps to use for copy
														
 
															+   *
														
 
															+   * @param maxMaps - Number of maps
														
 
															+   */
														
 
															+  public void setMaxMaps(int maxMaps) {
														
 
															+    this.maxMaps = maxMaps;
														
 
															+  }
														
 
															+
														
 
															+  /** Get the map bandwidth in MB
														
 
															+   *
														
 
															+   * @return Bandwidth in MB
														
 
															+   */
														
 
															+  public int getMapBandwidth() {
														
 
															+    return mapBandwidth;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Set per map bandwidth
														
 
															+   *
														
 
															+   * @param mapBandwidth - per map bandwidth
														
 
															+   */
														
 
															+  public void setMapBandwidth(int mapBandwidth) {
														
 
															+    assert mapBandwidth > 0 : "Bandwidth " + mapBandwidth + " is invalid (should be > 0)";
														
 
															+    this.mapBandwidth = mapBandwidth;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Get path where the ssl configuration file is present to use for hftps://
														
 
															+   *
														
 
															+   * @return Path on local file system
														
 
															+   */
														
 
															+  public String getSslConfigurationFile() {
														
 
															+    return sslConfigurationFile;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Set the SSL configuration file path to use with hftps:// (local path)
														
 
															+   *
														
 
															+   * @param sslConfigurationFile - Local ssl config file path
														
 
															+   */
														
 
															+  public void setSslConfigurationFile(String sslConfigurationFile) {
														
 
															+    this.sslConfigurationFile = sslConfigurationFile;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Returns an iterator with the list of file attributes to preserve
														
 
															+   *
														
 
															+   * @return iterator of file attributes to preserve
														
 
															+   */
														
 
															+  public Iterator<FileAttribute> preserveAttributes() {
														
 
															+    return preserveStatus.iterator();
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Checks if the input attibute should be preserved or not
														
 
															+   *
														
 
															+   * @param attribute - Attribute to check
														
 
															+   * @return True if attribute should be preserved, false otherwise
														
 
															+   */
														
 
															+  public boolean shouldPreserve(FileAttribute attribute) {
														
 
															+    return preserveStatus.contains(attribute);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Add file attributes that need to be preserved. This method may be
														
 
															+   * called multiple times to add attributes.
														
 
															+   *
														
 
															+   * @param fileAttribute - Attribute to add, one at a time
														
 
															+   */
														
 
															+  public void preserve(FileAttribute fileAttribute) {
														
 
															+    for (FileAttribute attribute : preserveStatus) {
														
 
															+      if (attribute.equals(fileAttribute)) {
														
 
															+        return;
														
 
															+      }
														
 
															+    }
														
 
															+    preserveStatus.add(fileAttribute);
														
 
															+  }
														
 
															+
														
 
															+  /** Get work path for atomic commit. If null, the work
														
 
															+   * path would be parentOf(targetPath) + "/._WIP_" + nameOf(targetPath)
														
 
															+   *
														
 
															+   * @return Atomic work path on the target cluster. Null if not set
														
 
															+   */
														
 
															+  public Path getAtomicWorkPath() {
														
 
															+    return atomicWorkPath;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Set the work path for atomic commit
														
 
															+   *
														
 
															+   * @param atomicWorkPath - Path on the target cluster
														
 
															+   */
														
 
															+  public void setAtomicWorkPath(Path atomicWorkPath) {
														
 
															+    this.atomicWorkPath = atomicWorkPath;
														
 
															+  }
														
 
															+
														
 
															+  /** Get output directory for writing distcp logs. Otherwise logs
														
 
															+   * are temporarily written to JobStagingDir/_logs and deleted
														
 
															+   * upon job completion
														
 
															+   *
														
 
															+   * @return Log output path on the cluster where distcp job is run
														
 
															+   */
														
 
															+  public Path getLogPath() {
														
 
															+    return logPath;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Set the log path where distcp output logs are stored
														
 
															+   * Uses JobStagingDir/_logs by default
														
 
															+   *
														
 
															+   * @param logPath - Path where logs will be saved
														
 
															+   */
														
 
															+  public void setLogPath(Path logPath) {
														
 
															+    this.logPath = logPath;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Get the copy strategy to use. Uses appropriate input format
														
 
															+   *
														
 
															+   * @return copy strategy to use
														
 
															+   */
														
 
															+  public String getCopyStrategy() {
														
 
															+    return copyStrategy;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Set the copy strategy to use. Should map to a strategy implementation
														
 
															+   * in distp-default.xml
														
 
															+   *
														
 
															+   * @param copyStrategy - copy Strategy to use
														
 
															+   */
														
 
															+  public void setCopyStrategy(String copyStrategy) {
														
 
															+    this.copyStrategy = copyStrategy;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * File path (hdfs:// or file://) that contains the list of actual
														
 
															+   * files to copy
														
 
															+   *
														
 
															+   * @return - Source listing file path
														
 
															+   */
														
 
															+  public Path getSourceFileListing() {
														
 
															+    return sourceFileListing;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Getter for sourcePaths.
														
 
															+   * @return List of source-paths.
														
 
															+   */
														
 
															+  public List<Path> getSourcePaths() {
														
 
															+    return sourcePaths;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Setter for sourcePaths.
														
 
															+   * @param sourcePaths The new list of source-paths.
														
 
															+   */
														
 
															+  public void setSourcePaths(List<Path> sourcePaths) {
														
 
															+    assert sourcePaths != null && sourcePaths.size() != 0;
														
 
															+    this.sourcePaths = sourcePaths;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Getter for the targetPath.
														
 
															+   * @return The target-path.
														
 
															+   */
														
 
															+  public Path getTargetPath() {
														
 
															+    return targetPath;
														
 
															+  }
														
 
															+
														
 
															+  public void validate(DistCpOptionSwitch option, boolean value) {
														
 
															+
														
 
															+    boolean syncFolder = (option == DistCpOptionSwitch.SYNC_FOLDERS ?
														
 
															+        value : this.syncFolder);
														
 
															+    boolean overwrite = (option == DistCpOptionSwitch.OVERWRITE ?
														
 
															+        value : this.overwrite);
														
 
															+    boolean deleteMissing = (option == DistCpOptionSwitch.DELETE_MISSING ?
														
 
															+        value : this.deleteMissing);
														
 
															+    boolean atomicCommit = (option == DistCpOptionSwitch.ATOMIC_COMMIT ?
														
 
															+        value : this.atomicCommit);
														
 
															+    boolean skipCRC = (option == DistCpOptionSwitch.SKIP_CRC ?
														
 
															+        value : this.skipCRC);
														
 
															+
														
 
															+    if (syncFolder && atomicCommit) {
														
 
															+      throw new IllegalArgumentException("Atomic commit can't be used with " +
														
 
															+          "sync folder or overwrite options");
														
 
															+    }
														
 
															+
														
 
															+    if (deleteMissing && !(overwrite || syncFolder)) {
														
 
															+      throw new IllegalArgumentException("Delete missing is applicable " +
														
 
															+          "only with update or overwrite options");
														
 
															+    }
														
 
															+
														
 
															+    if (overwrite && syncFolder) {
														
 
															+      throw new IllegalArgumentException("Overwrite and update options are " +
														
 
															+          "mutually exclusive");
														
 
															+    }
														
 
															+
														
 
															+    if (!syncFolder && skipCRC) {
														
 
															+      throw new IllegalArgumentException("Skip CRC is valid only with update options");
														
 
															+    }
														
 
															+
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Add options to configuration. These will be used in the Mapper/committer
														
 
															+   *
														
 
															+   * @param conf - Configruation object to which the options need to be added
														
 
															+   */
														
 
															+  public void appendToConf(Configuration conf) {
														
 
															+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.ATOMIC_COMMIT,
														
 
															+        String.valueOf(atomicCommit));
														
 
															+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.IGNORE_FAILURES,
														
 
															+        String.valueOf(ignoreFailures));
														
 
															+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.SYNC_FOLDERS,
														
 
															+        String.valueOf(syncFolder));
														
 
															+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.DELETE_MISSING,
														
 
															+        String.valueOf(deleteMissing));
														
 
															+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.OVERWRITE,
														
 
															+        String.valueOf(overwrite));
														
 
															+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.SKIP_CRC,
														
 
															+        String.valueOf(skipCRC));
														
 
															+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.BANDWIDTH,
														
 
															+        String.valueOf(mapBandwidth));
														
 
															+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.PRESERVE_STATUS,
														
 
															+        DistCpUtils.packAttributes(preserveStatus));
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Utility to easily string-ify Options, for logging.
														
 
															+   *
														
 
															+   * @return String representation of the Options.
														
 
															+   */
														
 
															+  @Override
														
 
															+  public String toString() {
														
 
															+    return "DistCpOptions{" +
														
 
															+        "atomicCommit=" + atomicCommit +
														
 
															+        ", syncFolder=" + syncFolder +
														
 
															+        ", deleteMissing=" + deleteMissing +
														
 
															+        ", ignoreFailures=" + ignoreFailures +
														
 
															+        ", maxMaps=" + maxMaps +
														
 
															+        ", sslConfigurationFile='" + sslConfigurationFile + '\'' +
														
 
															+        ", copyStrategy='" + copyStrategy + '\'' +
														
 
															+        ", sourceFileListing=" + sourceFileListing +
														
 
															+        ", sourcePaths=" + sourcePaths +
														
 
															+        ", targetPath=" + targetPath +
														
 
															+        '}';
														
 
															+  }
														
 
															+
														
 
															+  @Override
														
 
															+  protected DistCpOptions clone() throws CloneNotSupportedException {
														
 
															+    return (DistCpOptions) super.clone();
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/FileBasedCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/FileBasedCopyListing.java
@@ -0,0 +1,100 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools;
														
 
															+
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.io.IOUtils;
														
 
															+import org.apache.hadoop.security.Credentials;
														
 
															+
														
 
															+import java.io.BufferedReader;
														
 
															+import java.io.IOException;
														
 
															+import java.io.InputStreamReader;
														
 
															+import java.util.ArrayList;
														
 
															+import java.util.List;
														
 
															+
														
 
															+/**
														
 
															+ * FileBasedCopyListing implements the CopyListing interface,
														
 
															+ * to create the copy-listing for DistCp,
														
 
															+ * by iterating over all source paths mentioned in a specified input-file.
														
 
															+ */
														
 
															+public class FileBasedCopyListing extends CopyListing {
														
 
															+
														
 
															+  private final CopyListing globbedListing;
														
 
															+  /**
														
 
															+   * Constructor, to initialize base-class.
														
 
															+   * @param configuration The input Configuration object.
														
 
															+   * @param credentials - Credentials object on which the FS delegation tokens are cached. If null
														
 
															+   * delegation token caching is skipped
														
 
															+   */
														
 
															+  public FileBasedCopyListing(Configuration configuration, Credentials credentials) {
														
 
															+    super(configuration, credentials);
														
 
															+    globbedListing = new GlobbedCopyListing(getConf(), credentials);
														
 
															+  }
														
 
															+
														
 
															+  /** {@inheritDoc} */
														
 
															+  @Override
														
 
															+  protected void validatePaths(DistCpOptions options)
														
 
															+      throws IOException, InvalidInputException {
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Implementation of CopyListing::buildListing().
														
 
															+   *   Iterates over all source paths mentioned in the input-file.
														
 
															+   * @param pathToListFile Path on HDFS where the listing file is written.
														
 
															+   * @param options Input Options for DistCp (indicating source/target paths.)
														
 
															+   * @throws IOException
														
 
															+   */
														
 
															+  @Override
														
 
															+  public void doBuildListing(Path pathToListFile, DistCpOptions options) throws IOException {
														
 
															+    DistCpOptions newOption = new DistCpOptions(options);
														
 
															+    newOption.setSourcePaths(fetchFileList(options.getSourceFileListing()));
														
 
															+    globbedListing.buildListing(pathToListFile, newOption);
														
 
															+  }
														
 
															+
														
 
															+  private List<Path> fetchFileList(Path sourceListing) throws IOException {
														
 
															+    List<Path> result = new ArrayList<Path>();
														
 
															+    FileSystem fs = sourceListing.getFileSystem(getConf());
														
 
															+    BufferedReader input = null;
														
 
															+    try {
														
 
															+      input = new BufferedReader(new InputStreamReader(fs.open(sourceListing)));
														
 
															+      String line = input.readLine();
														
 
															+      while (line != null) {
														
 
															+        result.add(new Path(line));
														
 
															+        line = input.readLine();
														
 
															+      }
														
 
															+    } finally {
														
 
															+      IOUtils.closeStream(input);
														
 
															+    }
														
 
															+    return result;
														
 
															+  }
														
 
															+
														
 
															+  /** {@inheritDoc} */
														
 
															+  @Override
														
 
															+  protected long getBytesToCopy() {
														
 
															+    return globbedListing.getBytesToCopy();
														
 
															+  }
														
 
															+
														
 
															+  /** {@inheritDoc} */
														
 
															+  @Override
														
 
															+  protected long getNumberOfPaths() {
														
 
															+    return globbedListing.getNumberOfPaths();
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/GlobbedCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/GlobbedCopyListing.java
@@ -0,0 +1,105 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.fs.FileStatus;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.security.Credentials;
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+import java.util.List;
														
 
															+import java.util.ArrayList;
														
 
															+
														
 
															+/**
														
 
															+ * GlobbedCopyListing implements the CopyListing interface, to create the copy
														
 
															+ * listing-file by "globbing" all specified source paths (wild-cards and all.)
														
 
															+ */
														
 
															+public class GlobbedCopyListing extends CopyListing {
														
 
															+  private static final Log LOG = LogFactory.getLog(GlobbedCopyListing.class);
														
 
															+
														
 
															+  private final CopyListing simpleListing;
														
 
															+  /**
														
 
															+   * Constructor, to initialize the configuration.
														
 
															+   * @param configuration The input Configuration object.
														
 
															+   * @param credentials Credentials object on which the FS delegation tokens are cached. If null
														
 
															+   * delegation token caching is skipped
														
 
															+   */
														
 
															+  public GlobbedCopyListing(Configuration configuration, Credentials credentials) {
														
 
															+    super(configuration, credentials);
														
 
															+    simpleListing = new SimpleCopyListing(getConf(), credentials) ;
														
 
															+  }
														
 
															+
														
 
															+  /** {@inheritDoc} */
														
 
															+  @Override
														
 
															+  protected void validatePaths(DistCpOptions options)
														
 
															+      throws IOException, InvalidInputException {
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Implementation of CopyListing::buildListing().
														
 
															+   * Creates the copy listing by "globbing" all source-paths.
														
 
															+   * @param pathToListingFile The location at which the copy-listing file
														
 
															+   *                           is to be created.
														
 
															+   * @param options Input Options for DistCp (indicating source/target paths.)
														
 
															+   * @throws IOException
														
 
															+   */
														
 
															+  @Override
														
 
															+  public void doBuildListing(Path pathToListingFile,
														
 
															+                             DistCpOptions options) throws IOException {
														
 
															+
														
 
															+    List<Path> globbedPaths = new ArrayList<Path>();
														
 
															+    if (options.getSourcePaths().isEmpty()) {
														
 
															+      throw new InvalidInputException("Nothing to process. Source paths::EMPTY");  
														
 
															+    }
														
 
															+
														
 
															+    for (Path p : options.getSourcePaths()) {
														
 
															+      FileSystem fs = p.getFileSystem(getConf());
														
 
															+      FileStatus[] inputs = fs.globStatus(p);
														
 
															+
														
 
															+      if(inputs != null && inputs.length > 0) {
														
 
															+        for (FileStatus onePath: inputs) {
														
 
															+          globbedPaths.add(onePath.getPath());
														
 
															+        }
														
 
															+      } else {
														
 
															+        throw new InvalidInputException(p + " doesn't exist");        
														
 
															+      }
														
 
															+    }
														
 
															+
														
 
															+    DistCpOptions optionsGlobbed = new DistCpOptions(options);
														
 
															+    optionsGlobbed.setSourcePaths(globbedPaths);
														
 
															+    simpleListing.buildListing(pathToListingFile, optionsGlobbed);
														
 
															+  }
														
 
															+
														
 
															+  /** {@inheritDoc} */
														
 
															+  @Override
														
 
															+  protected long getBytesToCopy() {
														
 
															+    return simpleListing.getBytesToCopy();
														
 
															+  }
														
 
															+
														
 
															+  /** {@inheritDoc} */
														
 
															+  @Override
														
 
															+  protected long getNumberOfPaths() {
														
 
															+    return simpleListing.getNumberOfPaths();
														
 
															+  }
														
 
															+
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java
@@ -0,0 +1,246 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools;
														
 
															+
														
 
															+import org.apache.commons.cli.*;
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
														
 
															+
														
 
															+import java.util.*;
														
 
															+
														
 
															+/**
														
 
															+ * The OptionsParser parses out the command-line options passed to DistCp,
														
 
															+ * and interprets those specific to DistCp, to create an Options object.
														
 
															+ */
														
 
															+public class OptionsParser {
														
 
															+
														
 
															+  private static final Log LOG = LogFactory.getLog(OptionsParser.class);
														
 
															+
														
 
															+  private static final Options cliOptions = new Options();      
														
 
															+
														
 
															+  static {
														
 
															+    for (DistCpOptionSwitch option : DistCpOptionSwitch.values()) {
														
 
															+      if (LOG.isDebugEnabled()) {
														
 
															+        LOG.debug("Adding option " + option.getOption());
														
 
															+      }
														
 
															+      cliOptions.addOption(option.getOption());
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private static class CustomParser extends GnuParser {
														
 
															+    @Override
														
 
															+    protected String[] flatten(Options options, String[] arguments, boolean stopAtNonOption) {
														
 
															+      for (int index = 0; index < arguments.length; index++) {
														
 
															+        if (arguments[index].equals("-" + DistCpOptionSwitch.PRESERVE_STATUS.getSwitch())) {
														
 
															+          arguments[index] = "-prbugp";
														
 
															+        }
														
 
															+      }
														
 
															+      return super.flatten(options, arguments, stopAtNonOption);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * The parse method parses the command-line options, and creates
														
 
															+   * a corresponding Options object.
														
 
															+   * @param args Command-line arguments (excluding the options consumed
														
 
															+   *              by the GenericOptionsParser).
														
 
															+   * @return The Options object, corresponding to the specified command-line.
														
 
															+   * @throws IllegalArgumentException: Thrown if the parse fails.
														
 
															+   */
														
 
															+  public static DistCpOptions parse(String args[]) throws IllegalArgumentException {
														
 
															+
														
 
															+    CommandLineParser parser = new CustomParser();
														
 
															+
														
 
															+    CommandLine command;
														
 
															+    try {
														
 
															+      command = parser.parse(cliOptions, args, true);
														
 
															+    } catch (ParseException e) {
														
 
															+      throw new IllegalArgumentException("Unable to parse arguments. " +
														
 
															+        Arrays.toString(args), e);
														
 
															+    }
														
 
															+
														
 
															+    DistCpOptions option;
														
 
															+    Path targetPath;
														
 
															+    List<Path> sourcePaths = new ArrayList<Path>();
														
 
															+
														
 
															+    String leftOverArgs[] = command.getArgs();
														
 
															+    if (leftOverArgs == null || leftOverArgs.length < 1) {
														
 
															+      throw new IllegalArgumentException("Target path not specified");
														
 
															+    }
														
 
															+
														
 
															+    //Last Argument is the target path
														
 
															+    targetPath = new Path(leftOverArgs[leftOverArgs.length -1].trim());
														
 
															+
														
 
															+    //Copy any source paths in the arguments to the list
														
 
															+    for (int index = 0; index < leftOverArgs.length - 1; index++) {
														
 
															+      sourcePaths.add(new Path(leftOverArgs[index].trim()));
														
 
															+    }
														
 
															+
														
 
															+    /* If command has source file listing, use it else, fall back on source paths in args
														
 
															+       If both are present, throw exception and bail */
														
 
															+    if (command.hasOption(DistCpOptionSwitch.SOURCE_FILE_LISTING.getSwitch())) {
														
 
															+      if (!sourcePaths.isEmpty()) {
														
 
															+        throw new IllegalArgumentException("Both source file listing and source paths present");
														
 
															+      }
														
 
															+      option = new DistCpOptions(new Path(getVal(command, DistCpOptionSwitch.
														
 
															+              SOURCE_FILE_LISTING.getSwitch())), targetPath);
														
 
															+    } else {
														
 
															+      if (sourcePaths.isEmpty()) {
														
 
															+        throw new IllegalArgumentException("Neither source file listing nor source paths present");
														
 
															+      }
														
 
															+      option = new DistCpOptions(sourcePaths, targetPath);
														
 
															+    }
														
 
															+
														
 
															+    //Process all the other option switches and set options appropriately
														
 
															+    if (command.hasOption(DistCpOptionSwitch.IGNORE_FAILURES.getSwitch())) {
														
 
															+      option.setIgnoreFailures(true);
														
 
															+    }
														
 
															+
														
 
															+    if (command.hasOption(DistCpOptionSwitch.ATOMIC_COMMIT.getSwitch())) {
														
 
															+      option.setAtomicCommit(true);
														
 
															+    }
														
 
															+
														
 
															+    if (command.hasOption(DistCpOptionSwitch.WORK_PATH.getSwitch()) &&
														
 
															+        option.shouldAtomicCommit()) {
														
 
															+      String workPath = getVal(command, DistCpOptionSwitch.WORK_PATH.getSwitch());
														
 
															+      if (workPath != null && !workPath.isEmpty()) {
														
 
															+        option.setAtomicWorkPath(new Path(workPath));
														
 
															+      }
														
 
															+    } else if (command.hasOption(DistCpOptionSwitch.WORK_PATH.getSwitch())) {
														
 
															+      throw new IllegalArgumentException("-tmp work-path can only be specified along with -atomic");      
														
 
															+    }
														
 
															+
														
 
															+    if (command.hasOption(DistCpOptionSwitch.LOG_PATH.getSwitch())) {
														
 
															+      option.setLogPath(new Path(getVal(command, DistCpOptionSwitch.LOG_PATH.getSwitch())));
														
 
															+    }
														
 
															+
														
 
															+    if (command.hasOption(DistCpOptionSwitch.SYNC_FOLDERS.getSwitch())) {
														
 
															+      option.setSyncFolder(true);
														
 
															+    }
														
 
															+
														
 
															+    if (command.hasOption(DistCpOptionSwitch.OVERWRITE.getSwitch())) {
														
 
															+      option.setOverwrite(true);
														
 
															+    }
														
 
															+
														
 
															+    if (command.hasOption(DistCpOptionSwitch.DELETE_MISSING.getSwitch())) {
														
 
															+      option.setDeleteMissing(true);
														
 
															+    }
														
 
															+
														
 
															+    if (command.hasOption(DistCpOptionSwitch.SKIP_CRC.getSwitch())) {
														
 
															+      option.setSkipCRC(true);
														
 
															+    }
														
 
															+
														
 
															+    if (command.hasOption(DistCpOptionSwitch.BLOCKING.getSwitch())) {
														
 
															+      option.setBlocking(false);
														
 
															+    }
														
 
															+
														
 
															+    if (command.hasOption(DistCpOptionSwitch.BANDWIDTH.getSwitch())) {
														
 
															+      try {
														
 
															+        Integer mapBandwidth = Integer.parseInt(
														
 
															+            getVal(command, DistCpOptionSwitch.BANDWIDTH.getSwitch()).trim());
														
 
															+        option.setMapBandwidth(mapBandwidth);
														
 
															+      } catch (NumberFormatException e) {
														
 
															+        throw new IllegalArgumentException("Bandwidth specified is invalid: " +
														
 
															+            getVal(command, DistCpOptionSwitch.BANDWIDTH.getSwitch()), e);
														
 
															+      }
														
 
															+    }
														
 
															+
														
 
															+    if (command.hasOption(DistCpOptionSwitch.SSL_CONF.getSwitch())) {
														
 
															+      option.setSslConfigurationFile(command.
														
 
															+          getOptionValue(DistCpOptionSwitch.SSL_CONF.getSwitch()));
														
 
															+    }
														
 
															+
														
 
															+    if (command.hasOption(DistCpOptionSwitch.MAX_MAPS.getSwitch())) {
														
 
															+      try {
														
 
															+        Integer maps = Integer.parseInt(
														
 
															+            getVal(command, DistCpOptionSwitch.MAX_MAPS.getSwitch()).trim());
														
 
															+        option.setMaxMaps(maps);
														
 
															+      } catch (NumberFormatException e) {
														
 
															+        throw new IllegalArgumentException("Number of maps is invalid: " +
														
 
															+            getVal(command, DistCpOptionSwitch.MAX_MAPS.getSwitch()), e);
														
 
															+      }
														
 
															+    }
														
 
															+
														
 
															+    if (command.hasOption(DistCpOptionSwitch.COPY_STRATEGY.getSwitch())) {
														
 
															+      option.setCopyStrategy(
														
 
															+            getVal(command, DistCpOptionSwitch.COPY_STRATEGY.getSwitch()));
														
 
															+    }
														
 
															+
														
 
															+    if (command.hasOption(DistCpOptionSwitch.PRESERVE_STATUS.getSwitch())) {
														
 
															+      String attributes =
														
 
															+          getVal(command, DistCpOptionSwitch.PRESERVE_STATUS.getSwitch());
														
 
															+      if (attributes == null || attributes.isEmpty()) {
														
 
															+        for (FileAttribute attribute : FileAttribute.values()) {
														
 
															+          option.preserve(attribute);
														
 
															+        }
														
 
															+      } else {
														
 
															+        for (int index = 0; index < attributes.length(); index++) {
														
 
															+          option.preserve(FileAttribute.
														
 
															+              getAttribute(attributes.charAt(index)));
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+
														
 
															+    if (command.hasOption(DistCpOptionSwitch.FILE_LIMIT.getSwitch())) {
														
 
															+      String fileLimitString = getVal(command,
														
 
															+                              DistCpOptionSwitch.FILE_LIMIT.getSwitch().trim());
														
 
															+      try {
														
 
															+        Integer.parseInt(fileLimitString);
														
 
															+      }
														
 
															+      catch (NumberFormatException e) {
														
 
															+        throw new IllegalArgumentException("File-limit is invalid: "
														
 
															+                                            + fileLimitString, e);
														
 
															+      }
														
 
															+      LOG.warn(DistCpOptionSwitch.FILE_LIMIT.getSwitch() + " is a deprecated" +
														
 
															+              " option. Ignoring.");
														
 
															+    }
														
 
															+
														
 
															+    if (command.hasOption(DistCpOptionSwitch.SIZE_LIMIT.getSwitch())) {
														
 
															+      String sizeLimitString = getVal(command,
														
 
															+                              DistCpOptionSwitch.SIZE_LIMIT.getSwitch().trim());
														
 
															+      try {
														
 
															+        Long.parseLong(sizeLimitString);
														
 
															+      }
														
 
															+      catch (NumberFormatException e) {
														
 
															+        throw new IllegalArgumentException("Size-limit is invalid: "
														
 
															+                                            + sizeLimitString, e);
														
 
															+      }
														
 
															+      LOG.warn(DistCpOptionSwitch.SIZE_LIMIT.getSwitch() + " is a deprecated" +
														
 
															+              " option. Ignoring.");
														
 
															+    }
														
 
															+
														
 
															+    return option;
														
 
															+  }
														
 
															+
														
 
															+  private static String getVal(CommandLine command, String swtch) {
														
 
															+    String optionValue = command.getOptionValue(swtch);
														
 
															+    if (optionValue == null) {
														
 
															+      return null;
														
 
															+    } else {
														
 
															+      return optionValue.trim();
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  public static void usage() {
														
 
															+    HelpFormatter formatter = new HelpFormatter();
														
 
															+    formatter.printHelp("distcp OPTIONS [source_path...] <target_path>\n\nOPTIONS", cliOptions);
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
@@ -0,0 +1,275 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.FileStatus;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.io.SequenceFile;
														
 
															+import org.apache.hadoop.io.IOUtils;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.io.DataInputBuffer;
														
 
															+import org.apache.hadoop.tools.util.DistCpUtils;
														
 
															+import org.apache.hadoop.mapreduce.security.TokenCache;
														
 
															+import org.apache.hadoop.security.Credentials;
														
 
															+
														
 
															+import java.io.*;
														
 
															+import java.util.Stack;
														
 
															+
														
 
															+/**
														
 
															+ * The SimpleCopyListing is responsible for making the exhaustive list of
														
 
															+ * all files/directories under its specified list of input-paths.
														
 
															+ * These are written into the specified copy-listing file.
														
 
															+ * Note: The SimpleCopyListing doesn't handle wild-cards in the input-paths.
														
 
															+ */
														
 
															+public class SimpleCopyListing extends CopyListing {
														
 
															+  private static final Log LOG = LogFactory.getLog(SimpleCopyListing.class);
														
 
															+
														
 
															+  private long totalPaths = 0;
														
 
															+  private long totalBytesToCopy = 0;
														
 
															+
														
 
															+  /**
														
 
															+   * Protected constructor, to initialize configuration.
														
 
															+   *
														
 
															+   * @param configuration The input configuration, with which the source/target FileSystems may be accessed.
														
 
															+   * @param credentials - Credentials object on which the FS delegation tokens are cached. If null
														
 
															+   * delegation token caching is skipped
														
 
															+   */
														
 
															+  protected SimpleCopyListing(Configuration configuration, Credentials credentials) {
														
 
															+    super(configuration, credentials);
														
 
															+  }
														
 
															+
														
 
															+  @Override
														
 
															+  protected void validatePaths(DistCpOptions options)
														
 
															+      throws IOException, InvalidInputException {
														
 
															+
														
 
															+    Path targetPath = options.getTargetPath();
														
 
															+    FileSystem targetFS = targetPath.getFileSystem(getConf());
														
 
															+    boolean targetIsFile = targetFS.isFile(targetPath);
														
 
															+
														
 
															+    //If target is a file, then source has to be single file
														
 
															+    if (targetIsFile) {
														
 
															+      if (options.getSourcePaths().size() > 1) {
														
 
															+        throw new InvalidInputException("Multiple source being copied to a file: " +
														
 
															+            targetPath);
														
 
															+      }
														
 
															+
														
 
															+      Path srcPath = options.getSourcePaths().get(0);
														
 
															+      FileSystem sourceFS = srcPath.getFileSystem(getConf());
														
 
															+      if (!sourceFS.isFile(srcPath)) {
														
 
															+        throw new InvalidInputException("Cannot copy " + srcPath +
														
 
															+            ", which is not a file to " + targetPath);
														
 
															+      }
														
 
															+    }
														
 
															+
														
 
															+    if (options.shouldAtomicCommit() && targetFS.exists(targetPath)) {
														
 
															+      throw new InvalidInputException("Target path for atomic-commit already exists: " +
														
 
															+        targetPath + ". Cannot atomic-commit to pre-existing target-path.");
														
 
															+    }
														
 
															+
														
 
															+    for (Path path: options.getSourcePaths()) {
														
 
															+      FileSystem fs = path.getFileSystem(getConf());
														
 
															+      if (!fs.exists(path)) {
														
 
															+        throw new InvalidInputException(path + " doesn't exist");
														
 
															+      }
														
 
															+    }
														
 
															+
														
 
															+    /* This is requires to allow map tasks to access each of the source
														
 
															+       clusters. This would retrieve the delegation token for each unique
														
 
															+       file system and add them to job's private credential store
														
 
															+     */
														
 
															+    Credentials credentials = getCredentials();
														
 
															+    if (credentials != null) {
														
 
															+      Path[] inputPaths = options.getSourcePaths().toArray(new Path[1]);
														
 
															+      TokenCache.obtainTokensForNamenodes(credentials, inputPaths, getConf());
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /** {@inheritDoc} */
														
 
															+  @Override
														
 
															+  public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException {
														
 
															+
														
 
															+    SequenceFile.Writer fileListWriter = null;
														
 
															+
														
 
															+    try {
														
 
															+      fileListWriter = getWriter(pathToListingFile);
														
 
															+
														
 
															+      for (Path path: options.getSourcePaths()) {
														
 
															+        FileSystem sourceFS = path.getFileSystem(getConf());
														
 
															+        path = makeQualified(path);
														
 
															+
														
 
															+        FileStatus rootStatus = sourceFS.getFileStatus(path);
														
 
															+        Path sourcePathRoot = computeSourceRootPath(rootStatus, options);
														
 
															+        boolean localFile = (rootStatus.getClass() != FileStatus.class);
														
 
															+
														
 
															+        FileStatus[] sourceFiles = sourceFS.listStatus(path);
														
 
															+        if (sourceFiles != null && sourceFiles.length > 0) {
														
 
															+          for (FileStatus sourceStatus: sourceFiles) {
														
 
															+            if (LOG.isDebugEnabled()) {
														
 
															+              LOG.debug("Recording source-path: " + sourceStatus.getPath() + " for copy.");
														
 
															+            }
														
 
															+            writeToFileListing(fileListWriter, sourceStatus, sourcePathRoot, localFile);
														
 
															+
														
 
															+            if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) {
														
 
															+              if (LOG.isDebugEnabled()) {
														
 
															+                LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath());
														
 
															+              }
														
 
															+              traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot, localFile);
														
 
															+            }
														
 
															+          }
														
 
															+        } else {
														
 
															+          writeToFileListing(fileListWriter, rootStatus, sourcePathRoot, localFile);
														
 
															+        }
														
 
															+      }
														
 
															+    } finally {
														
 
															+      IOUtils.closeStream(fileListWriter);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private Path computeSourceRootPath(FileStatus sourceStatus,
														
 
															+                                     DistCpOptions options) throws IOException {
														
 
															+
														
 
															+    Path target = options.getTargetPath();
														
 
															+    FileSystem targetFS = target.getFileSystem(getConf());
														
 
															+
														
 
															+    boolean solitaryFile = options.getSourcePaths().size() == 1
														
 
															+                                                && !sourceStatus.isDirectory();
														
 
															+
														
 
															+    if (solitaryFile) {
														
 
															+      if (targetFS.isFile(target) || !targetFS.exists(target)) {
														
 
															+        return sourceStatus.getPath();
														
 
															+      } else {
														
 
															+        return sourceStatus.getPath().getParent();
														
 
															+      }
														
 
															+    } else {
														
 
															+      boolean specialHandling = (options.getSourcePaths().size() == 1 && !targetFS.exists(target)) ||
														
 
															+          options.shouldSyncFolder() || options.shouldOverwrite();
														
 
															+
														
 
															+      return specialHandling && sourceStatus.isDirectory() ? sourceStatus.getPath() :
														
 
															+          sourceStatus.getPath().getParent();
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /** {@inheritDoc} */
														
 
															+  @Override
														
 
															+  protected long getBytesToCopy() {
														
 
															+    return totalBytesToCopy;
														
 
															+  }
														
 
															+
														
 
															+  /** {@inheritDoc} */
														
 
															+  @Override
														
 
															+  protected long getNumberOfPaths() {
														
 
															+    return totalPaths;
														
 
															+  }
														
 
															+
														
 
															+  private Path makeQualified(Path path) throws IOException {
														
 
															+    final FileSystem fs = path.getFileSystem(getConf());
														
 
															+    return path.makeQualified(fs.getUri(), fs.getWorkingDirectory());
														
 
															+  }
														
 
															+
														
 
															+  private SequenceFile.Writer getWriter(Path pathToListFile) throws IOException {
														
 
															+    FileSystem fs = pathToListFile.getFileSystem(getConf());
														
 
															+    if (fs.exists(pathToListFile)) {
														
 
															+      fs.delete(pathToListFile, false);
														
 
															+    }
														
 
															+    return SequenceFile.createWriter(getConf(),
														
 
															+            SequenceFile.Writer.file(pathToListFile),
														
 
															+            SequenceFile.Writer.keyClass(Text.class),
														
 
															+            SequenceFile.Writer.valueClass(FileStatus.class),
														
 
															+            SequenceFile.Writer.compression(SequenceFile.CompressionType.NONE));
														
 
															+  }
														
 
															+
														
 
															+  private static boolean isDirectoryAndNotEmpty(FileSystem fileSystem,
														
 
															+                                    FileStatus fileStatus) throws IOException {
														
 
															+    return fileStatus.isDirectory() && getChildren(fileSystem, fileStatus).length > 0;
														
 
															+  }
														
 
															+
														
 
															+  private static FileStatus[] getChildren(FileSystem fileSystem,
														
 
															+                                         FileStatus parent) throws IOException {
														
 
															+    return fileSystem.listStatus(parent.getPath());
														
 
															+  }
														
 
															+
														
 
															+  private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter,
														
 
															+                                         FileStatus sourceStatus,
														
 
															+                                         Path sourcePathRoot, boolean localFile)
														
 
															+                                         throws IOException {
														
 
															+    FileSystem sourceFS = sourcePathRoot.getFileSystem(getConf());
														
 
															+    Stack<FileStatus> pathStack = new Stack<FileStatus>();
														
 
															+    pathStack.push(sourceStatus);
														
 
															+
														
 
															+    while (!pathStack.isEmpty()) {
														
 
															+      for (FileStatus child: getChildren(sourceFS, pathStack.pop())) {
														
 
															+        if (LOG.isDebugEnabled())
														
 
															+          LOG.debug("Recording source-path: "
														
 
															+                    + sourceStatus.getPath() + " for copy.");
														
 
															+        writeToFileListing(fileListWriter, child, sourcePathRoot, localFile);
														
 
															+        if (isDirectoryAndNotEmpty(sourceFS, child)) {
														
 
															+          if (LOG.isDebugEnabled())
														
 
															+            LOG.debug("Traversing non-empty source dir: "
														
 
															+                       + sourceStatus.getPath());
														
 
															+          pathStack.push(child);
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private void writeToFileListing(SequenceFile.Writer fileListWriter,
														
 
															+                                  FileStatus fileStatus, Path sourcePathRoot,
														
 
															+                                  boolean localFile) throws IOException {
														
 
															+    if (fileStatus.getPath().equals(sourcePathRoot) && fileStatus.isDirectory())
														
 
															+      return; // Skip the root-paths.
														
 
															+
														
 
															+    if (LOG.isDebugEnabled()) {
														
 
															+      LOG.debug("REL PATH: " + DistCpUtils.getRelativePath(sourcePathRoot,
														
 
															+        fileStatus.getPath()) + ", FULL PATH: " + fileStatus.getPath());
														
 
															+    }
														
 
															+
														
 
															+    FileStatus status = fileStatus;
														
 
															+    if (localFile) {
														
 
															+      status = getFileStatus(fileStatus);
														
 
															+    }
														
 
															+
														
 
															+    fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot,
														
 
															+        fileStatus.getPath())), status);
														
 
															+    fileListWriter.sync();
														
 
															+
														
 
															+    if (!fileStatus.isDirectory()) {
														
 
															+      totalBytesToCopy += fileStatus.getLen();
														
 
															+    }
														
 
															+    totalPaths++;
														
 
															+  }
														
 
															+
														
 
															+  private static final ByteArrayOutputStream buffer = new ByteArrayOutputStream(64);
														
 
															+  private DataInputBuffer in = new DataInputBuffer();
														
 
															+  
														
 
															+  private FileStatus getFileStatus(FileStatus fileStatus) throws IOException {
														
 
															+    FileStatus status = new FileStatus();
														
 
															+
														
 
															+    buffer.reset();
														
 
															+    DataOutputStream out = new DataOutputStream(buffer);
														
 
															+    fileStatus.write(out);
														
 
															+
														
 
															+    in.reset(buffer.toByteArray(), 0, buffer.size());
														
 
															+    status.readFields(in);
														
 
															+    return status;
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
@@ -0,0 +1,297 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools.mapred;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.fs.FileStatus;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.io.IOUtils;
														
 
															+import org.apache.hadoop.io.SequenceFile;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.mapreduce.*;
														
 
															+import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
														
 
															+import org.apache.hadoop.tools.*;
														
 
															+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
														
 
															+import org.apache.hadoop.tools.util.DistCpUtils;
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+import java.util.ArrayList;
														
 
															+import java.util.EnumSet;
														
 
															+import java.util.List;
														
 
															+
														
 
															+/**
														
 
															+ * The CopyCommitter class is DistCp's OutputCommitter implementation. It is
														
 
															+ * responsible for handling the completion/cleanup of the DistCp run.
														
 
															+ * Specifically, it does the following:
														
 
															+ *  1. Cleanup of the meta-folder (where DistCp maintains its file-list, etc.)
														
 
															+ *  2. Preservation of user/group/replication-factor on any directories that
														
 
															+ *     have been copied. (Files are taken care of in their map-tasks.)
														
 
															+ *  3. Atomic-move of data from the temporary work-folder to the final path
														
 
															+ *     (if atomic-commit was opted for).
														
 
															+ *  4. Deletion of files from the target that are missing at source (if opted for).
														
 
															+ *  5. Cleanup of any partially copied files, from previous, failed attempts.
														
 
															+ */
														
 
															+public class CopyCommitter extends FileOutputCommitter {
														
 
															+  private static final Log LOG = LogFactory.getLog(CopyCommitter.class);
														
 
															+
														
 
															+  private final TaskAttemptContext taskAttemptContext;
														
 
															+
														
 
															+  /**
														
 
															+   * Create a output committer
														
 
															+   *
														
 
															+   * @param outputPath the job's output path
														
 
															+   * @param context    the task's context
														
 
															+   * @throws IOException - Exception if any
														
 
															+   */
														
 
															+  public CopyCommitter(Path outputPath, TaskAttemptContext context) throws IOException {
														
 
															+    super(outputPath, context);
														
 
															+    this.taskAttemptContext = context;
														
 
															+  }
														
 
															+
														
 
															+  /** @inheritDoc */
														
 
															+  @Override
														
 
															+  public void commitJob(JobContext jobContext) throws IOException {
														
 
															+    Configuration conf = jobContext.getConfiguration();
														
 
															+    super.commitJob(jobContext);
														
 
															+
														
 
															+    cleanupTempFiles(jobContext);
														
 
															+
														
 
															+    String attributes = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
														
 
															+    if (attributes != null && !attributes.isEmpty()) {
														
 
															+      preserveFileAttributesForDirectories(conf);
														
 
															+    }
														
 
															+
														
 
															+    try {
														
 
															+      if (conf.getBoolean(DistCpConstants.CONF_LABEL_DELETE_MISSING, false)) {
														
 
															+        deleteMissing(conf);
														
 
															+      } else if (conf.getBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, false)) {
														
 
															+        commitData(conf);
														
 
															+      }
														
 
															+      taskAttemptContext.setStatus("Commit Successful");
														
 
															+    }
														
 
															+    finally {
														
 
															+      cleanup(conf);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /** @inheritDoc */
														
 
															+  @Override
														
 
															+  public void abortJob(JobContext jobContext,
														
 
															+                       JobStatus.State state) throws IOException {
														
 
															+    try {
														
 
															+      super.abortJob(jobContext, state);
														
 
															+    } finally {
														
 
															+      cleanupTempFiles(jobContext);
														
 
															+      cleanup(jobContext.getConfiguration());
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private void cleanupTempFiles(JobContext context) {
														
 
															+    try {
														
 
															+      Configuration conf = context.getConfiguration();
														
 
															+
														
 
															+      Path targetWorkPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
														
 
															+      FileSystem targetFS = targetWorkPath.getFileSystem(conf);
														
 
															+
														
 
															+      String jobId = context.getJobID().toString();
														
 
															+      deleteAttemptTempFiles(targetWorkPath, targetFS, jobId);
														
 
															+      deleteAttemptTempFiles(targetWorkPath.getParent(), targetFS, jobId);
														
 
															+    } catch (Throwable t) {
														
 
															+      LOG.warn("Unable to cleanup temp files", t);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private void deleteAttemptTempFiles(Path targetWorkPath,
														
 
															+                                      FileSystem targetFS,
														
 
															+                                      String jobId) throws IOException {
														
 
															+
														
 
															+    FileStatus[] tempFiles = targetFS.globStatus(
														
 
															+        new Path(targetWorkPath, ".distcp.tmp." + jobId.replaceAll("job","attempt") + "*"));
														
 
															+
														
 
															+    if (tempFiles != null && tempFiles.length > 0) {
														
 
															+      for (FileStatus file : tempFiles) {
														
 
															+        LOG.info("Cleaning up " + file.getPath());
														
 
															+        targetFS.delete(file.getPath(), false);
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Cleanup meta folder and other temporary files
														
 
															+   *
														
 
															+   * @param conf - Job Configuration
														
 
															+   */
														
 
															+  private void cleanup(Configuration conf) {
														
 
															+    Path metaFolder = new Path(conf.get(DistCpConstants.CONF_LABEL_META_FOLDER));
														
 
															+    try {
														
 
															+      FileSystem fs = metaFolder.getFileSystem(conf);
														
 
															+      LOG.info("Cleaning up temporary work folder: " + metaFolder);
														
 
															+      fs.delete(metaFolder, true);
														
 
															+    } catch (IOException ignore) {
														
 
															+      LOG.error("Exception encountered ", ignore);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  // This method changes the target-directories' file-attributes (owner,
														
 
															+  // user/group permissions, etc.) based on the corresponding source directories.
														
 
															+  private void preserveFileAttributesForDirectories(Configuration conf) throws IOException {
														
 
															+    String attrSymbols = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
														
 
															+    LOG.info("About to preserve attributes: " + attrSymbols);
														
 
															+
														
 
															+    EnumSet<FileAttribute> attributes = DistCpUtils.unpackAttributes(attrSymbols);
														
 
															+
														
 
															+    Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
														
 
															+    FileSystem clusterFS = sourceListing.getFileSystem(conf);
														
 
															+    SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
														
 
															+                                      SequenceFile.Reader.file(sourceListing));
														
 
															+    long totalLen = clusterFS.getFileStatus(sourceListing).getLen();
														
 
															+
														
 
															+    Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
														
 
															+
														
 
															+    long preservedEntries = 0;
														
 
															+    try {
														
 
															+      FileStatus srcFileStatus = new FileStatus();
														
 
															+      Text srcRelPath = new Text();
														
 
															+
														
 
															+      // Iterate over every source path that was copied.
														
 
															+      while (sourceReader.next(srcRelPath, srcFileStatus)) {
														
 
															+        // File-attributes for files are set at the time of copy,
														
 
															+        // in the map-task.
														
 
															+        if (! srcFileStatus.isDirectory()) continue;
														
 
															+
														
 
															+        Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath);
														
 
															+
														
 
															+        // Skip the root folder.
														
 
															+        // Status can't be preserved on root-folder. (E.g. multiple paths may
														
 
															+        // be copied to a single target folder. Which source-attributes to use
														
 
															+        // on the target is undefined.)
														
 
															+        if (targetRoot.equals(targetFile)) continue;
														
 
															+
														
 
															+        FileSystem targetFS = targetFile.getFileSystem(conf);
														
 
															+        DistCpUtils.preserve(targetFS, targetFile, srcFileStatus,  attributes);
														
 
															+
														
 
															+        taskAttemptContext.progress();
														
 
															+        taskAttemptContext.setStatus("Preserving status on directory entries. [" +
														
 
															+            sourceReader.getPosition() * 100 / totalLen + "%]");
														
 
															+      }
														
 
															+    } finally {
														
 
															+      IOUtils.closeStream(sourceReader);
														
 
															+    }
														
 
															+    LOG.info("Preserved status on " + preservedEntries + " dir entries on target");
														
 
															+  }
														
 
															+
														
 
															+  // This method deletes "extra" files from the target, if they're not
														
 
															+  // available at the source.
														
 
															+  private void deleteMissing(Configuration conf) throws IOException {
														
 
															+    LOG.info("-delete option is enabled. About to remove entries from " +
														
 
															+        "target that are missing in source");
														
 
															+
														
 
															+    // Sort the source-file listing alphabetically.
														
 
															+    Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
														
 
															+    FileSystem clusterFS = sourceListing.getFileSystem(conf);
														
 
															+    Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing);
														
 
															+
														
 
															+    // Similarly, create the listing of target-files. Sort alphabetically.
														
 
															+    Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
														
 
															+    CopyListing target = new GlobbedCopyListing(new Configuration(conf), null);
														
 
															+
														
 
															+    List<Path> targets = new ArrayList<Path>(1);
														
 
															+    Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
														
 
															+    targets.add(targetFinalPath);
														
 
															+    DistCpOptions options = new DistCpOptions(targets, new Path("/NONE"));
														
 
															+
														
 
															+    target.buildListing(targetListing, options);
														
 
															+    Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing);
														
 
															+    long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen();
														
 
															+
														
 
															+    SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
														
 
															+                                 SequenceFile.Reader.file(sortedSourceListing));
														
 
															+    SequenceFile.Reader targetReader = new SequenceFile.Reader(conf,
														
 
															+                                 SequenceFile.Reader.file(sortedTargetListing));
														
 
															+
														
 
															+    // Walk both source and target file listings.
														
 
															+    // Delete all from target that doesn't also exist on source.
														
 
															+    long deletedEntries = 0;
														
 
															+    try {
														
 
															+      FileStatus srcFileStatus = new FileStatus();
														
 
															+      Text srcRelPath = new Text();
														
 
															+      FileStatus trgtFileStatus = new FileStatus();
														
 
															+      Text trgtRelPath = new Text();
														
 
															+
														
 
															+      FileSystem targetFS = targetFinalPath.getFileSystem(conf);
														
 
															+      boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
														
 
															+      while (targetReader.next(trgtRelPath, trgtFileStatus)) {
														
 
															+        // Skip sources that don't exist on target.
														
 
															+        while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) {
														
 
															+          srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
														
 
															+        }
														
 
															+
														
 
															+        if (srcAvailable && trgtRelPath.equals(srcRelPath)) continue;
														
 
															+
														
 
															+        // Target doesn't exist at source. Delete.
														
 
															+        boolean result = (!targetFS.exists(trgtFileStatus.getPath()) ||
														
 
															+            targetFS.delete(trgtFileStatus.getPath(), true));
														
 
															+        if (result) {
														
 
															+          LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source");
														
 
															+          deletedEntries++;
														
 
															+        } else {
														
 
															+          throw new IOException("Unable to delete " + trgtFileStatus.getPath());
														
 
															+        }
														
 
															+        taskAttemptContext.progress();
														
 
															+        taskAttemptContext.setStatus("Deleting missing files from target. [" +
														
 
															+            targetReader.getPosition() * 100 / totalLen + "%]");
														
 
															+      }
														
 
															+    } finally {
														
 
															+      IOUtils.closeStream(sourceReader);
														
 
															+      IOUtils.closeStream(targetReader);
														
 
															+    }
														
 
															+    LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0));
														
 
															+  }
														
 
															+
														
 
															+  private void commitData(Configuration conf) throws IOException {
														
 
															+
														
 
															+    Path workDir = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
														
 
															+    Path finalDir = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
														
 
															+    FileSystem targetFS = workDir.getFileSystem(conf);
														
 
															+
														
 
															+    LOG.info("Atomic commit enabled. Moving " + workDir + " to " + finalDir);
														
 
															+    if (targetFS.exists(finalDir) && targetFS.exists(workDir)) {
														
 
															+      LOG.error("Pre-existing final-path found at: " + finalDir);
														
 
															+      throw new IOException("Target-path can't be committed to because it " +
														
 
															+          "exists at " + finalDir + ". Copied data is in temp-dir: " + workDir + ". ");
														
 
															+    }
														
 
															+
														
 
															+    boolean result = targetFS.rename(workDir, finalDir);
														
 
															+    if (!result) {
														
 
															+      LOG.warn("Rename failed. Perhaps data already moved. Verifying...");
														
 
															+      result = targetFS.exists(finalDir) && !targetFS.exists(workDir);
														
 
															+    }
														
 
															+    if (result) {
														
 
															+      LOG.info("Data committed successfully to " + finalDir);
														
 
															+      taskAttemptContext.setStatus("Data committed successfully to " + finalDir);
														
 
															+    } else {
														
 
															+      LOG.error("Unable to commit data to " + finalDir);
														
 
															+      throw new IOException("Atomic commit failed. Temporary data in " + workDir +
														
 
															+        ", Unable to move to " + finalDir);
														
 
															+    }
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java
@@ -0,0 +1,330 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools.mapred;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.fs.FileStatus;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.mapreduce.JobContext;
														
 
															+import org.apache.hadoop.mapreduce.Mapper;
														
 
															+import org.apache.hadoop.tools.DistCpConstants;
														
 
															+import org.apache.hadoop.tools.DistCpOptionSwitch;
														
 
															+import org.apache.hadoop.tools.DistCpOptions;
														
 
															+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
														
 
															+import org.apache.hadoop.tools.util.DistCpUtils;
														
 
															+import org.apache.hadoop.util.StringUtils;
														
 
															+
														
 
															+import java.io.*;
														
 
															+import java.util.EnumSet;
														
 
															+import java.util.Arrays;
														
 
															+
														
 
															+/**
														
 
															+ * Mapper class that executes the DistCp copy operation.
														
 
															+ * Implements the o.a.h.mapreduce.Mapper<> interface.
														
 
															+ */
														
 
															+public class CopyMapper extends Mapper<Text, FileStatus, Text, Text> {
														
 
															+
														
 
															+  /**
														
 
															+   * Hadoop counters for the DistCp CopyMapper.
														
 
															+   * (These have been kept identical to the old DistCp,
														
 
															+   * for backward compatibility.)
														
 
															+   */
														
 
															+  public static enum Counter {
														
 
															+    COPY,         // Number of files received by the mapper for copy.
														
 
															+    SKIP,         // Number of files skipped.
														
 
															+    FAIL,         // Number of files that failed to be copied.
														
 
															+    BYTESCOPIED,  // Number of bytes actually copied by the copy-mapper, total.
														
 
															+    BYTESEXPECTED,// Number of bytes expected to be copied.
														
 
															+    BYTESFAILED,  // Number of bytes that failed to be copied.
														
 
															+    BYTESSKIPPED, // Number of bytes that were skipped from copy.
														
 
															+  }
														
 
															+
														
 
															+  private static Log LOG = LogFactory.getLog(CopyMapper.class);
														
 
															+
														
 
															+  private Configuration conf;
														
 
															+
														
 
															+  private boolean syncFolders = false;
														
 
															+  private boolean ignoreFailures = false;
														
 
															+  private boolean skipCrc = false;
														
 
															+  private boolean overWrite = false;
														
 
															+  private EnumSet<FileAttribute> preserve = EnumSet.noneOf(FileAttribute.class);
														
 
															+
														
 
															+  private FileSystem targetFS = null;
														
 
															+  private Path    targetWorkPath = null;
														
 
															+
														
 
															+  /**
														
 
															+   * Implementation of the Mapper::setup() method. This extracts the DistCp-
														
 
															+   * options specified in the Job's configuration, to set up the Job.
														
 
															+   * @param context Mapper's context.
														
 
															+   * @throws IOException On IO failure.
														
 
															+   * @throws InterruptedException If the job is interrupted.
														
 
															+   */
														
 
															+  @Override
														
 
															+  public void setup(Context context) throws IOException, InterruptedException {
														
 
															+    conf = context.getConfiguration();
														
 
															+
														
 
															+    syncFolders = conf.getBoolean(DistCpOptionSwitch.SYNC_FOLDERS.getConfigLabel(), false);
														
 
															+    ignoreFailures = conf.getBoolean(DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(), false);
														
 
															+    skipCrc = conf.getBoolean(DistCpOptionSwitch.SKIP_CRC.getConfigLabel(), false);
														
 
															+    overWrite = conf.getBoolean(DistCpOptionSwitch.OVERWRITE.getConfigLabel(), false);
														
 
															+    preserve = DistCpUtils.unpackAttributes(conf.get(DistCpOptionSwitch.
														
 
															+        PRESERVE_STATUS.getConfigLabel()));
														
 
															+
														
 
															+    targetWorkPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
														
 
															+    Path targetFinalPath = new Path(conf.get(
														
 
															+            DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
														
 
															+    targetFS = targetFinalPath.getFileSystem(conf);
														
 
															+
														
 
															+    if (targetFS.exists(targetFinalPath) && targetFS.isFile(targetFinalPath)) {
														
 
															+      overWrite = true; // When target is an existing file, overwrite it.
														
 
															+    }
														
 
															+
														
 
															+    if (conf.get(DistCpConstants.CONF_LABEL_SSL_CONF) != null) {
														
 
															+      initializeSSLConf(context);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Initialize SSL Config if same is set in conf
														
 
															+   *
														
 
															+   * @throws IOException - If any
														
 
															+   */
														
 
															+  private void initializeSSLConf(Context context) throws IOException {
														
 
															+    LOG.info("Initializing SSL configuration");
														
 
															+    
														
 
															+    String workDir = conf.get(JobContext.JOB_LOCAL_DIR) + "/work";
														
 
															+    Path[] cacheFiles = context.getLocalCacheFiles();
														
 
															+
														
 
															+    Configuration sslConfig = new Configuration(false);
														
 
															+    String sslConfFileName = conf.get(DistCpConstants.CONF_LABEL_SSL_CONF);
														
 
															+    Path sslClient = findCacheFile(cacheFiles, sslConfFileName);
														
 
															+    if (sslClient == null) {
														
 
															+      LOG.warn("SSL Client config file not found. Was looking for " + sslConfFileName +
														
 
															+          " in " + Arrays.toString(cacheFiles));
														
 
															+      return;
														
 
															+    }
														
 
															+    sslConfig.addResource(sslClient);
														
 
															+
														
 
															+    String trustStoreFile = conf.get("ssl.client.truststore.location");
														
 
															+    Path trustStorePath = findCacheFile(cacheFiles, trustStoreFile);
														
 
															+    sslConfig.set("ssl.client.truststore.location", trustStorePath.toString());
														
 
															+
														
 
															+    String keyStoreFile = conf.get("ssl.client.keystore.location");
														
 
															+    Path keyStorePath = findCacheFile(cacheFiles, keyStoreFile);
														
 
															+    sslConfig.set("ssl.client.keystore.location", keyStorePath.toString());
														
 
															+
														
 
															+    try {
														
 
															+      OutputStream out = new FileOutputStream(workDir + "/" + sslConfFileName);
														
 
															+      try {
														
 
															+        sslConfig.writeXml(out);
														
 
															+      } finally {
														
 
															+        out.close();
														
 
															+      }
														
 
															+      conf.set(DistCpConstants.CONF_LABEL_SSL_KEYSTORE, sslConfFileName);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.warn("Unable to write out the ssl configuration. " +
														
 
															+          "Will fall back to default ssl-client.xml in class path, if there is one", e);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Find entry from distributed cache
														
 
															+   *
														
 
															+   * @param cacheFiles - All localized cache files
														
 
															+   * @param fileName - fileName to search
														
 
															+   * @return Path of the filename if found, else null
														
 
															+   */
														
 
															+  private Path findCacheFile(Path[] cacheFiles, String fileName) {
														
 
															+    if (cacheFiles != null && cacheFiles.length > 0) {
														
 
															+      for (Path file : cacheFiles) {
														
 
															+        if (file.getName().equals(fileName)) {
														
 
															+          return file;
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    return null;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Implementation of the Mapper<>::map(). Does the copy.
														
 
															+   * @param relPath The target path.
														
 
															+   * @param sourceFileStatus The source path.
														
 
															+   * @throws IOException
														
 
															+   */
														
 
															+  @Override
														
 
															+  public void map(Text relPath, FileStatus sourceFileStatus, Context context)
														
 
															+          throws IOException, InterruptedException {
														
 
															+    Path sourcePath = sourceFileStatus.getPath();
														
 
															+
														
 
															+    if (LOG.isDebugEnabled())
														
 
															+      LOG.debug("DistCpMapper::map(): Received " + sourcePath + ", " + relPath);
														
 
															+
														
 
															+    Path target = new Path(targetWorkPath.makeQualified(targetFS.getUri(),
														
 
															+                          targetFS.getWorkingDirectory()) + relPath.toString());
														
 
															+
														
 
															+    EnumSet<DistCpOptions.FileAttribute> fileAttributes
														
 
															+            = getFileAttributeSettings(context);
														
 
															+
														
 
															+    final String description = "Copying " + sourcePath + " to " + target;
														
 
															+    context.setStatus(description);
														
 
															+
														
 
															+    LOG.info(description);
														
 
															+
														
 
															+    try {
														
 
															+      FileStatus sourceCurrStatus;
														
 
															+      FileSystem sourceFS;
														
 
															+      try {
														
 
															+        sourceFS = sourcePath.getFileSystem(conf);
														
 
															+        sourceCurrStatus = sourceFS.getFileStatus(sourcePath);
														
 
															+      } catch (FileNotFoundException e) {
														
 
															+        throw new IOException(new RetriableFileCopyCommand.CopyReadException(e));
														
 
															+      }
														
 
															+
														
 
															+      FileStatus targetStatus = null;
														
 
															+
														
 
															+      try {
														
 
															+        targetStatus = targetFS.getFileStatus(target);
														
 
															+      } catch (FileNotFoundException ignore) {
														
 
															+        if (LOG.isDebugEnabled())
														
 
															+          LOG.debug("Path could not be found: " + target, ignore);
														
 
															+      }
														
 
															+
														
 
															+      if (targetStatus != null && (targetStatus.isDirectory() != sourceCurrStatus.isDirectory())) {
														
 
															+        throw new IOException("Can't replace " + target + ". Target is " +
														
 
															+            getFileType(targetStatus) + ", Source is " + getFileType(sourceCurrStatus));
														
 
															+      }
														
 
															+
														
 
															+      if (sourceCurrStatus.isDirectory()) {
														
 
															+        createTargetDirsWithRetry(description, target, context);
														
 
															+        return;
														
 
															+      }
														
 
															+
														
 
															+      if (skipFile(sourceFS, sourceCurrStatus, target)) {
														
 
															+        LOG.info("Skipping copy of " + sourceCurrStatus.getPath()
														
 
															+                 + " to " + target);
														
 
															+        updateSkipCounters(context, sourceCurrStatus);
														
 
															+        context.write(null, new Text("SKIP: " + sourceCurrStatus.getPath()));
														
 
															+      }
														
 
															+      else {
														
 
															+        copyFileWithRetry(description, sourceCurrStatus, target, context,
														
 
															+                          fileAttributes);
														
 
															+      }
														
 
															+
														
 
															+      DistCpUtils.preserve(target.getFileSystem(conf), target,
														
 
															+                           sourceCurrStatus, fileAttributes);
														
 
															+
														
 
															+    } catch (IOException exception) {
														
 
															+      handleFailures(exception, sourceFileStatus, target, context);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private String getFileType(FileStatus fileStatus) {
														
 
															+    return fileStatus == null ? "N/A" : (fileStatus.isDirectory() ? "dir" : "file");
														
 
															+  }
														
 
															+
														
 
															+  private static EnumSet<DistCpOptions.FileAttribute>
														
 
															+          getFileAttributeSettings(Mapper.Context context) {
														
 
															+    String attributeString = context.getConfiguration().get(
														
 
															+            DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel());
														
 
															+    return DistCpUtils.unpackAttributes(attributeString);
														
 
															+  }
														
 
															+
														
 
															+  private void copyFileWithRetry(String description, FileStatus sourceFileStatus,
														
 
															+               Path target, Context context,
														
 
															+               EnumSet<DistCpOptions.FileAttribute> fileAttributes) throws IOException {
														
 
															+
														
 
															+    long bytesCopied;
														
 
															+    try {
														
 
															+      bytesCopied = (Long)new RetriableFileCopyCommand(description)
														
 
															+                       .execute(sourceFileStatus, target, context, fileAttributes);
														
 
															+    } catch (Exception e) {
														
 
															+      context.setStatus("Copy Failure: " + sourceFileStatus.getPath());
														
 
															+      throw new IOException("File copy failed: " + sourceFileStatus.getPath() +
														
 
															+          " --> " + target, e);
														
 
															+    }
														
 
															+    incrementCounter(context, Counter.BYTESEXPECTED, sourceFileStatus.getLen());
														
 
															+    incrementCounter(context, Counter.BYTESCOPIED, bytesCopied);
														
 
															+    incrementCounter(context, Counter.COPY, 1);
														
 
															+  }
														
 
															+
														
 
															+  private void createTargetDirsWithRetry(String description,
														
 
															+                   Path target, Context context) throws IOException {
														
 
															+    try {
														
 
															+      new RetriableDirectoryCreateCommand(description).execute(target, context);
														
 
															+    } catch (Exception e) {
														
 
															+      throw new IOException("mkdir failed for " + target, e);
														
 
															+    }
														
 
															+    incrementCounter(context, Counter.COPY, 1);
														
 
															+  }
														
 
															+
														
 
															+  private static void updateSkipCounters(Context context,
														
 
															+                                         FileStatus sourceFile) {
														
 
															+    incrementCounter(context, Counter.SKIP, 1);
														
 
															+    incrementCounter(context, Counter.BYTESSKIPPED, sourceFile.getLen());
														
 
															+
														
 
															+  }
														
 
															+
														
 
															+  private void handleFailures(IOException exception,
														
 
															+                                     FileStatus sourceFileStatus, Path target,
														
 
															+                                     Context context) throws IOException, InterruptedException {
														
 
															+    LOG.error("Failure in copying " + sourceFileStatus.getPath() + " to " +
														
 
															+                target, exception);
														
 
															+
														
 
															+    if (ignoreFailures && exception.getCause() instanceof
														
 
															+            RetriableFileCopyCommand.CopyReadException) {
														
 
															+      incrementCounter(context, Counter.FAIL, 1);
														
 
															+      incrementCounter(context, Counter.BYTESFAILED, sourceFileStatus.getLen());
														
 
															+      context.write(null, new Text("FAIL: " + sourceFileStatus.getPath() + " - " + 
														
 
															+          StringUtils.stringifyException(exception)));
														
 
															+    }
														
 
															+    else
														
 
															+      throw exception;
														
 
															+  }
														
 
															+
														
 
															+  private static void incrementCounter(Context context, Counter counter,
														
 
															+                                       long value) {
														
 
															+    context.getCounter(counter).increment(value);
														
 
															+  }
														
 
															+
														
 
															+  private boolean skipFile(FileSystem sourceFS, FileStatus source, Path target)
														
 
															+                                          throws IOException {
														
 
															+    return     targetFS.exists(target)
														
 
															+            && !overWrite
														
 
															+            && !mustUpdate(sourceFS, source, target);
														
 
															+  }
														
 
															+
														
 
															+  private boolean mustUpdate(FileSystem sourceFS, FileStatus source, Path target)
														
 
															+                                    throws IOException {
														
 
															+    final FileStatus targetFileStatus = targetFS.getFileStatus(target);
														
 
															+
														
 
															+    return     syncFolders
														
 
															+            && (
														
 
															+                   targetFileStatus.getLen() != source.getLen()
														
 
															+                || (!skipCrc &&
														
 
															+                       !DistCpUtils.checksumsAreEqual(sourceFS,
														
 
															+                                          source.getPath(), targetFS, target))
														
 
															+                || (source.getBlockSize() != targetFileStatus.getBlockSize() &&
														
 
															+                      preserve.contains(FileAttribute.BLOCKSIZE))
														
 
															+               );
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyOutputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyOutputFormat.java
@@ -0,0 +1,124 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools.mapred;
														
 
															+
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.mapreduce.*;
														
 
															+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
														
 
															+import org.apache.hadoop.mapreduce.security.TokenCache;
														
 
															+import org.apache.hadoop.tools.DistCpConstants;
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+
														
 
															+/**
														
 
															+ * The CopyOutputFormat is the Hadoop OutputFormat used in DistCp.
														
 
															+ * It sets up the Job's Configuration (in the Job-Context) with the settings
														
 
															+ * for the work-directory, final commit-directory, etc. It also sets the right
														
 
															+ * output-committer.
														
 
															+ * @param <K>
														
 
															+ * @param <V>
														
 
															+ */
														
 
															+public class CopyOutputFormat<K, V> extends TextOutputFormat<K, V> {
														
 
															+
														
 
															+  /**
														
 
															+   * Setter for the working directory for DistCp (where files will be copied
														
 
															+   * before they are moved to the final commit-directory.)
														
 
															+   * @param job The Job on whose configuration the working-directory is to be set.
														
 
															+   * @param workingDirectory The path to use as the working directory.
														
 
															+   */
														
 
															+  public static void setWorkingDirectory(Job job, Path workingDirectory) {
														
 
															+    job.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH,
														
 
															+        workingDirectory.toString());
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Setter for the final directory for DistCp (where files copied will be
														
 
															+   * moved, atomically.)
														
 
															+   * @param job The Job on whose configuration the working-directory is to be set.
														
 
															+   * @param commitDirectory The path to use for final commit.
														
 
															+   */
														
 
															+  public static void setCommitDirectory(Job job, Path commitDirectory) {
														
 
															+    job.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH,
														
 
															+        commitDirectory.toString());
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Getter for the working directory.
														
 
															+   * @param job The Job from whose configuration the working-directory is to
														
 
															+   * be retrieved.
														
 
															+   * @return The working-directory Path.
														
 
															+   */
														
 
															+  public static Path getWorkingDirectory(Job job) {
														
 
															+    return getWorkingDirectory(job.getConfiguration());
														
 
															+  }
														
 
															+
														
 
															+  private static Path getWorkingDirectory(Configuration conf) {
														
 
															+    String workingDirectory = conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH);
														
 
															+    if (workingDirectory == null || workingDirectory.isEmpty()) {
														
 
															+      return null;
														
 
															+    } else {
														
 
															+      return new Path(workingDirectory);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Getter for the final commit-directory.
														
 
															+   * @param job The Job from whose configuration the commit-directory is to be
														
 
															+   * retrieved.
														
 
															+   * @return The commit-directory Path.
														
 
															+   */
														
 
															+  public static Path getCommitDirectory(Job job) {
														
 
															+    return getCommitDirectory(job.getConfiguration());
														
 
															+  }
														
 
															+
														
 
															+  private static Path getCommitDirectory(Configuration conf) {
														
 
															+    String commitDirectory = conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH);
														
 
															+    if (commitDirectory == null || commitDirectory.isEmpty()) {
														
 
															+      return null;
														
 
															+    } else {
														
 
															+      return new Path(commitDirectory);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /** @inheritDoc */
														
 
															+  @Override
														
 
															+  public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException {
														
 
															+    return new CopyCommitter(getOutputPath(context), context);
														
 
															+  }
														
 
															+
														
 
															+  /** @inheritDoc */
														
 
															+  @Override
														
 
															+  public void checkOutputSpecs(JobContext context) throws IOException {
														
 
															+    Configuration conf = context.getConfiguration();
														
 
															+
														
 
															+    if (getCommitDirectory(conf) == null) {
														
 
															+      throw new IllegalStateException("Commit directory not configured");
														
 
															+    }
														
 
															+
														
 
															+    Path workingPath = getWorkingDirectory(conf);
														
 
															+    if (workingPath == null) {
														
 
															+      throw new IllegalStateException("Working directory not configured");
														
 
															+    }
														
 
															+
														
 
															+    // get delegation token for outDir's file system
														
 
															+    TokenCache.obtainTokensForNamenodes(context.getCredentials(),
														
 
															+                                        new Path[] {workingPath}, conf);
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableDirectoryCreateCommand.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableDirectoryCreateCommand.java
@@ -0,0 +1,56 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools.mapred;
														
 
															+
														
 
															+import org.apache.hadoop.tools.util.RetriableCommand;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.mapreduce.Mapper;
														
 
															+
														
 
															+/**
														
 
															+ * This class extends Retriable command to implement the creation of directories
														
 
															+ * with retries on failure.
														
 
															+ */
														
 
															+public class RetriableDirectoryCreateCommand extends RetriableCommand {
														
 
															+
														
 
															+  /**
														
 
															+   * Constructor, taking a description of the action.
														
 
															+   * @param description Verbose description of the copy operation.
														
 
															+   */
														
 
															+  public RetriableDirectoryCreateCommand(String description) {
														
 
															+    super(description);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Implementation of RetriableCommand::doExecute().
														
 
															+   * This implements the actual mkdirs() functionality.
														
 
															+   * @param arguments Argument-list to the command.
														
 
															+   * @return Boolean. True, if the directory could be created successfully.
														
 
															+   * @throws Exception IOException, on failure to create the directory.
														
 
															+   */
														
 
															+  @Override
														
 
															+  protected Object doExecute(Object... arguments) throws Exception {
														
 
															+    assert arguments.length == 2 : "Unexpected argument list.";
														
 
															+    Path target = (Path)arguments[0];
														
 
															+    Mapper.Context context = (Mapper.Context)arguments[1];
														
 
															+
														
 
															+    FileSystem targetFS = target.getFileSystem(context.getConfiguration());
														
 
															+    return targetFS.mkdirs(target);
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
@@ -0,0 +1,245 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools.mapred;
														
 
															+
														
 
															+import org.apache.hadoop.tools.util.RetriableCommand;
														
 
															+import org.apache.hadoop.tools.util.ThrottledInputStream;
														
 
															+import org.apache.hadoop.tools.util.DistCpUtils;
														
 
															+import org.apache.hadoop.tools.DistCpOptions.*;
														
 
															+import org.apache.hadoop.tools.DistCpConstants;
														
 
															+import org.apache.hadoop.fs.*;
														
 
															+import org.apache.hadoop.mapreduce.Mapper;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.io.IOUtils;
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+
														
 
															+import java.io.*;
														
 
															+import java.util.EnumSet;
														
 
															+
														
 
															+/**
														
 
															+ * This class extends RetriableCommand to implement the copy of files,
														
 
															+ * with retries on failure.
														
 
															+ */
														
 
															+public class RetriableFileCopyCommand extends RetriableCommand {
														
 
															+
														
 
															+  private static Log LOG = LogFactory.getLog(RetriableFileCopyCommand.class);
														
 
															+  private static int BUFFER_SIZE = 8 * 1024;
														
 
															+
														
 
															+  /**
														
 
															+   * Constructor, taking a description of the action.
														
 
															+   * @param description Verbose description of the copy operation.
														
 
															+   */
														
 
															+  public RetriableFileCopyCommand(String description) {
														
 
															+    super(description);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Implementation of RetriableCommand::doExecute().
														
 
															+   * This is the actual copy-implementation.
														
 
															+   * @param arguments Argument-list to the command.
														
 
															+   * @return Number of bytes copied.
														
 
															+   * @throws Exception: CopyReadException, if there are read-failures. All other
														
 
															+   *         failures are IOExceptions.
														
 
															+   */
														
 
															+  @SuppressWarnings("unchecked")
														
 
															+  @Override
														
 
															+  protected Object doExecute(Object... arguments) throws Exception {
														
 
															+    assert arguments.length == 4 : "Unexpected argument list.";
														
 
															+    FileStatus source = (FileStatus)arguments[0];
														
 
															+    assert !source.isDirectory() : "Unexpected file-status. Expected file.";
														
 
															+    Path target = (Path)arguments[1];
														
 
															+    Mapper.Context context = (Mapper.Context)arguments[2];
														
 
															+    EnumSet<FileAttribute> fileAttributes
														
 
															+            = (EnumSet<FileAttribute>)arguments[3];
														
 
															+    return doCopy(source, target, context, fileAttributes);
														
 
															+  }
														
 
															+
														
 
															+  private long doCopy(FileStatus sourceFileStatus, Path target,
														
 
															+                      Mapper.Context context,
														
 
															+                      EnumSet<FileAttribute> fileAttributes)
														
 
															+          throws IOException {
														
 
															+
														
 
															+    Path tmpTargetPath = getTmpFile(target, context);
														
 
															+    final Configuration configuration = context.getConfiguration();
														
 
															+    FileSystem targetFS = target.getFileSystem(configuration);
														
 
															+
														
 
															+    try {
														
 
															+      if (LOG.isDebugEnabled()) {
														
 
															+        LOG.debug("Copying " + sourceFileStatus.getPath() + " to " + target);
														
 
															+        LOG.debug("Tmp-file path: " + tmpTargetPath);
														
 
															+      }
														
 
															+      FileSystem sourceFS = sourceFileStatus.getPath().getFileSystem(
														
 
															+              configuration);
														
 
															+      long bytesRead = copyToTmpFile(tmpTargetPath, targetFS, sourceFileStatus,
														
 
															+                                     context, fileAttributes);
														
 
															+
														
 
															+      compareFileLengths(sourceFileStatus, tmpTargetPath, configuration, bytesRead);
														
 
															+      compareCheckSums(sourceFS, sourceFileStatus.getPath(), targetFS, tmpTargetPath);
														
 
															+      promoteTmpToTarget(tmpTargetPath, target, targetFS);
														
 
															+      return bytesRead;
														
 
															+
														
 
															+    } finally {
														
 
															+      if (targetFS.exists(tmpTargetPath))
														
 
															+        targetFS.delete(tmpTargetPath, false);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private long copyToTmpFile(Path tmpTargetPath, FileSystem targetFS,
														
 
															+                             FileStatus sourceFileStatus, Mapper.Context context,
														
 
															+                             EnumSet<FileAttribute> fileAttributes)
														
 
															+                             throws IOException {
														
 
															+    OutputStream outStream = new BufferedOutputStream(targetFS.create(
														
 
															+            tmpTargetPath, true, BUFFER_SIZE,
														
 
															+            getReplicationFactor(fileAttributes, sourceFileStatus, targetFS),
														
 
															+            getBlockSize(fileAttributes, sourceFileStatus, targetFS), context));
														
 
															+    return copyBytes(sourceFileStatus, outStream, BUFFER_SIZE, true, context);
														
 
															+  }
														
 
															+
														
 
															+  private void compareFileLengths(FileStatus sourceFileStatus, Path target,
														
 
															+                                  Configuration configuration, long bytesRead)
														
 
															+                                  throws IOException {
														
 
															+    final Path sourcePath = sourceFileStatus.getPath();
														
 
															+    FileSystem fs = sourcePath.getFileSystem(configuration);
														
 
															+    if (fs.getFileStatus(sourcePath).getLen() != bytesRead)
														
 
															+      throw new IOException("Mismatch in length of source:" + sourcePath
														
 
															+                + " and target:" + target);
														
 
															+  }
														
 
															+
														
 
															+  private void compareCheckSums(FileSystem sourceFS, Path source,
														
 
															+                                FileSystem targetFS, Path target)
														
 
															+                                throws IOException {
														
 
															+    if (!DistCpUtils.checksumsAreEqual(sourceFS, source, targetFS, target))
														
 
															+      throw new IOException("Check-sum mismatch between "
														
 
															+                              + source + " and " + target);
														
 
															+
														
 
															+  }
														
 
															+
														
 
															+  //If target file exists and unable to delete target - fail
														
 
															+  //If target doesn't exist and unable to create parent folder - fail
														
 
															+  //If target is successfully deleted and parent exists, if rename fails - fail
														
 
															+  private void promoteTmpToTarget(Path tmpTarget, Path target, FileSystem fs)
														
 
															+                                  throws IOException {
														
 
															+    if ((fs.exists(target) && !fs.delete(target, false))
														
 
															+        || (!fs.exists(target.getParent()) && !fs.mkdirs(target.getParent()))
														
 
															+        || !fs.rename(tmpTarget, target)) {
														
 
															+      throw new IOException("Failed to promote tmp-file:" + tmpTarget
														
 
															+                              + " to: " + target);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private Path getTmpFile(Path target, Mapper.Context context) {
														
 
															+    Path targetWorkPath = new Path(context.getConfiguration().
														
 
															+        get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
														
 
															+
														
 
															+    Path root = target.equals(targetWorkPath)? targetWorkPath.getParent() : targetWorkPath;
														
 
															+    LOG.info("Creating temp file: " +
														
 
															+        new Path(root, ".distcp.tmp." + context.getTaskAttemptID().toString()));
														
 
															+    return new Path(root, ".distcp.tmp." + context.getTaskAttemptID().toString());
														
 
															+  }
														
 
															+
														
 
															+  private long copyBytes(FileStatus sourceFileStatus, OutputStream outStream,
														
 
															+                         int bufferSize, boolean mustCloseStream,
														
 
															+                         Mapper.Context context) throws IOException {
														
 
															+    Path source = sourceFileStatus.getPath();
														
 
															+    byte buf[] = new byte[bufferSize];
														
 
															+    ThrottledInputStream inStream = null;
														
 
															+    long totalBytesRead = 0;
														
 
															+
														
 
															+    try {
														
 
															+      inStream = getInputStream(source, context.getConfiguration());
														
 
															+      int bytesRead = readBytes(inStream, buf);
														
 
															+      while (bytesRead >= 0) {
														
 
															+        totalBytesRead += bytesRead;
														
 
															+        outStream.write(buf, 0, bytesRead);
														
 
															+        updateContextStatus(totalBytesRead, context, sourceFileStatus);
														
 
															+        bytesRead = inStream.read(buf);
														
 
															+      }
														
 
															+    } finally {
														
 
															+      if (mustCloseStream)
														
 
															+        IOUtils.cleanup(LOG, outStream, inStream);
														
 
															+    }
														
 
															+
														
 
															+    return totalBytesRead;
														
 
															+  }
														
 
															+
														
 
															+  private void updateContextStatus(long totalBytesRead, Mapper.Context context,
														
 
															+                                   FileStatus sourceFileStatus) {
														
 
															+    StringBuilder message = new StringBuilder(DistCpUtils.getFormatter()
														
 
															+                .format(totalBytesRead * 100.0f / sourceFileStatus.getLen()));
														
 
															+    message.append("% ")
														
 
															+            .append(description).append(" [")
														
 
															+            .append(DistCpUtils.getStringDescriptionFor(totalBytesRead))
														
 
															+            .append('/')
														
 
															+        .append(DistCpUtils.getStringDescriptionFor(sourceFileStatus.getLen()))
														
 
															+            .append(']');
														
 
															+    context.setStatus(message.toString());
														
 
															+  }
														
 
															+
														
 
															+  private static int readBytes(InputStream inStream, byte buf[])
														
 
															+          throws IOException {
														
 
															+    try {
														
 
															+      return inStream.read(buf);
														
 
															+    }
														
 
															+    catch (IOException e) {
														
 
															+      throw new CopyReadException(e);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private static ThrottledInputStream getInputStream(Path path, Configuration conf)
														
 
															+          throws IOException {
														
 
															+    try {
														
 
															+      FileSystem fs = path.getFileSystem(conf);
														
 
															+      long bandwidthMB = conf.getInt(DistCpConstants.CONF_LABEL_BANDWIDTH_MB,
														
 
															+              DistCpConstants.DEFAULT_BANDWIDTH_MB);
														
 
															+      return new ThrottledInputStream(new BufferedInputStream(fs.open(path)),
														
 
															+              bandwidthMB * 1024 * 1024);
														
 
															+    }
														
 
															+    catch (IOException e) {
														
 
															+      throw new CopyReadException(e);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private static short getReplicationFactor(
														
 
															+          EnumSet<FileAttribute> fileAttributes,
														
 
															+          FileStatus sourceFile, FileSystem targetFS) {
														
 
															+    return fileAttributes.contains(FileAttribute.REPLICATION)?
														
 
															+            sourceFile.getReplication() : targetFS.getDefaultReplication();
														
 
															+  }
														
 
															+
														
 
															+  private static long getBlockSize(
														
 
															+          EnumSet<FileAttribute> fileAttributes,
														
 
															+          FileStatus sourceFile, FileSystem targetFS) {
														
 
															+    return fileAttributes.contains(FileAttribute.BLOCKSIZE)?
														
 
															+            sourceFile.getBlockSize() : targetFS.getDefaultBlockSize();
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Special subclass of IOException. This is used to distinguish read-operation
														
 
															+   * failures from other kinds of IOExceptions.
														
 
															+   * The failure to read from source is dealt with specially, in the CopyMapper.
														
 
															+   * Such failures may be skipped if the DistCpOptions indicate so.
														
 
															+   * Write failures are intolerable, and amount to CopyMapper failure.  
														
 
															+   */
														
 
															+  public static class CopyReadException extends IOException {
														
 
															+    public CopyReadException(Throwable rootCause) {
														
 
															+      super(rootCause);
														
 
															+    }
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformSizeInputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformSizeInputFormat.java
@@ -0,0 +1,169 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools.mapred;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.io.SequenceFile;
														
 
															+import org.apache.hadoop.io.IOUtils;
														
 
															+import org.apache.hadoop.tools.DistCpConstants;
														
 
															+import org.apache.hadoop.tools.util.DistCpUtils;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.FileStatus;
														
 
															+import org.apache.hadoop.mapreduce.*;
														
 
															+import org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader;
														
 
															+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+import java.util.List;
														
 
															+import java.util.ArrayList;
														
 
															+
														
 
															+/**
														
 
															+ * UniformSizeInputFormat extends the InputFormat<> class, to produce
														
 
															+ * input-splits for DistCp.
														
 
															+ * It looks at the copy-listing and groups the contents into input-splits such
														
 
															+ * that the total-number of bytes to be copied for each input split is
														
 
															+ * uniform.
														
 
															+ */
														
 
															+public class UniformSizeInputFormat extends InputFormat<Text, FileStatus> {
														
 
															+  private static final Log LOG
														
 
															+                = LogFactory.getLog(UniformSizeInputFormat.class);
														
 
															+
														
 
															+  /**
														
 
															+   * Implementation of InputFormat::getSplits(). Returns a list of InputSplits,
														
 
															+   * such that the number of bytes to be copied for all the splits are
														
 
															+   * approximately equal.
														
 
															+   * @param context JobContext for the job.
														
 
															+   * @return The list of uniformly-distributed input-splits.
														
 
															+   * @throws IOException: On failure.
														
 
															+   * @throws InterruptedException
														
 
															+   */
														
 
															+  @Override
														
 
															+  public List<InputSplit> getSplits(JobContext context)
														
 
															+                      throws IOException, InterruptedException {
														
 
															+    Configuration configuration = context.getConfiguration();
														
 
															+    int numSplits = DistCpUtils.getInt(configuration,
														
 
															+                                       JobContext.NUM_MAPS);
														
 
															+
														
 
															+    if (numSplits == 0) return new ArrayList<InputSplit>();
														
 
															+
														
 
															+    return getSplits(configuration, numSplits,
														
 
															+                     DistCpUtils.getLong(configuration,
														
 
															+                          DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED));
														
 
															+  }
														
 
															+
														
 
															+  private List<InputSplit> getSplits(Configuration configuration, int numSplits,
														
 
															+                                     long totalSizeBytes) throws IOException {
														
 
															+    List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
														
 
															+    long nBytesPerSplit = (long) Math.ceil(totalSizeBytes * 1.0 / numSplits);
														
 
															+
														
 
															+    FileStatus srcFileStatus = new FileStatus();
														
 
															+    Text srcRelPath = new Text();
														
 
															+    long currentSplitSize = 0;
														
 
															+    long lastSplitStart = 0;
														
 
															+    long lastPosition = 0;
														
 
															+
														
 
															+    final Path listingFilePath = getListingFilePath(configuration);
														
 
															+
														
 
															+    if (LOG.isDebugEnabled()) {
														
 
															+      LOG.debug("Average bytes per map: " + nBytesPerSplit +
														
 
															+          ", Number of maps: " + numSplits + ", total size: " + totalSizeBytes);
														
 
															+    }
														
 
															+    SequenceFile.Reader reader=null;
														
 
															+    try {
														
 
															+      reader = getListingFileReader(configuration);
														
 
															+      while (reader.next(srcRelPath, srcFileStatus)) {
														
 
															+        // If adding the current file would cause the bytes per map to exceed
														
 
															+        // limit. Add the current file to new split
														
 
															+        if (currentSplitSize + srcFileStatus.getLen() > nBytesPerSplit && lastPosition != 0) {
														
 
															+          FileSplit split = new FileSplit(listingFilePath, lastSplitStart,
														
 
															+              lastPosition - lastSplitStart, null);
														
 
															+          if (LOG.isDebugEnabled()) {
														
 
															+            LOG.debug ("Creating split : " + split + ", bytes in split: " + currentSplitSize);
														
 
															+          }
														
 
															+          splits.add(split);
														
 
															+          lastSplitStart = lastPosition;
														
 
															+          currentSplitSize = 0;
														
 
															+        }
														
 
															+        currentSplitSize += srcFileStatus.getLen();
														
 
															+        lastPosition = reader.getPosition();
														
 
															+      }
														
 
															+      if (lastPosition > lastSplitStart) {
														
 
															+        FileSplit split = new FileSplit(listingFilePath, lastSplitStart,
														
 
															+            lastPosition - lastSplitStart, null);
														
 
															+        if (LOG.isDebugEnabled()) {
														
 
															+          LOG.info ("Creating split : " + split + ", bytes in split: " + currentSplitSize);
														
 
															+        }
														
 
															+        splits.add(split);
														
 
															+      }
														
 
															+
														
 
															+    } finally {
														
 
															+      IOUtils.closeStream(reader);
														
 
															+    }
														
 
															+
														
 
															+    return splits;
														
 
															+  }
														
 
															+
														
 
															+  private static Path getListingFilePath(Configuration configuration) {
														
 
															+    final String listingFilePathString =
														
 
															+            configuration.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, "");
														
 
															+
														
 
															+    assert !listingFilePathString.equals("")
														
 
															+              : "Couldn't find listing file. Invalid input.";
														
 
															+    return new Path(listingFilePathString);
														
 
															+  }
														
 
															+
														
 
															+  private SequenceFile.Reader getListingFileReader(Configuration configuration) {
														
 
															+
														
 
															+    final Path listingFilePath = getListingFilePath(configuration);
														
 
															+    try {
														
 
															+      final FileSystem fileSystem = listingFilePath.getFileSystem(configuration);
														
 
															+      if (!fileSystem.exists(listingFilePath))
														
 
															+        throw new IllegalArgumentException("Listing file doesn't exist at: "
														
 
															+                                           + listingFilePath);
														
 
															+
														
 
															+      return new SequenceFile.Reader(configuration,
														
 
															+                                     SequenceFile.Reader.file(listingFilePath));
														
 
															+    }
														
 
															+    catch (IOException exception) {
														
 
															+      LOG.error("Couldn't find listing file at: " + listingFilePath, exception);
														
 
															+      throw new IllegalArgumentException("Couldn't find listing-file at: "
														
 
															+                                         + listingFilePath, exception);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Implementation of InputFormat::createRecordReader().
														
 
															+   * @param split The split for which the RecordReader is sought.
														
 
															+   * @param context The context of the current task-attempt.
														
 
															+   * @return A SequenceFileRecordReader instance, (since the copy-listing is a
														
 
															+   * simple sequence-file.)
														
 
															+   * @throws IOException
														
 
															+   * @throws InterruptedException
														
 
															+   */
														
 
															+  @Override
														
 
															+  public RecordReader<Text, FileStatus> createRecordReader(InputSplit split,
														
 
															+                                                     TaskAttemptContext context)
														
 
															+                                      throws IOException, InterruptedException {
														
 
															+    return new SequenceFileRecordReader<Text, FileStatus>();
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputChunk.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputChunk.java
@@ -0,0 +1,246 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+package org.apache.hadoop.tools.mapred.lib;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.fs.FileStatus;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.io.SequenceFile;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.io.IOUtils;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.tools.DistCpConstants;
														
 
															+import org.apache.hadoop.tools.util.DistCpUtils;
														
 
															+import org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader;
														
 
															+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
														
 
															+import org.apache.hadoop.mapreduce.TaskAttemptContext;
														
 
															+import org.apache.hadoop.mapreduce.TaskID;
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+
														
 
															+/**
														
 
															+ * The DynamicInputChunk represents a single chunk of work, when used in
														
 
															+ * conjunction with the DynamicInputFormat and the DynamicRecordReader.
														
 
															+ * The records in the DynamicInputFormat's input-file are split across various
														
 
															+ * DynamicInputChunks. Each one is claimed and processed in an iteration of
														
 
															+ * a dynamic-mapper. When a DynamicInputChunk has been exhausted, the faster
														
 
															+ * mapper may claim another and process it, until there are no more to be
														
 
															+ * consumed.
														
 
															+ */
														
 
															+class DynamicInputChunk<K, V> {
														
 
															+  private static Log LOG = LogFactory.getLog(DynamicInputChunk.class);
														
 
															+
														
 
															+  private static Configuration configuration;
														
 
															+  private static Path chunkRootPath;
														
 
															+  private static String chunkFilePrefix;
														
 
															+  private static int numChunksLeft = -1; // Un-initialized before 1st dir-scan.
														
 
															+  private static FileSystem fs;
														
 
															+
														
 
															+  private Path chunkFilePath;
														
 
															+  private SequenceFileRecordReader<K, V> reader;
														
 
															+  private SequenceFile.Writer writer;
														
 
															+
														
 
															+  private static void initializeChunkInvariants(Configuration config)
														
 
															+                                                  throws IOException {
														
 
															+    configuration = config;
														
 
															+    Path listingFilePath = new Path(getListingFilePath(configuration));
														
 
															+    chunkRootPath = new Path(listingFilePath.getParent(), "chunkDir");
														
 
															+    fs = chunkRootPath.getFileSystem(configuration);
														
 
															+    chunkFilePrefix = listingFilePath.getName() + ".chunk.";
														
 
															+  }
														
 
															+
														
 
															+  private static String getListingFilePath(Configuration configuration) {
														
 
															+    final String listingFileString = configuration.get(
														
 
															+            DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, "");
														
 
															+    assert !listingFileString.equals("") : "Listing file not found.";
														
 
															+    return listingFileString;
														
 
															+  }
														
 
															+
														
 
															+  private static boolean areInvariantsInitialized() {
														
 
															+    return chunkRootPath != null;
														
 
															+  }
														
 
															+
														
 
															+  private DynamicInputChunk(String chunkId, Configuration configuration)
														
 
															+                                                      throws IOException {
														
 
															+    if (!areInvariantsInitialized())
														
 
															+      initializeChunkInvariants(configuration);
														
 
															+
														
 
															+    chunkFilePath = new Path(chunkRootPath, chunkFilePrefix + chunkId);
														
 
															+    openForWrite();
														
 
															+  }
														
 
															+
														
 
															+
														
 
															+  private void openForWrite() throws IOException {
														
 
															+    writer = SequenceFile.createWriter(
														
 
															+            chunkFilePath.getFileSystem(configuration), configuration,
														
 
															+            chunkFilePath, Text.class, FileStatus.class,
														
 
															+            SequenceFile.CompressionType.NONE);
														
 
															+
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Factory method to create chunk-files for writing to.
														
 
															+   * (For instance, when the DynamicInputFormat splits the input-file into
														
 
															+   * chunks.)
														
 
															+   * @param chunkId String to identify the chunk.
														
 
															+   * @param configuration Configuration, describing the location of the listing-
														
 
															+   * file, file-system for the map-job, etc.
														
 
															+   * @return A DynamicInputChunk, corresponding to a chunk-file, with the name
														
 
															+   * incorporating the chunk-id.
														
 
															+   * @throws IOException Exception on failure to create the chunk.
														
 
															+   */
														
 
															+  public static DynamicInputChunk createChunkForWrite(String chunkId,
														
 
															+                          Configuration configuration) throws IOException {
														
 
															+    return new DynamicInputChunk(chunkId, configuration);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Method to write records into a chunk.
														
 
															+   * @param key Key from the listing file.
														
 
															+   * @param value Corresponding value from the listing file.
														
 
															+   * @throws IOException Exception onf failure to write to the file.
														
 
															+   */
														
 
															+  public void write(Text key, FileStatus value) throws IOException {
														
 
															+    writer.append(key, value);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Closes streams opened to the chunk-file.
														
 
															+   */
														
 
															+  public void close() {
														
 
															+    IOUtils.cleanup(LOG, reader, writer);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Reassigns the chunk to a specified Map-Task, for consumption.
														
 
															+   * @param taskId The Map-Task to which a the chunk is to be reassigned.
														
 
															+   * @throws IOException Exception on failure to reassign.
														
 
															+   */
														
 
															+  public void assignTo(TaskID taskId) throws IOException {
														
 
															+    Path newPath = new Path(chunkRootPath, taskId.toString());
														
 
															+    if (!fs.rename(chunkFilePath, newPath)) {
														
 
															+      LOG.warn(chunkFilePath + " could not be assigned to " + taskId);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private DynamicInputChunk(Path chunkFilePath,
														
 
															+                            TaskAttemptContext taskAttemptContext)
														
 
															+                                   throws IOException, InterruptedException {
														
 
															+    if (!areInvariantsInitialized())
														
 
															+      initializeChunkInvariants(taskAttemptContext.getConfiguration());
														
 
															+
														
 
															+    this.chunkFilePath = chunkFilePath;
														
 
															+    openForRead(taskAttemptContext);
														
 
															+  }
														
 
															+
														
 
															+  private void openForRead(TaskAttemptContext taskAttemptContext)
														
 
															+          throws IOException, InterruptedException {
														
 
															+    reader = new SequenceFileRecordReader<K, V>();
														
 
															+    reader.initialize(new FileSplit(chunkFilePath, 0,
														
 
															+            DistCpUtils.getFileSize(chunkFilePath, configuration), null),
														
 
															+            taskAttemptContext);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Factory method that
														
 
															+   * 1. acquires a chunk for the specified map-task attempt
														
 
															+   * 2. returns a DynamicInputChunk associated with the acquired chunk-file.
														
 
															+   * @param taskAttemptContext The attempt-context for the map task that's
														
 
															+   * trying to acquire a chunk.
														
 
															+   * @return The acquired dynamic-chunk. The chunk-file is renamed to the
														
 
															+   * attempt-id (from the attempt-context.)
														
 
															+   * @throws IOException Exception on failure.
														
 
															+   * @throws InterruptedException Exception on failure.
														
 
															+   */
														
 
															+  public static DynamicInputChunk acquire(TaskAttemptContext taskAttemptContext)
														
 
															+                                      throws IOException, InterruptedException {
														
 
															+    if (!areInvariantsInitialized())
														
 
															+        initializeChunkInvariants(taskAttemptContext.getConfiguration());
														
 
															+
														
 
															+    String taskId
														
 
															+            = taskAttemptContext.getTaskAttemptID().getTaskID().toString();
														
 
															+    Path acquiredFilePath = new Path(chunkRootPath, taskId);
														
 
															+
														
 
															+    if (fs.exists(acquiredFilePath)) {
														
 
															+      LOG.info("Acquiring pre-assigned chunk: " + acquiredFilePath);
														
 
															+      return new DynamicInputChunk(acquiredFilePath, taskAttemptContext);
														
 
															+    }
														
 
															+
														
 
															+    for (FileStatus chunkFile : getListOfChunkFiles()) {
														
 
															+      if (fs.rename(chunkFile.getPath(), acquiredFilePath)) {
														
 
															+        LOG.info(taskId + " acquired " + chunkFile.getPath());
														
 
															+        return new DynamicInputChunk(acquiredFilePath, taskAttemptContext);
														
 
															+      }
														
 
															+      else
														
 
															+        LOG.warn(taskId + " could not acquire " + chunkFile.getPath());
														
 
															+    }
														
 
															+
														
 
															+    return null;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Method to be called to relinquish an acquired chunk. All streams open to
														
 
															+   * the chunk are closed, and the chunk-file is deleted.
														
 
															+   * @throws IOException Exception thrown on failure to release (i.e. delete)
														
 
															+   * the chunk file.
														
 
															+   */
														
 
															+  public void release() throws IOException {
														
 
															+    close();
														
 
															+    if (!fs.delete(chunkFilePath, false)) {
														
 
															+      LOG.error("Unable to release chunk at path: " + chunkFilePath);
														
 
															+      throw new IOException("Unable to release chunk at path: " + chunkFilePath);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  static FileStatus [] getListOfChunkFiles() throws IOException {
														
 
															+    Path chunkFilePattern = new Path(chunkRootPath, chunkFilePrefix + "*");
														
 
															+    FileStatus chunkFiles[] = fs.globStatus(chunkFilePattern);
														
 
															+    numChunksLeft = chunkFiles.length;
														
 
															+    return chunkFiles;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Getter for the chunk-file's path, on HDFS.
														
 
															+   * @return The qualified path to the chunk-file.
														
 
															+   */
														
 
															+  public Path getPath() {
														
 
															+    return chunkFilePath;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Getter for the record-reader, opened to the chunk-file.
														
 
															+   * @return Opened Sequence-file reader.
														
 
															+   */
														
 
															+  public SequenceFileRecordReader<K,V> getReader() {
														
 
															+    assert reader != null : "Reader un-initialized!";
														
 
															+    return reader;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Getter for the number of chunk-files left in the chunk-file directory.
														
 
															+   * Useful to determine how many chunks (and hence, records) are left to be
														
 
															+   * processed.
														
 
															+   * @return Before the first scan of the directory, the number returned is -1.
														
 
															+   * Otherwise, the number of chunk-files seen from the last scan is returned.
														
 
															+   */
														
 
															+  public static int getNumChunksLeft() {
														
 
															+    return numChunksLeft;
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputFormat.java
@@ -0,0 +1,292 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools.mapred.lib;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.mapreduce.*;
														
 
															+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
														
 
															+import org.apache.hadoop.tools.DistCpConstants;
														
 
															+import org.apache.hadoop.tools.util.DistCpUtils;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.io.SequenceFile;
														
 
															+import org.apache.hadoop.io.IOUtils;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.fs.FileStatus;
														
 
															+
														
 
															+import java.util.List;
														
 
															+import java.util.ArrayList;
														
 
															+import java.io.IOException;
														
 
															+
														
 
															+/**
														
 
															+ * DynamicInputFormat implements the "Worker pattern" for DistCp.
														
 
															+ * Rather than to split up the copy-list into a set of static splits,
														
 
															+ * the DynamicInputFormat does the following:
														
 
															+ * 1. Splits the copy-list into small chunks on the DFS.
														
 
															+ * 2. Creates a set of empty "dynamic" splits, that each consume as many chunks
														
 
															+ *    as it can.
														
 
															+ * This arrangement ensures that a single slow mapper won't slow down the entire
														
 
															+ * job (since the slack will be picked up by other mappers, who consume more
														
 
															+ * chunks.)
														
 
															+ * By varying the split-ratio, one can vary chunk sizes to achieve different
														
 
															+ * performance characteristics. 
														
 
															+ */
														
 
															+public class DynamicInputFormat<K, V> extends InputFormat<K, V> {
														
 
															+  private static final Log LOG = LogFactory.getLog(DynamicInputFormat.class);
														
 
															+
														
 
															+  private static final String CONF_LABEL_LISTING_SPLIT_RATIO
														
 
															+          = "mapred.listing.split.ratio";
														
 
															+  private static final String CONF_LABEL_NUM_SPLITS
														
 
															+          = "mapred.num.splits";
														
 
															+  private static final String CONF_LABEL_NUM_ENTRIES_PER_CHUNK
														
 
															+          = "mapred.num.entries.per.chunk";
														
 
															+
														
 
															+  /**
														
 
															+   * Implementation of InputFormat::getSplits(). This method splits up the
														
 
															+   * copy-listing file into chunks, and assigns the first batch to different
														
 
															+   * tasks.
														
 
															+   * @param jobContext JobContext for the map job.
														
 
															+   * @return The list of (empty) dynamic input-splits.
														
 
															+   * @throws IOException, on failure.
														
 
															+   * @throws InterruptedException
														
 
															+   */
														
 
															+  @Override
														
 
															+  public List<InputSplit> getSplits(JobContext jobContext)
														
 
															+      throws IOException, InterruptedException {
														
 
															+    LOG.info("DynamicInputFormat: Getting splits for job:"
														
 
															+             + jobContext.getJobID());
														
 
															+    return createSplits(jobContext,
														
 
															+                        splitCopyListingIntoChunksWithShuffle(jobContext));
														
 
															+  }
														
 
															+
														
 
															+  private List<InputSplit> createSplits(JobContext jobContext,
														
 
															+                                        List<DynamicInputChunk> chunks)
														
 
															+          throws IOException {
														
 
															+    int numMaps = getNumMapTasks(jobContext.getConfiguration());
														
 
															+
														
 
															+    final int nSplits = Math.min(numMaps, chunks.size());
														
 
															+    List<InputSplit> splits = new ArrayList<InputSplit>(nSplits);
														
 
															+    
														
 
															+    for (int i=0; i< nSplits; ++i) {
														
 
															+      TaskID taskId = new TaskID(jobContext.getJobID(), TaskType.MAP, i);
														
 
															+      chunks.get(i).assignTo(taskId);
														
 
															+      splits.add(new FileSplit(chunks.get(i).getPath(), 0,
														
 
															+          // Setting non-zero length for FileSplit size, to avoid a possible
														
 
															+          // future when 0-sized file-splits are considered "empty" and skipped
														
 
															+          // over.
														
 
															+          MIN_RECORDS_PER_CHUNK,
														
 
															+          null));
														
 
															+    }
														
 
															+    DistCpUtils.publish(jobContext.getConfiguration(),
														
 
															+                        CONF_LABEL_NUM_SPLITS, splits.size());
														
 
															+    return splits;
														
 
															+  }
														
 
															+
														
 
															+  private static int N_CHUNKS_OPEN_AT_ONCE_DEFAULT = 16;
														
 
															+
														
 
															+  private List<DynamicInputChunk> splitCopyListingIntoChunksWithShuffle
														
 
															+                                    (JobContext context) throws IOException {
														
 
															+
														
 
															+    final Configuration configuration = context.getConfiguration();
														
 
															+    int numRecords = getNumberOfRecords(configuration);
														
 
															+    int numMaps = getNumMapTasks(configuration);
														
 
															+    // Number of chunks each map will process, on average.
														
 
															+    int splitRatio = getListingSplitRatio(configuration, numMaps, numRecords);
														
 
															+    validateNumChunksUsing(splitRatio, numMaps);
														
 
															+
														
 
															+    int numEntriesPerChunk = (int)Math.ceil((float)numRecords
														
 
															+                                          /(splitRatio * numMaps));
														
 
															+    DistCpUtils.publish(context.getConfiguration(),
														
 
															+                        CONF_LABEL_NUM_ENTRIES_PER_CHUNK,
														
 
															+                        numEntriesPerChunk);
														
 
															+
														
 
															+    final int nChunksTotal = (int)Math.ceil((float)numRecords/numEntriesPerChunk);
														
 
															+    int nChunksOpenAtOnce
														
 
															+            = Math.min(N_CHUNKS_OPEN_AT_ONCE_DEFAULT, nChunksTotal);
														
 
															+
														
 
															+    Path listingPath = getListingFilePath(configuration);
														
 
															+    SequenceFile.Reader reader
														
 
															+            = new SequenceFile.Reader(configuration,
														
 
															+                                      SequenceFile.Reader.file(listingPath));
														
 
															+
														
 
															+    List<DynamicInputChunk> openChunks
														
 
															+                  = new ArrayList<DynamicInputChunk>();
														
 
															+    
														
 
															+    List<DynamicInputChunk> chunksFinal = new ArrayList<DynamicInputChunk>();
														
 
															+
														
 
															+    FileStatus fileStatus = new FileStatus();
														
 
															+    Text relPath = new Text();
														
 
															+    int recordCounter = 0;
														
 
															+    int chunkCount = 0;
														
 
															+
														
 
															+    try {
														
 
															+
														
 
															+      while (reader.next(relPath, fileStatus)) {
														
 
															+        if (recordCounter % (nChunksOpenAtOnce*numEntriesPerChunk) == 0) {
														
 
															+          // All chunks full. Create new chunk-set.
														
 
															+          closeAll(openChunks);
														
 
															+          chunksFinal.addAll(openChunks);
														
 
															+
														
 
															+          openChunks = createChunks(
														
 
															+                  configuration, chunkCount, nChunksTotal, nChunksOpenAtOnce);
														
 
															+
														
 
															+          chunkCount += openChunks.size();
														
 
															+
														
 
															+          nChunksOpenAtOnce = openChunks.size();
														
 
															+          recordCounter = 0;
														
 
															+        }
														
 
															+
														
 
															+        // Shuffle into open chunks.
														
 
															+        openChunks.get(recordCounter%nChunksOpenAtOnce).write(relPath, fileStatus);
														
 
															+        ++recordCounter;
														
 
															+      }
														
 
															+
														
 
															+    } finally {
														
 
															+      closeAll(openChunks);
														
 
															+      chunksFinal.addAll(openChunks);
														
 
															+      IOUtils.closeStream(reader);
														
 
															+    }
														
 
															+
														
 
															+    LOG.info("Number of dynamic-chunk-files created: " + chunksFinal.size()); 
														
 
															+    return chunksFinal;
														
 
															+  }
														
 
															+
														
 
															+  private static void validateNumChunksUsing(int splitRatio, int numMaps)
														
 
															+                                              throws IOException {
														
 
															+    if (splitRatio * numMaps > MAX_CHUNKS_TOLERABLE)
														
 
															+      throw new IOException("Too many chunks created with splitRatio:"
														
 
															+                 + splitRatio + ", numMaps:" + numMaps
														
 
															+                 + ". Reduce numMaps or decrease split-ratio to proceed.");
														
 
															+  }
														
 
															+
														
 
															+  private static void closeAll(List<DynamicInputChunk> chunks) {
														
 
															+    for (DynamicInputChunk chunk: chunks)
														
 
															+      chunk.close();
														
 
															+  }
														
 
															+
														
 
															+  private static List<DynamicInputChunk> createChunks(Configuration config,
														
 
															+                      int chunkCount, int nChunksTotal, int nChunksOpenAtOnce)
														
 
															+                                          throws IOException {
														
 
															+    List<DynamicInputChunk> chunks = new ArrayList<DynamicInputChunk>();
														
 
															+    int chunkIdUpperBound
														
 
															+            = Math.min(nChunksTotal, chunkCount + nChunksOpenAtOnce);
														
 
															+
														
 
															+    // If there will be fewer than nChunksOpenAtOnce chunks left after
														
 
															+    // the current batch of chunks, fold the remaining chunks into
														
 
															+    // the current batch.
														
 
															+    if (nChunksTotal - chunkIdUpperBound < nChunksOpenAtOnce)
														
 
															+      chunkIdUpperBound = nChunksTotal;
														
 
															+
														
 
															+    for (int i=chunkCount; i < chunkIdUpperBound; ++i)
														
 
															+      chunks.add(createChunk(i, config));
														
 
															+    return chunks;
														
 
															+  }
														
 
															+
														
 
															+  private static DynamicInputChunk createChunk(int chunkId, Configuration config)
														
 
															+                                              throws IOException {
														
 
															+    return DynamicInputChunk.createChunkForWrite(String.format("%05d", chunkId),
														
 
															+                                              config);
														
 
															+  }
														
 
															+
														
 
															+
														
 
															+  private static Path getListingFilePath(Configuration configuration) {
														
 
															+    String listingFilePathString = configuration.get(
														
 
															+            DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, "");
														
 
															+
														
 
															+    assert !listingFilePathString.equals("") : "Listing file not found.";
														
 
															+
														
 
															+    Path listingFilePath = new Path(listingFilePathString);
														
 
															+    try {
														
 
															+      assert listingFilePath.getFileSystem(configuration)
														
 
															+              .exists(listingFilePath) : "Listing file: " + listingFilePath +
														
 
															+                                          " not found.";
														
 
															+    } catch (IOException e) {
														
 
															+      assert false :   "Listing file: " + listingFilePath
														
 
															+                    + " couldn't be accessed. " + e.getMessage();
														
 
															+    }
														
 
															+    return listingFilePath;
														
 
															+  }
														
 
															+
														
 
															+  private static int getNumberOfRecords(Configuration configuration) {
														
 
															+    return DistCpUtils.getInt(configuration,
														
 
															+                              DistCpConstants.CONF_LABEL_TOTAL_NUMBER_OF_RECORDS);
														
 
															+  }
														
 
															+
														
 
															+  private static int getNumMapTasks(Configuration configuration) {
														
 
															+    return DistCpUtils.getInt(configuration,
														
 
															+                              JobContext.NUM_MAPS);
														
 
															+  }
														
 
															+
														
 
															+  private static int getListingSplitRatio(Configuration configuration,
														
 
															+                                            int numMaps, int numPaths) {
														
 
															+    return configuration.getInt(
														
 
															+            CONF_LABEL_LISTING_SPLIT_RATIO,
														
 
															+            getSplitRatio(numMaps, numPaths));
														
 
															+  }
														
 
															+
														
 
															+  private static final int MAX_CHUNKS_TOLERABLE = 400;
														
 
															+  private static final int MAX_CHUNKS_IDEAL     = 100;
														
 
															+  private static final int MIN_RECORDS_PER_CHUNK = 5;
														
 
															+  private static final int SPLIT_RATIO_DEFAULT  = 2;
														
 
															+
														
 
															+  /**
														
 
															+   * Package private, for testability.
														
 
															+   * @param nMaps The number of maps requested for.
														
 
															+   * @param nRecords The number of records to be copied.
														
 
															+   * @return The number of splits each map should handle, ideally.
														
 
															+   */
														
 
															+  static int getSplitRatio(int nMaps, int nRecords) {
														
 
															+    if (nMaps == 1) {
														
 
															+      LOG.warn("nMaps == 1. Why use DynamicInputFormat?");
														
 
															+      return 1;
														
 
															+    }
														
 
															+
														
 
															+    if (nMaps > MAX_CHUNKS_IDEAL)
														
 
															+      return SPLIT_RATIO_DEFAULT;
														
 
															+
														
 
															+    int nPickups = (int)Math.ceil((float)MAX_CHUNKS_IDEAL/nMaps);
														
 
															+    int nRecordsPerChunk = (int)Math.ceil((float)nRecords/(nMaps*nPickups));
														
 
															+
														
 
															+    return nRecordsPerChunk < MIN_RECORDS_PER_CHUNK ?
														
 
															+              SPLIT_RATIO_DEFAULT : nPickups;
														
 
															+  }
														
 
															+
														
 
															+  static int getNumEntriesPerChunk(Configuration configuration) {
														
 
															+    return DistCpUtils.getInt(configuration,
														
 
															+                              CONF_LABEL_NUM_ENTRIES_PER_CHUNK);
														
 
															+  }
														
 
															+
														
 
															+
														
 
															+  /**
														
 
															+   * Implementation of Inputformat::createRecordReader().
														
 
															+   * @param inputSplit The split for which the RecordReader is required.
														
 
															+   * @param taskAttemptContext TaskAttemptContext for the current attempt.
														
 
															+   * @return DynamicRecordReader instance.
														
 
															+   * @throws IOException, on failure.
														
 
															+   * @throws InterruptedException
														
 
															+   */
														
 
															+  @Override
														
 
															+  public RecordReader<K, V> createRecordReader(
														
 
															+          InputSplit inputSplit,
														
 
															+          TaskAttemptContext taskAttemptContext)
														
 
															+          throws IOException, InterruptedException {
														
 
															+    return new DynamicRecordReader<K, V>();
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicRecordReader.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicRecordReader.java
@@ -0,0 +1,203 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools.mapred.lib;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.tools.util.DistCpUtils;
														
 
															+import org.apache.hadoop.tools.DistCpConstants;
														
 
															+import org.apache.hadoop.mapreduce.*;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+import java.util.concurrent.TimeUnit;
														
 
															+
														
 
															+/**
														
 
															+ * The DynamicRecordReader is used in conjunction with the DynamicInputFormat
														
 
															+ * to implement the "Worker pattern" for DistCp.
														
 
															+ * The DynamicRecordReader is responsible for:
														
 
															+ * 1. Presenting the contents of each chunk to DistCp's mapper.
														
 
															+ * 2. Acquiring a new chunk when the current chunk has been completely consumed,
														
 
															+ *    transparently.
														
 
															+ */
														
 
															+public class DynamicRecordReader<K, V> extends RecordReader<K, V> {
														
 
															+  private static final Log LOG = LogFactory.getLog(DynamicRecordReader.class);
														
 
															+  private TaskAttemptContext taskAttemptContext;
														
 
															+  private Configuration configuration;
														
 
															+  private DynamicInputChunk<K, V> chunk;
														
 
															+  private TaskID taskId;
														
 
															+
														
 
															+  // Data required for progress indication.
														
 
															+  private int numRecordsPerChunk; // Constant per job.
														
 
															+  private int totalNumRecords;    // Constant per job.
														
 
															+  private int numRecordsProcessedByThisMap = 0;
														
 
															+  private long timeOfLastChunkDirScan = 0;
														
 
															+  private boolean isChunkDirAlreadyScanned = false;
														
 
															+
														
 
															+  private static long TIME_THRESHOLD_FOR_DIR_SCANS = TimeUnit.MINUTES.toMillis(5);
														
 
															+
														
 
															+  /**
														
 
															+   * Implementation for RecordReader::initialize(). Initializes the internal
														
 
															+   * RecordReader to read from chunks.
														
 
															+   * @param inputSplit The InputSplit for the map. Ignored entirely.
														
 
															+   * @param taskAttemptContext The AttemptContext.
														
 
															+   * @throws IOException, on failure.
														
 
															+   * @throws InterruptedException
														
 
															+   */
														
 
															+  @Override
														
 
															+  public void initialize(InputSplit inputSplit,
														
 
															+                         TaskAttemptContext taskAttemptContext)
														
 
															+                         throws IOException, InterruptedException {
														
 
															+    numRecordsPerChunk = DynamicInputFormat.getNumEntriesPerChunk(
														
 
															+            taskAttemptContext.getConfiguration());
														
 
															+    this.taskAttemptContext = taskAttemptContext;
														
 
															+    configuration = taskAttemptContext.getConfiguration();
														
 
															+    taskId = taskAttemptContext.getTaskAttemptID().getTaskID();
														
 
															+    chunk = DynamicInputChunk.acquire(this.taskAttemptContext);
														
 
															+    timeOfLastChunkDirScan = System.currentTimeMillis();
														
 
															+    isChunkDirAlreadyScanned = false;
														
 
															+
														
 
															+    totalNumRecords = getTotalNumRecords();
														
 
															+
														
 
															+  }
														
 
															+
														
 
															+  private int getTotalNumRecords() {
														
 
															+    return DistCpUtils.getInt(configuration,
														
 
															+                              DistCpConstants.CONF_LABEL_TOTAL_NUMBER_OF_RECORDS);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Implementation of RecordReader::nextValue().
														
 
															+   * Reads the contents of the current chunk and returns them. When a chunk has
														
 
															+   * been completely exhausted, an new chunk is acquired and read,
														
 
															+   * transparently.
														
 
															+   * @return True, if the nextValue() could be traversed to. False, otherwise.
														
 
															+   * @throws IOException, on failure.
														
 
															+   * @throws InterruptedException
														
 
															+   */
														
 
															+  @Override
														
 
															+  public boolean nextKeyValue()
														
 
															+      throws IOException, InterruptedException {
														
 
															+
														
 
															+    if (chunk == null) {
														
 
															+      if (LOG.isDebugEnabled())
														
 
															+        LOG.debug(taskId + ": RecordReader is null. No records to be read.");
														
 
															+      return false;
														
 
															+    }
														
 
															+
														
 
															+    if (chunk.getReader().nextKeyValue()) {
														
 
															+      ++numRecordsProcessedByThisMap;
														
 
															+      return true;
														
 
															+    }
														
 
															+
														
 
															+    if (LOG.isDebugEnabled())
														
 
															+      LOG.debug(taskId + ": Current chunk exhausted. " +
														
 
															+                         " Attempting to pick up new one.");
														
 
															+
														
 
															+    chunk.release();
														
 
															+    timeOfLastChunkDirScan = System.currentTimeMillis();
														
 
															+    isChunkDirAlreadyScanned = false;
														
 
															+    
														
 
															+    chunk = DynamicInputChunk.acquire(taskAttemptContext);
														
 
															+
														
 
															+    if (chunk == null) return false;
														
 
															+
														
 
															+    if (chunk.getReader().nextKeyValue()) {
														
 
															+      ++numRecordsProcessedByThisMap;
														
 
															+      return true;
														
 
															+    }
														
 
															+    else {
														
 
															+      return false;
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Implementation of RecordReader::getCurrentKey().
														
 
															+   * @return The key of the current record. (i.e. the source-path.)
														
 
															+   * @throws IOException, on failure.
														
 
															+   * @throws InterruptedException
														
 
															+   */
														
 
															+  @Override
														
 
															+  public K getCurrentKey()
														
 
															+      throws IOException, InterruptedException {
														
 
															+    return chunk.getReader().getCurrentKey();
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Implementation of RecordReader::getCurrentValue().
														
 
															+   * @return The value of the current record. (i.e. the target-path.)
														
 
															+   * @throws IOException, on failure.
														
 
															+   * @throws InterruptedException
														
 
															+   */
														
 
															+  @Override
														
 
															+  public V getCurrentValue()
														
 
															+      throws IOException, InterruptedException {
														
 
															+    return chunk.getReader().getCurrentValue();
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Implementation of RecordReader::getProgress().
														
 
															+   * @return A fraction [0.0,1.0] indicating the progress of a DistCp mapper.
														
 
															+   * @throws IOException, on failure.
														
 
															+   * @throws InterruptedException
														
 
															+   */
														
 
															+  @Override
														
 
															+  public float getProgress()
														
 
															+      throws IOException, InterruptedException {
														
 
															+    final int numChunksLeft = getNumChunksLeft();
														
 
															+    if (numChunksLeft < 0) {// Un-initialized. i.e. Before 1st dir-scan.
														
 
															+      assert numRecordsProcessedByThisMap <= numRecordsPerChunk
														
 
															+              : "numRecordsProcessedByThisMap:" + numRecordsProcessedByThisMap +
														
 
															+                " exceeds numRecordsPerChunk:" + numRecordsPerChunk;
														
 
															+      return ((float) numRecordsProcessedByThisMap) / totalNumRecords;
														
 
															+      // Conservative estimate, till the first directory scan.
														
 
															+    }
														
 
															+
														
 
															+    return ((float) numRecordsProcessedByThisMap)
														
 
															+            /(numRecordsProcessedByThisMap + numRecordsPerChunk*numChunksLeft);
														
 
															+  }
														
 
															+
														
 
															+  private int getNumChunksLeft() throws IOException {
														
 
															+    long now = System.currentTimeMillis();
														
 
															+    boolean tooLongSinceLastDirScan
														
 
															+                  = now - timeOfLastChunkDirScan > TIME_THRESHOLD_FOR_DIR_SCANS;
														
 
															+
														
 
															+    if (tooLongSinceLastDirScan
														
 
															+            || (!isChunkDirAlreadyScanned &&
														
 
															+                    numRecordsProcessedByThisMap%numRecordsPerChunk
														
 
															+                              > numRecordsPerChunk/2)) {
														
 
															+      DynamicInputChunk.getListOfChunkFiles();
														
 
															+      isChunkDirAlreadyScanned = true;
														
 
															+      timeOfLastChunkDirScan = now;
														
 
															+    }
														
 
															+
														
 
															+    return DynamicInputChunk.getNumChunksLeft();
														
 
															+  }
														
 
															+  /**
														
 
															+   * Implementation of RecordReader::close().
														
 
															+   * Closes the RecordReader.
														
 
															+   * @throws IOException, on failure.
														
 
															+   */
														
 
															+  @Override
														
 
															+  public void close()
														
 
															+      throws IOException {
														
 
															+    if (chunk != null)
														
 
															+        chunk.close();
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java
@@ -0,0 +1,343 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools.util;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.fs.FileStatus;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.fs.FileChecksum;
														
 
															+import org.apache.hadoop.io.SequenceFile;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
														
 
															+import org.apache.hadoop.tools.mapred.UniformSizeInputFormat;
														
 
															+import org.apache.hadoop.tools.DistCpOptions;
														
 
															+import org.apache.hadoop.mapreduce.InputFormat;
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+import java.util.EnumSet;
														
 
															+import java.util.Locale;
														
 
															+import java.text.DecimalFormat;
														
 
															+import java.net.URI;
														
 
															+import java.net.InetAddress;
														
 
															+import java.net.UnknownHostException;
														
 
															+
														
 
															+/**
														
 
															+ * Utility functions used in DistCp.
														
 
															+ */
														
 
															+public class DistCpUtils {
														
 
															+
														
 
															+  private static final Log LOG = LogFactory.getLog(DistCpUtils.class);
														
 
															+
														
 
															+  /**
														
 
															+   * Retrieves size of the file at the specified path.
														
 
															+   * @param path The path of the file whose size is sought.
														
 
															+   * @param configuration Configuration, to retrieve the appropriate FileSystem.
														
 
															+   * @return The file-size, in number of bytes.
														
 
															+   * @throws IOException, on failure.
														
 
															+   */
														
 
															+  public static long getFileSize(Path path, Configuration configuration)
														
 
															+                                            throws IOException {
														
 
															+    if (LOG.isDebugEnabled())
														
 
															+      LOG.debug("Retrieving file size for: " + path);
														
 
															+    return path.getFileSystem(configuration).getFileStatus(path).getLen();
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Utility to publish a value to a configuration.
														
 
															+   * @param configuration The Configuration to which the value must be written.
														
 
															+   * @param label The label for the value being published.
														
 
															+   * @param value The value being published.
														
 
															+   * @param <T> The type of the value.
														
 
															+   */
														
 
															+  public static <T> void publish(Configuration configuration,
														
 
															+                                 String label, T value) {
														
 
															+    configuration.set(label, String.valueOf(value));
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Utility to retrieve a specified key from a Configuration. Throw exception
														
 
															+   * if not found.
														
 
															+   * @param configuration The Configuration in which the key is sought.
														
 
															+   * @param label The key being sought.
														
 
															+   * @return Integer value of the key.
														
 
															+   */
														
 
															+  public static int getInt(Configuration configuration, String label) {
														
 
															+    int value = configuration.getInt(label, -1);
														
 
															+    assert value >= 0 : "Couldn't find " + label;
														
 
															+    return value;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Utility to retrieve a specified key from a Configuration. Throw exception
														
 
															+   * if not found.
														
 
															+   * @param configuration The Configuration in which the key is sought.
														
 
															+   * @param label The key being sought.
														
 
															+   * @return Long value of the key.
														
 
															+   */
														
 
															+  public static long getLong(Configuration configuration, String label) {
														
 
															+    long value = configuration.getLong(label, -1);
														
 
															+    assert value >= 0 : "Couldn't find " + label;
														
 
															+    return value;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Returns the class that implements a copy strategy. Looks up the implementation for
														
 
															+   * a particular strategy from distcp-default.xml
														
 
															+   *
														
 
															+   * @param conf - Configuration object
														
 
															+   * @param options - Handle to input options
														
 
															+   * @return Class implementing the strategy specified in options.
														
 
															+   */
														
 
															+  public static Class<? extends InputFormat> getStrategy(Configuration conf,
														
 
															+                                                                 DistCpOptions options) {
														
 
															+    String confLabel = "distcp." +
														
 
															+        options.getCopyStrategy().toLowerCase(Locale.getDefault()) + ".strategy.impl";
														
 
															+    return conf.getClass(confLabel, UniformSizeInputFormat.class, InputFormat.class);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Gets relative path of child path with respect to a root path
														
 
															+   * For ex. If childPath = /tmp/abc/xyz/file and
														
 
															+   *            sourceRootPath = /tmp/abc
														
 
															+   * Relative path would be /xyz/file
														
 
															+   *         If childPath = /file and
														
 
															+   *            sourceRootPath = /
														
 
															+   * Relative path would be /file
														
 
															+   * @param sourceRootPath - Source root path
														
 
															+   * @param childPath - Path for which relative path is required
														
 
															+   * @return - Relative portion of the child path (always prefixed with /
														
 
															+   *           unless it is empty 
														
 
															+   */
														
 
															+  public static String getRelativePath(Path sourceRootPath, Path childPath) {
														
 
															+    String childPathString = childPath.toUri().getPath();
														
 
															+    String sourceRootPathString = sourceRootPath.toUri().getPath();
														
 
															+    return sourceRootPathString.equals("/") ? childPathString :
														
 
															+        childPathString.substring(sourceRootPathString.length());
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Pack file preservation attributes into a string, containing
														
 
															+   * just the first character of each preservation attribute
														
 
															+   * @param attributes - Attribute set to preserve
														
 
															+   * @return - String containing first letters of each attribute to preserve
														
 
															+   */
														
 
															+  public static String packAttributes(EnumSet<FileAttribute> attributes) {
														
 
															+    StringBuffer buffer = new StringBuffer(5);
														
 
															+    int len = 0;
														
 
															+    for (FileAttribute attribute : attributes) {
														
 
															+      buffer.append(attribute.name().charAt(0));
														
 
															+      len++;
														
 
															+    }
														
 
															+    return buffer.substring(0, len);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Un packs preservation attribute string containing the first character of
														
 
															+   * each preservation attribute back to a set of attributes to preserve
														
 
															+   * @param attributes - Attribute string
														
 
															+   * @return - Attribute set
														
 
															+   */
														
 
															+  public static EnumSet<FileAttribute> unpackAttributes(String attributes) {
														
 
															+    EnumSet<FileAttribute> retValue = EnumSet.noneOf(FileAttribute.class);
														
 
															+
														
 
															+    if (attributes != null) {
														
 
															+      for (int index = 0; index < attributes.length(); index++) {
														
 
															+        retValue.add(FileAttribute.getAttribute(attributes.charAt(index)));
														
 
															+      }
														
 
															+    }
														
 
															+
														
 
															+    return retValue;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Preserve attribute on file matching that of the file status being sent
														
 
															+   * as argument. Barring the block size, all the other attributes are preserved
														
 
															+   * by this function
														
 
															+   *
														
 
															+   * @param targetFS - File system
														
 
															+   * @param path - Path that needs to preserve original file status
														
 
															+   * @param srcFileStatus - Original file status
														
 
															+   * @param attributes - Attribute set that need to be preserved
														
 
															+   * @throws IOException - Exception if any (particularly relating to group/owner
														
 
															+   *                       change or any transient error)
														
 
															+   */
														
 
															+  public static void preserve(FileSystem targetFS, Path path,
														
 
															+                              FileStatus srcFileStatus,
														
 
															+                              EnumSet<FileAttribute> attributes) throws IOException {
														
 
															+
														
 
															+    FileStatus targetFileStatus = targetFS.getFileStatus(path);
														
 
															+    String group = targetFileStatus.getGroup();
														
 
															+    String user = targetFileStatus.getOwner();
														
 
															+    boolean chown = false;
														
 
															+
														
 
															+    if (attributes.contains(FileAttribute.PERMISSION) &&
														
 
															+      !srcFileStatus.getPermission().equals(targetFileStatus.getPermission())) {
														
 
															+      targetFS.setPermission(path, srcFileStatus.getPermission());
														
 
															+    }
														
 
															+
														
 
															+    if (attributes.contains(FileAttribute.REPLICATION) && ! targetFileStatus.isDirectory() &&
														
 
															+        srcFileStatus.getReplication() != targetFileStatus.getReplication()) {
														
 
															+      targetFS.setReplication(path, srcFileStatus.getReplication());
														
 
															+    }
														
 
															+
														
 
															+    if (attributes.contains(FileAttribute.GROUP) &&
														
 
															+            !group.equals(srcFileStatus.getGroup())) {
														
 
															+      group = srcFileStatus.getGroup();
														
 
															+      chown = true;
														
 
															+    }
														
 
															+
														
 
															+    if (attributes.contains(FileAttribute.USER) &&
														
 
															+            !user.equals(srcFileStatus.getOwner())) {
														
 
															+      user = srcFileStatus.getOwner();
														
 
															+      chown = true;
														
 
															+    }
														
 
															+
														
 
															+    if (chown) {
														
 
															+      targetFS.setOwner(path, user, group);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Sort sequence file containing FileStatus and Text as key and value respecitvely
														
 
															+   *
														
 
															+   * @param fs - File System
														
 
															+   * @param conf - Configuration
														
 
															+   * @param sourceListing - Source listing file
														
 
															+   * @return Path of the sorted file. Is source file with _sorted appended to the name
														
 
															+   * @throws IOException - Any exception during sort.
														
 
															+   */
														
 
															+  public static Path sortListing(FileSystem fs, Configuration conf, Path sourceListing)
														
 
															+      throws IOException {
														
 
															+    SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, Text.class, FileStatus.class, conf);
														
 
															+    Path output = new Path(sourceListing.toString() +  "_sorted");
														
 
															+
														
 
															+    if (fs.exists(output)) {
														
 
															+      fs.delete(output, false);
														
 
															+    }
														
 
															+
														
 
															+    sorter.sort(sourceListing, output);
														
 
															+    return output;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * String utility to convert a number-of-bytes to human readable format.
														
 
															+   */
														
 
															+  private static ThreadLocal<DecimalFormat> FORMATTER
														
 
															+                        = new ThreadLocal<DecimalFormat>() {
														
 
															+    @Override
														
 
															+    protected DecimalFormat initialValue() {
														
 
															+      return new DecimalFormat("0.0");
														
 
															+    }
														
 
															+  };
														
 
															+
														
 
															+  public static DecimalFormat getFormatter() {
														
 
															+    return FORMATTER.get();
														
 
															+  }
														
 
															+
														
 
															+  public static String getStringDescriptionFor(long nBytes) {
														
 
															+
														
 
															+    char units [] = {'B', 'K', 'M', 'G', 'T', 'P'};
														
 
															+
														
 
															+    double current = nBytes;
														
 
															+    double prev    = current;
														
 
															+    int index = 0;
														
 
															+
														
 
															+    while ((current = current/1024) >= 1) {
														
 
															+      prev = current;
														
 
															+      ++index;
														
 
															+    }
														
 
															+
														
 
															+    assert index < units.length : "Too large a number.";
														
 
															+
														
 
															+    return getFormatter().format(prev) + units[index];
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Utility to compare checksums for the paths specified.
														
 
															+   *
														
 
															+   * If checksums's can't be retrieved, it doesn't fail the test
														
 
															+   * Only time the comparison would fail is when checksums are
														
 
															+   * available and they don't match
														
 
															+   *                                  
														
 
															+   * @param sourceFS FileSystem for the source path.
														
 
															+   * @param source The source path.
														
 
															+   * @param targetFS FileSystem for the target path.
														
 
															+   * @param target The target path.
														
 
															+   * @return If either checksum couldn't be retrieved, the function returns
														
 
															+   * false. If checksums are retrieved, the function returns true if they match,
														
 
															+   * and false otherwise.
														
 
															+   * @throws IOException if there's an exception while retrieving checksums.
														
 
															+   */
														
 
															+  public static boolean checksumsAreEqual(FileSystem sourceFS, Path source,
														
 
															+                                   FileSystem targetFS, Path target)
														
 
															+                                   throws IOException {
														
 
															+    FileChecksum sourceChecksum = null;
														
 
															+    FileChecksum targetChecksum = null;
														
 
															+    try {
														
 
															+      sourceChecksum = sourceFS.getFileChecksum(source);
														
 
															+      targetChecksum = targetFS.getFileChecksum(target);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Unable to retrieve checksum for " + source + " or " + target, e);
														
 
															+    }
														
 
															+    return (sourceChecksum == null || targetChecksum == null ||
														
 
															+            sourceChecksum.equals(targetChecksum));
														
 
															+  }
														
 
															+
														
 
															+  /* see if two file systems are the same or not
														
 
															+   *
														
 
															+   */
														
 
															+  public static boolean compareFs(FileSystem srcFs, FileSystem destFs) {
														
 
															+    URI srcUri = srcFs.getUri();
														
 
															+    URI dstUri = destFs.getUri();
														
 
															+    if (srcUri.getScheme() == null) {
														
 
															+      return false;
														
 
															+    }
														
 
															+    if (!srcUri.getScheme().equals(dstUri.getScheme())) {
														
 
															+      return false;
														
 
															+    }
														
 
															+    String srcHost = srcUri.getHost();
														
 
															+    String dstHost = dstUri.getHost();
														
 
															+    if ((srcHost != null) && (dstHost != null)) {
														
 
															+      try {
														
 
															+        srcHost = InetAddress.getByName(srcHost).getCanonicalHostName();
														
 
															+        dstHost = InetAddress.getByName(dstHost).getCanonicalHostName();
														
 
															+      } catch(UnknownHostException ue) {
														
 
															+        if (LOG.isDebugEnabled())
														
 
															+          LOG.debug("Could not compare file-systems. Unknown host: ", ue);
														
 
															+        return false;
														
 
															+      }
														
 
															+      if (!srcHost.equals(dstHost)) {
														
 
															+        return false;
														
 
															+      }
														
 
															+    }
														
 
															+    else if (srcHost == null && dstHost != null) {
														
 
															+      return false;
														
 
															+    }
														
 
															+    else if (srcHost != null) {
														
 
															+      return false;
														
 
															+    }
														
 
															+
														
 
															+    //check for ports
														
 
															+
														
 
															+    return srcUri.getPort() == dstUri.getPort();
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/RetriableCommand.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/RetriableCommand.java
@@ -0,0 +1,106 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+
														
 
															+package org.apache.hadoop.tools.util;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.io.retry.RetryPolicy;
														
 
															+import org.apache.hadoop.io.retry.RetryPolicies;
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+import java.util.concurrent.TimeUnit;
														
 
															+
														
 
															+/**
														
 
															+ * This class represents commands that be retried on failure, in a configurable
														
 
															+ * manner.
														
 
															+ */
														
 
															+public abstract class RetriableCommand {
														
 
															+
														
 
															+  private static Log LOG = LogFactory.getLog(RetriableCommand.class);
														
 
															+
														
 
															+  private static final long DELAY_MILLISECONDS = 500;
														
 
															+  private static final int  MAX_RETRIES        = 3;
														
 
															+
														
 
															+  private RetryPolicy retryPolicy = RetryPolicies.
														
 
															+      exponentialBackoffRetry(MAX_RETRIES, DELAY_MILLISECONDS, TimeUnit.MILLISECONDS);
														
 
															+  protected String description;
														
 
															+
														
 
															+  /**
														
 
															+   * Constructor.
														
 
															+   * @param description The human-readable description of the command.
														
 
															+   */
														
 
															+  public RetriableCommand(String description) {
														
 
															+    this.description = description;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Constructor.
														
 
															+   * @param description The human-readable description of the command.
														
 
															+   * @param retryPolicy The RetryHandler to be used to compute retries.
														
 
															+   */
														
 
															+  public RetriableCommand(String description, RetryPolicy retryPolicy) {
														
 
															+    this(description);
														
 
															+    setRetryPolicy(retryPolicy);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Implement this interface-method define the command-logic that will be
														
 
															+   * retried on failure (i.e. with Exception).
														
 
															+   * @param arguments Argument-list to the command.
														
 
															+   * @return Generic "Object".
														
 
															+   * @throws Exception Throws Exception on complete failure.
														
 
															+   */
														
 
															+  protected abstract Object doExecute(Object... arguments) throws Exception;
														
 
															+
														
 
															+  /**
														
 
															+   * The execute() method invokes doExecute() until either:
														
 
															+   *  1. doExecute() succeeds, or
														
 
															+   *  2. the command may no longer be retried (e.g. runs out of retry-attempts).
														
 
															+   * @param arguments The list of arguments for the command.
														
 
															+   * @return Generic "Object" from doExecute(), on success.
														
 
															+   * @throws IOException, IOException, on complete failure.
														
 
															+   */
														
 
															+  public Object execute(Object... arguments) throws Exception {
														
 
															+    Exception latestException;
														
 
															+    int counter = 0;
														
 
															+    do {
														
 
															+      try {
														
 
															+        return doExecute(arguments);
														
 
															+      } catch(Exception exception) {
														
 
															+        LOG.error("Failure in Retriable command: " + description, exception);
														
 
															+        latestException = exception;
														
 
															+      }
														
 
															+      counter++;
														
 
															+    } while (retryPolicy.shouldRetry(latestException, counter, 0, true).equals(RetryPolicy.RetryAction.RETRY));
														
 
															+
														
 
															+    throw new IOException("Couldn't run retriable-command: " + description,
														
 
															+                          latestException);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Fluent-interface to change the RetryHandler.
														
 
															+   * @param retryHandler The new RetryHandler instance to be used.
														
 
															+   * @return Self.
														
 
															+   */
														
 
															+  public RetriableCommand setRetryPolicy(RetryPolicy retryHandler) {
														
 
															+    this.retryPolicy = retryHandler;
														
 
															+    return this;
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ThrottledInputStream.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ThrottledInputStream.java
@@ -0,0 +1,139 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools.util;
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+import java.io.InputStream;
														
 
															+
														
 
															+/**
														
 
															+ * The ThrottleInputStream provides bandwidth throttling on a specified
														
 
															+ * InputStream. It is implemented as a wrapper on top of another InputStream
														
 
															+ * instance.
														
 
															+ * The throttling works by examining the number of bytes read from the underlying
														
 
															+ * InputStream from the beginning, and sleep()ing for a time interval if
														
 
															+ * the byte-transfer is found exceed the specified tolerable maximum.
														
 
															+ * (Thus, while the read-rate might exceed the maximum for a given short interval,
														
 
															+ * the average tends towards the specified maximum, overall.)
														
 
															+ */
														
 
															+public class ThrottledInputStream extends InputStream {
														
 
															+
														
 
															+  private final InputStream rawStream;
														
 
															+  private final long maxBytesPerSec;
														
 
															+  private final long startTime = System.currentTimeMillis();
														
 
															+
														
 
															+  private long bytesRead = 0;
														
 
															+  private long totalSleepTime = 0;
														
 
															+
														
 
															+  private static final long SLEEP_DURATION_MS = 50;
														
 
															+
														
 
															+  public ThrottledInputStream(InputStream rawStream) {
														
 
															+    this(rawStream, Long.MAX_VALUE);
														
 
															+  }
														
 
															+
														
 
															+  public ThrottledInputStream(InputStream rawStream, long maxBytesPerSec) {
														
 
															+    assert maxBytesPerSec > 0 : "Bandwidth " + maxBytesPerSec + " is invalid"; 
														
 
															+    this.rawStream = rawStream;
														
 
															+    this.maxBytesPerSec = maxBytesPerSec;
														
 
															+  }
														
 
															+
														
 
															+  /** @inheritDoc */
														
 
															+  @Override
														
 
															+  public int read() throws IOException {
														
 
															+    throttle();
														
 
															+    int data = rawStream.read();
														
 
															+    if (data != -1) {
														
 
															+      bytesRead++;
														
 
															+    }
														
 
															+    return data;
														
 
															+  }
														
 
															+
														
 
															+  /** @inheritDoc */
														
 
															+  @Override
														
 
															+  public int read(byte[] b) throws IOException {
														
 
															+    throttle();
														
 
															+    int readLen = rawStream.read(b);
														
 
															+    if (readLen != -1) {
														
 
															+      bytesRead += readLen;
														
 
															+    }
														
 
															+    return readLen;
														
 
															+  }
														
 
															+
														
 
															+  /** @inheritDoc */
														
 
															+  @Override
														
 
															+  public int read(byte[] b, int off, int len) throws IOException {
														
 
															+    throttle();
														
 
															+    int readLen = rawStream.read(b, off, len);
														
 
															+    if (readLen != -1) {
														
 
															+      bytesRead += readLen;
														
 
															+    }
														
 
															+    return readLen;
														
 
															+  }
														
 
															+
														
 
															+  private void throttle() throws IOException {
														
 
															+    if (getBytesPerSec() > maxBytesPerSec) {
														
 
															+      try {
														
 
															+        Thread.sleep(SLEEP_DURATION_MS);
														
 
															+        totalSleepTime += SLEEP_DURATION_MS;
														
 
															+      } catch (InterruptedException e) {
														
 
															+        throw new IOException("Thread aborted", e);
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Getter for the number of bytes read from this stream, since creation.
														
 
															+   * @return The number of bytes.
														
 
															+   */
														
 
															+  public long getTotalBytesRead() {
														
 
															+    return bytesRead;
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Getter for the read-rate from this stream, since creation.
														
 
															+   * Calculated as bytesRead/elapsedTimeSinceStart.
														
 
															+   * @return Read rate, in bytes/sec.
														
 
															+   */
														
 
															+  public long getBytesPerSec() {
														
 
															+    long elapsed = (System.currentTimeMillis() - startTime) / 1000;
														
 
															+    if (elapsed == 0) {
														
 
															+      return bytesRead;
														
 
															+    } else {
														
 
															+      return bytesRead / elapsed;
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Getter the total time spent in sleep.
														
 
															+   * @return Number of milliseconds spent in sleep.
														
 
															+   */
														
 
															+  public long getTotalSleepTime() {
														
 
															+    return totalSleepTime;
														
 
															+  }
														
 
															+
														
 
															+  /** @inheritDoc */
														
 
															+  @Override
														
 
															+  public String toString() {
														
 
															+    return "ThrottledInputStream{" +
														
 
															+        "bytesRead=" + bytesRead +
														
 
															+        ", maxBytesPerSec=" + maxBytesPerSec +
														
 
															+        ", bytesPerSec=" + getBytesPerSec() +
														
 
															+        ", totalSleepTime=" + totalSleepTime +
														
 
															+        '}';
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/main/resources/distcp-default.xml
+++ b/hadoop-tools/hadoop-distcp/src/main/resources/distcp-default.xml
@@ -0,0 +1,41 @@
 
															+<?xml version="1.0"?>
														
 
															+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
														
 
															+
														
 
															+<!-- Do not modify this file directly. Anything that need to be overwritten,
														
 
															+     need to be done so, through -D switches or customized conf -->
														
 
															+
														
 
															+<configuration>
														
 
															+
														
 
															+    <property>
														
 
															+        <name>distcp.dynamic.strategy.impl</name>
														
 
															+        <value>org.apache.hadoop.tools.mapred.lib.DynamicInputFormat</value>
														
 
															+        <description>Implementation of dynamic input format</description>
														
 
															+    </property>
														
 
															+
														
 
															+    <property>
														
 
															+        <name>distcp.static.strategy.impl</name>
														
 
															+        <value>org.apache.hadoop.tools.mapred.UniformSizeInputFormat</value>
														
 
															+        <description>Implementation of static input format</description>
														
 
															+    </property>
														
 
															+
														
 
															+    <property>
														
 
															+        <name>mapred.job.map.memory.mb</name>
														
 
															+        <value>1024</value>
														
 
															+    </property>
														
 
															+
														
 
															+    <property>
														
 
															+        <name>mapred.job.reduce.memory.mb</name>
														
 
															+        <value>1024</value>
														
 
															+    </property>
														
 
															+
														
 
															+    <property>
														
 
															+        <name>mapred.reducer.new-api</name>
														
 
															+        <value>true</value>
														
 
															+    </property>
														
 
															+
														
 
															+    <property>
														
 
															+        <name>mapreduce.reduce.class</name>
														
 
															+        <value>org.apache.hadoop.mapreduce.Reducer</value>
														
 
															+    </property>
														
 
															+
														
 
															+</configuration>
														
--- a/hadoop-tools/hadoop-distcp/src/site/fml/faq.fml
+++ b/hadoop-tools/hadoop-distcp/src/site/fml/faq.fml
@@ -0,0 +1,98 @@
 
															+<?xml version="1.0" encoding="ISO-8859-1" ?>
														
 
															+
														
 
															+<!--
														
 
															+Licensed to the Apache Software Foundation (ASF) under one
														
 
															+or more contributor license agreements.  See the NOTICE file
														
 
															+distributed with this work for additional information
														
 
															+regarding copyright ownership.  The ASF licenses this file
														
 
															+to you under the Apache License, Version 2.0 (the
														
 
															+"License"); you may not use this file except in compliance
														
 
															+with the License.  You may obtain a copy of the License at
														
 
															+
														
 
															+    http://www.apache.org/licenses/LICENSE-2.0
														
 
															+
														
 
															+Unless required by applicable law or agreed to in writing,
														
 
															+software distributed under the License is distributed on an
														
 
															+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
														
 
															+KIND, either express or implied.  See the License for the
														
 
															+specific language governing permissions and limitations
														
 
															+under the License.
														
 
															+-->
														
 
															+
														
 
															+<faqs xmlns="http://maven.apache.org/FML/1.0.1"
														
 
															+      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
														
 
															+      xsi:schemaLocation="http://maven.apache.org/FML/1.0.1 http://maven.apache.org/xsd/fml-1.0.1.xsd"
														
 
															+      title="Frequently Asked Questions">
														
 
															+  <part id="General">
														
 
															+    <title>General</title>
														
 
															+
														
 
															+    <faq id="Update">
														
 
															+      <question>Why does -update not create the parent source-directory under
														
 
															+      a pre-existing target directory?</question>
														
 
															+      <answer>The behaviour of <code>-update</code> and <code>-overwrite</code>
														
 
															+      is described in detail in the Usage section of this document. In short,
														
 
															+      if either option is used with a pre-existing destination directory, the
														
 
															+      <strong>contents</strong> of each source directory is copied over, rather
														
 
															+      than the source-directory itself.
														
 
															+      This behaviour is consistent with the legacy DistCp implementation as well.
														
 
															+      </answer>
														
 
															+    </faq>
														
 
															+
														
 
															+    <faq id="Deviation">
														
 
															+      <question>How does the new DistCp differ in semantics from the Legacy
														
 
															+      DistCp?</question>
														
 
															+      <answer>
														
 
															+          <ul>
														
 
															+              <li>Files that are skipped during copy used to also have their
														
 
															+              file-attributes (permissions, owner/group info, etc.) unchanged,
														
 
															+              when copied with Legacy DistCp. These are now updated, even if
														
 
															+              the file-copy is skipped.</li>
														
 
															+              <li>Empty root directories among the source-path inputs were not
														
 
															+              created at the target, in Legacy DistCp. These are now created.</li>
														
 
															+          </ul>
														
 
															+      </answer>
														
 
															+    </faq>
														
 
															+
														
 
															+    <faq id="nMaps">
														
 
															+      <question>Why does the new DistCp use more maps than legacy DistCp?</question>
														
 
															+      <answer>
														
 
															+          <p>Legacy DistCp works by figuring out what files need to be actually
														
 
															+      copied to target <strong>before</strong> the copy-job is launched, and then
														
 
															+      launching as many maps as required for copy. So if a majority of the files
														
 
															+      need to be skipped (because they already exist, for example), fewer maps
														
 
															+      will be needed. As a consequence, the time spent in setup (i.e. before the
														
 
															+      M/R job) is higher.</p>
														
 
															+          <p>The new DistCp calculates only the contents of the source-paths. It
														
 
															+      doesn't try to filter out what files can be skipped. That decision is put-
														
 
															+      off till the M/R job runs. This is much faster (vis-a-vis execution-time),
														
 
															+      but the number of maps launched will be as specified in the <code>-m</code>
														
 
															+      option, or 20 (default) if unspecified.</p>
														
 
															+      </answer>
														
 
															+    </faq>
														
 
															+
														
 
															+    <faq id="more_maps">
														
 
															+      <question>Why does DistCp not run faster when more maps are specified?</question>
														
 
															+      <answer>
														
 
															+          <p>At present, the smallest unit of work for DistCp is a file. i.e.,
														
 
															+          a file is processed by only one map. Increasing the number of maps to
														
 
															+          a value exceeding the number of files would yield no performance
														
 
															+          benefit. The number of maps lauched would equal the number of files.</p>
														
 
															+      </answer>
														
 
															+    </faq>
														
 
															+
														
 
															+    <faq id="client_mem">
														
 
															+      <question>Why does DistCp run out of memory?</question>
														
 
															+      <answer>
														
 
															+          <p>If the number of individual files/directories being copied from
														
 
															+      the source path(s) is extremely large (e.g. 1,000,000 paths), DistCp might
														
 
															+      run out of memory while determining the list of paths for copy. This is
														
 
															+      not unique to the new DistCp implementation.</p>
														
 
															+          <p>To get around this, consider changing the <code>-Xmx</code> JVM
														
 
															+      heap-size parameters, as follows:</p>
														
 
															+          <p><code>bash$ export HADOOP_CLIENT_OPTS="-Xms64m -Xmx1024m"</code></p>
														
 
															+          <p><code>bash$ hadoop distcp /source /target</code></p>
														
 
															+      </answer>
														
 
															+    </faq>
														
 
															+
														
 
															+  </part>
														
 
															+</faqs>
														
--- a/hadoop-tools/hadoop-distcp/src/site/pdf.xml
+++ b/hadoop-tools/hadoop-distcp/src/site/pdf.xml
@@ -0,0 +1,47 @@
 
															+<?xml version="1.0" encoding="UTF-8"?>
														
 
															+
														
 
															+<!--
														
 
															+Licensed to the Apache Software Foundation (ASF) under one
														
 
															+or more contributor license agreements.  See the NOTICE file
														
 
															+distributed with this work for additional information
														
 
															+regarding copyright ownership.  The ASF licenses this file
														
 
															+to you under the Apache License, Version 2.0 (the
														
 
															+"License"); you may not use this file except in compliance
														
 
															+with the License.  You may obtain a copy of the License at
														
 
															+
														
 
															+http://www.apache.org/licenses/LICENSE-2.0
														
 
															+
														
 
															+Unless required by applicable law or agreed to in writing,
														
 
															+software distributed under the License is distributed on an
														
 
															+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
														
 
															+KIND, either express or implied.  See the License for the
														
 
															+specific language governing permissions and limitations
														
 
															+under the License.
														
 
															+-->
														
 
															+
														
 
															+<!-- START SNIPPET: docDescriptor -->
														
 
															+<document xmlns="http://maven.apache.org/DOCUMENT/1.0.1"
														
 
															+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
														
 
															+  xsi:schemaLocation="http://maven.apache.org/DOCUMENT/1.0.1 http://maven.apache.org/xsd/document-1.0.1.xsd"
														
 
															+  outputName="distcp">
														
 
															+
														
 
															+  <meta>
														
 
															+    <title>${project.name}</title>
														
 
															+  </meta>
														
 
															+
														
 
															+  <toc name="Table of Contents">
														
 
															+    <item name="Introduction" ref="index.xml"/>
														
 
															+    <item name="Usage" ref="usage.xml"/>
														
 
															+    <item name="Command Line Reference" ref="cli.xml"/>
														
 
															+    <item name="Architecture" ref="architecture.xml"/>
														
 
															+    <item name="Appendix" ref="appendix.xml"/>
														
 
															+    <item name="FAQ" ref="faq.fml"/>
														
 
															+  </toc>
														
 
															+  <cover>
														
 
															+    <coverTitle>${project.name}</coverTitle>
														
 
															+    <coverSubTitle>v. ${project.version}</coverSubTitle>
														
 
															+    <coverType>User Guide</coverType>
														
 
															+    <projectName>${project.name}</projectName>
														
 
															+    <companyName>Apache Hadoop</companyName>
														
 
															+  </cover>
														
 
															+</document>
														
--- a/hadoop-tools/hadoop-distcp/src/site/xdoc/appendix.xml
+++ b/hadoop-tools/hadoop-distcp/src/site/xdoc/appendix.xml
@@ -0,0 +1,125 @@
 
															+<?xml version="1.0" encoding="UTF-8"?>
														
 
															+<document xmlns="http://maven.apache.org/XDOC/2.0"
														
 
															+          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
														
 
															+          xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
														
 
															+  <head>
														
 
															+    <title>Appendix</title>
														
 
															+  </head>
														
 
															+  <body>
														
 
															+    <section name="Map sizing">
														
 
															+ 
														
 
															+      <p> By default, DistCp makes an attempt to size each map comparably so
														
 
															+      that each copies roughly the same number of bytes. Note that files are the
														
 
															+      finest level of granularity, so increasing the number of simultaneous
														
 
															+      copiers (i.e. maps) may not always increase the number of
														
 
															+      simultaneous copies nor the overall throughput.</p>
														
 
															+
														
 
															+      <p> The new DistCp also provides a strategy to "dynamically" size maps,
														
 
															+      allowing faster data-nodes to copy more bytes than slower nodes. Using
														
 
															+      <code>-strategy dynamic</code> (explained in the Architecture), rather
														
 
															+      than to assign a fixed set of source-files to each map-task, files are
														
 
															+      instead split into several sets. The number of sets exceeds the number of
														
 
															+      maps, usually by a factor of 2-3. Each map picks up and copies all files
														
 
															+      listed in a chunk. When a chunk is exhausted, a new chunk is acquired and
														
 
															+      processed, until no more chunks remain.</p>
														
 
															+
														
 
															+      <p> By not assigning a source-path to a fixed map, faster map-tasks (i.e.
														
 
															+      data-nodes) are able to consume more chunks, and thus copy more data,
														
 
															+      than slower nodes. While this distribution isn't uniform, it is
														
 
															+      <strong>fair</strong> with regard to each mapper's capacity.</p>
														
 
															+
														
 
															+      <p>The dynamic-strategy is implemented by the DynamicInputFormat. It
														
 
															+      provides superior performance under most conditions. </p>
														
 
															+
														
 
															+      <p>Tuning the number of maps to the size of the source and
														
 
															+      destination clusters, the size of the copy, and the available
														
 
															+      bandwidth is recommended for long-running and regularly run jobs.</p>
														
 
															+
														
 
															+   </section>
														
 
															+
														
 
															+   <section name="Copying between versions of HDFS">
														
 
															+
														
 
															+        <p>For copying between two different versions of Hadoop, one will
														
 
															+        usually use HftpFileSystem. This is a read-only FileSystem, so DistCp
														
 
															+        must be run on the destination cluster (more specifically, on
														
 
															+        TaskTrackers that can write to the destination cluster). Each source is
														
 
															+        specified as <code>hftp://&lt;dfs.http.address&gt;/&lt;path&gt;</code>
														
 
															+        (the default <code>dfs.http.address</code> is
														
 
															+        &lt;namenode&gt;:50070).</p>
														
 
															+
														
 
															+   </section>
														
 
															+
														
 
															+   <section name="Map/Reduce and other side-effects">
														
 
															+
														
 
															+        <p>As has been mentioned in the preceding, should a map fail to copy
														
 
															+        one of its inputs, there will be several side-effects.</p>
														
 
															+
														
 
															+        <ul>
														
 
															+
														
 
															+          <li>Unless <code>-overwrite</code> is specified, files successfully
														
 
															+          copied by a previous map on a re-execution will be marked as
														
 
															+          &quot;skipped&quot;.</li>
														
 
															+
														
 
															+          <li>If a map fails <code>mapred.map.max.attempts</code> times, the
														
 
															+          remaining map tasks will be killed (unless <code>-i</code> is
														
 
															+          set).</li>
														
 
															+
														
 
															+          <li>If <code>mapred.speculative.execution</code> is set set
														
 
															+          <code>final</code> and <code>true</code>, the result of the copy is
														
 
															+          undefined.</li>
														
 
															+
														
 
															+        </ul>
														
 
															+
														
 
															+   </section>
														
 
															+
														
 
															+   <section name="SSL Configurations for HSFTP sources:">
														
 
															+
														
 
															+       <p>To use an HSFTP source (i.e. using the hsftp protocol), a Map-Red SSL
														
 
															+       configuration file needs to be specified (via the <code>-mapredSslConf</code>
														
 
															+       option). This must specify 3 parameters:</p>
														
 
															+
														
 
															+       <ul>
														
 
															+           <li><code>ssl.client.truststore.location</code>: The local-filesystem
														
 
															+            location of the trust-store file, containing the certificate for
														
 
															+            the namenode.</li>
														
 
															+
														
 
															+           <li><code>ssl.client.truststore.type</code>: (Optional) The format of
														
 
															+           the trust-store file.</li>
														
 
															+
														
 
															+           <li><code>ssl.client.truststore.password</code>: (Optional) Password
														
 
															+           for the trust-store file.</li>
														
 
															+
														
 
															+       </ul>
														
 
															+
														
 
															+       <p>The following is an example of the contents of the contents of
														
 
															+       a Map-Red SSL Configuration file:</p>
														
 
															+
														
 
															+           <p> <br/> <code> &lt;configuration&gt; </code> </p>
														
 
															+
														
 
															+           <p> <br/> <code>&lt;property&gt; </code> </p>
														
 
															+           <p> <code>&lt;name&gt;ssl.client.truststore.location&lt;/name&gt; </code> </p>
														
 
															+           <p> <code>&lt;value&gt;/work/keystore.jks&lt;/value&gt; </code> </p>
														
 
															+           <p> <code>&lt;description&gt;Truststore to be used by clients like distcp. Must be specified. &lt;/description&gt;</code> </p>
														
 
															+           <p> <br/> <code>&lt;/property&gt; </code> </p>
														
 
															+
														
 
															+           <p><code> &lt;property&gt; </code> </p>
														
 
															+           <p> <code>&lt;name&gt;ssl.client.truststore.password&lt;/name&gt; </code> </p>
														
 
															+           <p> <code>&lt;value&gt;changeme&lt;/value&gt; </code> </p>
														
 
															+           <p> <code>&lt;description&gt;Optional. Default value is "". &lt;/description&gt;  </code> </p>
														
 
															+           <p> <code>&lt;/property&gt; </code>  </p>
														
 
															+
														
 
															+           <p> <br/> <code> &lt;property&gt; </code> </p>
														
 
															+           <p> <code> &lt;name&gt;ssl.client.truststore.type&lt;/name&gt;</code>  </p>
														
 
															+           <p> <code> &lt;value&gt;jks&lt;/value&gt;</code>  </p>
														
 
															+           <p> <code> &lt;description&gt;Optional. Default value is "jks". &lt;/description&gt;</code>  </p>
														
 
															+           <p> <code> &lt;/property&gt; </code> </p>
														
 
															+
														
 
															+           <p> <code> <br/> &lt;/configuration&gt; </code> </p>
														
 
															+
														
 
															+       <p><br/>The SSL configuration file must be in the class-path of the 
														
 
															+       DistCp program.</p>
														
 
															+
														
 
															+   </section>
														
 
															+
														
 
															+  </body>
														
 
															+</document>
														
--- a/hadoop-tools/hadoop-distcp/src/site/xdoc/architecture.xml
+++ b/hadoop-tools/hadoop-distcp/src/site/xdoc/architecture.xml
@@ -0,0 +1,200 @@
 
															+<?xml version="1.0" encoding="UTF-8"?>
														
 
															+<document xmlns="http://maven.apache.org/XDOC/2.0"
														
 
															+          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
														
 
															+          xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
														
 
															+    <head>
														
 
															+        <title>Architecture of DistCp</title>
														
 
															+    </head>
														
 
															+    <body>
														
 
															+      <section name="Architecture">
														
 
															+
														
 
															+        <p>The components of the new DistCp may be classified into the following
														
 
															+           categories: </p>
														
 
															+
														
 
															+        <ul>
														
 
															+
														
 
															+          <li>DistCp Driver</li>
														
 
															+          <li>Copy-listing generator</li>
														
 
															+          <li>Input-formats and Map-Reduce components</li>
														
 
															+
														
 
															+        </ul>
														
 
															+
														
 
															+        <subsection name="DistCp Driver">
														
 
															+          <p>The DistCp Driver components are responsible for:</p>
														
 
															+
														
 
															+          <ul>
														
 
															+            <li>Parsing the arguments passed to the DistCp command on the
														
 
															+                command-line, via:
														
 
															+              <ul>
														
 
															+                <li>OptionsParser, and</li>
														
 
															+                <li>DistCpOptionsSwitch</li>
														
 
															+              </ul>
														
 
															+            </li>
														
 
															+            <li>Assembling the command arguments into an appropriate
														
 
															+                DistCpOptions object, and initializing DistCp. These arguments
														
 
															+                include:
														
 
															+              <ul>
														
 
															+                <li>Source-paths</li>
														
 
															+                <li>Target location</li>
														
 
															+                <li>Copy options (e.g. whether to update-copy, overwrite, which
														
 
															+                    file-attributes to preserve, etc.)</li>
														
 
															+              </ul>
														
 
															+            </li>
														
 
															+            <li>Orchestrating the copy operation by:
														
 
															+              <ul>
														
 
															+                <li>Invoking the copy-listing-generator to create the list of
														
 
															+                    files to be copied.</li>
														
 
															+                <li>Setting up and launching the Hadoop Map-Reduce Job to carry
														
 
															+                    out the copy.</li>
														
 
															+                <li>Based on the options, either returning a handle to the
														
 
															+                    Hadoop MR Job immediately, or waiting till completion.</li>
														
 
															+              </ul>
														
 
															+            </li>
														
 
															+          </ul>
														
 
															+          <br/>
														
 
															+
														
 
															+          <p>The parser-elements are exercised only from the command-line (or if
														
 
															+             DistCp::run() is invoked). The DistCp class may also be used
														
 
															+             programmatically, by constructing the DistCpOptions object, and
														
 
															+             initializing a DistCp object appropriately.</p>
														
 
															+
														
 
															+        </subsection>
														
 
															+
														
 
															+        <subsection name="Copy-listing generator">
														
 
															+
														
 
															+          <p>The copy-listing-generator classes are responsible for creating the
														
 
															+             list of files/directories to be copied from source. They examine
														
 
															+             the contents of the source-paths (files/directories, including
														
 
															+             wild-cards), and record all paths that need copy into a sequence-
														
 
															+             file, for consumption by the DistCp Hadoop Job. The main classes in
														
 
															+             this module include:</p>
														
 
															+
														
 
															+          <ol>
														
 
															+
														
 
															+            <li>CopyListing: The interface that should be implemented by any 
														
 
															+                copy-listing-generator implementation. Also provides the factory
														
 
															+                method by which the concrete CopyListing implementation is
														
 
															+                chosen.</li>
														
 
															+
														
 
															+            <li>SimpleCopyListing: An implementation of CopyListing that accepts
														
 
															+                multiple source paths (files/directories), and recursively lists
														
 
															+                all the individual files and directories under each, for
														
 
															+                copy.</li>
														
 
															+
														
 
															+            <li>GlobbedCopyListing: Another implementation of CopyListing that
														
 
															+                expands wild-cards in the source paths.</li>
														
 
															+
														
 
															+            <li>FileBasedCopyListing: An implementation of CopyListing that
														
 
															+                reads the source-path list from a specified file.</li>
														
 
															+
														
 
															+          </ol>
														
 
															+          <p/>
														
 
															+
														
 
															+          <p>Based on whether a source-file-list is specified in the
														
 
															+             DistCpOptions, the source-listing is generated in one of the
														
 
															+             following ways:</p>
														
 
															+
														
 
															+          <ol>
														
 
															+
														
 
															+            <li>If there's no source-file-list, the GlobbedCopyListing is used.
														
 
															+                All wild-cards are expanded, and all the expansions are
														
 
															+                forwarded to the SimpleCopyListing, which in turn constructs the
														
 
															+                listing (via recursive descent of each path). </li>
														
 
															+
														
 
															+            <li>If a source-file-list is specified, the FileBasedCopyListing is
														
 
															+                used. Source-paths are read from the specified file, and then
														
 
															+                forwarded to the GlobbedCopyListing. The listing is then
														
 
															+                constructed as described above.</li>
														
 
															+
														
 
															+          </ol>
														
 
															+
														
 
															+          <br/>
														
 
															+
														
 
															+          <p>One may customize the method by which the copy-listing is
														
 
															+             constructed by providing a custom implementation of the CopyListing
														
 
															+             interface. The behaviour of DistCp differs here from the legacy
														
 
															+             DistCp, in how paths are considered for copy. </p>
														
 
															+
														
 
															+          <p>The legacy implementation only lists those paths that must
														
 
															+             definitely be copied on to target.
														
 
															+             E.g. if a file already exists at the target (and -overwrite isn't
														
 
															+             specified), the file isn't even considered in the Map-Reduce Copy
														
 
															+             Job. Determining this during setup (i.e. before the Map-Reduce Job)
														
 
															+             involves file-size and checksum-comparisons that are potentially
														
 
															+             time-consuming.</p>
														
 
															+
														
 
															+          <p>The new DistCp postpones such checks until the Map-Reduce Job, thus
														
 
															+             reducing setup time. Performance is enhanced further since these
														
 
															+             checks are parallelized across multiple maps.</p>
														
 
															+
														
 
															+        </subsection>
														
 
															+
														
 
															+        <subsection name="Input-formats and Map-Reduce components">
														
 
															+
														
 
															+          <p> The Input-formats and Map-Reduce components are responsible for
														
 
															+              the actual copy of files and directories from the source to the
														
 
															+              destination path. The listing-file created during copy-listing
														
 
															+              generation is consumed at this point, when the copy is carried
														
 
															+              out. The classes of interest here include:</p>
														
 
															+
														
 
															+          <ul>
														
 
															+            <li><strong>UniformSizeInputFormat:</strong> This implementation of
														
 
															+                org.apache.hadoop.mapreduce.InputFormat provides equivalence
														
 
															+                with Legacy DistCp in balancing load across maps.
														
 
															+                The aim of the UniformSizeInputFormat is to make each map copy
														
 
															+                roughly the same number of bytes. Apropos, the listing file is
														
 
															+                split into groups of paths, such that the sum of file-sizes in
														
 
															+                each InputSplit is nearly equal to every other map. The splitting
														
 
															+                isn't always perfect, but its trivial implementation keeps the
														
 
															+                setup-time low.</li>
														
 
															+
														
 
															+            <li><strong>DynamicInputFormat and DynamicRecordReader:</strong>
														
 
															+                <p> The DynamicInputFormat implements org.apache.hadoop.mapreduce.InputFormat,
														
 
															+                and is new to DistCp. The listing-file is split into several
														
 
															+                "chunk-files", the exact number of chunk-files being a multiple
														
 
															+                of the number of maps requested for in the Hadoop Job. Each map
														
 
															+                task is "assigned" one of the chunk-files (by renaming the chunk
														
 
															+                to the task's id), before the Job is launched.</p>
														
 
															+
														
 
															+                <p>Paths are read from each chunk using the DynamicRecordReader,
														
 
															+                and processed in the CopyMapper. After all the paths in a chunk
														
 
															+                are processed, the current chunk is deleted and a new chunk is
														
 
															+                acquired. The process continues until no more chunks are
														
 
															+                available.</p>
														
 
															+                <p>This "dynamic" approach allows faster map-tasks to consume
														
 
															+                more paths than slower ones, thus speeding up the DistCp job
														
 
															+                overall. </p>
														
 
															+            </li>
														
 
															+
														
 
															+            <li><strong>CopyMapper:</strong> This class implements the physical
														
 
															+                file-copy. The input-paths are checked against the input-options
														
 
															+                (specified in the Job's Configuration), to determine whether a
														
 
															+                file needs copy. A file will be copied only if at least one of
														
 
															+                the following is true:
														
 
															+              <ul>
														
 
															+                <li>A file with the same name doesn't exist at target.</li>
														
 
															+                <li>A file with the same name exists at target, but has a
														
 
															+                    different file size.</li>
														
 
															+                <li>A file with the same name exists at target, but has a
														
 
															+                    different checksum, and -skipcrccheck isn't mentioned.</li>
														
 
															+                <li>A file with the same name exists at target, but -overwrite
														
 
															+                    is specified.</li>
														
 
															+                <li>A file with the same name exists at target, but differs in
														
 
															+                    block-size (and block-size needs to be preserved.</li>
														
 
															+              </ul>
														
 
															+            </li>
														
 
															+
														
 
															+            <li><strong>CopyCommitter:</strong>
														
 
															+                This class is responsible for the commit-phase of the DistCp
														
 
															+                job, including:
														
 
															+              <ul>
														
 
															+                <li>Preservation of directory-permissions (if specified in the
														
 
															+                    options)</li>
														
 
															+                <li>Clean-up of temporary-files, work-directories, etc.</li>
														
 
															+              </ul>
														
 
															+            </li>
														
 
															+          </ul>
														
 
															+        </subsection>
														
 
															+      </section>
														
 
															+    </body>
														
 
															+</document>
														
--- a/hadoop-tools/hadoop-distcp/src/site/xdoc/cli.xml
+++ b/hadoop-tools/hadoop-distcp/src/site/xdoc/cli.xml
@@ -0,0 +1,123 @@
 
															+<?xml version="1.0" encoding="UTF-8"?>
														
 
															+<document xmlns="http://maven.apache.org/XDOC/2.0"
														
 
															+          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
														
 
															+          xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
														
 
															+  <head>
														
 
															+    <title>Command Line Options</title>
														
 
															+  </head>
														
 
															+  <body>
														
 
															+      <section name="Options Index"> 
														
 
															+        <table>
														
 
															+          <tr><th> Flag </th><th> Description </th><th> Notes </th></tr>
														
 
															+
														
 
															+          <tr><td><code>-p[rbugp]</code></td>
														
 
															+              <td>Preserve<br/>
														
 
															+                  r: replication number<br/>
														
 
															+                  b: block size<br/>
														
 
															+                  u: user<br/>
														
 
															+                  g: group<br/>
														
 
															+                  p: permission<br/></td>
														
 
															+              <td>Modification times are not preserved. Also, when
														
 
															+              <code>-update</code> is specified, status updates will
														
 
															+              <strong>not</strong> be synchronized unless the file sizes
														
 
															+              also differ (i.e. unless the file is re-created).
														
 
															+              </td></tr>
														
 
															+          <tr><td><code>-i</code></td>
														
 
															+              <td>Ignore failures</td>
														
 
															+              <td>As explained in the Appendix, this option
														
 
															+              will keep more accurate statistics about the copy than the
														
 
															+              default case. It also preserves logs from failed copies, which
														
 
															+              can be valuable for debugging. Finally, a failing map will not
														
 
															+              cause the job to fail before all splits are attempted.
														
 
															+              </td></tr>
														
 
															+          <tr><td><code>-log &lt;logdir&gt;</code></td>
														
 
															+              <td>Write logs to &lt;logdir&gt;</td>
														
 
															+              <td>DistCp keeps logs of each file it attempts to copy as map
														
 
															+              output. If a map fails, the log output will not be retained if
														
 
															+              it is re-executed.
														
 
															+              </td></tr>
														
 
															+          <tr><td><code>-m &lt;num_maps&gt;</code></td>
														
 
															+              <td>Maximum number of simultaneous copies</td>
														
 
															+              <td>Specify the number of maps to copy data. Note that more maps
														
 
															+              may not necessarily improve throughput.
														
 
															+              </td></tr>
														
 
															+          <tr><td><code>-overwrite</code></td>
														
 
															+              <td>Overwrite destination</td>
														
 
															+              <td>If a map fails and <code>-i</code> is not specified, all the
														
 
															+              files in the split, not only those that failed, will be recopied.
														
 
															+              As discussed in the Usage documentation, it also changes
														
 
															+              the semantics for generating destination paths, so users should
														
 
															+              use this carefully.
														
 
															+              </td></tr>
														
 
															+          <tr><td><code>-update</code></td>
														
 
															+              <td>Overwrite if src size different from dst size</td>
														
 
															+              <td>As noted in the preceding, this is not a &quot;sync&quot;
														
 
															+              operation. The only criterion examined is the source and
														
 
															+              destination file sizes; if they differ, the source file
														
 
															+              replaces the destination file. As discussed in the
														
 
															+              Usage documentation, it also changes the semantics for
														
 
															+              generating destination paths, so users should use this carefully.
														
 
															+              </td></tr>
														
 
															+          <tr><td><code>-f &lt;urilist_uri&gt;</code></td>
														
 
															+              <td>Use list at &lt;urilist_uri&gt; as src list</td>
														
 
															+              <td>This is equivalent to listing each source on the command
														
 
															+              line. The <code>urilist_uri</code> list should be a fully
														
 
															+              qualified URI.
														
 
															+              </td></tr>
														
 
															+          <tr><td><code>-filelimit &lt;n&gt;</code></td>
														
 
															+              <td>Limit the total number of files to be &lt;= n</td>
														
 
															+              <td><strong>Deprecated!</strong> Ignored in the new DistCp.
														
 
															+              </td></tr>
														
 
															+          <tr><td><code>-sizelimit &lt;n&gt;</code></td>
														
 
															+              <td>Limit the total size to be &lt;= n bytes</td>
														
 
															+              <td><strong>Deprecated!</strong> Ignored in the new DistCp.
														
 
															+              </td></tr>
														
 
															+          <tr><td><code>-delete</code></td>
														
 
															+              <td>Delete the files existing in the dst but not in src</td>
														
 
															+              <td>The deletion is done by FS Shell.  So the trash will be used,
														
 
															+                  if it is enable.
														
 
															+              </td></tr>
														
 
															+          <tr><td><code>-strategy {dynamic|uniformsize}</code></td>
														
 
															+              <td>Choose the copy-strategy to be used in DistCp.</td>
														
 
															+              <td>By default, uniformsize is used. (i.e. Maps are balanced on the
														
 
															+                  total size of files copied by each map. Similar to legacy.)
														
 
															+                  If "dynamic" is specified, <code>DynamicInputFormat</code> is
														
 
															+                  used instead. (This is described in the Architecture section,
														
 
															+                  under InputFormats.)
														
 
															+              </td></tr>
														
 
															+          <tr><td><code>-bandwidth</code></td>
														
 
															+                <td>Specify bandwidth per map, in MB/second.</td>
														
 
															+                <td>Each map will be restricted to consume only the specified
														
 
															+                    bandwidth. This is not always exact. The map throttles back
														
 
															+                    its bandwidth consumption during a copy, such that the
														
 
															+                    <strong>net</strong> bandwidth used tends towards the
														
 
															+                    specified value.
														
 
															+                </td></tr>
														
 
															+          <tr><td><code>-atomic {-tmp &lt;tmp_dir&gt;}</code></td>
														
 
															+                <td>Specify atomic commit, with optional tmp directory.</td>
														
 
															+                <td><code>-atomic</code> instructs DistCp to copy the source
														
 
															+                    data to a temporary target location, and then move the
														
 
															+                    temporary target to the final-location atomically. Data will
														
 
															+                    either be available at final target in a complete and consistent
														
 
															+                    form, or not at all.
														
 
															+                    Optionally, <code>-tmp</code> may be used to specify the
														
 
															+                    location of the tmp-target. If not specified, a default is
														
 
															+                    chosen. <strong>Note:</strong> tmp_dir must be on the final
														
 
															+                    target cluster.
														
 
															+                </td></tr>
														
 
															+            <tr><td><code>-mapredSslConf &lt;ssl_conf_file&gt;</code></td>
														
 
															+                  <td>Specify SSL Config file, to be used with HSFTP source</td>
														
 
															+                  <td>When using the hsftp protocol with a source, the security-
														
 
															+                      related properties may be specified in a config-file and
														
 
															+                      passed to DistCp. &lt;ssl_conf_file&gt; needs to be in
														
 
															+                      the classpath.
														
 
															+                  </td></tr>
														
 
															+            <tr><td><code>-async</code></td>
														
 
															+                  <td>Run DistCp asynchronously. Quits as soon as the Hadoop
														
 
															+                  Job is launched.</td>
														
 
															+                  <td>The Hadoop Job-id is logged, for tracking.
														
 
															+                  </td></tr>
														
 
															+        </table>
														
 
															+      </section>
														
 
															+  </body>
														
 
															+</document>
														
--- a/hadoop-tools/hadoop-distcp/src/site/xdoc/index.xml
+++ b/hadoop-tools/hadoop-distcp/src/site/xdoc/index.xml
@@ -0,0 +1,32 @@
 
															+<?xml version="1.0" encoding="UTF-8"?>
														
 
															+<document xmlns="http://maven.apache.org/XDOC/2.0"
														
 
															+          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
														
 
															+          xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
														
 
															+  <head>
														
 
															+    <title>DistCp</title>
														
 
															+  </head>
														
 
															+  <body>
														
 
															+    <section name="Overview">
														
 
															+      <p>
														
 
															+        DistCp (distributed copy) is a tool used for large inter/intra-cluster
														
 
															+      copying. It uses Map/Reduce to effect its distribution, error
														
 
															+      handling and recovery, and reporting. It expands a list of files and
														
 
															+      directories into input to map tasks, each of which will copy a partition
														
 
															+      of the files specified in the source list.
														
 
															+      </p>
														
 
															+      <p>
														
 
															+       The erstwhile implementation of DistCp has its share of quirks and
														
 
															+       drawbacks, both in its usage, as well as its extensibility and
														
 
															+       performance. The purpose of the DistCp refactor was to fix these shortcomings,
														
 
															+       enabling it to be used and extended programmatically. New paradigms have
														
 
															+       been introduced to improve runtime and setup performance, while simultaneously
														
 
															+       retaining the legacy behaviour as default.
														
 
															+      </p>
														
 
															+      <p>
														
 
															+       This document aims to describe the design of the new DistCp, its spanking
														
 
															+       new features, their optimal use, and any deviance from the legacy
														
 
															+       implementation.
														
 
															+      </p>
														
 
															+    </section>
														
 
															+  </body>
														
 
															+</document>
														
--- a/hadoop-tools/hadoop-distcp/src/site/xdoc/usage.xml
+++ b/hadoop-tools/hadoop-distcp/src/site/xdoc/usage.xml
@@ -0,0 +1,147 @@
 
															+<document xmlns="http://maven.apache.org/XDOC/2.0"
														
 
															+          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
														
 
															+          xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
														
 
															+  <head>
														
 
															+    <title>Usage </title>
														
 
															+  </head>
														
 
															+  <body>
														
 
															+    <section name="Basic Usage">
														
 
															+        <p>The most common invocation of DistCp is an inter-cluster copy:</p>
														
 
															+        <p><code>bash$ hadoop jar hadoop-distcp.jar hdfs://nn1:8020/foo/bar \</code><br/>
														
 
															+           <code>                    hdfs://nn2:8020/bar/foo</code></p>
														
 
															+
														
 
															+        <p>This will expand the namespace under <code>/foo/bar</code> on nn1
														
 
															+        into a temporary file, partition its contents among a set of map
														
 
															+        tasks, and start a copy on each TaskTracker from nn1 to nn2.</p>
														
 
															+
														
 
															+        <p>One can also specify multiple source directories on the command
														
 
															+        line:</p>
														
 
															+        <p><code>bash$ hadoop jar hadoop-distcp.jar hdfs://nn1:8020/foo/a \</code><br/>
														
 
															+           <code> hdfs://nn1:8020/foo/b \</code><br/>
														
 
															+           <code> hdfs://nn2:8020/bar/foo</code></p>
														
 
															+
														
 
															+        <p>Or, equivalently, from a file using the <code>-f</code> option:<br/>
														
 
															+        <code>bash$ hadoop jar hadoop-distcp.jar -f hdfs://nn1:8020/srclist \</code><br/>
														
 
															+        <code> hdfs://nn2:8020/bar/foo</code><br/></p>
														
 
															+
														
 
															+        <p>Where <code>srclist</code> contains<br/>
														
 
															+        <code>hdfs://nn1:8020/foo/a</code><br/>
														
 
															+        <code>hdfs://nn1:8020/foo/b</code></p>
														
 
															+
														
 
															+        <p>When copying from multiple sources, DistCp will abort the copy with
														
 
															+        an error message if two sources collide, but collisions at the
														
 
															+        destination are resolved per the <a href="#options">options</a>
														
 
															+        specified. By default, files already existing at the destination are
														
 
															+        skipped (i.e. not replaced by the source file). A count of skipped
														
 
															+        files is reported at the end of each job, but it may be inaccurate if a
														
 
															+        copier failed for some subset of its files, but succeeded on a later
														
 
															+        attempt.</p>
														
 
															+
														
 
															+        <p>It is important that each TaskTracker can reach and communicate with
														
 
															+        both the source and destination file systems. For HDFS, both the source
														
 
															+        and destination must be running the same version of the protocol or use
														
 
															+        a backwards-compatible protocol (see <a href="#cpver">Copying Between
														
 
															+        Versions</a>).</p>
														
 
															+
														
 
															+        <p>After a copy, it is recommended that one generates and cross-checks
														
 
															+        a listing of the source and destination to verify that the copy was
														
 
															+        truly successful. Since DistCp employs both Map/Reduce and the
														
 
															+        FileSystem API, issues in or between any of the three could adversely
														
 
															+        and silently affect the copy. Some have had success running with
														
 
															+        <code>-update</code> enabled to perform a second pass, but users should
														
 
															+        be acquainted with its semantics before attempting this.</p>
														
 
															+
														
 
															+        <p>It's also worth noting that if another client is still writing to a
														
 
															+        source file, the copy will likely fail. Attempting to overwrite a file
														
 
															+        being written at the destination should also fail on HDFS. If a source
														
 
															+        file is (re)moved before it is copied, the copy will fail with a
														
 
															+        FileNotFoundException.</p>
														
 
															+
														
 
															+        <p>Please refer to the detailed Command Line Reference for information
														
 
															+        on all the options available in DistCp.</p>
														
 
															+        
														
 
															+    </section>
														
 
															+    <section name="Update and Overwrite">
														
 
															+
														
 
															+        <p><code>-update</code> is used to copy files from source that don't
														
 
															+        exist at the target, or have different contents. <code>-overwrite</code>
														
 
															+        overwrites target-files even if they exist at the source, or have the
														
 
															+        same contents.</p>
														
 
															+
														
 
															+        <p><br/>Update and Overwrite options warrant special attention, since their
														
 
															+        handling of source-paths varies from the defaults in a very subtle manner.
														
 
															+        Consider a copy from <code>/source/first/</code> and
														
 
															+        <code>/source/second/</code> to <code>/target/</code>, where the source
														
 
															+        paths have the following contents:</p>
														
 
															+
														
 
															+        <p><code>hdfs://nn1:8020/source/first/1</code><br/>
														
 
															+           <code>hdfs://nn1:8020/source/first/2</code><br/>
														
 
															+           <code>hdfs://nn1:8020/source/second/10</code><br/>
														
 
															+           <code>hdfs://nn1:8020/source/second/20</code><br/></p>
														
 
															+
														
 
															+        <p><br/>When DistCp is invoked without <code>-update</code> or
														
 
															+        <code>-overwrite</code>, the DistCp defaults would create directories
														
 
															+        <code>first/</code> and <code>second/</code>, under <code>/target</code>.
														
 
															+        Thus:<br/></p>
														
 
															+
														
 
															+        <p><code>distcp hdfs://nn1:8020/source/first hdfs://nn1:8020/source/second hdfs://nn2:8020/target</code></p>
														
 
															+        <p><br/>would yield the following contents in <code>/target</code>: </p>
														
 
															+
														
 
															+        <p><code>hdfs://nn2:8020/target/first/1</code><br/>
														
 
															+           <code>hdfs://nn2:8020/target/first/2</code><br/>
														
 
															+           <code>hdfs://nn2:8020/target/second/10</code><br/>
														
 
															+           <code>hdfs://nn2:8020/target/second/20</code><br/></p>
														
 
															+
														
 
															+        <p><br/>When either <code>-update</code> or <code>-overwrite</code> is
														
 
															+            specified, the <strong>contents</strong> of the source-directories
														
 
															+            are copied to target, and not the source directories themselves. Thus: </p>
														
 
															+
														
 
															+        <p><code>distcp -update hdfs://nn1:8020/source/first hdfs://nn1:8020/source/second hdfs://nn2:8020/target</code></p>
														
 
															+
														
 
															+        <p><br/>would yield the following contents in <code>/target</code>: </p>
														
 
															+
														
 
															+        <p><code>hdfs://nn2:8020/target/1</code><br/>
														
 
															+           <code>hdfs://nn2:8020/target/2</code><br/>
														
 
															+           <code>hdfs://nn2:8020/target/10</code><br/>
														
 
															+           <code>hdfs://nn2:8020/target/20</code><br/></p>
														
 
															+
														
 
															+        <p><br/>By extension, if both source folders contained a file with the same
														
 
															+        name (say, <code>0</code>), then both sources would map an entry to
														
 
															+        <code>/target/0</code> at the destination. Rather than to permit this
														
 
															+        conflict, DistCp will abort.</p>
														
 
															+
														
 
															+        <p><br/>Now, consider the following copy operation:</p>
														
 
															+
														
 
															+        <p><code>distcp hdfs://nn1:8020/source/first hdfs://nn1:8020/source/second hdfs://nn2:8020/target</code></p>
														
 
															+
														
 
															+        <p><br/>With sources/sizes:</p>
														
 
															+
														
 
															+        <p><code>hdfs://nn1:8020/source/first/1     32</code><br/>
														
 
															+           <code>hdfs://nn1:8020/source/first/2     32</code><br/>
														
 
															+           <code>hdfs://nn1:8020/source/second/10   64</code><br/>
														
 
															+           <code>hdfs://nn1:8020/source/second/20   32</code><br/></p>
														
 
															+
														
 
															+        <p><br/>And destination/sizes:</p>
														
 
															+
														
 
															+        <p><code>hdfs://nn2:8020/target/1   32</code><br/>
														
 
															+           <code>hdfs://nn2:8020/target/10  32</code><br/>
														
 
															+           <code>hdfs://nn2:8020/target/20  64</code><br/></p>
														
 
															+
														
 
															+        <p><br/>Will effect: </p>
														
 
															+
														
 
															+        <p><code>hdfs://nn2:8020/target/1   32</code><br/>
														
 
															+           <code>hdfs://nn2:8020/target/2   32</code><br/>
														
 
															+           <code>hdfs://nn2:8020/target/10  64</code><br/>
														
 
															+           <code>hdfs://nn2:8020/target/20  32</code><br/></p>
														
 
															+
														
 
															+        <p><br/><code>1</code> is skipped because the file-length and contents match.
														
 
															+        <code>2</code> is copied because it doesn't exist at the target.
														
 
															+        <code>10</code> and <code>20</code> are overwritten since the contents
														
 
															+        don't match the source. </p>
														
 
															+
														
 
															+        <p>If <code>-update</code> is used, <code>1</code> is overwritten as well.</p>
														
 
															+
														
 
															+    </section>
														
 
															+  </body>
														
 
															+
														
 
															+</document>
														
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/StubContext.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/StubContext.java
@@ -0,0 +1,139 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools;
														
 
															+
														
 
															+import org.apache.hadoop.mapreduce.*;
														
 
															+import org.apache.hadoop.mapreduce.task.MapContextImpl;
														
 
															+import org.apache.hadoop.mapreduce.lib.map.WrappedMapper;
														
 
															+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.fs.FileStatus;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+
														
 
															+import java.util.List;
														
 
															+import java.util.ArrayList;
														
 
															+import java.io.IOException;
														
 
															+
														
 
															+public class StubContext {
														
 
															+
														
 
															+  private StubStatusReporter reporter = new StubStatusReporter();
														
 
															+  private RecordReader<Text, FileStatus> reader;
														
 
															+  private StubInMemoryWriter writer = new StubInMemoryWriter();
														
 
															+  private Mapper<Text, FileStatus, Text, Text>.Context mapperContext;
														
 
															+
														
 
															+  public StubContext(Configuration conf, RecordReader<Text, FileStatus> reader,
														
 
															+                     int taskId) throws IOException, InterruptedException {
														
 
															+
														
 
															+    WrappedMapper<Text, FileStatus, Text, Text> wrappedMapper
														
 
															+            = new WrappedMapper<Text, FileStatus, Text, Text>();
														
 
															+
														
 
															+    MapContextImpl<Text, FileStatus, Text, Text> contextImpl
														
 
															+            = new MapContextImpl<Text, FileStatus, Text, Text>(conf,
														
 
															+            getTaskAttemptID(taskId), reader, writer,
														
 
															+            null, reporter, null);
														
 
															+
														
 
															+    this.reader = reader;
														
 
															+    this.mapperContext = wrappedMapper.getMapContext(contextImpl);
														
 
															+  }
														
 
															+
														
 
															+  public Mapper<Text, FileStatus, Text, Text>.Context getContext() {
														
 
															+    return mapperContext;
														
 
															+  }
														
 
															+
														
 
															+  public StatusReporter getReporter() {
														
 
															+    return reporter;
														
 
															+  }
														
 
															+
														
 
															+  public RecordReader<Text, FileStatus> getReader() {
														
 
															+    return reader;
														
 
															+  }
														
 
															+
														
 
															+  public StubInMemoryWriter getWriter() {
														
 
															+    return writer;
														
 
															+  }
														
 
															+
														
 
															+  public static class StubStatusReporter extends StatusReporter {
														
 
															+
														
 
															+    private Counters counters = new Counters();
														
 
															+
														
 
															+    public StubStatusReporter() {
														
 
															+	    /*
														
 
															+      final CounterGroup counterGroup
														
 
															+              = new CounterGroup("FileInputFormatCounters",
														
 
															+                                 "FileInputFormatCounters");
														
 
															+      counterGroup.addCounter(new Counter("BYTES_READ",
														
 
															+                                          "BYTES_READ",
														
 
															+                                          0));
														
 
															+      counters.addGroup(counterGroup);
														
 
															+      */
														
 
															+    }
														
 
															+
														
 
															+    @Override
														
 
															+    public Counter getCounter(Enum<?> name) {
														
 
															+      return counters.findCounter(name);
														
 
															+    }
														
 
															+
														
 
															+    @Override
														
 
															+    public Counter getCounter(String group, String name) {
														
 
															+      return counters.findCounter(group, name);
														
 
															+    }
														
 
															+
														
 
															+    @Override
														
 
															+    public void progress() {}
														
 
															+
														
 
															+    @Override
														
 
															+    public float getProgress() {
														
 
															+      return 0F;
														
 
															+    }
														
 
															+
														
 
															+    @Override
														
 
															+    public void setStatus(String status) {}
														
 
															+  }
														
 
															+
														
 
															+
														
 
															+  public static class StubInMemoryWriter extends RecordWriter<Text, Text> {
														
 
															+
														
 
															+    List<Text> keys = new ArrayList<Text>();
														
 
															+
														
 
															+    List<Text> values = new ArrayList<Text>();
														
 
															+
														
 
															+    @Override
														
 
															+    public void write(Text key, Text value) throws IOException, InterruptedException {
														
 
															+      keys.add(key);
														
 
															+      values.add(value);
														
 
															+    }
														
 
															+
														
 
															+    @Override
														
 
															+    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
														
 
															+    }
														
 
															+
														
 
															+    public List<Text> keys() {
														
 
															+      return keys;
														
 
															+    }
														
 
															+
														
 
															+    public List<Text> values() {
														
 
															+      return values;
														
 
															+    }
														
 
															+
														
 
															+  }
														
 
															+
														
 
															+  public static TaskAttemptID getTaskAttemptID(int taskId) {
														
 
															+    return new TaskAttemptID("", 0, TaskType.MAP, taskId, 0);
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListing.java
@@ -0,0 +1,252 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.FileStatus;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.tools.util.TestDistCpUtils;
														
 
															+import org.apache.hadoop.hdfs.MiniDFSCluster;
														
 
															+import org.apache.hadoop.security.Credentials;
														
 
															+import org.apache.hadoop.io.SequenceFile;
														
 
															+import org.apache.hadoop.io.IOUtils;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.junit.Test;
														
 
															+import org.junit.Assert;
														
 
															+import org.junit.BeforeClass;
														
 
															+import org.junit.AfterClass;
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+import java.io.OutputStream;
														
 
															+import java.util.List;
														
 
															+import java.util.ArrayList;
														
 
															+
														
 
															+public class TestCopyListing extends SimpleCopyListing {
														
 
															+  private static final Log LOG = LogFactory.getLog(TestCopyListing.class);
														
 
															+
														
 
															+  private static final Credentials CREDENTIALS = new Credentials();
														
 
															+
														
 
															+  private static final Configuration config = new Configuration();
														
 
															+  private static MiniDFSCluster cluster;
														
 
															+
														
 
															+  @BeforeClass
														
 
															+  public static void create() throws IOException {
														
 
															+    cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).format(true)
														
 
															+                                                .build();
														
 
															+  }
														
 
															+
														
 
															+  @AfterClass
														
 
															+  public static void destroy() {
														
 
															+    if (cluster != null) {
														
 
															+      cluster.shutdown();
														
 
															+    }
														
 
															+  }
														
 
															+  
														
 
															+  public TestCopyListing() {
														
 
															+    super(config, CREDENTIALS);
														
 
															+  }
														
 
															+
														
 
															+  protected TestCopyListing(Configuration configuration) {
														
 
															+    super(configuration, CREDENTIALS);
														
 
															+  }
														
 
															+
														
 
															+  @Override
														
 
															+  protected long getBytesToCopy() {
														
 
															+    return 0;
														
 
															+  }
														
 
															+
														
 
															+  @Override
														
 
															+  protected long getNumberOfPaths() {
														
 
															+    return 0;
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testMultipleSrcToFile() {
														
 
															+    FileSystem fs = null;
														
 
															+    try {
														
 
															+      fs = FileSystem.get(getConf());
														
 
															+      List<Path> srcPaths = new ArrayList<Path>();
														
 
															+      srcPaths.add(new Path("/tmp/in/1"));
														
 
															+      srcPaths.add(new Path("/tmp/in/2"));
														
 
															+      Path target = new Path("/tmp/out/1");
														
 
															+      TestDistCpUtils.createFile(fs, "/tmp/in/1");
														
 
															+      TestDistCpUtils.createFile(fs, "/tmp/in/2");
														
 
															+      fs.mkdirs(target);
														
 
															+      DistCpOptions options = new DistCpOptions(srcPaths, target);
														
 
															+      validatePaths(options);
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+      //No errors
														
 
															+
														
 
															+      target = new Path("/tmp/out/1");
														
 
															+      fs.create(target).close();
														
 
															+      options = new DistCpOptions(srcPaths, target);
														
 
															+      try {
														
 
															+        validatePaths(options);
														
 
															+        Assert.fail("Invalid inputs accepted");
														
 
															+      } catch (InvalidInputException ignore) { }
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+
														
 
															+      srcPaths.clear();
														
 
															+      srcPaths.add(new Path("/tmp/in/1"));
														
 
															+      fs.mkdirs(new Path("/tmp/in/1"));
														
 
															+      target = new Path("/tmp/out/1");
														
 
															+      fs.create(target).close();
														
 
															+      options = new DistCpOptions(srcPaths, target);
														
 
															+      try {
														
 
															+        validatePaths(options);
														
 
															+        Assert.fail("Invalid inputs accepted");
														
 
															+      } catch (InvalidInputException ignore) { }
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered ", e);
														
 
															+      Assert.fail("Test input validation failed");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testDuplicates() {
														
 
															+    FileSystem fs = null;
														
 
															+    try {
														
 
															+      fs = FileSystem.get(getConf());
														
 
															+      List<Path> srcPaths = new ArrayList<Path>();
														
 
															+      srcPaths.add(new Path("/tmp/in/*/*"));
														
 
															+      TestDistCpUtils.createFile(fs, "/tmp/in/1.txt");
														
 
															+      TestDistCpUtils.createFile(fs, "/tmp/in/src/1.txt");
														
 
															+      Path target = new Path("/tmp/out");
														
 
															+      Path listingFile = new Path("/tmp/list");
														
 
															+      DistCpOptions options = new DistCpOptions(srcPaths, target);
														
 
															+      CopyListing listing = CopyListing.getCopyListing(getConf(), CREDENTIALS, options);
														
 
															+      try {
														
 
															+        listing.buildListing(listingFile, options);
														
 
															+        Assert.fail("Duplicates not detected");
														
 
															+      } catch (DuplicateFileException ignore) {
														
 
															+      }
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered in test", e);
														
 
															+      Assert.fail("Test failed " + e.getMessage());
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testBuildListing() {
														
 
															+    FileSystem fs = null;
														
 
															+    try {
														
 
															+      fs = FileSystem.get(getConf());
														
 
															+      List<Path> srcPaths = new ArrayList<Path>();
														
 
															+      Path p1 = new Path("/tmp/in/1");
														
 
															+      Path p2 = new Path("/tmp/in/2");
														
 
															+      Path p3 = new Path("/tmp/in2/2");
														
 
															+      Path target = new Path("/tmp/out/1");
														
 
															+      srcPaths.add(p1.getParent());
														
 
															+      srcPaths.add(p3.getParent());
														
 
															+      TestDistCpUtils.createFile(fs, "/tmp/in/1");
														
 
															+      TestDistCpUtils.createFile(fs, "/tmp/in/2");
														
 
															+      TestDistCpUtils.createFile(fs, "/tmp/in2/2");
														
 
															+      fs.mkdirs(target);
														
 
															+      OutputStream out = fs.create(p1);
														
 
															+      out.write("ABC".getBytes());
														
 
															+      out.close();
														
 
															+
														
 
															+      out = fs.create(p2);
														
 
															+      out.write("DEF".getBytes());
														
 
															+      out.close();
														
 
															+
														
 
															+      out = fs.create(p3);
														
 
															+      out.write("GHIJ".getBytes());
														
 
															+      out.close();
														
 
															+
														
 
															+      Path listingFile = new Path("/tmp/file");
														
 
															+
														
 
															+      DistCpOptions options = new DistCpOptions(srcPaths, target);
														
 
															+      options.setSyncFolder(true);
														
 
															+      CopyListing listing = new SimpleCopyListing(getConf(), CREDENTIALS);
														
 
															+      try {
														
 
															+        listing.buildListing(listingFile, options);
														
 
															+        Assert.fail("Duplicates not detected");
														
 
															+      } catch (DuplicateFileException ignore) {
														
 
															+      }
														
 
															+      Assert.assertEquals(listing.getBytesToCopy(), 10);
														
 
															+      Assert.assertEquals(listing.getNumberOfPaths(), 3);
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+
														
 
															+      try {
														
 
															+        listing.buildListing(listingFile, options);
														
 
															+        Assert.fail("Invalid input not detected");
														
 
															+      } catch (InvalidInputException ignore) {
														
 
															+      }
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered ", e);
														
 
															+      Assert.fail("Test build listing failed");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testBuildListingForSingleFile() {
														
 
															+    FileSystem fs = null;
														
 
															+    String testRootString = "/singleFileListing";
														
 
															+    Path testRoot = new Path(testRootString);
														
 
															+    SequenceFile.Reader reader = null;
														
 
															+    try {
														
 
															+      fs = FileSystem.get(getConf());
														
 
															+      if (fs.exists(testRoot))
														
 
															+        TestDistCpUtils.delete(fs, testRootString);
														
 
															+
														
 
															+      Path sourceFile = new Path(testRoot, "/source/foo/bar/source.txt");
														
 
															+      Path decoyFile  = new Path(testRoot, "/target/moo/source.txt");
														
 
															+      Path targetFile = new Path(testRoot, "/target/moo/target.txt");
														
 
															+
														
 
															+      TestDistCpUtils.createFile(fs, sourceFile.toString());
														
 
															+      TestDistCpUtils.createFile(fs, decoyFile.toString());
														
 
															+      TestDistCpUtils.createFile(fs, targetFile.toString());
														
 
															+
														
 
															+      List<Path> srcPaths = new ArrayList<Path>();
														
 
															+      srcPaths.add(sourceFile);
														
 
															+
														
 
															+      DistCpOptions options = new DistCpOptions(srcPaths, targetFile);
														
 
															+      CopyListing listing = new SimpleCopyListing(getConf(), CREDENTIALS);
														
 
															+
														
 
															+      final Path listFile = new Path(testRoot, "/tmp/fileList.seq");
														
 
															+      listing.buildListing(listFile, options);
														
 
															+
														
 
															+      reader = new SequenceFile.Reader(fs, listFile, getConf());
														
 
															+      FileStatus fileStatus = new FileStatus();
														
 
															+      Text relativePath = new Text();
														
 
															+      Assert.assertTrue(reader.next(relativePath, fileStatus));
														
 
															+      Assert.assertTrue(relativePath.toString().equals(""));
														
 
															+    }
														
 
															+    catch (Exception e) {
														
 
															+      Assert.fail("Unexpected exception encountered.");
														
 
															+      LOG.error("Unexpected exception: ", e);
														
 
															+    }
														
 
															+    finally {
														
 
															+      TestDistCpUtils.delete(fs, testRootString);
														
 
															+      IOUtils.closeStream(reader);
														
 
															+    }
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCp.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCp.java
@@ -0,0 +1,275 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.hdfs.MiniDFSCluster;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.io.IOUtils;
														
 
															+import org.apache.hadoop.mapred.MiniMRCluster;
														
 
															+import org.apache.hadoop.mapreduce.Job;
														
 
															+import org.apache.hadoop.mapreduce.JobSubmissionFiles;
														
 
															+import org.apache.hadoop.mapreduce.Cluster;
														
 
															+import org.apache.hadoop.tools.mapred.CopyOutputFormat;
														
 
															+import org.junit.*;
														
 
															+
														
 
															+import java.util.List;
														
 
															+import java.util.ArrayList;
														
 
															+import java.io.*;
														
 
															+
														
 
															+@Ignore
														
 
															+public class TestDistCp {
														
 
															+  private static final Log LOG = LogFactory.getLog(TestDistCp.class);
														
 
															+  private static List<Path> pathList = new ArrayList<Path>();
														
 
															+  private static final int FILE_SIZE = 1024;
														
 
															+
														
 
															+  private static Configuration configuration;
														
 
															+  private static MiniDFSCluster cluster;
														
 
															+  private static MiniMRCluster mrCluster;
														
 
															+
														
 
															+  private static final String SOURCE_PATH = "/tmp/source";
														
 
															+  private static final String TARGET_PATH = "/tmp/target";
														
 
															+
														
 
															+  @BeforeClass
														
 
															+  public static void setup() throws Exception {
														
 
															+    configuration = getConfigurationForCluster();
														
 
															+    cluster = new MiniDFSCluster.Builder(configuration).numDataNodes(1)
														
 
															+                    .format(true).build();
														
 
															+    System.setProperty("org.apache.hadoop.mapred.TaskTracker", "target/tmp");
														
 
															+    configuration.set("org.apache.hadoop.mapred.TaskTracker", "target/tmp");
														
 
															+    System.setProperty("hadoop.log.dir", "target/tmp");
														
 
															+    configuration.set("hadoop.log.dir", "target/tmp");
														
 
															+    mrCluster = new MiniMRCluster(1, cluster.getFileSystem().getUri().toString(), 1);
														
 
															+    Configuration mrConf = mrCluster.createJobConf();
														
 
															+    final String mrJobTracker = mrConf.get("mapred.job.tracker");
														
 
															+    configuration.set("mapred.job.tracker", mrJobTracker);
														
 
															+    final String mrJobTrackerAddress
														
 
															+            = mrConf.get("mapred.job.tracker.http.address");
														
 
															+    configuration.set("mapred.job.tracker.http.address", mrJobTrackerAddress);
														
 
															+  }
														
 
															+
														
 
															+  @AfterClass
														
 
															+  public static void cleanup() {
														
 
															+    if (mrCluster != null) mrCluster.shutdown();
														
 
															+    if (cluster != null) cluster.shutdown();
														
 
															+  }
														
 
															+
														
 
															+  private static Configuration getConfigurationForCluster() throws IOException {
														
 
															+    Configuration configuration = new Configuration();
														
 
															+    System.setProperty("test.build.data", "target/build/TEST_DISTCP/data");
														
 
															+    configuration.set("hadoop.log.dir", "target/tmp");
														
 
															+
														
 
															+    LOG.debug("fs.default.name  == " + configuration.get("fs.default.name"));
														
 
															+    LOG.debug("dfs.http.address == " + configuration.get("dfs.http.address"));
														
 
															+    return configuration;
														
 
															+  }
														
 
															+
														
 
															+  private static void createSourceData() throws Exception {
														
 
															+    mkdirs(SOURCE_PATH + "/1");
														
 
															+    mkdirs(SOURCE_PATH + "/2");
														
 
															+    mkdirs(SOURCE_PATH + "/2/3/4");
														
 
															+    mkdirs(SOURCE_PATH + "/2/3");
														
 
															+    mkdirs(SOURCE_PATH + "/5");
														
 
															+    touchFile(SOURCE_PATH + "/5/6");
														
 
															+    mkdirs(SOURCE_PATH + "/7");
														
 
															+    mkdirs(SOURCE_PATH + "/7/8");
														
 
															+    touchFile(SOURCE_PATH + "/7/8/9");
														
 
															+  }
														
 
															+
														
 
															+  private static void mkdirs(String path) throws Exception {
														
 
															+    FileSystem fileSystem = cluster.getFileSystem();
														
 
															+    final Path qualifiedPath = new Path(path).makeQualified(fileSystem.getUri(),
														
 
															+                                  fileSystem.getWorkingDirectory());
														
 
															+    pathList.add(qualifiedPath);
														
 
															+    fileSystem.mkdirs(qualifiedPath);
														
 
															+  }
														
 
															+
														
 
															+  private static void touchFile(String path) throws Exception {
														
 
															+    FileSystem fs;
														
 
															+    DataOutputStream outputStream = null;
														
 
															+    try {
														
 
															+      fs = cluster.getFileSystem();
														
 
															+      final Path qualifiedPath = new Path(path).makeQualified(fs.getUri(),
														
 
															+                                            fs.getWorkingDirectory());
														
 
															+      final long blockSize = fs.getDefaultBlockSize() * 2;
														
 
															+      outputStream = fs.create(qualifiedPath, true, 0,
														
 
															+              (short)(fs.getDefaultReplication()*2),
														
 
															+              blockSize);
														
 
															+      outputStream.write(new byte[FILE_SIZE]);
														
 
															+      pathList.add(qualifiedPath);
														
 
															+    }
														
 
															+    finally {
														
 
															+      IOUtils.cleanup(null, outputStream);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private static void clearState() throws Exception {
														
 
															+    pathList.clear();
														
 
															+    cluster.getFileSystem().delete(new Path(TARGET_PATH), true);
														
 
															+    createSourceData();
														
 
															+  }
														
 
															+
														
 
															+//  @Test
														
 
															+  public void testUniformSizeDistCp() throws Exception {
														
 
															+    try {
														
 
															+      clearState();
														
 
															+      final FileSystem fileSystem = cluster.getFileSystem();
														
 
															+      Path sourcePath = new Path(SOURCE_PATH)
														
 
															+              .makeQualified(fileSystem.getUri(),
														
 
															+                             fileSystem.getWorkingDirectory());
														
 
															+      List<Path> sources = new ArrayList<Path>();
														
 
															+      sources.add(sourcePath);
														
 
															+
														
 
															+      Path targetPath = new Path(TARGET_PATH)
														
 
															+              .makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
														
 
															+      DistCpOptions options = new DistCpOptions(sources, targetPath);
														
 
															+      options.setAtomicCommit(true);
														
 
															+      options.setBlocking(false);
														
 
															+      Job job = new DistCp(configuration, options).execute();
														
 
															+      Path workDir = CopyOutputFormat.getWorkingDirectory(job);
														
 
															+      Path finalDir = CopyOutputFormat.getCommitDirectory(job);
														
 
															+
														
 
															+      while (!job.isComplete()) {
														
 
															+        if (cluster.getFileSystem().exists(workDir)) {
														
 
															+          break;
														
 
															+        }
														
 
															+      }
														
 
															+      job.waitForCompletion(true);
														
 
															+      Assert.assertFalse(cluster.getFileSystem().exists(workDir));
														
 
															+      Assert.assertTrue(cluster.getFileSystem().exists(finalDir));
														
 
															+      Assert.assertFalse(cluster.getFileSystem().exists(
														
 
															+          new Path(job.getConfiguration().get(DistCpConstants.CONF_LABEL_META_FOLDER))));
														
 
															+      verifyResults();
														
 
															+    }
														
 
															+    catch (Exception e) {
														
 
															+      LOG.error("Exception encountered", e);
														
 
															+      Assert.fail("Unexpected exception: " + e.getMessage());
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+//  @Test
														
 
															+  public void testCleanup() {
														
 
															+    try {
														
 
															+      clearState();
														
 
															+      Path sourcePath = new Path("noscheme:///file");
														
 
															+      List<Path> sources = new ArrayList<Path>();
														
 
															+      sources.add(sourcePath);
														
 
															+
														
 
															+      final FileSystem fs = cluster.getFileSystem();
														
 
															+      Path targetPath = new Path(TARGET_PATH)
														
 
															+              .makeQualified(fs.getUri(), fs.getWorkingDirectory());
														
 
															+      DistCpOptions options = new DistCpOptions(sources, targetPath);
														
 
															+
														
 
															+      Path stagingDir = JobSubmissionFiles.getStagingDir(
														
 
															+              new Cluster(configuration), configuration);
														
 
															+      stagingDir.getFileSystem(configuration).mkdirs(stagingDir);
														
 
															+
														
 
															+      try {
														
 
															+        new DistCp(configuration, options).execute();
														
 
															+      } catch (Throwable t) {
														
 
															+        Assert.assertEquals(stagingDir.getFileSystem(configuration).
														
 
															+            listStatus(stagingDir).length, 0);
														
 
															+      }
														
 
															+    } catch (Exception e) {
														
 
															+      LOG.error("Exception encountered ", e);
														
 
															+      Assert.fail("testCleanup failed " + e.getMessage());
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testRootPath() throws Exception {
														
 
															+    try {
														
 
															+      clearState();
														
 
															+      List<Path> sources = new ArrayList<Path>();
														
 
															+      final FileSystem fs = cluster.getFileSystem();
														
 
															+      sources.add(new Path("/a")
														
 
															+              .makeQualified(fs.getUri(), fs.getWorkingDirectory()));
														
 
															+      sources.add(new Path("/b")
														
 
															+              .makeQualified(fs.getUri(), fs.getWorkingDirectory()));
														
 
															+      touchFile("/a/a.txt");
														
 
															+      touchFile("/b/b.txt");
														
 
															+
														
 
															+      Path targetPath = new Path("/c")
														
 
															+              .makeQualified(fs.getUri(), fs.getWorkingDirectory());
														
 
															+      DistCpOptions options = new DistCpOptions(sources, targetPath);
														
 
															+      new DistCp(configuration, options).execute();
														
 
															+      Assert.assertTrue(fs.exists(new Path("/c/a/a.txt")));
														
 
															+      Assert.assertTrue(fs.exists(new Path("/c/b/b.txt")));
														
 
															+    }
														
 
															+    catch (Exception e) {
														
 
															+      LOG.error("Exception encountered", e);
														
 
															+      Assert.fail("Unexpected exception: " + e.getMessage());
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testDynamicDistCp() throws Exception {
														
 
															+    try {
														
 
															+      clearState();
														
 
															+      final FileSystem fs = cluster.getFileSystem();
														
 
															+      Path sourcePath = new Path(SOURCE_PATH)
														
 
															+              .makeQualified(fs.getUri(), fs.getWorkingDirectory());
														
 
															+      List<Path> sources = new ArrayList<Path>();
														
 
															+      sources.add(sourcePath);
														
 
															+
														
 
															+      Path targetPath = new Path(TARGET_PATH)
														
 
															+              .makeQualified(fs.getUri(), fs.getWorkingDirectory());
														
 
															+      DistCpOptions options = new DistCpOptions(sources, targetPath);
														
 
															+      options.setCopyStrategy("dynamic");
														
 
															+
														
 
															+      options.setAtomicCommit(true);
														
 
															+      options.setAtomicWorkPath(new Path("/work"));
														
 
															+      options.setBlocking(false);
														
 
															+      Job job = new DistCp(configuration, options).execute();
														
 
															+      Path workDir = CopyOutputFormat.getWorkingDirectory(job);
														
 
															+      Path finalDir = CopyOutputFormat.getCommitDirectory(job);
														
 
															+
														
 
															+      while (!job.isComplete()) {
														
 
															+        if (fs.exists(workDir)) {
														
 
															+          break;
														
 
															+        }
														
 
															+      }
														
 
															+      job.waitForCompletion(true);
														
 
															+      Assert.assertFalse(fs.exists(workDir));
														
 
															+      Assert.assertTrue(fs.exists(finalDir));
														
 
															+
														
 
															+      verifyResults();
														
 
															+    }
														
 
															+    catch (Exception e) {
														
 
															+      LOG.error("Exception encountered", e);
														
 
															+      Assert.fail("Unexpected exception: " + e.getMessage());
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private static void verifyResults() throws Exception {
														
 
															+    for (Path path : pathList) {
														
 
															+      FileSystem fs = cluster.getFileSystem();
														
 
															+
														
 
															+      Path sourcePath = path.makeQualified(fs.getUri(), fs.getWorkingDirectory());
														
 
															+      Path targetPath
														
 
															+              = new Path(sourcePath.toString().replaceAll(SOURCE_PATH, TARGET_PATH));
														
 
															+
														
 
															+      Assert.assertTrue(fs.exists(targetPath));
														
 
															+      Assert.assertEquals(fs.isFile(sourcePath), fs.isFile(targetPath));
														
 
															+    }
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestFileBasedCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestFileBasedCopyListing.java
@@ -0,0 +1,542 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.fs.FileStatus;
														
 
															+import org.apache.hadoop.hdfs.MiniDFSCluster;
														
 
															+import org.apache.hadoop.io.SequenceFile;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.io.IOUtils;
														
 
															+import org.apache.hadoop.tools.util.TestDistCpUtils;
														
 
															+import org.apache.hadoop.security.Credentials;
														
 
															+import org.junit.AfterClass;
														
 
															+import org.junit.Assert;
														
 
															+import org.junit.BeforeClass;
														
 
															+import org.junit.Test;
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+import java.io.OutputStream;
														
 
															+import java.util.HashMap;
														
 
															+import java.util.Map;
														
 
															+
														
 
															+public class TestFileBasedCopyListing {
														
 
															+  private static final Log LOG = LogFactory.getLog(TestFileBasedCopyListing.class);
														
 
															+
														
 
															+  private static final Credentials CREDENTIALS = new Credentials();
														
 
															+
														
 
															+  private static final Configuration config = new Configuration();
														
 
															+  private static MiniDFSCluster cluster;
														
 
															+  private static FileSystem fs;
														
 
															+
														
 
															+  @BeforeClass
														
 
															+  public static void create() throws IOException {
														
 
															+    cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).format(true)
														
 
															+                                                .build();
														
 
															+    fs = cluster.getFileSystem();
														
 
															+    buildExpectedValuesMap();
														
 
															+  }
														
 
															+
														
 
															+  @AfterClass
														
 
															+  public static void destroy() {
														
 
															+    if (cluster != null) {
														
 
															+      cluster.shutdown();
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private static Map<String, String> map = new HashMap<String, String>();
														
 
															+
														
 
															+  private static void buildExpectedValuesMap() {
														
 
															+    map.put("/file1", "/tmp/singlefile1/file1");
														
 
															+    map.put("/file2", "/tmp/singlefile2/file2");
														
 
															+    map.put("/file3", "/tmp/multifile/file3");
														
 
															+    map.put("/file4", "/tmp/multifile/file4");
														
 
															+    map.put("/file5", "/tmp/multifile/file5");
														
 
															+    map.put("/multifile/file3", "/tmp/multifile/file3");
														
 
															+    map.put("/multifile/file4", "/tmp/multifile/file4");
														
 
															+    map.put("/multifile/file5", "/tmp/multifile/file5");
														
 
															+    map.put("/Ufile3", "/tmp/Umultifile/Ufile3");
														
 
															+    map.put("/Ufile4", "/tmp/Umultifile/Ufile4");
														
 
															+    map.put("/Ufile5", "/tmp/Umultifile/Ufile5");
														
 
															+    map.put("/dir1", "/tmp/singledir/dir1");
														
 
															+    map.put("/singledir/dir1", "/tmp/singledir/dir1");
														
 
															+    map.put("/dir2", "/tmp/singledir/dir2");
														
 
															+    map.put("/singledir/dir2", "/tmp/singledir/dir2");
														
 
															+    map.put("/Udir1", "/tmp/Usingledir/Udir1");
														
 
															+    map.put("/Udir2", "/tmp/Usingledir/Udir2");
														
 
															+    map.put("/dir2/file6", "/tmp/singledir/dir2/file6");
														
 
															+    map.put("/singledir/dir2/file6", "/tmp/singledir/dir2/file6");
														
 
															+    map.put("/file7", "/tmp/singledir1/dir3/file7");
														
 
															+    map.put("/file8", "/tmp/singledir1/dir3/file8");
														
 
															+    map.put("/file9", "/tmp/singledir1/dir3/file9");
														
 
															+    map.put("/dir3/file7", "/tmp/singledir1/dir3/file7");
														
 
															+    map.put("/dir3/file8", "/tmp/singledir1/dir3/file8");
														
 
															+    map.put("/dir3/file9", "/tmp/singledir1/dir3/file9");
														
 
															+    map.put("/Ufile7", "/tmp/Usingledir1/Udir3/Ufile7");
														
 
															+    map.put("/Ufile8", "/tmp/Usingledir1/Udir3/Ufile8");
														
 
															+    map.put("/Ufile9", "/tmp/Usingledir1/Udir3/Ufile9");
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testSingleFileMissingTarget() {
														
 
															+    caseSingleFileMissingTarget(false);
														
 
															+    caseSingleFileMissingTarget(true);
														
 
															+  }
														
 
															+
														
 
															+  private void caseSingleFileMissingTarget(boolean sync) {
														
 
															+
														
 
															+    try {
														
 
															+      Path listFile = new Path("/tmp/listing");
														
 
															+      Path target = new Path("/tmp/target");
														
 
															+
														
 
															+      addEntries(listFile, "/tmp/singlefile1/file1");
														
 
															+      createFiles("/tmp/singlefile1/file1");
														
 
															+
														
 
															+      runTest(listFile, target, sync);
														
 
															+
														
 
															+      checkResult(listFile, 0);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing build listing", e);
														
 
															+      Assert.fail("build listing failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testSingleFileTargetFile() {
														
 
															+    caseSingleFileTargetFile(false);
														
 
															+    caseSingleFileTargetFile(true);
														
 
															+  }
														
 
															+
														
 
															+  private void caseSingleFileTargetFile(boolean sync) {
														
 
															+
														
 
															+    try {
														
 
															+      Path listFile = new Path("/tmp/listing");
														
 
															+      Path target = new Path("/tmp/target");
														
 
															+
														
 
															+      addEntries(listFile, "/tmp/singlefile1/file1");
														
 
															+      createFiles("/tmp/singlefile1/file1", target.toString());
														
 
															+
														
 
															+      runTest(listFile, target, sync);
														
 
															+
														
 
															+      checkResult(listFile, 0);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing build listing", e);
														
 
															+      Assert.fail("build listing failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testSingleFileTargetDir() {
														
 
															+    caseSingleFileTargetDir(false);
														
 
															+    caseSingleFileTargetDir(true);
														
 
															+  }
														
 
															+
														
 
															+  private void caseSingleFileTargetDir(boolean sync) {
														
 
															+
														
 
															+    try {
														
 
															+      Path listFile = new Path("/tmp/listing");
														
 
															+      Path target = new Path("/tmp/target");
														
 
															+
														
 
															+      addEntries(listFile, "/tmp/singlefile2/file2");
														
 
															+      createFiles("/tmp/singlefile2/file2");
														
 
															+      mkdirs(target.toString());
														
 
															+
														
 
															+      runTest(listFile, target, sync);
														
 
															+
														
 
															+      checkResult(listFile, 1);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing build listing", e);
														
 
															+      Assert.fail("build listing failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testSingleDirTargetMissing() {
														
 
															+    caseSingleDirTargetMissing(false);
														
 
															+    caseSingleDirTargetMissing(true);
														
 
															+  }
														
 
															+
														
 
															+  private void caseSingleDirTargetMissing(boolean sync) {
														
 
															+
														
 
															+    try {
														
 
															+      Path listFile = new Path("/tmp/listing");
														
 
															+      Path target = new Path("/tmp/target");
														
 
															+
														
 
															+      addEntries(listFile, "/tmp/singledir");
														
 
															+      mkdirs("/tmp/singledir/dir1");
														
 
															+
														
 
															+      runTest(listFile, target, sync);
														
 
															+
														
 
															+      checkResult(listFile, 1);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing build listing", e);
														
 
															+      Assert.fail("build listing failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testSingleDirTargetPresent() {
														
 
															+
														
 
															+    try {
														
 
															+      Path listFile = new Path("/tmp/listing");
														
 
															+      Path target = new Path("/tmp/target");
														
 
															+
														
 
															+      addEntries(listFile, "/tmp/singledir");
														
 
															+      mkdirs("/tmp/singledir/dir1");
														
 
															+      mkdirs(target.toString());
														
 
															+
														
 
															+      runTest(listFile, target);
														
 
															+
														
 
															+      checkResult(listFile, 1);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing build listing", e);
														
 
															+      Assert.fail("build listing failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testUpdateSingleDirTargetPresent() {
														
 
															+
														
 
															+    try {
														
 
															+      Path listFile = new Path("/tmp/listing");
														
 
															+      Path target = new Path("/tmp/target");
														
 
															+
														
 
															+      addEntries(listFile, "/tmp/Usingledir");
														
 
															+      mkdirs("/tmp/Usingledir/Udir1");
														
 
															+      mkdirs(target.toString());
														
 
															+
														
 
															+      runTest(listFile, target, true);
														
 
															+
														
 
															+      checkResult(listFile, 1);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing build listing", e);
														
 
															+      Assert.fail("build listing failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testMultiFileTargetPresent() {
														
 
															+    caseMultiFileTargetPresent(false);
														
 
															+    caseMultiFileTargetPresent(true);
														
 
															+  }
														
 
															+
														
 
															+  private void caseMultiFileTargetPresent(boolean sync) {
														
 
															+
														
 
															+    try {
														
 
															+      Path listFile = new Path("/tmp/listing");
														
 
															+      Path target = new Path("/tmp/target");
														
 
															+
														
 
															+      addEntries(listFile, "/tmp/multifile/file3", "/tmp/multifile/file4", "/tmp/multifile/file5");
														
 
															+      createFiles("/tmp/multifile/file3", "/tmp/multifile/file4", "/tmp/multifile/file5");
														
 
															+      mkdirs(target.toString());
														
 
															+
														
 
															+      runTest(listFile, target, sync);
														
 
															+
														
 
															+      checkResult(listFile, 3);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing build listing", e);
														
 
															+      Assert.fail("build listing failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testMultiFileTargetMissing() {
														
 
															+    caseMultiFileTargetMissing(false);
														
 
															+    caseMultiFileTargetMissing(true);
														
 
															+  }
														
 
															+
														
 
															+  private void caseMultiFileTargetMissing(boolean sync) {
														
 
															+
														
 
															+    try {
														
 
															+      Path listFile = new Path("/tmp/listing");
														
 
															+      Path target = new Path("/tmp/target");
														
 
															+
														
 
															+      addEntries(listFile, "/tmp/multifile/file3", "/tmp/multifile/file4", "/tmp/multifile/file5");
														
 
															+      createFiles("/tmp/multifile/file3", "/tmp/multifile/file4", "/tmp/multifile/file5");
														
 
															+
														
 
															+      runTest(listFile, target, sync);
														
 
															+
														
 
															+      checkResult(listFile, 3);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing build listing", e);
														
 
															+      Assert.fail("build listing failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testMultiDirTargetPresent() {
														
 
															+
														
 
															+    try {
														
 
															+      Path listFile = new Path("/tmp/listing");
														
 
															+      Path target = new Path("/tmp/target");
														
 
															+
														
 
															+      addEntries(listFile, "/tmp/multifile", "/tmp/singledir");
														
 
															+      createFiles("/tmp/multifile/file3", "/tmp/multifile/file4", "/tmp/multifile/file5");
														
 
															+      mkdirs(target.toString(), "/tmp/singledir/dir1");
														
 
															+
														
 
															+      runTest(listFile, target);
														
 
															+
														
 
															+      checkResult(listFile, 4);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing build listing", e);
														
 
															+      Assert.fail("build listing failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testUpdateMultiDirTargetPresent() {
														
 
															+
														
 
															+    try {
														
 
															+      Path listFile = new Path("/tmp/listing");
														
 
															+      Path target = new Path("/tmp/target");
														
 
															+
														
 
															+      addEntries(listFile, "/tmp/Umultifile", "/tmp/Usingledir");
														
 
															+      createFiles("/tmp/Umultifile/Ufile3", "/tmp/Umultifile/Ufile4", "/tmp/Umultifile/Ufile5");
														
 
															+      mkdirs(target.toString(), "/tmp/Usingledir/Udir1");
														
 
															+
														
 
															+      runTest(listFile, target, true);
														
 
															+
														
 
															+      checkResult(listFile, 4);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing build listing", e);
														
 
															+      Assert.fail("build listing failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testMultiDirTargetMissing() {
														
 
															+    caseMultiDirTargetMissing(false);
														
 
															+    caseMultiDirTargetMissing(true);
														
 
															+  }
														
 
															+
														
 
															+  private void caseMultiDirTargetMissing(boolean sync) {
														
 
															+
														
 
															+    try {
														
 
															+      Path listFile = new Path("/tmp/listing");
														
 
															+      Path target = new Path("/tmp/target");
														
 
															+
														
 
															+      addEntries(listFile, "/tmp/multifile", "/tmp/singledir");
														
 
															+      createFiles("/tmp/multifile/file3", "/tmp/multifile/file4", "/tmp/multifile/file5");
														
 
															+      mkdirs("/tmp/singledir/dir1");
														
 
															+
														
 
															+      runTest(listFile, target, sync);
														
 
															+
														
 
															+      checkResult(listFile, 4);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing build listing", e);
														
 
															+      Assert.fail("build listing failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testGlobTargetMissingSingleLevel() {
														
 
															+    caseGlobTargetMissingSingleLevel(false);
														
 
															+    caseGlobTargetMissingSingleLevel(true);
														
 
															+  }
														
 
															+
														
 
															+  private void caseGlobTargetMissingSingleLevel(boolean sync) {
														
 
															+
														
 
															+    try {
														
 
															+      Path listFile = new Path("/tmp1/listing");
														
 
															+      Path target = new Path("/tmp/target");
														
 
															+
														
 
															+      addEntries(listFile, "/tmp/*");
														
 
															+      createFiles("/tmp/multifile/file3", "/tmp/multifile/file4", "/tmp/multifile/file5");
														
 
															+      createFiles("/tmp/singledir/dir2/file6");
														
 
															+
														
 
															+      runTest(listFile, target, sync);
														
 
															+
														
 
															+      checkResult(listFile, 5);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing build listing", e);
														
 
															+      Assert.fail("build listing failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+      TestDistCpUtils.delete(fs, "/tmp1");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testGlobTargetMissingMultiLevel() {
														
 
															+    caseGlobTargetMissingMultiLevel(false);
														
 
															+    caseGlobTargetMissingMultiLevel(true);
														
 
															+  }
														
 
															+
														
 
															+  private void caseGlobTargetMissingMultiLevel(boolean sync) {
														
 
															+
														
 
															+    try {
														
 
															+      Path listFile = new Path("/tmp1/listing");
														
 
															+      Path target = new Path("/tmp/target");
														
 
															+
														
 
															+      addEntries(listFile, "/tmp/*/*");
														
 
															+      createFiles("/tmp/multifile/file3", "/tmp/multifile/file4", "/tmp/multifile/file5");
														
 
															+      createFiles("/tmp/singledir1/dir3/file7", "/tmp/singledir1/dir3/file8",
														
 
															+          "/tmp/singledir1/dir3/file9");
														
 
															+
														
 
															+      runTest(listFile, target, sync);
														
 
															+
														
 
															+      checkResult(listFile, 6);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing build listing", e);
														
 
															+      Assert.fail("build listing failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+      TestDistCpUtils.delete(fs, "/tmp1");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testGlobTargetDirMultiLevel() {
														
 
															+
														
 
															+    try {
														
 
															+      Path listFile = new Path("/tmp1/listing");
														
 
															+      Path target = new Path("/tmp/target");
														
 
															+
														
 
															+      addEntries(listFile, "/tmp/*/*");
														
 
															+      createFiles("/tmp/multifile/file3", "/tmp/multifile/file4", "/tmp/multifile/file5");
														
 
															+      createFiles("/tmp/singledir1/dir3/file7", "/tmp/singledir1/dir3/file8",
														
 
															+          "/tmp/singledir1/dir3/file9");
														
 
															+      mkdirs(target.toString());
														
 
															+
														
 
															+      runTest(listFile, target);
														
 
															+
														
 
															+      checkResult(listFile, 6);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing build listing", e);
														
 
															+      Assert.fail("build listing failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+      TestDistCpUtils.delete(fs, "/tmp1");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testUpdateGlobTargetDirMultiLevel() {
														
 
															+
														
 
															+    try {
														
 
															+      Path listFile = new Path("/tmp1/listing");
														
 
															+      Path target = new Path("/tmp/target");
														
 
															+
														
 
															+      addEntries(listFile, "/tmp/*/*");
														
 
															+      createFiles("/tmp/Umultifile/Ufile3", "/tmp/Umultifile/Ufile4", "/tmp/Umultifile/Ufile5");
														
 
															+      createFiles("/tmp/Usingledir1/Udir3/Ufile7", "/tmp/Usingledir1/Udir3/Ufile8",
														
 
															+          "/tmp/Usingledir1/Udir3/Ufile9");
														
 
															+      mkdirs(target.toString());
														
 
															+
														
 
															+      runTest(listFile, target, true);
														
 
															+
														
 
															+      checkResult(listFile, 6);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing build listing", e);
														
 
															+      Assert.fail("build listing failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp");
														
 
															+      TestDistCpUtils.delete(fs, "/tmp1");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private void addEntries(Path listFile, String... entries) throws IOException {
														
 
															+    OutputStream out = fs.create(listFile);
														
 
															+    try {
														
 
															+      for (String entry : entries){
														
 
															+        out.write(entry.getBytes());
														
 
															+        out.write("\n".getBytes());
														
 
															+      }
														
 
															+    } finally {
														
 
															+      out.close();
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private void createFiles(String... entries) throws IOException {
														
 
															+    for (String entry : entries){
														
 
															+      OutputStream out = fs.create(new Path(entry));
														
 
															+      try {
														
 
															+        out.write(entry.getBytes());
														
 
															+        out.write("\n".getBytes());
														
 
															+      } finally {
														
 
															+        out.close();
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private void mkdirs(String... entries) throws IOException {
														
 
															+    for (String entry : entries){
														
 
															+      fs.mkdirs(new Path(entry));
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private void runTest(Path listFile, Path target) throws IOException {
														
 
															+    runTest(listFile, target, true);
														
 
															+  }
														
 
															+
														
 
															+  private void runTest(Path listFile, Path target, boolean sync) throws IOException {
														
 
															+    CopyListing listing = new FileBasedCopyListing(config, CREDENTIALS);
														
 
															+    DistCpOptions options = new DistCpOptions(listFile, target);
														
 
															+    options.setSyncFolder(sync);
														
 
															+    listing.buildListing(listFile, options);
														
 
															+  }
														
 
															+
														
 
															+  private void checkResult(Path listFile, int count) throws IOException {
														
 
															+    if (count == 0) {
														
 
															+      return;
														
 
															+    }
														
 
															+
														
 
															+    int recCount = 0;
														
 
															+    SequenceFile.Reader reader = new SequenceFile.Reader(config,
														
 
															+                                            SequenceFile.Reader.file(listFile));
														
 
															+    try {
														
 
															+      Text relPath = new Text();
														
 
															+      FileStatus fileStatus = new FileStatus();
														
 
															+      while (reader.next(relPath, fileStatus)) {
														
 
															+        Assert.assertEquals(fileStatus.getPath().toUri().getPath(), map.get(relPath.toString()));
														
 
															+        recCount++;
														
 
															+      }
														
 
															+    } finally {
														
 
															+      IOUtils.closeStream(reader);
														
 
															+    }
														
 
															+    Assert.assertEquals(recCount, count);
														
 
															+  }
														
 
															+
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestGlobbedCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestGlobbedCopyListing.java
@@ -0,0 +1,135 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools;
														
 
															+
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.fs.FileStatus;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.hdfs.MiniDFSCluster;
														
 
															+import org.apache.hadoop.io.IOUtils;
														
 
															+import org.apache.hadoop.io.SequenceFile;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.tools.util.DistCpUtils;
														
 
															+import org.apache.hadoop.security.Credentials;
														
 
															+import org.junit.AfterClass;
														
 
															+import org.junit.Assert;
														
 
															+import org.junit.BeforeClass;
														
 
															+import org.junit.Test;
														
 
															+
														
 
															+import java.io.DataOutputStream;
														
 
															+import java.net.URI;
														
 
															+import java.util.Arrays;
														
 
															+import java.util.HashMap;
														
 
															+import java.util.Map;
														
 
															+
														
 
															+public class TestGlobbedCopyListing {
														
 
															+
														
 
															+  private static MiniDFSCluster cluster;
														
 
															+
														
 
															+  private static final Credentials CREDENTIALS = new Credentials();
														
 
															+
														
 
															+  public static Map<String, String> expectedValues = new HashMap<String, String>();
														
 
															+
														
 
															+  @BeforeClass
														
 
															+  public static void setup() throws Exception {
														
 
															+    cluster = new MiniDFSCluster(new Configuration(), 1, true, null);
														
 
															+    createSourceData();
														
 
															+  }
														
 
															+
														
 
															+  private static void createSourceData() throws Exception {
														
 
															+    mkdirs("/tmp/source/1");
														
 
															+    mkdirs("/tmp/source/2");
														
 
															+    mkdirs("/tmp/source/2/3");
														
 
															+    mkdirs("/tmp/source/2/3/4");
														
 
															+    mkdirs("/tmp/source/5");
														
 
															+    touchFile("/tmp/source/5/6");
														
 
															+    mkdirs("/tmp/source/7");
														
 
															+    mkdirs("/tmp/source/7/8");
														
 
															+    touchFile("/tmp/source/7/8/9");
														
 
															+  }
														
 
															+
														
 
															+  private static void mkdirs(String path) throws Exception {
														
 
															+    FileSystem fileSystem = null;
														
 
															+    try {
														
 
															+      fileSystem = cluster.getFileSystem();
														
 
															+      fileSystem.mkdirs(new Path(path));
														
 
															+      recordInExpectedValues(path);
														
 
															+    }
														
 
															+    finally {
														
 
															+      IOUtils.cleanup(null, fileSystem);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private static void touchFile(String path) throws Exception {
														
 
															+    FileSystem fileSystem = null;
														
 
															+    DataOutputStream outputStream = null;
														
 
															+    try {
														
 
															+      fileSystem = cluster.getFileSystem();
														
 
															+      outputStream = fileSystem.create(new Path(path), true, 0);
														
 
															+      recordInExpectedValues(path);
														
 
															+    }
														
 
															+    finally {
														
 
															+      IOUtils.cleanup(null, fileSystem, outputStream);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private static void recordInExpectedValues(String path) throws Exception {
														
 
															+    FileSystem fileSystem = cluster.getFileSystem();
														
 
															+    Path sourcePath = new Path(fileSystem.getUri().toString() + path);
														
 
															+    expectedValues.put(sourcePath.toString(), DistCpUtils.getRelativePath(
														
 
															+        new Path("/tmp/source"), sourcePath));
														
 
															+  }
														
 
															+
														
 
															+  @AfterClass
														
 
															+  public static void tearDown() {
														
 
															+    cluster.shutdown();
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testRun() throws Exception {
														
 
															+    final URI uri = cluster.getFileSystem().getUri();
														
 
															+    final String pathString = uri.toString();
														
 
															+    Path fileSystemPath = new Path(pathString);
														
 
															+    Path source = new Path(fileSystemPath.toString() + "/tmp/source");
														
 
															+    Path target = new Path(fileSystemPath.toString() + "/tmp/target");
														
 
															+    Path listingPath = new Path(fileSystemPath.toString() + "/tmp/META/fileList.seq");
														
 
															+    DistCpOptions options = new DistCpOptions(Arrays.asList(source), target);
														
 
															+
														
 
															+    new GlobbedCopyListing(new Configuration(), CREDENTIALS).buildListing(listingPath, options);
														
 
															+
														
 
															+    verifyContents(listingPath);
														
 
															+  }
														
 
															+
														
 
															+  private void verifyContents(Path listingPath) throws Exception {
														
 
															+    SequenceFile.Reader reader = new SequenceFile.Reader(cluster.getFileSystem(),
														
 
															+                                              listingPath, new Configuration());
														
 
															+    Text key   = new Text();
														
 
															+    FileStatus value = new FileStatus();
														
 
															+    Map<String, String> actualValues = new HashMap<String, String>();
														
 
															+    while (reader.next(key, value)) {
														
 
															+      actualValues.put(value.getPath().toString(), key.toString());
														
 
															+    }
														
 
															+
														
 
															+    Assert.assertEquals(expectedValues.size(), actualValues.size());
														
 
															+    for (Map.Entry<String, String> entry : actualValues.entrySet()) {
														
 
															+      Assert.assertEquals(entry.getValue(), expectedValues.get(entry.getKey()));
														
 
															+    }
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestIntegration.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestIntegration.java
@@ -0,0 +1,466 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.tools.util.TestDistCpUtils;
														
 
															+import org.junit.Assert;
														
 
															+import org.junit.BeforeClass;
														
 
															+import org.junit.Test;
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+import java.io.OutputStream;
														
 
															+
														
 
															+public class TestIntegration {
														
 
															+  private static final Log LOG = LogFactory.getLog(TestIntegration.class);
														
 
															+
														
 
															+  private static FileSystem fs;
														
 
															+
														
 
															+  private static Path listFile;
														
 
															+  private static Path target;
														
 
															+  private static String root;
														
 
															+
														
 
															+  private static Configuration getConf() {
														
 
															+    Configuration conf = new Configuration();
														
 
															+    conf.set("fs.default.name", "file:///");
														
 
															+    conf.set("mapred.job.tracker", "local");
														
 
															+    return conf;
														
 
															+  }
														
 
															+
														
 
															+  @BeforeClass
														
 
															+  public static void setup() {
														
 
															+    try {
														
 
															+      fs = FileSystem.get(getConf());
														
 
															+      listFile = new Path("target/tmp/listing").makeQualified(fs.getUri(),
														
 
															+              fs.getWorkingDirectory());
														
 
															+      target = new Path("target/tmp/target").makeQualified(fs.getUri(),
														
 
															+              fs.getWorkingDirectory());
														
 
															+      root = new Path("target/tmp").makeQualified(fs.getUri(),
														
 
															+              fs.getWorkingDirectory()).toString();
														
 
															+      TestDistCpUtils.delete(fs, root);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered ", e);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testSingleFileMissingTarget() {
														
 
															+    caseSingleFileMissingTarget(false);
														
 
															+    caseSingleFileMissingTarget(true);
														
 
															+  }
														
 
															+
														
 
															+  private void caseSingleFileMissingTarget(boolean sync) {
														
 
															+
														
 
															+    try {
														
 
															+      addEntries(listFile, "singlefile1/file1");
														
 
															+      createFiles("singlefile1/file1");
														
 
															+
														
 
															+      runTest(listFile, target, sync);
														
 
															+
														
 
															+      checkResult(target, 1);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing distcp", e);
														
 
															+      Assert.fail("distcp failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, root);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testSingleFileTargetFile() {
														
 
															+    caseSingleFileTargetFile(false);
														
 
															+    caseSingleFileTargetFile(true);
														
 
															+  }
														
 
															+
														
 
															+  private void caseSingleFileTargetFile(boolean sync) {
														
 
															+
														
 
															+    try {
														
 
															+      addEntries(listFile, "singlefile1/file1");
														
 
															+      createFiles("singlefile1/file1", target.toString());
														
 
															+
														
 
															+      runTest(listFile, target, sync);
														
 
															+
														
 
															+      checkResult(target, 1);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing distcp", e);
														
 
															+      Assert.fail("distcp failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, root);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testSingleFileTargetDir() {
														
 
															+    caseSingleFileTargetDir(false);
														
 
															+    caseSingleFileTargetDir(true);
														
 
															+  }
														
 
															+
														
 
															+  private void caseSingleFileTargetDir(boolean sync) {
														
 
															+
														
 
															+    try {
														
 
															+      addEntries(listFile, "singlefile2/file2");
														
 
															+      createFiles("singlefile2/file2");
														
 
															+      mkdirs(target.toString());
														
 
															+
														
 
															+      runTest(listFile, target, sync);
														
 
															+
														
 
															+      checkResult(target, 1, "file2");
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing distcp", e);
														
 
															+      Assert.fail("distcp failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, root);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testSingleDirTargetMissing() {
														
 
															+    caseSingleDirTargetMissing(false);
														
 
															+    caseSingleDirTargetMissing(true);
														
 
															+  }
														
 
															+
														
 
															+  private void caseSingleDirTargetMissing(boolean sync) {
														
 
															+
														
 
															+    try {
														
 
															+      addEntries(listFile, "singledir");
														
 
															+      mkdirs(root + "/singledir/dir1");
														
 
															+
														
 
															+      runTest(listFile, target, sync);
														
 
															+
														
 
															+      checkResult(target, 1, "dir1");
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing distcp", e);
														
 
															+      Assert.fail("distcp failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, root);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testSingleDirTargetPresent() {
														
 
															+
														
 
															+    try {
														
 
															+      addEntries(listFile, "singledir");
														
 
															+      mkdirs(root + "/singledir/dir1");
														
 
															+      mkdirs(target.toString());
														
 
															+
														
 
															+      runTest(listFile, target, false);
														
 
															+
														
 
															+      checkResult(target, 1, "singledir/dir1");
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing distcp", e);
														
 
															+      Assert.fail("distcp failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, root);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testUpdateSingleDirTargetPresent() {
														
 
															+
														
 
															+    try {
														
 
															+      addEntries(listFile, "Usingledir");
														
 
															+      mkdirs(root + "/Usingledir/Udir1");
														
 
															+      mkdirs(target.toString());
														
 
															+
														
 
															+      runTest(listFile, target, true);
														
 
															+
														
 
															+      checkResult(target, 1, "Udir1");
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing distcp", e);
														
 
															+      Assert.fail("distcp failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, root);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testMultiFileTargetPresent() {
														
 
															+    caseMultiFileTargetPresent(false);
														
 
															+    caseMultiFileTargetPresent(true);
														
 
															+  }
														
 
															+
														
 
															+  private void caseMultiFileTargetPresent(boolean sync) {
														
 
															+
														
 
															+    try {
														
 
															+      addEntries(listFile, "multifile/file3", "multifile/file4", "multifile/file5");
														
 
															+      createFiles("multifile/file3", "multifile/file4", "multifile/file5");
														
 
															+      mkdirs(target.toString());
														
 
															+
														
 
															+      runTest(listFile, target, sync);
														
 
															+
														
 
															+      checkResult(target, 3, "file3", "file4", "file5");
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing distcp", e);
														
 
															+      Assert.fail("distcp failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, root);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testMultiFileTargetMissing() {
														
 
															+    caseMultiFileTargetMissing(false);
														
 
															+    caseMultiFileTargetMissing(true);
														
 
															+  }
														
 
															+
														
 
															+  private void caseMultiFileTargetMissing(boolean sync) {
														
 
															+
														
 
															+    try {
														
 
															+      addEntries(listFile, "multifile/file3", "multifile/file4", "multifile/file5");
														
 
															+      createFiles("multifile/file3", "multifile/file4", "multifile/file5");
														
 
															+
														
 
															+      runTest(listFile, target, sync);
														
 
															+
														
 
															+      checkResult(target, 3, "file3", "file4", "file5");
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing distcp", e);
														
 
															+      Assert.fail("distcp failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, root);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testMultiDirTargetPresent() {
														
 
															+
														
 
															+    try {
														
 
															+      addEntries(listFile, "multifile", "singledir");
														
 
															+      createFiles("multifile/file3", "multifile/file4", "multifile/file5");
														
 
															+      mkdirs(target.toString(), root + "/singledir/dir1");
														
 
															+
														
 
															+      runTest(listFile, target, false);
														
 
															+
														
 
															+      checkResult(target, 2, "multifile/file3", "multifile/file4", "multifile/file5", "singledir/dir1");
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing distcp", e);
														
 
															+      Assert.fail("distcp failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, root);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testUpdateMultiDirTargetPresent() {
														
 
															+
														
 
															+    try {
														
 
															+      addEntries(listFile, "Umultifile", "Usingledir");
														
 
															+      createFiles("Umultifile/Ufile3", "Umultifile/Ufile4", "Umultifile/Ufile5");
														
 
															+      mkdirs(target.toString(), root + "/Usingledir/Udir1");
														
 
															+
														
 
															+      runTest(listFile, target, true);
														
 
															+
														
 
															+      checkResult(target, 4, "Ufile3", "Ufile4", "Ufile5", "Udir1");
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing distcp", e);
														
 
															+      Assert.fail("distcp failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, root);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testMultiDirTargetMissing() {
														
 
															+
														
 
															+    try {
														
 
															+      addEntries(listFile, "multifile", "singledir");
														
 
															+      createFiles("multifile/file3", "multifile/file4", "multifile/file5");
														
 
															+      mkdirs(root + "/singledir/dir1");
														
 
															+
														
 
															+      runTest(listFile, target, false);
														
 
															+
														
 
															+      checkResult(target, 2, "multifile/file3", "multifile/file4",
														
 
															+          "multifile/file5", "singledir/dir1");
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing distcp", e);
														
 
															+      Assert.fail("distcp failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, root);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testUpdateMultiDirTargetMissing() {
														
 
															+
														
 
															+    try {
														
 
															+      addEntries(listFile, "multifile", "singledir");
														
 
															+      createFiles("multifile/file3", "multifile/file4", "multifile/file5");
														
 
															+      mkdirs(root + "/singledir/dir1");
														
 
															+
														
 
															+      runTest(listFile, target, true);
														
 
															+
														
 
															+      checkResult(target, 4, "file3", "file4", "file5", "dir1");
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing distcp", e);
														
 
															+      Assert.fail("distcp failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, root);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testGlobTargetMissingSingleLevel() {
														
 
															+
														
 
															+    try {
														
 
															+      Path listFile = new Path("target/tmp1/listing").makeQualified(fs.getUri(),
														
 
															+                                fs.getWorkingDirectory());
														
 
															+      addEntries(listFile, "*");
														
 
															+      createFiles("multifile/file3", "multifile/file4", "multifile/file5");
														
 
															+      createFiles("singledir/dir2/file6");
														
 
															+
														
 
															+      runTest(listFile, target, false);
														
 
															+
														
 
															+      checkResult(target, 2, "multifile/file3", "multifile/file4", "multifile/file5",
														
 
															+          "singledir/dir2/file6");
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing distcp", e);
														
 
															+      Assert.fail("distcp failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, root);
														
 
															+      TestDistCpUtils.delete(fs, "target/tmp1");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testUpdateGlobTargetMissingSingleLevel() {
														
 
															+
														
 
															+    try {
														
 
															+      Path listFile = new Path("target/tmp1/listing").makeQualified(fs.getUri(),
														
 
															+                                  fs.getWorkingDirectory());
														
 
															+      addEntries(listFile, "*");
														
 
															+      createFiles("multifile/file3", "multifile/file4", "multifile/file5");
														
 
															+      createFiles("singledir/dir2/file6");
														
 
															+
														
 
															+      runTest(listFile, target, true);
														
 
															+
														
 
															+      checkResult(target, 4, "file3", "file4", "file5", "dir2/file6");
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while running distcp", e);
														
 
															+      Assert.fail("distcp failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, root);
														
 
															+      TestDistCpUtils.delete(fs, "target/tmp1");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testGlobTargetMissingMultiLevel() {
														
 
															+
														
 
															+    try {
														
 
															+      Path listFile = new Path("target/tmp1/listing").makeQualified(fs.getUri(),
														
 
															+              fs.getWorkingDirectory());
														
 
															+      addEntries(listFile, "*/*");
														
 
															+      createFiles("multifile/file3", "multifile/file4", "multifile/file5");
														
 
															+      createFiles("singledir1/dir3/file7", "singledir1/dir3/file8",
														
 
															+          "singledir1/dir3/file9");
														
 
															+
														
 
															+      runTest(listFile, target, false);
														
 
															+
														
 
															+      checkResult(target, 4, "file3", "file4", "file5",
														
 
															+          "dir3/file7", "dir3/file8", "dir3/file9");
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while running distcp", e);
														
 
															+      Assert.fail("distcp failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, root);
														
 
															+      TestDistCpUtils.delete(fs, "target/tmp1");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testUpdateGlobTargetMissingMultiLevel() {
														
 
															+
														
 
															+    try {
														
 
															+      Path listFile = new Path("target/tmp1/listing").makeQualified(fs.getUri(),
														
 
															+              fs.getWorkingDirectory());
														
 
															+      addEntries(listFile, "*/*");
														
 
															+      createFiles("multifile/file3", "multifile/file4", "multifile/file5");
														
 
															+      createFiles("singledir1/dir3/file7", "singledir1/dir3/file8",
														
 
															+          "singledir1/dir3/file9");
														
 
															+
														
 
															+      runTest(listFile, target, true);
														
 
															+
														
 
															+      checkResult(target, 6, "file3", "file4", "file5",
														
 
															+          "file7", "file8", "file9");
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while running distcp", e);
														
 
															+      Assert.fail("distcp failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, root);
														
 
															+      TestDistCpUtils.delete(fs, "target/tmp1");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private void addEntries(Path listFile, String... entries) throws IOException {
														
 
															+    OutputStream out = fs.create(listFile);
														
 
															+    try {
														
 
															+      for (String entry : entries){
														
 
															+        out.write((root + "/" + entry).getBytes());
														
 
															+        out.write("\n".getBytes());
														
 
															+      }
														
 
															+    } finally {
														
 
															+      out.close();
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private void createFiles(String... entries) throws IOException {
														
 
															+    for (String entry : entries){
														
 
															+      OutputStream out = fs.create(new Path(root + "/" + entry));
														
 
															+      try {
														
 
															+        out.write((root + "/" + entry).getBytes());
														
 
															+        out.write("\n".getBytes());
														
 
															+      } finally {
														
 
															+        out.close();
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private void mkdirs(String... entries) throws IOException {
														
 
															+    for (String entry : entries){
														
 
															+      fs.mkdirs(new Path(entry));
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private void runTest(Path listFile, Path target, boolean sync) throws IOException {
														
 
															+    DistCpOptions options = new DistCpOptions(listFile, target);
														
 
															+    options.setSyncFolder(sync);
														
 
															+    try {
														
 
															+      new DistCp(getConf(), options).execute();
														
 
															+    } catch (Exception e) {
														
 
															+      LOG.error("Exception encountered ", e);
														
 
															+      throw new IOException(e);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private void checkResult(Path target, int count, String... relPaths) throws IOException {
														
 
															+    Assert.assertEquals(count, fs.listStatus(target).length);
														
 
															+    if (relPaths == null || relPaths.length == 0) {
														
 
															+      Assert.assertTrue(target.toString(), fs.exists(target));
														
 
															+      return;
														
 
															+    }
														
 
															+    for (String relPath : relPaths) {
														
 
															+      Assert.assertTrue(new Path(target, relPath).toString(), fs.exists(new Path(target, relPath)));
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java
@@ -0,0 +1,497 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools;
														
 
															+
														
 
															+import org.junit.Assert;
														
 
															+import org.junit.Test;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.tools.DistCpOptions.*;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+
														
 
															+import java.util.Iterator;
														
 
															+import java.util.NoSuchElementException;
														
 
															+
														
 
															+public class TestOptionsParser {
														
 
															+
														
 
															+  @Test
														
 
															+  public void testParseIgnoreFailure() {
														
 
															+    DistCpOptions options = OptionsParser.parse(new String[] {
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertFalse(options.shouldIgnoreFailures());
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-i",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertTrue(options.shouldIgnoreFailures());
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testParseOverwrite() {
														
 
															+    DistCpOptions options = OptionsParser.parse(new String[] {
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertFalse(options.shouldOverwrite());
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-overwrite",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertTrue(options.shouldOverwrite());
														
 
															+
														
 
															+    try {
														
 
															+      OptionsParser.parse(new String[] {
														
 
															+          "-update",
														
 
															+          "-overwrite",
														
 
															+          "hdfs://localhost:8020/source/first",
														
 
															+          "hdfs://localhost:8020/target/"});
														
 
															+      Assert.fail("Update and overwrite aren't allowed together");
														
 
															+    } catch (IllegalArgumentException ignore) {
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testLogPath() {
														
 
															+    DistCpOptions options = OptionsParser.parse(new String[] {
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertNull(options.getLogPath());
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-log",
														
 
															+        "hdfs://localhost:8020/logs",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertEquals(options.getLogPath(), new Path("hdfs://localhost:8020/logs"));
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testParseBlokcing() {
														
 
															+    DistCpOptions options = OptionsParser.parse(new String[] {
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertTrue(options.shouldBlock());
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-async",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertFalse(options.shouldBlock());
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testParsebandwidth() {
														
 
															+    DistCpOptions options = OptionsParser.parse(new String[] {
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertEquals(options.getMapBandwidth(), DistCpConstants.DEFAULT_BANDWIDTH_MB);
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-bandwidth",
														
 
															+        "11",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertEquals(options.getMapBandwidth(), 11);
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testParseSkipCRC() {
														
 
															+    DistCpOptions options = OptionsParser.parse(new String[] {
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertFalse(options.shouldSkipCRC());
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-update",
														
 
															+        "-skipcrccheck",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertTrue(options.shouldSyncFolder());
														
 
															+    Assert.assertTrue(options.shouldSkipCRC());
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testParseAtomicCommit() {
														
 
															+    DistCpOptions options = OptionsParser.parse(new String[] {
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertFalse(options.shouldAtomicCommit());
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-atomic",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertTrue(options.shouldAtomicCommit());
														
 
															+
														
 
															+    try {
														
 
															+      OptionsParser.parse(new String[] {
														
 
															+          "-atomic",
														
 
															+          "-update",
														
 
															+          "hdfs://localhost:8020/source/first",
														
 
															+          "hdfs://localhost:8020/target/"});
														
 
															+      Assert.fail("Atomic and sync folders were allowed");
														
 
															+    } catch (IllegalArgumentException ignore) { }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testParseWorkPath() {
														
 
															+    DistCpOptions options = OptionsParser.parse(new String[] {
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertNull(options.getAtomicWorkPath());
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-atomic",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertNull(options.getAtomicWorkPath());
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-atomic",
														
 
															+        "-tmp",
														
 
															+        "hdfs://localhost:8020/work",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertEquals(options.getAtomicWorkPath(), new Path("hdfs://localhost:8020/work"));
														
 
															+
														
 
															+    try {
														
 
															+      OptionsParser.parse(new String[] {
														
 
															+          "-tmp",
														
 
															+          "hdfs://localhost:8020/work",
														
 
															+          "hdfs://localhost:8020/source/first",
														
 
															+          "hdfs://localhost:8020/target/"});
														
 
															+      Assert.fail("work path was allowed without -atomic switch");
														
 
															+    } catch (IllegalArgumentException ignore) {}
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testParseSyncFolders() {
														
 
															+    DistCpOptions options = OptionsParser.parse(new String[] {
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertFalse(options.shouldSyncFolder());
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-update",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertTrue(options.shouldSyncFolder());
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testParseDeleteMissing() {
														
 
															+    DistCpOptions options = OptionsParser.parse(new String[] {
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertFalse(options.shouldDeleteMissing());
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-update",
														
 
															+        "-delete",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertTrue(options.shouldSyncFolder());
														
 
															+    Assert.assertTrue(options.shouldDeleteMissing());
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-overwrite",
														
 
															+        "-delete",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertTrue(options.shouldOverwrite());
														
 
															+    Assert.assertTrue(options.shouldDeleteMissing());
														
 
															+
														
 
															+    try {
														
 
															+      OptionsParser.parse(new String[] {
														
 
															+          "-atomic",
														
 
															+          "-delete",
														
 
															+          "hdfs://localhost:8020/source/first",
														
 
															+          "hdfs://localhost:8020/target/"});
														
 
															+      Assert.fail("Atomic and delete folders were allowed");
														
 
															+    } catch (IllegalArgumentException ignore) { }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testParseSSLConf() {
														
 
															+    DistCpOptions options = OptionsParser.parse(new String[] {
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertNull(options.getSslConfigurationFile());
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-mapredSslConf",
														
 
															+        "/tmp/ssl-client.xml",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertEquals(options.getSslConfigurationFile(), "/tmp/ssl-client.xml");
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testParseMaps() {
														
 
															+    DistCpOptions options = OptionsParser.parse(new String[] {
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertEquals(options.getMaxMaps(), DistCpConstants.DEFAULT_MAPS);
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-m",
														
 
															+        "1",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertEquals(options.getMaxMaps(), 1);
														
 
															+
														
 
															+    try {
														
 
															+      OptionsParser.parse(new String[] {
														
 
															+          "-m",
														
 
															+          "hello",
														
 
															+          "hdfs://localhost:8020/source/first",
														
 
															+          "hdfs://localhost:8020/target/"});
														
 
															+      Assert.fail("Non numberic map parsed");
														
 
															+    } catch (IllegalArgumentException ignore) { }
														
 
															+
														
 
															+    try {
														
 
															+      OptionsParser.parse(new String[] {
														
 
															+          "-mapredXslConf",
														
 
															+          "hdfs://localhost:8020/source/first",
														
 
															+          "hdfs://localhost:8020/target/"});
														
 
															+      Assert.fail("Non numberic map parsed");
														
 
															+    } catch (IllegalArgumentException ignore) { }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testSourceListing() {
														
 
															+    DistCpOptions options = OptionsParser.parse(new String[] {
														
 
															+        "-f",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertEquals(options.getSourceFileListing(),
														
 
															+        new Path("hdfs://localhost:8020/source/first"));
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testSourceListingAndSourcePath() {
														
 
															+    try {
														
 
															+      OptionsParser.parse(new String[] {
														
 
															+          "-f",
														
 
															+          "hdfs://localhost:8020/source/first",
														
 
															+          "hdfs://localhost:8020/source/first",
														
 
															+          "hdfs://localhost:8020/target/"});
														
 
															+      Assert.fail("Both source listing & source paths allowed");
														
 
															+    } catch (IllegalArgumentException ignore) {}
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testMissingSourceInfo() {
														
 
															+    try {
														
 
															+      OptionsParser.parse(new String[] {
														
 
															+          "hdfs://localhost:8020/target/"});
														
 
															+      Assert.fail("Neither source listing not source paths present");
														
 
															+    } catch (IllegalArgumentException ignore) {}
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testMissingTarget() {
														
 
															+    try {
														
 
															+      OptionsParser.parse(new String[] {
														
 
															+          "-f", "hdfs://localhost:8020/source"});
														
 
															+      Assert.fail("Missing target allowed");
														
 
															+    } catch (IllegalArgumentException ignore) {}
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testInvalidArgs() {
														
 
															+    try {
														
 
															+      OptionsParser.parse(new String[] {
														
 
															+          "-m", "-f", "hdfs://localhost:8020/source"});
														
 
															+      Assert.fail("Missing map value");
														
 
															+    } catch (IllegalArgumentException ignore) {}
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testToString() {
														
 
															+    DistCpOptions option = new DistCpOptions(new Path("abc"), new Path("xyz"));
														
 
															+    String val = "DistCpOptions{atomicCommit=false, syncFolder=false, deleteMissing=false, " +
														
 
															+        "ignoreFailures=false, maxMaps=20, sslConfigurationFile='null', copyStrategy='uniformsize', " +
														
 
															+        "sourceFileListing=abc, sourcePaths=null, targetPath=xyz}";
														
 
															+    Assert.assertEquals(val, option.toString());
														
 
															+    Assert.assertNotSame(DistCpOptionSwitch.ATOMIC_COMMIT.toString(),
														
 
															+        DistCpOptionSwitch.ATOMIC_COMMIT.name());
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testCopyStrategy() {
														
 
															+    DistCpOptions options = OptionsParser.parse(new String[] {
														
 
															+        "-strategy",
														
 
															+        "dynamic",
														
 
															+        "-f",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertEquals(options.getCopyStrategy(), "dynamic");
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-f",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertEquals(options.getCopyStrategy(), DistCpConstants.UNIFORMSIZE);
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testTargetPath() {
														
 
															+    DistCpOptions options = OptionsParser.parse(new String[] {
														
 
															+        "-f",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertEquals(options.getTargetPath(), new Path("hdfs://localhost:8020/target/"));
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testPreserve() {
														
 
															+    DistCpOptions options = OptionsParser.parse(new String[] {
														
 
															+        "-f",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertFalse(options.shouldPreserve(FileAttribute.BLOCKSIZE));
														
 
															+    Assert.assertFalse(options.shouldPreserve(FileAttribute.REPLICATION));
														
 
															+    Assert.assertFalse(options.shouldPreserve(FileAttribute.PERMISSION));
														
 
															+    Assert.assertFalse(options.shouldPreserve(FileAttribute.USER));
														
 
															+    Assert.assertFalse(options.shouldPreserve(FileAttribute.GROUP));
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-p",
														
 
															+        "-f",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertTrue(options.shouldPreserve(FileAttribute.BLOCKSIZE));
														
 
															+    Assert.assertTrue(options.shouldPreserve(FileAttribute.REPLICATION));
														
 
															+    Assert.assertTrue(options.shouldPreserve(FileAttribute.PERMISSION));
														
 
															+    Assert.assertTrue(options.shouldPreserve(FileAttribute.USER));
														
 
															+    Assert.assertTrue(options.shouldPreserve(FileAttribute.GROUP));
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-p",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertTrue(options.shouldPreserve(FileAttribute.BLOCKSIZE));
														
 
															+    Assert.assertTrue(options.shouldPreserve(FileAttribute.REPLICATION));
														
 
															+    Assert.assertTrue(options.shouldPreserve(FileAttribute.PERMISSION));
														
 
															+    Assert.assertTrue(options.shouldPreserve(FileAttribute.USER));
														
 
															+    Assert.assertTrue(options.shouldPreserve(FileAttribute.GROUP));
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-pbr",
														
 
															+        "-f",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertTrue(options.shouldPreserve(FileAttribute.BLOCKSIZE));
														
 
															+    Assert.assertTrue(options.shouldPreserve(FileAttribute.REPLICATION));
														
 
															+    Assert.assertFalse(options.shouldPreserve(FileAttribute.PERMISSION));
														
 
															+    Assert.assertFalse(options.shouldPreserve(FileAttribute.USER));
														
 
															+    Assert.assertFalse(options.shouldPreserve(FileAttribute.GROUP));
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-pbrgup",
														
 
															+        "-f",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertTrue(options.shouldPreserve(FileAttribute.BLOCKSIZE));
														
 
															+    Assert.assertTrue(options.shouldPreserve(FileAttribute.REPLICATION));
														
 
															+    Assert.assertTrue(options.shouldPreserve(FileAttribute.PERMISSION));
														
 
															+    Assert.assertTrue(options.shouldPreserve(FileAttribute.USER));
														
 
															+    Assert.assertTrue(options.shouldPreserve(FileAttribute.GROUP));
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-p",
														
 
															+        "-f",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    int i = 0;
														
 
															+    Iterator<FileAttribute> attribIterator = options.preserveAttributes();
														
 
															+    while (attribIterator.hasNext()) {
														
 
															+      attribIterator.next();
														
 
															+      i++;
														
 
															+    }
														
 
															+    Assert.assertEquals(i, 5);
														
 
															+
														
 
															+    try {
														
 
															+      OptionsParser.parse(new String[] {
														
 
															+          "-pabc",
														
 
															+          "-f",
														
 
															+          "hdfs://localhost:8020/source/first",
														
 
															+          "hdfs://localhost:8020/target"});
														
 
															+      Assert.fail("Invalid preserve attribute");
														
 
															+    }
														
 
															+    catch (IllegalArgumentException ignore) {}
														
 
															+    catch (NoSuchElementException ignore) {}
														
 
															+
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-f",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    Assert.assertFalse(options.shouldPreserve(FileAttribute.PERMISSION));
														
 
															+    options.preserve(FileAttribute.PERMISSION);
														
 
															+    Assert.assertTrue(options.shouldPreserve(FileAttribute.PERMISSION));
														
 
															+
														
 
															+    options.preserve(FileAttribute.PERMISSION);
														
 
															+    Assert.assertTrue(options.shouldPreserve(FileAttribute.PERMISSION));
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testOptionsSwitchAddToConf() {
														
 
															+    Configuration conf = new Configuration();
														
 
															+    Assert.assertNull(conf.get(DistCpOptionSwitch.ATOMIC_COMMIT.getConfigLabel()));
														
 
															+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.ATOMIC_COMMIT);
														
 
															+    Assert.assertTrue(conf.getBoolean(DistCpOptionSwitch.ATOMIC_COMMIT.getConfigLabel(), false));
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testOptionsAppendToConf() {
														
 
															+    Configuration conf = new Configuration();
														
 
															+    Assert.assertFalse(conf.getBoolean(DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(), false));
														
 
															+    Assert.assertFalse(conf.getBoolean(DistCpOptionSwitch.ATOMIC_COMMIT.getConfigLabel(), false));
														
 
															+    DistCpOptions options = OptionsParser.parse(new String[] {
														
 
															+        "-atomic",
														
 
															+        "-i",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    options.appendToConf(conf);
														
 
															+    Assert.assertTrue(conf.getBoolean(DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(), false));
														
 
															+    Assert.assertTrue(conf.getBoolean(DistCpOptionSwitch.ATOMIC_COMMIT.getConfigLabel(), false));
														
 
															+    Assert.assertEquals(conf.getInt(DistCpOptionSwitch.BANDWIDTH.getConfigLabel(), -1),
														
 
															+        DistCpConstants.DEFAULT_BANDWIDTH_MB);
														
 
															+
														
 
															+    conf = new Configuration();
														
 
															+    Assert.assertFalse(conf.getBoolean(DistCpOptionSwitch.SYNC_FOLDERS.getConfigLabel(), false));
														
 
															+    Assert.assertFalse(conf.getBoolean(DistCpOptionSwitch.DELETE_MISSING.getConfigLabel(), false));
														
 
															+    Assert.assertEquals(conf.get(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel()), null);
														
 
															+    options = OptionsParser.parse(new String[] {
														
 
															+        "-update",
														
 
															+        "-delete",
														
 
															+        "-pu",
														
 
															+        "-bandwidth",
														
 
															+        "11",
														
 
															+        "hdfs://localhost:8020/source/first",
														
 
															+        "hdfs://localhost:8020/target/"});
														
 
															+    options.appendToConf(conf);
														
 
															+    Assert.assertTrue(conf.getBoolean(DistCpOptionSwitch.SYNC_FOLDERS.getConfigLabel(), false));
														
 
															+    Assert.assertTrue(conf.getBoolean(DistCpOptionSwitch.DELETE_MISSING.getConfigLabel(), false));
														
 
															+    Assert.assertEquals(conf.get(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel()), "U");
														
 
															+    Assert.assertEquals(conf.getInt(DistCpOptionSwitch.BANDWIDTH.getConfigLabel(), -1), 11);
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java
@@ -0,0 +1,419 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools.mapred;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.fs.FileStatus;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.fs.permission.FsPermission;
														
 
															+import org.apache.hadoop.hdfs.MiniDFSCluster;
														
 
															+import org.apache.hadoop.mapreduce.*;
														
 
															+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
														
 
															+import org.apache.hadoop.mapreduce.task.JobContextImpl;
														
 
															+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
														
 
															+import org.apache.hadoop.tools.CopyListing;
														
 
															+import org.apache.hadoop.tools.DistCpConstants;
														
 
															+import org.apache.hadoop.tools.DistCpOptions;
														
 
															+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
														
 
															+import org.apache.hadoop.tools.GlobbedCopyListing;
														
 
															+import org.apache.hadoop.tools.util.TestDistCpUtils;
														
 
															+import org.apache.hadoop.security.Credentials;
														
 
															+import org.junit.*;
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+import java.util.*;
														
 
															+
														
 
															+public class TestCopyCommitter {
														
 
															+  private static final Log LOG = LogFactory.getLog(TestCopyCommitter.class);
														
 
															+
														
 
															+  private static final Random rand = new Random();
														
 
															+
														
 
															+  private static final Credentials CREDENTIALS = new Credentials();
														
 
															+  public static final int PORT = 39737;
														
 
															+
														
 
															+
														
 
															+  private static Configuration config;
														
 
															+  private static MiniDFSCluster cluster;
														
 
															+
														
 
															+  private static Job getJobForClient() throws IOException {
														
 
															+    Job job = Job.getInstance(new Configuration());
														
 
															+    job.getConfiguration().set("mapred.job.tracker", "localhost:" + PORT);
														
 
															+    job.setInputFormatClass(NullInputFormat.class);
														
 
															+    job.setOutputFormatClass(NullOutputFormat.class);
														
 
															+    job.setNumReduceTasks(0);
														
 
															+    return job;
														
 
															+  }
														
 
															+
														
 
															+  @BeforeClass
														
 
															+  public static void create() throws IOException {
														
 
															+    config = getJobForClient().getConfiguration();
														
 
															+    config.setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, 0);
														
 
															+    cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).format(true)
														
 
															+                      .build();
														
 
															+  }
														
 
															+
														
 
															+  @AfterClass
														
 
															+  public static void destroy() {
														
 
															+    if (cluster != null) {
														
 
															+      cluster.shutdown();
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Before
														
 
															+  public void createMetaFolder() {
														
 
															+    config.set(DistCpConstants.CONF_LABEL_META_FOLDER, "/meta");
														
 
															+    Path meta = new Path("/meta");
														
 
															+    try {
														
 
															+      cluster.getFileSystem().mkdirs(meta);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while creating meta folder", e);
														
 
															+      Assert.fail("Unable to create meta folder");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @After
														
 
															+  public void cleanupMetaFolder() {
														
 
															+    Path meta = new Path("/meta");
														
 
															+    try {
														
 
															+      if (cluster.getFileSystem().exists(meta)) {
														
 
															+        cluster.getFileSystem().delete(meta, true);
														
 
															+        Assert.fail("Expected meta folder to be deleted");
														
 
															+      }
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while cleaning up folder", e);
														
 
															+      Assert.fail("Unable to clean up meta folder");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testNoCommitAction() {
														
 
															+    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
														
 
															+    JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
														
 
															+        taskAttemptContext.getTaskAttemptID().getJobID());
														
 
															+    try {
														
 
															+      OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
														
 
															+      committer.commitJob(jobContext);
														
 
															+      Assert.assertEquals(taskAttemptContext.getStatus(), "Commit Successful");
														
 
															+
														
 
															+      //Test for idempotent commit
														
 
															+      committer.commitJob(jobContext);
														
 
															+      Assert.assertEquals(taskAttemptContext.getStatus(), "Commit Successful");
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered ", e);
														
 
															+      Assert.fail("Commit failed");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testPreserveStatus() {
														
 
															+    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
														
 
															+    JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
														
 
															+        taskAttemptContext.getTaskAttemptID().getJobID());
														
 
															+    Configuration conf = jobContext.getConfiguration();
														
 
															+
														
 
															+
														
 
															+    String sourceBase;
														
 
															+    String targetBase;
														
 
															+    FileSystem fs = null;
														
 
															+    try {
														
 
															+      OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
														
 
															+      fs = FileSystem.get(conf);
														
 
															+      FsPermission sourcePerm = new FsPermission((short) 511);
														
 
															+      FsPermission initialPerm = new FsPermission((short) 448);
														
 
															+      sourceBase = TestDistCpUtils.createTestSetup(fs, sourcePerm);
														
 
															+      targetBase = TestDistCpUtils.createTestSetup(fs, initialPerm);
														
 
															+
														
 
															+      DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)),
														
 
															+          new Path("/out"));
														
 
															+      options.preserve(FileAttribute.PERMISSION);
														
 
															+      options.appendToConf(conf);
														
 
															+
														
 
															+      CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
														
 
															+      Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
														
 
															+      listing.buildListing(listingFile, options);
														
 
															+
														
 
															+      conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
														
 
															+
														
 
															+      committer.commitJob(jobContext);
														
 
															+      if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) {
														
 
															+        Assert.fail("Permission don't match");
														
 
															+      }
														
 
															+
														
 
															+      //Test for idempotent commit
														
 
															+      committer.commitJob(jobContext);
														
 
															+      if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) {
														
 
															+        Assert.fail("Permission don't match");
														
 
															+      }
														
 
															+
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing for preserve status", e);
														
 
															+      Assert.fail("Preserve status failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp1");
														
 
															+    }
														
 
															+
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testDeleteMissing() {
														
 
															+    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
														
 
															+    JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
														
 
															+        taskAttemptContext.getTaskAttemptID().getJobID());
														
 
															+    Configuration conf = jobContext.getConfiguration();
														
 
															+
														
 
															+    String sourceBase;
														
 
															+    String targetBase;
														
 
															+    FileSystem fs = null;
														
 
															+    try {
														
 
															+      OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
														
 
															+      fs = FileSystem.get(conf);
														
 
															+      sourceBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
														
 
															+      targetBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
														
 
															+      String targetBaseAdd = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
														
 
															+      fs.rename(new Path(targetBaseAdd), new Path(targetBase));
														
 
															+
														
 
															+      DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)),
														
 
															+          new Path("/out"));
														
 
															+      options.setSyncFolder(true);
														
 
															+      options.setDeleteMissing(true);
														
 
															+      options.appendToConf(conf);
														
 
															+
														
 
															+      CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
														
 
															+      Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
														
 
															+      listing.buildListing(listingFile, options);
														
 
															+
														
 
															+      conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
														
 
															+      conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);
														
 
															+
														
 
															+      committer.commitJob(jobContext);
														
 
															+      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
														
 
															+        Assert.fail("Source and target folders are not in sync");
														
 
															+      }
														
 
															+      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) {
														
 
															+        Assert.fail("Source and target folders are not in sync");
														
 
															+      }
														
 
															+
														
 
															+      //Test for idempotent commit
														
 
															+      committer.commitJob(jobContext);
														
 
															+      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
														
 
															+        Assert.fail("Source and target folders are not in sync");
														
 
															+      }
														
 
															+      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) {
														
 
															+        Assert.fail("Source and target folders are not in sync");
														
 
															+      }
														
 
															+    } catch (Throwable e) {
														
 
															+      LOG.error("Exception encountered while testing for delete missing", e);
														
 
															+      Assert.fail("Delete missing failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp1");
														
 
															+      conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testDeleteMissingFlatInterleavedFiles() {
														
 
															+    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
														
 
															+    JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
														
 
															+        taskAttemptContext.getTaskAttemptID().getJobID());
														
 
															+    Configuration conf = jobContext.getConfiguration();
														
 
															+
														
 
															+
														
 
															+    String sourceBase;
														
 
															+    String targetBase;
														
 
															+    FileSystem fs = null;
														
 
															+    try {
														
 
															+      OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
														
 
															+      fs = FileSystem.get(conf);
														
 
															+      sourceBase = "/tmp1/" + String.valueOf(rand.nextLong());
														
 
															+      targetBase = "/tmp1/" + String.valueOf(rand.nextLong());
														
 
															+      TestDistCpUtils.createFile(fs, sourceBase + "/1");
														
 
															+      TestDistCpUtils.createFile(fs, sourceBase + "/3");
														
 
															+      TestDistCpUtils.createFile(fs, sourceBase + "/4");
														
 
															+      TestDistCpUtils.createFile(fs, sourceBase + "/5");
														
 
															+      TestDistCpUtils.createFile(fs, sourceBase + "/7");
														
 
															+      TestDistCpUtils.createFile(fs, sourceBase + "/8");
														
 
															+      TestDistCpUtils.createFile(fs, sourceBase + "/9");
														
 
															+
														
 
															+      TestDistCpUtils.createFile(fs, targetBase + "/2");
														
 
															+      TestDistCpUtils.createFile(fs, targetBase + "/4");
														
 
															+      TestDistCpUtils.createFile(fs, targetBase + "/5");
														
 
															+      TestDistCpUtils.createFile(fs, targetBase + "/7");
														
 
															+      TestDistCpUtils.createFile(fs, targetBase + "/9");
														
 
															+      TestDistCpUtils.createFile(fs, targetBase + "/A");
														
 
															+
														
 
															+      DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)), 
														
 
															+          new Path("/out"));
														
 
															+      options.setSyncFolder(true);
														
 
															+      options.setDeleteMissing(true);
														
 
															+      options.appendToConf(conf);
														
 
															+
														
 
															+      CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
														
 
															+      Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
														
 
															+      listing.buildListing(listingFile, options);
														
 
															+
														
 
															+      conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
														
 
															+      conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);
														
 
															+
														
 
															+      committer.commitJob(jobContext);
														
 
															+      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
														
 
															+        Assert.fail("Source and target folders are not in sync");
														
 
															+      }
														
 
															+      Assert.assertEquals(fs.listStatus(new Path(targetBase)).length, 4);
														
 
															+
														
 
															+      //Test for idempotent commit
														
 
															+      committer.commitJob(jobContext);
														
 
															+      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
														
 
															+        Assert.fail("Source and target folders are not in sync");
														
 
															+      }
														
 
															+      Assert.assertEquals(fs.listStatus(new Path(targetBase)).length, 4);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing for delete missing", e);
														
 
															+      Assert.fail("Delete missing failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, "/tmp1");
														
 
															+      conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false");
														
 
															+    }
														
 
															+
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testAtomicCommitMissingFinal() {
														
 
															+    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
														
 
															+    JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
														
 
															+        taskAttemptContext.getTaskAttemptID().getJobID());
														
 
															+    Configuration conf = jobContext.getConfiguration();
														
 
															+
														
 
															+    String workPath = "/tmp1/" + String.valueOf(rand.nextLong());
														
 
															+    String finalPath = "/tmp1/" + String.valueOf(rand.nextLong());
														
 
															+    FileSystem fs = null;
														
 
															+    try {
														
 
															+      OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
														
 
															+      fs = FileSystem.get(conf);
														
 
															+      fs.mkdirs(new Path(workPath));
														
 
															+
														
 
															+      conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath);
														
 
															+      conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, finalPath);
														
 
															+      conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true);
														
 
															+
														
 
															+      Assert.assertTrue(fs.exists(new Path(workPath)));
														
 
															+      Assert.assertFalse(fs.exists(new Path(finalPath)));
														
 
															+      committer.commitJob(jobContext);
														
 
															+      Assert.assertFalse(fs.exists(new Path(workPath)));
														
 
															+      Assert.assertTrue(fs.exists(new Path(finalPath)));
														
 
															+
														
 
															+      //Test for idempotent commit
														
 
															+      committer.commitJob(jobContext);
														
 
															+      Assert.assertFalse(fs.exists(new Path(workPath)));
														
 
															+      Assert.assertTrue(fs.exists(new Path(finalPath)));
														
 
															+
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing for preserve status", e);
														
 
															+      Assert.fail("Atomic commit failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, workPath);
														
 
															+      TestDistCpUtils.delete(fs, finalPath);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testAtomicCommitExistingFinal() {
														
 
															+    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
														
 
															+    JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
														
 
															+        taskAttemptContext.getTaskAttemptID().getJobID());
														
 
															+    Configuration conf = jobContext.getConfiguration();
														
 
															+
														
 
															+
														
 
															+    String workPath = "/tmp1/" + String.valueOf(rand.nextLong());
														
 
															+    String finalPath = "/tmp1/" + String.valueOf(rand.nextLong());
														
 
															+    FileSystem fs = null;
														
 
															+    try {
														
 
															+      OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
														
 
															+      fs = FileSystem.get(conf);
														
 
															+      fs.mkdirs(new Path(workPath));
														
 
															+      fs.mkdirs(new Path(finalPath));
														
 
															+
														
 
															+      conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath);
														
 
															+      conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, finalPath);
														
 
															+      conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true);
														
 
															+
														
 
															+      Assert.assertTrue(fs.exists(new Path(workPath)));
														
 
															+      Assert.assertTrue(fs.exists(new Path(finalPath)));
														
 
															+      try {
														
 
															+        committer.commitJob(jobContext);
														
 
															+        Assert.fail("Should not be able to atomic-commit to pre-existing path.");
														
 
															+      } catch(Exception exception) {
														
 
															+        Assert.assertTrue(fs.exists(new Path(workPath)));
														
 
															+        Assert.assertTrue(fs.exists(new Path(finalPath)));
														
 
															+        LOG.info("Atomic-commit Test pass.");
														
 
															+      }
														
 
															+
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing for atomic commit.", e);
														
 
															+      Assert.fail("Atomic commit failure");
														
 
															+    } finally {
														
 
															+      TestDistCpUtils.delete(fs, workPath);
														
 
															+      TestDistCpUtils.delete(fs, finalPath);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private TaskAttemptContext getTaskAttemptContext(Configuration conf) {
														
 
															+    return new TaskAttemptContextImpl(conf,
														
 
															+        new TaskAttemptID("200707121733", 1, TaskType.MAP, 1, 1));
														
 
															+  }
														
 
															+
														
 
															+  private boolean checkDirectoryPermissions(FileSystem fs, String targetBase,
														
 
															+                                            FsPermission sourcePerm) throws IOException {
														
 
															+    Path base = new Path(targetBase);
														
 
															+
														
 
															+    Stack<Path> stack = new Stack<Path>();
														
 
															+    stack.push(base);
														
 
															+    while (!stack.isEmpty()) {
														
 
															+      Path file = stack.pop();
														
 
															+      if (!fs.exists(file)) continue;
														
 
															+      FileStatus[] fStatus = fs.listStatus(file);
														
 
															+      if (fStatus == null || fStatus.length == 0) continue;
														
 
															+
														
 
															+      for (FileStatus status : fStatus) {
														
 
															+        if (status.isDirectory()) {
														
 
															+          stack.push(status.getPath());
														
 
															+          Assert.assertEquals(status.getPermission(), sourcePerm);
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    return true;
														
 
															+  }
														
 
															+
														
 
															+  private static class NullInputFormat extends InputFormat {
														
 
															+    @Override
														
 
															+    public List getSplits(JobContext context)
														
 
															+        throws IOException, InterruptedException {
														
 
															+      return Collections.EMPTY_LIST;
														
 
															+    }
														
 
															+
														
 
															+    @Override
														
 
															+    public RecordReader createRecordReader(InputSplit split,
														
 
															+                                           TaskAttemptContext context)
														
 
															+        throws IOException, InterruptedException {
														
 
															+      return null;
														
 
															+    }
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java
@@ -0,0 +1,826 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools.mapred;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.fs.FileStatus;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.fs.permission.FsAction;
														
 
															+import org.apache.hadoop.fs.permission.FsPermission;
														
 
															+import org.apache.hadoop.hdfs.MiniDFSCluster;
														
 
															+import org.apache.hadoop.io.IOUtils;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.mapreduce.*;
														
 
															+import org.apache.hadoop.security.AccessControlException;
														
 
															+import org.apache.hadoop.security.UserGroupInformation;
														
 
															+import org.apache.hadoop.tools.DistCpConstants;
														
 
															+import org.apache.hadoop.tools.DistCpOptionSwitch;
														
 
															+import org.apache.hadoop.tools.DistCpOptions;
														
 
															+import org.apache.hadoop.tools.StubContext;
														
 
															+import org.apache.hadoop.tools.util.DistCpUtils;
														
 
															+import org.junit.Assert;
														
 
															+import org.junit.BeforeClass;
														
 
															+import org.junit.Test;
														
 
															+
														
 
															+import java.io.DataOutputStream;
														
 
															+import java.io.IOException;
														
 
															+import java.io.OutputStream;
														
 
															+import java.security.PrivilegedAction;
														
 
															+import java.util.ArrayList;
														
 
															+import java.util.EnumSet;
														
 
															+import java.util.List;
														
 
															+
														
 
															+public class TestCopyMapper {
														
 
															+  private static final Log LOG = LogFactory.getLog(TestCopyMapper.class);
														
 
															+  private static List<Path> pathList = new ArrayList<Path>();
														
 
															+  private static int nFiles = 0;
														
 
															+  private static final int FILE_SIZE = 1024;
														
 
															+
														
 
															+  private static MiniDFSCluster cluster;
														
 
															+
														
 
															+  private static final String SOURCE_PATH = "/tmp/source";
														
 
															+  private static final String TARGET_PATH = "/tmp/target";
														
 
															+
														
 
															+  private static Configuration configuration;
														
 
															+
														
 
															+  @BeforeClass
														
 
															+  public static void setup() throws Exception {
														
 
															+    configuration = getConfigurationForCluster();
														
 
															+    cluster = new MiniDFSCluster.Builder(configuration)
														
 
															+                .numDataNodes(1)
														
 
															+                .format(true)
														
 
															+                .build();
														
 
															+  }
														
 
															+
														
 
															+  private static Configuration getConfigurationForCluster() throws IOException {
														
 
															+    Configuration configuration = new Configuration();
														
 
															+    System.setProperty("test.build.data", "target/tmp/build/TEST_COPY_MAPPER/data");
														
 
															+    configuration.set("hadoop.log.dir", "target/tmp");
														
 
															+    LOG.debug("fs.default.name  == " + configuration.get("fs.default.name"));
														
 
															+    LOG.debug("dfs.http.address == " + configuration.get("dfs.http.address"));
														
 
															+    return configuration;
														
 
															+  }
														
 
															+
														
 
															+  private static Configuration getConfiguration() throws IOException {
														
 
															+    Configuration configuration = getConfigurationForCluster();
														
 
															+    final FileSystem fs = cluster.getFileSystem();
														
 
															+    Path workPath = new Path(TARGET_PATH)
														
 
															+            .makeQualified(fs.getUri(), fs.getWorkingDirectory());
														
 
															+    configuration.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH,
														
 
															+            workPath.toString());
														
 
															+    configuration.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH,
														
 
															+            workPath.toString());
														
 
															+    configuration.setBoolean(DistCpOptionSwitch.OVERWRITE.getConfigLabel(),
														
 
															+            false);
														
 
															+    configuration.setBoolean(DistCpOptionSwitch.SKIP_CRC.getConfigLabel(),
														
 
															+            true);
														
 
															+    configuration.setBoolean(DistCpOptionSwitch.SYNC_FOLDERS.getConfigLabel(),
														
 
															+            true);
														
 
															+    configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(),
														
 
															+            "br");
														
 
															+    return configuration;
														
 
															+  }
														
 
															+
														
 
															+  private static void createSourceData() throws Exception {
														
 
															+    mkdirs(SOURCE_PATH + "/1");
														
 
															+    mkdirs(SOURCE_PATH + "/2");
														
 
															+    mkdirs(SOURCE_PATH + "/2/3/4");
														
 
															+    mkdirs(SOURCE_PATH + "/2/3");
														
 
															+    mkdirs(SOURCE_PATH + "/5");
														
 
															+    touchFile(SOURCE_PATH + "/5/6");
														
 
															+    mkdirs(SOURCE_PATH + "/7");
														
 
															+    mkdirs(SOURCE_PATH + "/7/8");
														
 
															+    touchFile(SOURCE_PATH + "/7/8/9");
														
 
															+  }
														
 
															+
														
 
															+  private static void mkdirs(String path) throws Exception {
														
 
															+    FileSystem fileSystem = cluster.getFileSystem();
														
 
															+    final Path qualifiedPath = new Path(path).makeQualified(fileSystem.getUri(),
														
 
															+                                              fileSystem.getWorkingDirectory());
														
 
															+    pathList.add(qualifiedPath);
														
 
															+    fileSystem.mkdirs(qualifiedPath);
														
 
															+  }
														
 
															+
														
 
															+  private static void touchFile(String path) throws Exception {
														
 
															+    FileSystem fs;
														
 
															+    DataOutputStream outputStream = null;
														
 
															+    try {
														
 
															+      fs = cluster.getFileSystem();
														
 
															+      final Path qualifiedPath = new Path(path).makeQualified(fs.getUri(),
														
 
															+                                                      fs.getWorkingDirectory());
														
 
															+      final long blockSize = fs.getDefaultBlockSize() * 2;
														
 
															+      outputStream = fs.create(qualifiedPath, true, 0,
														
 
															+              (short)(fs.getDefaultReplication()*2),
														
 
															+              blockSize);
														
 
															+      outputStream.write(new byte[FILE_SIZE]);
														
 
															+      pathList.add(qualifiedPath);
														
 
															+      ++nFiles;
														
 
															+
														
 
															+      FileStatus fileStatus = fs.getFileStatus(qualifiedPath);
														
 
															+      System.out.println(fileStatus.getBlockSize());
														
 
															+      System.out.println(fileStatus.getReplication());
														
 
															+    }
														
 
															+    finally {
														
 
															+      IOUtils.cleanup(null, outputStream);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testRun() {
														
 
															+    try {
														
 
															+      deleteState();
														
 
															+      createSourceData();
														
 
															+
														
 
															+      FileSystem fs = cluster.getFileSystem();
														
 
															+      CopyMapper copyMapper = new CopyMapper();
														
 
															+      StubContext stubContext = new StubContext(getConfiguration(), null, 0);
														
 
															+      Mapper<Text, FileStatus, Text, Text>.Context context
														
 
															+              = stubContext.getContext();
														
 
															+      copyMapper.setup(context);
														
 
															+
														
 
															+      for (Path path: pathList) {
														
 
															+        copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
														
 
															+                fs.getFileStatus(path), context);
														
 
															+      }
														
 
															+
														
 
															+      // Check that the maps worked.
														
 
															+      for (Path path : pathList) {
														
 
															+        final Path targetPath = new Path(path.toString()
														
 
															+                .replaceAll(SOURCE_PATH, TARGET_PATH));
														
 
															+        Assert.assertTrue(fs.exists(targetPath));
														
 
															+        Assert.assertTrue(fs.isFile(targetPath) == fs.isFile(path));
														
 
															+        Assert.assertEquals(fs.getFileStatus(path).getReplication(),
														
 
															+                fs.getFileStatus(targetPath).getReplication());
														
 
															+        Assert.assertEquals(fs.getFileStatus(path).getBlockSize(),
														
 
															+                fs.getFileStatus(targetPath).getBlockSize());
														
 
															+        Assert.assertTrue(!fs.isFile(targetPath) ||
														
 
															+                fs.getFileChecksum(targetPath).equals(
														
 
															+                        fs.getFileChecksum(path)));
														
 
															+      }
														
 
															+
														
 
															+      Assert.assertEquals(pathList.size(),
														
 
															+              stubContext.getReporter().getCounter(CopyMapper.Counter.COPY).getValue());
														
 
															+      Assert.assertEquals(nFiles * FILE_SIZE,
														
 
															+              stubContext.getReporter().getCounter(CopyMapper.Counter.BYTESCOPIED).getValue());
														
 
															+
														
 
															+      testCopyingExistingFiles(fs, copyMapper, context);
														
 
															+      for (Text value : stubContext.getWriter().values()) {
														
 
															+        Assert.assertTrue(value.toString() + " is not skipped", value.toString().startsWith("SKIP:"));
														
 
															+      }
														
 
															+    }
														
 
															+    catch (Exception e) {
														
 
															+      LOG.error("Unexpected exception: ", e);
														
 
															+      Assert.assertTrue(false);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private void testCopyingExistingFiles(FileSystem fs, CopyMapper copyMapper,
														
 
															+                                        Mapper<Text, FileStatus, Text, Text>.Context context) {
														
 
															+
														
 
															+    try {
														
 
															+      for (Path path : pathList) {
														
 
															+        copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
														
 
															+                fs.getFileStatus(path), context);
														
 
															+      }
														
 
															+
														
 
															+      Assert.assertEquals(nFiles,
														
 
															+              context.getCounter(CopyMapper.Counter.SKIP).getValue());
														
 
															+    }
														
 
															+    catch (Exception exception) {
														
 
															+      Assert.assertTrue("Caught unexpected exception:" + exception.getMessage(),
														
 
															+              false);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testMakeDirFailure() {
														
 
															+    try {
														
 
															+      deleteState();
														
 
															+      createSourceData();
														
 
															+
														
 
															+      FileSystem fs = cluster.getFileSystem();
														
 
															+      CopyMapper copyMapper = new CopyMapper();
														
 
															+      StubContext stubContext = new StubContext(getConfiguration(), null, 0);
														
 
															+      Mapper<Text, FileStatus, Text, Text>.Context context
														
 
															+              = stubContext.getContext();
														
 
															+
														
 
															+      Configuration configuration = context.getConfiguration();
														
 
															+      String workPath = new Path("hftp://localhost:1234/*/*/*/?/")
														
 
															+              .makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString();
														
 
															+      configuration.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH,
														
 
															+              workPath);
														
 
															+      copyMapper.setup(context);
														
 
															+
														
 
															+      copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), pathList.get(0))),
														
 
															+              fs.getFileStatus(pathList.get(0)), context);
														
 
															+
														
 
															+      Assert.assertTrue("There should have been an exception.", false);
														
 
															+    }
														
 
															+    catch (Exception ignore) {
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testIgnoreFailures() {
														
 
															+    doTestIgnoreFailures(true);
														
 
															+    doTestIgnoreFailures(false);
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testDirToFile() {
														
 
															+    try {
														
 
															+      deleteState();
														
 
															+      createSourceData();
														
 
															+
														
 
															+      FileSystem fs = cluster.getFileSystem();
														
 
															+      CopyMapper copyMapper = new CopyMapper();
														
 
															+      StubContext stubContext = new StubContext(getConfiguration(), null, 0);
														
 
															+      Mapper<Text, FileStatus, Text, Text>.Context context
														
 
															+              = stubContext.getContext();
														
 
															+
														
 
															+      mkdirs(SOURCE_PATH + "/src/file");
														
 
															+      touchFile(TARGET_PATH + "/src/file");
														
 
															+      try {
														
 
															+        copyMapper.setup(context);
														
 
															+        copyMapper.map(new Text("/src/file"),
														
 
															+            fs.getFileStatus(new Path(SOURCE_PATH + "/src/file")),
														
 
															+            context);
														
 
															+      } catch (IOException e) {
														
 
															+        Assert.assertTrue(e.getMessage().startsWith("Can't replace"));
														
 
															+      }
														
 
															+    } catch (Exception e) {
														
 
															+      LOG.error("Exception encountered ", e);
														
 
															+      Assert.fail("Test failed: " + e.getMessage());
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testPreserve() {
														
 
															+    try {
														
 
															+      deleteState();
														
 
															+      createSourceData();
														
 
															+
														
 
															+      UserGroupInformation tmpUser = UserGroupInformation.createRemoteUser("guest");
														
 
															+
														
 
															+      final CopyMapper copyMapper = new CopyMapper();
														
 
															+      
														
 
															+      final Mapper<Text, FileStatus, Text, Text>.Context context =  tmpUser.
														
 
															+          doAs(new PrivilegedAction<Mapper<Text, FileStatus, Text, Text>.Context>() {
														
 
															+        @Override
														
 
															+        public Mapper<Text, FileStatus, Text, Text>.Context run() {
														
 
															+          try {
														
 
															+            StubContext stubContext = new StubContext(getConfiguration(), null, 0);
														
 
															+            return stubContext.getContext();
														
 
															+          } catch (Exception e) {
														
 
															+            LOG.error("Exception encountered ", e);
														
 
															+            throw new RuntimeException(e);
														
 
															+          }
														
 
															+        }
														
 
															+      });
														
 
															+
														
 
															+      EnumSet<DistCpOptions.FileAttribute> preserveStatus =
														
 
															+          EnumSet.allOf(DistCpOptions.FileAttribute.class);
														
 
															+
														
 
															+      context.getConfiguration().set(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
														
 
															+        DistCpUtils.packAttributes(preserveStatus));
														
 
															+
														
 
															+      touchFile(SOURCE_PATH + "/src/file");
														
 
															+      mkdirs(TARGET_PATH);
														
 
															+      cluster.getFileSystem().setPermission(new Path(TARGET_PATH), new FsPermission((short)511));
														
 
															+
														
 
															+      final FileSystem tmpFS = tmpUser.doAs(new PrivilegedAction<FileSystem>() {
														
 
															+        @Override
														
 
															+        public FileSystem run() {
														
 
															+          try {
														
 
															+            return FileSystem.get(configuration);
														
 
															+          } catch (IOException e) {
														
 
															+            LOG.error("Exception encountered ", e);
														
 
															+            Assert.fail("Test failed: " + e.getMessage());
														
 
															+            throw new RuntimeException("Test ought to fail here");
														
 
															+          }
														
 
															+        }
														
 
															+      });
														
 
															+
														
 
															+      tmpUser.doAs(new PrivilegedAction<Integer>() {
														
 
															+        @Override
														
 
															+        public Integer run() {
														
 
															+          try {
														
 
															+            copyMapper.setup(context);
														
 
															+            copyMapper.map(new Text("/src/file"),
														
 
															+                tmpFS.getFileStatus(new Path(SOURCE_PATH + "/src/file")),
														
 
															+                context);
														
 
															+            Assert.fail("Expected copy to fail");
														
 
															+          } catch (AccessControlException e) {
														
 
															+            Assert.assertTrue("Got exception: " + e.getMessage(), true);
														
 
															+          } catch (Exception e) {
														
 
															+            throw new RuntimeException(e);
														
 
															+          }
														
 
															+          return null;
														
 
															+        }
														
 
															+      });
														
 
															+    } catch (Exception e) {
														
 
															+      LOG.error("Exception encountered ", e);
														
 
															+      Assert.fail("Test failed: " + e.getMessage());
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testCopyReadableFiles() {
														
 
															+    try {
														
 
															+      deleteState();
														
 
															+      createSourceData();
														
 
															+
														
 
															+      UserGroupInformation tmpUser = UserGroupInformation.createRemoteUser("guest");
														
 
															+
														
 
															+      final CopyMapper copyMapper = new CopyMapper();
														
 
															+
														
 
															+      final Mapper<Text, FileStatus, Text, Text>.Context context =  tmpUser.
														
 
															+          doAs(new PrivilegedAction<Mapper<Text, FileStatus, Text, Text>.Context>() {
														
 
															+        @Override
														
 
															+        public Mapper<Text, FileStatus, Text, Text>.Context run() {
														
 
															+          try {
														
 
															+            StubContext stubContext = new StubContext(getConfiguration(), null, 0);
														
 
															+            return stubContext.getContext();
														
 
															+          } catch (Exception e) {
														
 
															+            LOG.error("Exception encountered ", e);
														
 
															+            throw new RuntimeException(e);
														
 
															+          }
														
 
															+        }
														
 
															+      });
														
 
															+
														
 
															+      touchFile(SOURCE_PATH + "/src/file");
														
 
															+      mkdirs(TARGET_PATH);
														
 
															+      cluster.getFileSystem().setPermission(new Path(SOURCE_PATH + "/src/file"),
														
 
															+          new FsPermission(FsAction.READ, FsAction.READ, FsAction.READ));
														
 
															+      cluster.getFileSystem().setPermission(new Path(TARGET_PATH), new FsPermission((short)511));
														
 
															+
														
 
															+      final FileSystem tmpFS = tmpUser.doAs(new PrivilegedAction<FileSystem>() {
														
 
															+        @Override
														
 
															+        public FileSystem run() {
														
 
															+          try {
														
 
															+            return FileSystem.get(configuration);
														
 
															+          } catch (IOException e) {
														
 
															+            LOG.error("Exception encountered ", e);
														
 
															+            Assert.fail("Test failed: " + e.getMessage());
														
 
															+            throw new RuntimeException("Test ought to fail here");
														
 
															+          }
														
 
															+        }
														
 
															+      });
														
 
															+
														
 
															+      tmpUser.doAs(new PrivilegedAction<Integer>() {
														
 
															+        @Override
														
 
															+        public Integer run() {
														
 
															+          try {
														
 
															+            copyMapper.setup(context);
														
 
															+            copyMapper.map(new Text("/src/file"),
														
 
															+                tmpFS.getFileStatus(new Path(SOURCE_PATH + "/src/file")),
														
 
															+                context);
														
 
															+          } catch (Exception e) {
														
 
															+            throw new RuntimeException(e);
														
 
															+          }
														
 
															+          return null;
														
 
															+        }
														
 
															+      });
														
 
															+    } catch (Exception e) {
														
 
															+      LOG.error("Exception encountered ", e);
														
 
															+      Assert.fail("Test failed: " + e.getMessage());
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testSkipCopyNoPerms() {
														
 
															+    try {
														
 
															+      deleteState();
														
 
															+      createSourceData();
														
 
															+
														
 
															+      UserGroupInformation tmpUser = UserGroupInformation.createRemoteUser("guest");
														
 
															+
														
 
															+      final CopyMapper copyMapper = new CopyMapper();
														
 
															+
														
 
															+      final StubContext stubContext =  tmpUser.
														
 
															+          doAs(new PrivilegedAction<StubContext>() {
														
 
															+        @Override
														
 
															+        public StubContext run() {
														
 
															+          try {
														
 
															+            return new StubContext(getConfiguration(), null, 0);
														
 
															+          } catch (Exception e) {
														
 
															+            LOG.error("Exception encountered ", e);
														
 
															+            throw new RuntimeException(e);
														
 
															+          }
														
 
															+        }
														
 
															+      });
														
 
															+
														
 
															+      final Mapper<Text, FileStatus, Text, Text>.Context context = stubContext.getContext();
														
 
															+      EnumSet<DistCpOptions.FileAttribute> preserveStatus =
														
 
															+          EnumSet.allOf(DistCpOptions.FileAttribute.class);
														
 
															+
														
 
															+      context.getConfiguration().set(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
														
 
															+        DistCpUtils.packAttributes(preserveStatus));
														
 
															+
														
 
															+      touchFile(SOURCE_PATH + "/src/file");
														
 
															+      touchFile(TARGET_PATH + "/src/file");
														
 
															+      cluster.getFileSystem().setPermission(new Path(SOURCE_PATH + "/src/file"),
														
 
															+          new FsPermission(FsAction.READ, FsAction.READ, FsAction.READ));
														
 
															+      cluster.getFileSystem().setPermission(new Path(TARGET_PATH + "/src/file"),
														
 
															+          new FsPermission(FsAction.READ, FsAction.READ, FsAction.READ));
														
 
															+
														
 
															+      final FileSystem tmpFS = tmpUser.doAs(new PrivilegedAction<FileSystem>() {
														
 
															+        @Override
														
 
															+        public FileSystem run() {
														
 
															+          try {
														
 
															+            return FileSystem.get(configuration);
														
 
															+          } catch (IOException e) {
														
 
															+            LOG.error("Exception encountered ", e);
														
 
															+            Assert.fail("Test failed: " + e.getMessage());
														
 
															+            throw new RuntimeException("Test ought to fail here");
														
 
															+          }
														
 
															+        }
														
 
															+      });
														
 
															+
														
 
															+      tmpUser.doAs(new PrivilegedAction<Integer>() {
														
 
															+        @Override
														
 
															+        public Integer run() {
														
 
															+          try {
														
 
															+            copyMapper.setup(context);
														
 
															+            copyMapper.map(new Text("/src/file"),
														
 
															+                tmpFS.getFileStatus(new Path(SOURCE_PATH + "/src/file")),
														
 
															+                context);
														
 
															+            Assert.assertEquals(stubContext.getWriter().values().size(), 1);
														
 
															+            Assert.assertTrue(stubContext.getWriter().values().get(0).toString().startsWith("SKIP"));
														
 
															+            Assert.assertTrue(stubContext.getWriter().values().get(0).toString().
														
 
															+                contains(SOURCE_PATH + "/src/file"));
														
 
															+          } catch (Exception e) {
														
 
															+            throw new RuntimeException(e);
														
 
															+          }
														
 
															+          return null;
														
 
															+        }
														
 
															+      });
														
 
															+    } catch (Exception e) {
														
 
															+      LOG.error("Exception encountered ", e);
														
 
															+      Assert.fail("Test failed: " + e.getMessage());
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testFailCopyWithAccessControlException() {
														
 
															+    try {
														
 
															+      deleteState();
														
 
															+      createSourceData();
														
 
															+
														
 
															+      UserGroupInformation tmpUser = UserGroupInformation.createRemoteUser("guest");
														
 
															+
														
 
															+      final CopyMapper copyMapper = new CopyMapper();
														
 
															+
														
 
															+      final StubContext stubContext =  tmpUser.
														
 
															+          doAs(new PrivilegedAction<StubContext>() {
														
 
															+        @Override
														
 
															+        public StubContext run() {
														
 
															+          try {
														
 
															+            return new StubContext(getConfiguration(), null, 0);
														
 
															+          } catch (Exception e) {
														
 
															+            LOG.error("Exception encountered ", e);
														
 
															+            throw new RuntimeException(e);
														
 
															+          }
														
 
															+        }
														
 
															+      });
														
 
															+
														
 
															+      EnumSet<DistCpOptions.FileAttribute> preserveStatus =
														
 
															+          EnumSet.allOf(DistCpOptions.FileAttribute.class);
														
 
															+
														
 
															+      final Mapper<Text, FileStatus, Text, Text>.Context context
														
 
															+              = stubContext.getContext();
														
 
															+      
														
 
															+      context.getConfiguration().set(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
														
 
															+        DistCpUtils.packAttributes(preserveStatus));
														
 
															+
														
 
															+      touchFile(SOURCE_PATH + "/src/file");
														
 
															+      OutputStream out = cluster.getFileSystem().create(new Path(TARGET_PATH + "/src/file"));
														
 
															+      out.write("hello world".getBytes());
														
 
															+      out.close();
														
 
															+      cluster.getFileSystem().setPermission(new Path(SOURCE_PATH + "/src/file"),
														
 
															+          new FsPermission(FsAction.READ, FsAction.READ, FsAction.READ));
														
 
															+      cluster.getFileSystem().setPermission(new Path(TARGET_PATH + "/src/file"),
														
 
															+          new FsPermission(FsAction.READ, FsAction.READ, FsAction.READ));
														
 
															+
														
 
															+      final FileSystem tmpFS = tmpUser.doAs(new PrivilegedAction<FileSystem>() {
														
 
															+        @Override
														
 
															+        public FileSystem run() {
														
 
															+          try {
														
 
															+            return FileSystem.get(configuration);
														
 
															+          } catch (IOException e) {
														
 
															+            LOG.error("Exception encountered ", e);
														
 
															+            Assert.fail("Test failed: " + e.getMessage());
														
 
															+            throw new RuntimeException("Test ought to fail here");
														
 
															+          }
														
 
															+        }
														
 
															+      });
														
 
															+
														
 
															+      tmpUser.doAs(new PrivilegedAction<Integer>() {
														
 
															+        @Override
														
 
															+        public Integer run() {
														
 
															+          try {
														
 
															+            copyMapper.setup(context);
														
 
															+            copyMapper.map(new Text("/src/file"),
														
 
															+                tmpFS.getFileStatus(new Path(SOURCE_PATH + "/src/file")),
														
 
															+                context);
														
 
															+            Assert.fail("Didn't expect the file to be copied");
														
 
															+          } catch (AccessControlException ignore) {
														
 
															+          } catch (Exception e) {
														
 
															+            if (e.getCause() == null || !(e.getCause() instanceof AccessControlException)) {
														
 
															+              throw new RuntimeException(e);
														
 
															+            }
														
 
															+          }
														
 
															+          return null;
														
 
															+        }
														
 
															+      });
														
 
															+    } catch (Exception e) {
														
 
															+      LOG.error("Exception encountered ", e);
														
 
															+      Assert.fail("Test failed: " + e.getMessage());
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testFileToDir() {
														
 
															+    try {
														
 
															+      deleteState();
														
 
															+      createSourceData();
														
 
															+
														
 
															+      FileSystem fs = cluster.getFileSystem();
														
 
															+      CopyMapper copyMapper = new CopyMapper();
														
 
															+      StubContext stubContext = new StubContext(getConfiguration(), null, 0);
														
 
															+      Mapper<Text, FileStatus, Text, Text>.Context context
														
 
															+              = stubContext.getContext();
														
 
															+
														
 
															+      touchFile(SOURCE_PATH + "/src/file");
														
 
															+      mkdirs(TARGET_PATH + "/src/file");
														
 
															+      try {
														
 
															+        copyMapper.setup(context);
														
 
															+        copyMapper.map(new Text("/src/file"),
														
 
															+            fs.getFileStatus(new Path(SOURCE_PATH + "/src/file")),
														
 
															+            context);
														
 
															+      } catch (IOException e) {
														
 
															+        Assert.assertTrue(e.getMessage().startsWith("Can't replace"));
														
 
															+      }
														
 
															+    } catch (Exception e) {
														
 
															+      LOG.error("Exception encountered ", e);
														
 
															+      Assert.fail("Test failed: " + e.getMessage());
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private void doTestIgnoreFailures(boolean ignoreFailures) {
														
 
															+    try {
														
 
															+      deleteState();
														
 
															+      createSourceData();
														
 
															+
														
 
															+      FileSystem fs = cluster.getFileSystem();
														
 
															+      CopyMapper copyMapper = new CopyMapper();
														
 
															+      StubContext stubContext = new StubContext(getConfiguration(), null, 0);
														
 
															+      Mapper<Text, FileStatus, Text, Text>.Context context
														
 
															+              = stubContext.getContext();
														
 
															+
														
 
															+      Configuration configuration = context.getConfiguration();
														
 
															+      configuration.setBoolean(
														
 
															+              DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(),ignoreFailures);
														
 
															+      configuration.setBoolean(DistCpOptionSwitch.OVERWRITE.getConfigLabel(),
														
 
															+              true);
														
 
															+      configuration.setBoolean(DistCpOptionSwitch.SKIP_CRC.getConfigLabel(),
														
 
															+              true);
														
 
															+      copyMapper.setup(context);
														
 
															+
														
 
															+      for (Path path : pathList) {
														
 
															+        final FileStatus fileStatus = fs.getFileStatus(path);
														
 
															+        if (!fileStatus.isDirectory()) {
														
 
															+          fs.delete(path, true);
														
 
															+          copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
														
 
															+                  fileStatus, context);
														
 
															+        }
														
 
															+      }
														
 
															+      if (ignoreFailures) {
														
 
															+        for (Text value : stubContext.getWriter().values()) {
														
 
															+          Assert.assertTrue(value.toString() + " is not skipped", value.toString().startsWith("FAIL:"));
														
 
															+        }
														
 
															+      }
														
 
															+      Assert.assertTrue("There should have been an exception.", ignoreFailures);
														
 
															+    }
														
 
															+    catch (Exception e) {
														
 
															+      Assert.assertTrue("Unexpected exception: " + e.getMessage(),
														
 
															+              !ignoreFailures);
														
 
															+      e.printStackTrace();
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private static void deleteState() throws IOException {
														
 
															+    pathList.clear();
														
 
															+    nFiles = 0;
														
 
															+    cluster.getFileSystem().delete(new Path(SOURCE_PATH), true);
														
 
															+    cluster.getFileSystem().delete(new Path(TARGET_PATH), true);
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testPreserveBlockSizeAndReplication() {
														
 
															+    testPreserveBlockSizeAndReplicationImpl(true);
														
 
															+    testPreserveBlockSizeAndReplicationImpl(false);
														
 
															+  }
														
 
															+
														
 
															+  private void testPreserveBlockSizeAndReplicationImpl(boolean preserve){
														
 
															+    try {
														
 
															+
														
 
															+      deleteState();
														
 
															+      createSourceData();
														
 
															+
														
 
															+      FileSystem fs = cluster.getFileSystem();
														
 
															+      CopyMapper copyMapper = new CopyMapper();
														
 
															+      StubContext stubContext = new StubContext(getConfiguration(), null, 0);
														
 
															+      Mapper<Text, FileStatus, Text, Text>.Context context
														
 
															+              = stubContext.getContext();
														
 
															+
														
 
															+      Configuration configuration = context.getConfiguration();
														
 
															+      EnumSet<DistCpOptions.FileAttribute> fileAttributes
														
 
															+              = EnumSet.noneOf(DistCpOptions.FileAttribute.class);
														
 
															+      if (preserve) {
														
 
															+        fileAttributes.add(DistCpOptions.FileAttribute.BLOCKSIZE);
														
 
															+        fileAttributes.add(DistCpOptions.FileAttribute.REPLICATION);
														
 
															+      }
														
 
															+      configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(),
														
 
															+              DistCpUtils.packAttributes(fileAttributes));
														
 
															+
														
 
															+      copyMapper.setup(context);
														
 
															+
														
 
															+      for (Path path : pathList) {
														
 
															+        final FileStatus fileStatus = fs.getFileStatus(path);
														
 
															+        copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
														
 
															+                fileStatus, context);
														
 
															+      }
														
 
															+
														
 
															+      // Check that the block-size/replication aren't preserved.
														
 
															+      for (Path path : pathList) {
														
 
															+        final Path targetPath = new Path(path.toString()
														
 
															+                .replaceAll(SOURCE_PATH, TARGET_PATH));
														
 
															+        final FileStatus source = fs.getFileStatus(path);
														
 
															+        final FileStatus target = fs.getFileStatus(targetPath);
														
 
															+        if (!source.isDirectory() ) {
														
 
															+          Assert.assertTrue(preserve ||
														
 
															+                  source.getBlockSize() != target.getBlockSize());
														
 
															+          Assert.assertTrue(preserve ||
														
 
															+                  source.getReplication() != target.getReplication());
														
 
															+          Assert.assertTrue(!preserve ||
														
 
															+                  source.getBlockSize() == target.getBlockSize());
														
 
															+          Assert.assertTrue(!preserve ||
														
 
															+                  source.getReplication() == target.getReplication());
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    catch (Exception e) {
														
 
															+      Assert.assertTrue("Unexpected exception: " + e.getMessage(), false);
														
 
															+      e.printStackTrace();
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private static void changeUserGroup(String user, String group)
														
 
															+          throws IOException {
														
 
															+    FileSystem fs = cluster.getFileSystem();
														
 
															+    FsPermission changedPermission = new FsPermission(
														
 
															+            FsAction.ALL, FsAction.ALL, FsAction.ALL
														
 
															+    );
														
 
															+    for (Path path : pathList)
														
 
															+      if (fs.isFile(path)) {
														
 
															+        fs.setOwner(path, user, group);
														
 
															+        fs.setPermission(path, changedPermission);
														
 
															+      }
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * If a single file is being copied to a location where the file (of the same
														
 
															+   * name) already exists, then the file shouldn't be skipped.
														
 
															+   */
														
 
															+  @Test
														
 
															+  public void testSingleFileCopy() {
														
 
															+    try {
														
 
															+      deleteState();
														
 
															+      touchFile(SOURCE_PATH + "/1");
														
 
															+      Path sourceFilePath = pathList.get(0);
														
 
															+      Path targetFilePath = new Path(sourceFilePath.toString().replaceAll(
														
 
															+              SOURCE_PATH, TARGET_PATH));
														
 
															+      touchFile(targetFilePath.toString());
														
 
															+
														
 
															+      FileSystem fs = cluster.getFileSystem();
														
 
															+      CopyMapper copyMapper = new CopyMapper();
														
 
															+      StubContext stubContext = new StubContext(getConfiguration(), null, 0);
														
 
															+      Mapper<Text, FileStatus, Text, Text>.Context context
														
 
															+              = stubContext.getContext();
														
 
															+
														
 
															+      context.getConfiguration().set(
														
 
															+              DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH,
														
 
															+              targetFilePath.getParent().toString()); // Parent directory.
														
 
															+      copyMapper.setup(context);
														
 
															+
														
 
															+      final FileStatus sourceFileStatus = fs.getFileStatus(sourceFilePath);
														
 
															+
														
 
															+      long before = fs.getFileStatus(targetFilePath).getModificationTime();
														
 
															+      copyMapper.map(new Text(DistCpUtils.getRelativePath(
														
 
															+              new Path(SOURCE_PATH), sourceFilePath)), sourceFileStatus, context);
														
 
															+      long after = fs.getFileStatus(targetFilePath).getModificationTime();
														
 
															+
														
 
															+      Assert.assertTrue("File should have been skipped", before == after);
														
 
															+
														
 
															+      context.getConfiguration().set(
														
 
															+              DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH,
														
 
															+              targetFilePath.toString()); // Specify the file path.
														
 
															+      copyMapper.setup(context);
														
 
															+
														
 
															+      before = fs.getFileStatus(targetFilePath).getModificationTime();
														
 
															+      try { Thread.sleep(2); } catch (Throwable ignore) {}
														
 
															+      copyMapper.map(new Text(DistCpUtils.getRelativePath(
														
 
															+              new Path(SOURCE_PATH), sourceFilePath)), sourceFileStatus, context);
														
 
															+      after = fs.getFileStatus(targetFilePath).getModificationTime();
														
 
															+
														
 
															+      Assert.assertTrue("File should have been overwritten.", before < after);
														
 
															+
														
 
															+    } catch (Exception exception) {
														
 
															+      Assert.fail("Unexpected exception: " + exception.getMessage());
														
 
															+      exception.printStackTrace();
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testPreserveUserGroup() {
														
 
															+    testPreserveUserGroupImpl(true);
														
 
															+    testPreserveUserGroupImpl(false);
														
 
															+  }
														
 
															+
														
 
															+  private void testPreserveUserGroupImpl(boolean preserve){
														
 
															+    try {
														
 
															+
														
 
															+      deleteState();
														
 
															+      createSourceData();
														
 
															+      changeUserGroup("Michael", "Corleone");
														
 
															+
														
 
															+      FileSystem fs = cluster.getFileSystem();
														
 
															+      CopyMapper copyMapper = new CopyMapper();
														
 
															+      StubContext stubContext = new StubContext(getConfiguration(), null, 0);
														
 
															+      Mapper<Text, FileStatus, Text, Text>.Context context
														
 
															+              = stubContext.getContext();
														
 
															+
														
 
															+      Configuration configuration = context.getConfiguration();
														
 
															+      EnumSet<DistCpOptions.FileAttribute> fileAttributes
														
 
															+              = EnumSet.noneOf(DistCpOptions.FileAttribute.class);
														
 
															+      if (preserve) {
														
 
															+        fileAttributes.add(DistCpOptions.FileAttribute.USER);
														
 
															+        fileAttributes.add(DistCpOptions.FileAttribute.GROUP);
														
 
															+        fileAttributes.add(DistCpOptions.FileAttribute.PERMISSION);
														
 
															+      }
														
 
															+
														
 
															+      configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(),
														
 
															+              DistCpUtils.packAttributes(fileAttributes));
														
 
															+      copyMapper.setup(context);
														
 
															+
														
 
															+      for (Path path : pathList) {
														
 
															+        final FileStatus fileStatus = fs.getFileStatus(path);
														
 
															+        copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
														
 
															+                fileStatus, context);
														
 
															+      }
														
 
															+
														
 
															+      // Check that the user/group attributes are preserved
														
 
															+      // (only) as necessary.
														
 
															+      for (Path path : pathList) {
														
 
															+        final Path targetPath = new Path(path.toString()
														
 
															+                .replaceAll(SOURCE_PATH, TARGET_PATH));
														
 
															+        final FileStatus source = fs.getFileStatus(path);
														
 
															+        final FileStatus target = fs.getFileStatus(targetPath);
														
 
															+        if (!source.isDirectory()) {
														
 
															+          Assert.assertTrue(!preserve || source.getOwner().equals(target.getOwner()));
														
 
															+          Assert.assertTrue(!preserve || source.getGroup().equals(target.getGroup()));
														
 
															+          Assert.assertTrue(!preserve || source.getPermission().equals(target.getPermission()));
														
 
															+          Assert.assertTrue( preserve || !source.getOwner().equals(target.getOwner()));
														
 
															+          Assert.assertTrue( preserve || !source.getGroup().equals(target.getGroup()));
														
 
															+          Assert.assertTrue( preserve || !source.getPermission().equals(target.getPermission()));
														
 
															+          Assert.assertTrue(source.isDirectory() ||
														
 
															+                  source.getReplication() != target.getReplication());
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    catch (Exception e) {
														
 
															+      Assert.assertTrue("Unexpected exception: " + e.getMessage(), false);
														
 
															+      e.printStackTrace();
														
 
															+    }
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyOutputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyOutputFormat.java
@@ -0,0 +1,135 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools.mapred;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.mapreduce.*;
														
 
															+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
														
 
															+import org.apache.hadoop.mapreduce.task.JobContextImpl;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.tools.DistCpConstants;
														
 
															+import org.junit.Test;
														
 
															+import org.junit.Assert;
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+
														
 
															+public class TestCopyOutputFormat {
														
 
															+  private static final Log LOG = LogFactory.getLog(TestCopyOutputFormat.class);
														
 
															+
														
 
															+  @Test
														
 
															+  public void testSetCommitDirectory() {
														
 
															+    try {
														
 
															+      Job job = Job.getInstance(new Configuration());
														
 
															+      Assert.assertEquals(null, CopyOutputFormat.getCommitDirectory(job));
														
 
															+
														
 
															+      job.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, "");
														
 
															+      Assert.assertEquals(null, CopyOutputFormat.getCommitDirectory(job));
														
 
															+
														
 
															+      Path directory = new Path("/tmp/test");
														
 
															+      CopyOutputFormat.setCommitDirectory(job, directory);
														
 
															+      Assert.assertEquals(directory, CopyOutputFormat.getCommitDirectory(job));
														
 
															+      Assert.assertEquals(directory.toString(), job.getConfiguration().
														
 
															+          get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while running test", e);
														
 
															+      Assert.fail("Failed while testing for set Commit Directory");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testSetWorkingDirectory() {
														
 
															+    try {
														
 
															+      Job job = Job.getInstance(new Configuration());
														
 
															+      Assert.assertEquals(null, CopyOutputFormat.getWorkingDirectory(job));
														
 
															+
														
 
															+      job.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, "");
														
 
															+      Assert.assertEquals(null, CopyOutputFormat.getWorkingDirectory(job));
														
 
															+
														
 
															+      Path directory = new Path("/tmp/test");
														
 
															+      CopyOutputFormat.setWorkingDirectory(job, directory);
														
 
															+      Assert.assertEquals(directory, CopyOutputFormat.getWorkingDirectory(job));
														
 
															+      Assert.assertEquals(directory.toString(), job.getConfiguration().
														
 
															+          get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while running test", e);
														
 
															+      Assert.fail("Failed while testing for set Working Directory");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testGetOutputCommitter() {
														
 
															+    try {
														
 
															+      TaskAttemptContext context = new TaskAttemptContextImpl(new Configuration(),
														
 
															+        new TaskAttemptID("200707121733", 1, TaskType.MAP, 1, 1));
														
 
															+      context.getConfiguration().set("mapred.output.dir", "/out");
														
 
															+      Assert.assertTrue(new CopyOutputFormat().getOutputCommitter(context) instanceof CopyCommitter);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered ", e);
														
 
															+      Assert.fail("Unable to get output committer");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testCheckOutputSpecs() {
														
 
															+    try {
														
 
															+      OutputFormat outputFormat = new CopyOutputFormat();
														
 
															+      Job job = Job.getInstance(new Configuration());
														
 
															+      JobID jobID = new JobID("200707121733", 1);
														
 
															+
														
 
															+      try {
														
 
															+        JobContext context = new JobContextImpl(job.getConfiguration(), jobID);
														
 
															+        outputFormat.checkOutputSpecs(context);
														
 
															+        Assert.fail("No checking for invalid work/commit path");
														
 
															+      } catch (IllegalStateException ignore) { }
														
 
															+
														
 
															+      CopyOutputFormat.setWorkingDirectory(job, new Path("/tmp/work"));
														
 
															+      try {
														
 
															+        JobContext context = new JobContextImpl(job.getConfiguration(), jobID);
														
 
															+        outputFormat.checkOutputSpecs(context);
														
 
															+        Assert.fail("No checking for invalid commit path");
														
 
															+      } catch (IllegalStateException ignore) { }
														
 
															+
														
 
															+      job.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, "");
														
 
															+      CopyOutputFormat.setCommitDirectory(job, new Path("/tmp/commit"));
														
 
															+      try {
														
 
															+        JobContext context = new JobContextImpl(job.getConfiguration(), jobID);
														
 
															+        outputFormat.checkOutputSpecs(context);
														
 
															+        Assert.fail("No checking for invalid work path");
														
 
															+      } catch (IllegalStateException ignore) { }
														
 
															+
														
 
															+      CopyOutputFormat.setWorkingDirectory(job, new Path("/tmp/work"));
														
 
															+      CopyOutputFormat.setCommitDirectory(job, new Path("/tmp/commit"));
														
 
															+      try {
														
 
															+        JobContext context = new JobContextImpl(job.getConfiguration(), jobID);
														
 
															+        outputFormat.checkOutputSpecs(context);
														
 
															+      } catch (IllegalStateException ignore) {
														
 
															+        Assert.fail("Output spec check failed.");
														
 
															+      }
														
 
															+
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered while testing checkoutput specs", e);
														
 
															+      Assert.fail("Checkoutput Spec failure");
														
 
															+    } catch (InterruptedException e) {
														
 
															+      LOG.error("Exception encountered while testing checkoutput specs", e);
														
 
															+      Assert.fail("Checkoutput Spec failure");
														
 
															+    }
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestUniformSizeInputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestUniformSizeInputFormat.java
@@ -0,0 +1,254 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools.mapred;
														
 
															+
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.fs.FileStatus;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.hdfs.MiniDFSCluster;
														
 
															+import org.apache.hadoop.io.IOUtils;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.io.SequenceFile;
														
 
															+import org.apache.hadoop.mapreduce.*;
														
 
															+import org.apache.hadoop.mapreduce.task.JobContextImpl;
														
 
															+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
														
 
															+import org.apache.hadoop.tools.CopyListing;
														
 
															+import org.apache.hadoop.tools.DistCpOptions;
														
 
															+import org.apache.hadoop.tools.StubContext;
														
 
															+import org.apache.hadoop.security.Credentials;
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.junit.AfterClass;
														
 
															+import org.junit.Assert;
														
 
															+import org.junit.BeforeClass;
														
 
															+import org.junit.Test;
														
 
															+
														
 
															+import java.io.DataOutputStream;
														
 
															+import java.io.IOException;
														
 
															+import java.util.ArrayList;
														
 
															+import java.util.List;
														
 
															+import java.util.Random;
														
 
															+
														
 
															+
														
 
															+public class TestUniformSizeInputFormat {
														
 
															+  private static final Log LOG
														
 
															+                = LogFactory.getLog(TestUniformSizeInputFormat.class);
														
 
															+
														
 
															+  private static MiniDFSCluster cluster;
														
 
															+  private static final int N_FILES = 20;
														
 
															+  private static final int SIZEOF_EACH_FILE=1024;
														
 
															+  private static final Random random = new Random();
														
 
															+  private static int totalFileSize = 0;
														
 
															+
														
 
															+  private static final Credentials CREDENTIALS = new Credentials();
														
 
															+
														
 
															+
														
 
															+  @BeforeClass
														
 
															+  public static void setup() throws Exception {
														
 
															+    cluster = new MiniDFSCluster.Builder(new Configuration()).numDataNodes(1)
														
 
															+                                          .format(true).build();
														
 
															+    totalFileSize = 0;
														
 
															+
														
 
															+    for (int i=0; i<N_FILES; ++i)
														
 
															+      totalFileSize += createFile("/tmp/source/" + String.valueOf(i), SIZEOF_EACH_FILE);
														
 
															+  }
														
 
															+
														
 
															+  private static DistCpOptions getOptions(int nMaps) throws Exception {
														
 
															+    Path sourcePath = new Path(cluster.getFileSystem().getUri().toString()
														
 
															+                               + "/tmp/source");
														
 
															+    Path targetPath = new Path(cluster.getFileSystem().getUri().toString()
														
 
															+                               + "/tmp/target");
														
 
															+
														
 
															+    List<Path> sourceList = new ArrayList<Path>();
														
 
															+    sourceList.add(sourcePath);
														
 
															+    final DistCpOptions distCpOptions = new DistCpOptions(sourceList, targetPath);
														
 
															+    distCpOptions.setMaxMaps(nMaps);
														
 
															+    return distCpOptions;
														
 
															+  }
														
 
															+
														
 
															+  private static int createFile(String path, int fileSize) throws Exception {
														
 
															+    FileSystem fileSystem = null;
														
 
															+    DataOutputStream outputStream = null;
														
 
															+    try {
														
 
															+      fileSystem = cluster.getFileSystem();
														
 
															+      outputStream = fileSystem.create(new Path(path), true, 0);
														
 
															+      int size = (int) Math.ceil(fileSize + (1 - random.nextFloat()) * fileSize);
														
 
															+      outputStream.write(new byte[size]);
														
 
															+      return size;
														
 
															+    }
														
 
															+    finally {
														
 
															+      IOUtils.cleanup(null, fileSystem, outputStream);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @AfterClass
														
 
															+  public static void tearDown() {
														
 
															+    cluster.shutdown();
														
 
															+  }
														
 
															+
														
 
															+  public void testGetSplits(int nMaps) throws Exception {
														
 
															+    DistCpOptions options = getOptions(nMaps);
														
 
															+    Configuration configuration = new Configuration();
														
 
															+    configuration.set("mapred.map.tasks",
														
 
															+                      String.valueOf(options.getMaxMaps()));
														
 
															+    Path listFile = new Path(cluster.getFileSystem().getUri().toString()
														
 
															+        + "/tmp/testGetSplits_1/fileList.seq");
														
 
															+    CopyListing.getCopyListing(configuration, CREDENTIALS, options).
														
 
															+        buildListing(listFile, options);
														
 
															+
														
 
															+    JobContext jobContext = new JobContextImpl(configuration, new JobID());
														
 
															+    UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat();
														
 
															+    List<InputSplit> splits
														
 
															+            = uniformSizeInputFormat.getSplits(jobContext);
														
 
															+
														
 
															+    List<InputSplit> legacySplits = legacyGetSplits(listFile, nMaps);
														
 
															+
														
 
															+    int sizePerMap = totalFileSize/nMaps;
														
 
															+
														
 
															+    checkSplits(listFile, splits);
														
 
															+    checkAgainstLegacy(splits, legacySplits);
														
 
															+
														
 
															+    int doubleCheckedTotalSize = 0;
														
 
															+    int previousSplitSize = -1;
														
 
															+    for (int i=0; i<splits.size(); ++i) {
														
 
															+      InputSplit split = splits.get(i);
														
 
															+      int currentSplitSize = 0;
														
 
															+      RecordReader<Text, FileStatus> recordReader = uniformSizeInputFormat.createRecordReader(
														
 
															+              split, null);
														
 
															+      StubContext stubContext = new StubContext(jobContext.getConfiguration(),
														
 
															+                                                recordReader, 0);
														
 
															+      final TaskAttemptContext taskAttemptContext
														
 
															+         = stubContext.getContext();
														
 
															+      recordReader.initialize(split, taskAttemptContext);
														
 
															+      while (recordReader.nextKeyValue()) {
														
 
															+        Path sourcePath = recordReader.getCurrentValue().getPath();
														
 
															+        FileSystem fs = sourcePath.getFileSystem(configuration);
														
 
															+        FileStatus fileStatus [] = fs.listStatus(sourcePath);
														
 
															+        Assert.assertEquals(fileStatus.length, 1);
														
 
															+        currentSplitSize += fileStatus[0].getLen();
														
 
															+      }
														
 
															+      Assert.assertTrue(
														
 
															+           previousSplitSize == -1
														
 
															+               || Math.abs(currentSplitSize - previousSplitSize) < 0.1*sizePerMap
														
 
															+               || i == splits.size()-1);
														
 
															+
														
 
															+      doubleCheckedTotalSize += currentSplitSize;
														
 
															+    }
														
 
															+
														
 
															+    Assert.assertEquals(totalFileSize, doubleCheckedTotalSize);
														
 
															+  }
														
 
															+
														
 
															+  // From
														
 
															+  // http://svn.apache.org/repos/asf/hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java
														
 
															+  private List<InputSplit> legacyGetSplits(Path listFile, int numSplits)
														
 
															+      throws IOException {
														
 
															+
														
 
															+    FileSystem fs = cluster.getFileSystem();
														
 
															+    FileStatus srcst = fs.getFileStatus(listFile);
														
 
															+    Configuration conf = fs.getConf();
														
 
															+
														
 
															+    ArrayList<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
														
 
															+    FileStatus value = new FileStatus();
														
 
															+    Text key = new Text();
														
 
															+    final long targetsize = totalFileSize / numSplits;
														
 
															+    long pos = 0L;
														
 
															+    long last = 0L;
														
 
															+    long acc = 0L;
														
 
															+    long cbrem = srcst.getLen();
														
 
															+    SequenceFile.Reader sl = null;
														
 
															+
														
 
															+    LOG.info("Average bytes per map: " + targetsize +
														
 
															+        ", Number of maps: " + numSplits + ", total size: " + totalFileSize);
														
 
															+
														
 
															+    try {
														
 
															+      sl = new SequenceFile.Reader(conf, SequenceFile.Reader.file(listFile));
														
 
															+      for (; sl.next(key, value); last = sl.getPosition()) {
														
 
															+        // if adding this split would put this split past the target size,
														
 
															+        // cut the last split and put this next file in the next split.
														
 
															+        if (acc + value.getLen() > targetsize && acc != 0) {
														
 
															+          long splitsize = last - pos;
														
 
															+          FileSplit fileSplit = new FileSplit(listFile, pos, splitsize, null);
														
 
															+          LOG.info ("Creating split : " + fileSplit + ", bytes in split: " + splitsize);
														
 
															+          splits.add(fileSplit);
														
 
															+          cbrem -= splitsize;
														
 
															+          pos = last;
														
 
															+          acc = 0L;
														
 
															+        }
														
 
															+        acc += value.getLen();
														
 
															+      }
														
 
															+    }
														
 
															+    finally {
														
 
															+      IOUtils.closeStream(sl);
														
 
															+    }
														
 
															+    if (cbrem != 0) {
														
 
															+      FileSplit fileSplit = new FileSplit(listFile, pos, cbrem, null);
														
 
															+      LOG.info ("Creating split : " + fileSplit + ", bytes in split: " + cbrem);
														
 
															+      splits.add(fileSplit);
														
 
															+    }
														
 
															+
														
 
															+    return splits;
														
 
															+  }
														
 
															+
														
 
															+  private void checkSplits(Path listFile, List<InputSplit> splits) throws IOException {
														
 
															+    long lastEnd = 0;
														
 
															+
														
 
															+    //Verify if each split's start is matching with the previous end and
														
 
															+    //we are not missing anything
														
 
															+    for (InputSplit split : splits) {
														
 
															+      FileSplit fileSplit = (FileSplit) split;
														
 
															+      long start = fileSplit.getStart();
														
 
															+      Assert.assertEquals(lastEnd, start);
														
 
															+      lastEnd = start + fileSplit.getLength();
														
 
															+    }
														
 
															+
														
 
															+    //Verify there is nothing more to read from the input file
														
 
															+    SequenceFile.Reader reader
														
 
															+            = new SequenceFile.Reader(cluster.getFileSystem().getConf(),
														
 
															+                    SequenceFile.Reader.file(listFile));
														
 
															+
														
 
															+    try {
														
 
															+      reader.seek(lastEnd);
														
 
															+      FileStatus srcFileStatus = new FileStatus();
														
 
															+      Text srcRelPath = new Text();
														
 
															+      Assert.assertFalse(reader.next(srcRelPath, srcFileStatus));
														
 
															+    } finally {
														
 
															+      IOUtils.closeStream(reader);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private void checkAgainstLegacy(List<InputSplit> splits,
														
 
															+                                  List<InputSplit> legacySplits)
														
 
															+      throws IOException, InterruptedException {
														
 
															+
														
 
															+    Assert.assertEquals(legacySplits.size(), splits.size());
														
 
															+    for (int index = 0; index < splits.size(); index++) {
														
 
															+      FileSplit fileSplit = (FileSplit) splits.get(index);
														
 
															+      FileSplit legacyFileSplit = (FileSplit) legacySplits.get(index);
														
 
															+      Assert.assertEquals(fileSplit.getStart(), legacyFileSplit.getStart());
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testGetSplits() throws Exception {
														
 
															+    testGetSplits(9);
														
 
															+    for (int i=1; i<N_FILES; ++i)
														
 
															+      testGetSplits(i);
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/lib/TestDynamicInputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/lib/TestDynamicInputFormat.java
@@ -0,0 +1,162 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools.mapred.lib;
														
 
															+
														
 
															+import junit.framework.Assert;
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.fs.FileStatus;
														
 
															+import org.apache.hadoop.hdfs.MiniDFSCluster;
														
 
															+import org.apache.hadoop.io.IOUtils;
														
 
															+import org.apache.hadoop.io.Text;
														
 
															+import org.apache.hadoop.mapreduce.*;
														
 
															+import org.apache.hadoop.mapreduce.task.JobContextImpl;
														
 
															+import org.apache.hadoop.tools.CopyListing;
														
 
															+import org.apache.hadoop.tools.DistCpOptions;
														
 
															+import org.apache.hadoop.tools.StubContext;
														
 
															+import org.apache.hadoop.security.Credentials;
														
 
															+import org.junit.AfterClass;
														
 
															+import org.junit.BeforeClass;
														
 
															+import org.junit.Test;
														
 
															+
														
 
															+import java.io.DataOutputStream;
														
 
															+import java.util.ArrayList;
														
 
															+import java.util.List;
														
 
															+
														
 
															+public class TestDynamicInputFormat {
														
 
															+  private static final Log LOG = LogFactory.getLog(TestDynamicInputFormat.class);
														
 
															+  private static MiniDFSCluster cluster;
														
 
															+  private static final int N_FILES = 1000;
														
 
															+  private static final int NUM_SPLITS = 7;
														
 
															+
														
 
															+  private static final Credentials CREDENTIALS = new Credentials();
														
 
															+
														
 
															+  private static List<String> expectedFilePaths = new ArrayList<String>(N_FILES);
														
 
															+
														
 
															+  @BeforeClass
														
 
															+  public static void setup() throws Exception {
														
 
															+    cluster = new MiniDFSCluster.Builder(getConfigurationForCluster())
														
 
															+                  .numDataNodes(1).format(true).build();
														
 
															+
														
 
															+    for (int i=0; i<N_FILES; ++i)
														
 
															+      createFile("/tmp/source/" + String.valueOf(i));
														
 
															+
														
 
															+  }
														
 
															+
														
 
															+  private static Configuration getConfigurationForCluster() {
														
 
															+    Configuration configuration = new Configuration();
														
 
															+    System.setProperty("test.build.data",
														
 
															+                       "target/tmp/build/TEST_DYNAMIC_INPUT_FORMAT/data");
														
 
															+    configuration.set("hadoop.log.dir", "target/tmp");
														
 
															+    LOG.debug("fs.default.name  == " + configuration.get("fs.default.name"));
														
 
															+    LOG.debug("dfs.http.address == " + configuration.get("dfs.http.address"));
														
 
															+    return configuration;
														
 
															+  }
														
 
															+
														
 
															+  private static DistCpOptions getOptions() throws Exception {
														
 
															+    Path sourcePath = new Path(cluster.getFileSystem().getUri().toString()
														
 
															+            + "/tmp/source");
														
 
															+    Path targetPath = new Path(cluster.getFileSystem().getUri().toString()
														
 
															+            + "/tmp/target");
														
 
															+
														
 
															+    List<Path> sourceList = new ArrayList<Path>();
														
 
															+    sourceList.add(sourcePath);
														
 
															+    DistCpOptions options = new DistCpOptions(sourceList, targetPath);
														
 
															+    options.setMaxMaps(NUM_SPLITS);
														
 
															+    return options;
														
 
															+  }
														
 
															+
														
 
															+  private static void createFile(String path) throws Exception {
														
 
															+    FileSystem fileSystem = null;
														
 
															+    DataOutputStream outputStream = null;
														
 
															+    try {
														
 
															+      fileSystem = cluster.getFileSystem();
														
 
															+      outputStream = fileSystem.create(new Path(path), true, 0);
														
 
															+      expectedFilePaths.add(fileSystem.listStatus(
														
 
															+                                    new Path(path))[0].getPath().toString());
														
 
															+    }
														
 
															+    finally {
														
 
															+      IOUtils.cleanup(null, fileSystem, outputStream);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @AfterClass
														
 
															+  public static void tearDown() {
														
 
															+    cluster.shutdown();
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testGetSplits() throws Exception {
														
 
															+    DistCpOptions options = getOptions();
														
 
															+    Configuration configuration = new Configuration();
														
 
															+    configuration.set("mapred.map.tasks",
														
 
															+                      String.valueOf(options.getMaxMaps()));
														
 
															+    CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(
														
 
															+            new Path(cluster.getFileSystem().getUri().toString()
														
 
															+                    +"/tmp/testDynInputFormat/fileList.seq"), options);
														
 
															+
														
 
															+    JobContext jobContext = new JobContextImpl(configuration, new JobID());
														
 
															+    DynamicInputFormat<Text, FileStatus> inputFormat =
														
 
															+        new DynamicInputFormat<Text, FileStatus>();
														
 
															+    List<InputSplit> splits = inputFormat.getSplits(jobContext);
														
 
															+
														
 
															+    int nFiles = 0;
														
 
															+    int taskId = 0;
														
 
															+
														
 
															+    for (InputSplit split : splits) {
														
 
															+      RecordReader<Text, FileStatus> recordReader =
														
 
															+           inputFormat.createRecordReader(split, null);
														
 
															+      StubContext stubContext = new StubContext(jobContext.getConfiguration(),
														
 
															+                                                recordReader, taskId);
														
 
															+      final TaskAttemptContext taskAttemptContext
														
 
															+         = stubContext.getContext();
														
 
															+      
														
 
															+      recordReader.initialize(splits.get(0), taskAttemptContext);
														
 
															+      float previousProgressValue = 0f;
														
 
															+      while (recordReader.nextKeyValue()) {
														
 
															+        FileStatus fileStatus = recordReader.getCurrentValue();
														
 
															+        String source = fileStatus.getPath().toString();
														
 
															+        System.out.println(source);
														
 
															+        Assert.assertTrue(expectedFilePaths.contains(source));
														
 
															+        final float progress = recordReader.getProgress();
														
 
															+        Assert.assertTrue(progress >= previousProgressValue);
														
 
															+        Assert.assertTrue(progress >= 0.0f);
														
 
															+        Assert.assertTrue(progress <= 1.0f);
														
 
															+        previousProgressValue = progress;
														
 
															+        ++nFiles;
														
 
															+      }
														
 
															+      Assert.assertTrue(recordReader.getProgress() == 1.0f);
														
 
															+
														
 
															+      ++taskId;
														
 
															+    }
														
 
															+
														
 
															+    Assert.assertEquals(expectedFilePaths.size(), nFiles);
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testGetSplitRatio() throws Exception {
														
 
															+    Assert.assertEquals(1, DynamicInputFormat.getSplitRatio(1, 1000000000));
														
 
															+    Assert.assertEquals(2, DynamicInputFormat.getSplitRatio(11000000, 10));
														
 
															+    Assert.assertEquals(4, DynamicInputFormat.getSplitRatio(30, 700));
														
 
															+    Assert.assertEquals(2, DynamicInputFormat.getSplitRatio(30, 200));
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java
@@ -0,0 +1,220 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools.util;
														
 
															+
														
 
															+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															+import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.fs.FileStatus;
														
 
															+import org.apache.hadoop.fs.permission.FsPermission;
														
 
															+import org.apache.hadoop.hdfs.MiniDFSCluster;
														
 
															+import org.apache.hadoop.io.IOUtils;
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.junit.Assert;
														
 
															+import org.junit.Test;
														
 
															+import org.junit.BeforeClass;
														
 
															+import org.junit.AfterClass;
														
 
															+
														
 
															+import java.util.EnumSet;
														
 
															+import java.util.Random;
														
 
															+import java.util.Stack;
														
 
															+import java.io.IOException;
														
 
															+import java.io.OutputStream;
														
 
															+
														
 
															+public class TestDistCpUtils {
														
 
															+  private static final Log LOG = LogFactory.getLog(TestDistCpUtils.class);
														
 
															+
														
 
															+  private static final Configuration config = new Configuration();
														
 
															+  private static MiniDFSCluster cluster;
														
 
															+
														
 
															+  @BeforeClass
														
 
															+  public static void create() throws IOException {
														
 
															+    cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).format(true)
														
 
															+                                                .build(); 
														
 
															+  }
														
 
															+
														
 
															+  @AfterClass
														
 
															+  public static void destroy() {
														
 
															+    if (cluster != null) {
														
 
															+      cluster.shutdown();
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testGetRelativePathRoot() {
														
 
															+    Path root = new Path("/tmp/abc");
														
 
															+    Path child = new Path("/tmp/abc/xyz/file");
														
 
															+    Assert.assertEquals(DistCpUtils.getRelativePath(root, child), "/xyz/file");
														
 
															+
														
 
															+    root = new Path("/");
														
 
															+    child = new Path("/a");
														
 
															+    Assert.assertEquals(DistCpUtils.getRelativePath(root, child), "/a");
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testPackAttributes() {
														
 
															+    EnumSet<FileAttribute> attributes = EnumSet.noneOf(FileAttribute.class);
														
 
															+    Assert.assertEquals(DistCpUtils.packAttributes(attributes), "");
														
 
															+
														
 
															+    attributes.add(FileAttribute.REPLICATION);
														
 
															+    Assert.assertEquals(DistCpUtils.packAttributes(attributes), "R");
														
 
															+    Assert.assertEquals(attributes, DistCpUtils.unpackAttributes("R"));
														
 
															+
														
 
															+    attributes.add(FileAttribute.BLOCKSIZE);
														
 
															+    Assert.assertEquals(DistCpUtils.packAttributes(attributes), "RB");
														
 
															+    Assert.assertEquals(attributes, DistCpUtils.unpackAttributes("RB"));
														
 
															+
														
 
															+    attributes.add(FileAttribute.USER);
														
 
															+    Assert.assertEquals(DistCpUtils.packAttributes(attributes), "RBU");
														
 
															+    Assert.assertEquals(attributes, DistCpUtils.unpackAttributes("RBU"));
														
 
															+
														
 
															+    attributes.add(FileAttribute.GROUP);
														
 
															+    Assert.assertEquals(DistCpUtils.packAttributes(attributes), "RBUG");
														
 
															+    Assert.assertEquals(attributes, DistCpUtils.unpackAttributes("RBUG"));
														
 
															+
														
 
															+    attributes.add(FileAttribute.PERMISSION);
														
 
															+    Assert.assertEquals(DistCpUtils.packAttributes(attributes), "RBUGP");
														
 
															+    Assert.assertEquals(attributes, DistCpUtils.unpackAttributes("RBUGP"));
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testPreserve() {
														
 
															+    try {
														
 
															+      FileSystem fs = FileSystem.get(config);
														
 
															+      EnumSet<FileAttribute> attributes = EnumSet.noneOf(FileAttribute.class);
														
 
															+
														
 
															+
														
 
															+      Path path = new Path("/tmp/abc");
														
 
															+      Path src = new Path("/tmp/src");
														
 
															+      fs.mkdirs(path);
														
 
															+      fs.mkdirs(src);
														
 
															+      FileStatus srcStatus = fs.getFileStatus(src);
														
 
															+
														
 
															+      FsPermission noPerm = new FsPermission((short) 0);
														
 
															+      fs.setPermission(path, noPerm);
														
 
															+      fs.setOwner(path, "nobody", "nobody");
														
 
															+
														
 
															+      DistCpUtils.preserve(fs, path, srcStatus, attributes);
														
 
															+      FileStatus target = fs.getFileStatus(path);
														
 
															+      Assert.assertEquals(target.getPermission(), noPerm);
														
 
															+      Assert.assertEquals(target.getOwner(), "nobody");
														
 
															+      Assert.assertEquals(target.getGroup(), "nobody");
														
 
															+
														
 
															+      attributes.add(FileAttribute.PERMISSION);
														
 
															+      DistCpUtils.preserve(fs, path, srcStatus, attributes);
														
 
															+      target = fs.getFileStatus(path);
														
 
															+      Assert.assertEquals(target.getPermission(), srcStatus.getPermission());
														
 
															+      Assert.assertEquals(target.getOwner(), "nobody");
														
 
															+      Assert.assertEquals(target.getGroup(), "nobody");
														
 
															+
														
 
															+      attributes.add(FileAttribute.GROUP);
														
 
															+      attributes.add(FileAttribute.USER);
														
 
															+      DistCpUtils.preserve(fs, path, srcStatus, attributes);
														
 
															+      target = fs.getFileStatus(path);
														
 
															+      Assert.assertEquals(target.getPermission(), srcStatus.getPermission());
														
 
															+      Assert.assertEquals(target.getOwner(), srcStatus.getOwner());
														
 
															+      Assert.assertEquals(target.getGroup(), srcStatus.getGroup());
														
 
															+
														
 
															+      fs.delete(path, true);
														
 
															+      fs.delete(src, true);
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered ", e);
														
 
															+      Assert.fail("Preserve test failure");
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private static Random rand = new Random();
														
 
															+
														
 
															+  public static String createTestSetup(FileSystem fs) throws IOException {
														
 
															+    return createTestSetup("/tmp1", fs, FsPermission.getDefault());
														
 
															+  }
														
 
															+  
														
 
															+  public static String createTestSetup(FileSystem fs,
														
 
															+                                       FsPermission perm) throws IOException {
														
 
															+    return createTestSetup("/tmp1", fs, perm);
														
 
															+  }
														
 
															+
														
 
															+  public static String createTestSetup(String baseDir,
														
 
															+                                       FileSystem fs,
														
 
															+                                       FsPermission perm) throws IOException {
														
 
															+    String base = getBase(baseDir);
														
 
															+    fs.mkdirs(new Path(base + "/newTest/hello/world1"));
														
 
															+    fs.mkdirs(new Path(base + "/newTest/hello/world2/newworld"));
														
 
															+    fs.mkdirs(new Path(base + "/newTest/hello/world3/oldworld"));
														
 
															+    fs.setPermission(new Path(base + "/newTest"), perm);
														
 
															+    fs.setPermission(new Path(base + "/newTest/hello"), perm);
														
 
															+    fs.setPermission(new Path(base + "/newTest/hello/world1"), perm);
														
 
															+    fs.setPermission(new Path(base + "/newTest/hello/world2"), perm);
														
 
															+    fs.setPermission(new Path(base + "/newTest/hello/world2/newworld"), perm);
														
 
															+    fs.setPermission(new Path(base + "/newTest/hello/world3"), perm);
														
 
															+    fs.setPermission(new Path(base + "/newTest/hello/world3/oldworld"), perm);
														
 
															+    createFile(fs, base + "/newTest/1");
														
 
															+    createFile(fs, base + "/newTest/hello/2");
														
 
															+    createFile(fs, base + "/newTest/hello/world3/oldworld/3");
														
 
															+    createFile(fs, base + "/newTest/hello/world2/4");
														
 
															+    return base;
														
 
															+  }
														
 
															+
														
 
															+  private static String getBase(String base) {
														
 
															+    String location = String.valueOf(rand.nextLong());
														
 
															+    return base + "/" + location;
														
 
															+  }
														
 
															+
														
 
															+  public static void delete(FileSystem fs, String path) {
														
 
															+    try {
														
 
															+      if (fs != null) {
														
 
															+        if (path != null) {
														
 
															+          fs.delete(new Path(path), true);
														
 
															+        }
														
 
															+      }
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.warn("Exception encountered ", e);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  public static void createFile(FileSystem fs, String filePath) throws IOException {
														
 
															+    OutputStream out = fs.create(new Path(filePath));
														
 
															+    IOUtils.closeStream(out);
														
 
															+  }
														
 
															+
														
 
															+  public static boolean checkIfFoldersAreInSync(FileSystem fs, String targetBase, String sourceBase)
														
 
															+      throws IOException {
														
 
															+    Path base = new Path(targetBase);
														
 
															+
														
 
															+     Stack<Path> stack = new Stack<Path>();
														
 
															+     stack.push(base);
														
 
															+     while (!stack.isEmpty()) {
														
 
															+       Path file = stack.pop();
														
 
															+       if (!fs.exists(file)) continue;
														
 
															+       FileStatus[] fStatus = fs.listStatus(file);
														
 
															+       if (fStatus == null || fStatus.length == 0) continue;
														
 
															+
														
 
															+       for (FileStatus status : fStatus) {
														
 
															+         if (status.isDirectory()) {
														
 
															+           stack.push(status.getPath());
														
 
															+         }
														
 
															+         Assert.assertTrue(fs.exists(new Path(sourceBase + "/" +
														
 
															+             DistCpUtils.getRelativePath(new Path(targetBase), status.getPath()))));
														
 
															+       }
														
 
															+     }
														
 
															+     return true;
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestRetriableCommand.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestRetriableCommand.java
@@ -0,0 +1,81 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools.util;
														
 
															+
														
 
															+import org.apache.hadoop.io.retry.RetryPolicy;
														
 
															+import org.apache.hadoop.io.retry.RetryPolicies;
														
 
															+import org.junit.Assert;
														
 
															+import org.junit.Test;
														
 
															+
														
 
															+import java.util.concurrent.TimeUnit;
														
 
															+
														
 
															+public class TestRetriableCommand {
														
 
															+
														
 
															+  private static class MyRetriableCommand extends RetriableCommand {
														
 
															+
														
 
															+    private int succeedAfter;
														
 
															+    private int retryCount = 0;
														
 
															+
														
 
															+    public MyRetriableCommand(int succeedAfter) {
														
 
															+      super("MyRetriableCommand");
														
 
															+      this.succeedAfter = succeedAfter;
														
 
															+    }
														
 
															+
														
 
															+    public MyRetriableCommand(int succeedAfter, RetryPolicy retryPolicy) {
														
 
															+      super("MyRetriableCommand", retryPolicy);
														
 
															+      this.succeedAfter = succeedAfter;
														
 
															+    }
														
 
															+
														
 
															+    @Override
														
 
															+    protected Object doExecute(Object... arguments) throws Exception {
														
 
															+      if (++retryCount < succeedAfter)
														
 
															+        throw new Exception("Transient failure#" + retryCount);
														
 
															+      return 0;
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testRetriableCommand() {
														
 
															+    try {
														
 
															+      new MyRetriableCommand(5).execute(0);
														
 
															+      Assert.assertTrue(false);
														
 
															+    }
														
 
															+    catch (Exception e) {
														
 
															+      Assert.assertTrue(true);
														
 
															+    }
														
 
															+
														
 
															+
														
 
															+    try {
														
 
															+      new MyRetriableCommand(3).execute(0);
														
 
															+      Assert.assertTrue(true);
														
 
															+    }
														
 
															+    catch (Exception e) {
														
 
															+      Assert.assertTrue(false);
														
 
															+    }
														
 
															+
														
 
															+    try {
														
 
															+      new MyRetriableCommand(5, RetryPolicies.
														
 
															+          retryUpToMaximumCountWithFixedSleep(5, 0, TimeUnit.MILLISECONDS)).execute(0);
														
 
															+      Assert.assertTrue(true);
														
 
															+    }
														
 
															+    catch (Exception e) {
														
 
															+      Assert.assertTrue(false);
														
 
															+    }
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestThrottledInputStream.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestThrottledInputStream.java
@@ -0,0 +1,157 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.tools.util;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.io.IOUtils;
														
 
															+import org.junit.Assert;
														
 
															+import org.junit.Test;
														
 
															+
														
 
															+import java.io.*;
														
 
															+
														
 
															+public class TestThrottledInputStream {
														
 
															+  private static final Log LOG = LogFactory.getLog(TestThrottledInputStream.class);
														
 
															+  private static final int BUFF_SIZE = 1024;
														
 
															+
														
 
															+  private enum CB {ONE_C, BUFFER, BUFF_OFFSET}
														
 
															+
														
 
															+  @Test
														
 
															+  public void testRead() {
														
 
															+    File tmpFile;
														
 
															+    File outFile;
														
 
															+    try {
														
 
															+      tmpFile = createFile(1024);
														
 
															+      outFile = createFile();
														
 
															+
														
 
															+      tmpFile.deleteOnExit();
														
 
															+      outFile.deleteOnExit();
														
 
															+
														
 
															+      long maxBandwidth = copyAndAssert(tmpFile, outFile, 0, 1, -1, CB.BUFFER);
														
 
															+
														
 
															+      copyAndAssert(tmpFile, outFile, maxBandwidth, 20, 0, CB.BUFFER);
														
 
															+/*
														
 
															+      copyAndAssert(tmpFile, outFile, maxBandwidth, 10, 0, CB.BUFFER);
														
 
															+      copyAndAssert(tmpFile, outFile, maxBandwidth, 50, 0, CB.BUFFER);
														
 
															+*/
														
 
															+
														
 
															+      copyAndAssert(tmpFile, outFile, maxBandwidth, 20, 0, CB.BUFF_OFFSET);
														
 
															+/*
														
 
															+      copyAndAssert(tmpFile, outFile, maxBandwidth, 10, 0, CB.BUFF_OFFSET);
														
 
															+      copyAndAssert(tmpFile, outFile, maxBandwidth, 50, 0, CB.BUFF_OFFSET);
														
 
															+*/
														
 
															+
														
 
															+      copyAndAssert(tmpFile, outFile, maxBandwidth, 20, 0, CB.ONE_C);
														
 
															+/*
														
 
															+      copyAndAssert(tmpFile, outFile, maxBandwidth, 10, 0, CB.ONE_C);
														
 
															+      copyAndAssert(tmpFile, outFile, maxBandwidth, 50, 0, CB.ONE_C);
														
 
															+*/
														
 
															+    } catch (IOException e) {
														
 
															+      LOG.error("Exception encountered ", e);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private long copyAndAssert(File tmpFile, File outFile,
														
 
															+                             long maxBandwidth, float factor,
														
 
															+                             int sleepTime, CB flag) throws IOException {
														
 
															+    long bandwidth;
														
 
															+    ThrottledInputStream in;
														
 
															+    long maxBPS = (long) (maxBandwidth / factor);
														
 
															+
														
 
															+    if (maxBandwidth == 0) {
														
 
															+      in = new ThrottledInputStream(new FileInputStream(tmpFile));
														
 
															+    } else {
														
 
															+      in = new ThrottledInputStream(new FileInputStream(tmpFile), maxBPS);
														
 
															+    }
														
 
															+    OutputStream out = new FileOutputStream(outFile);
														
 
															+    try {
														
 
															+      if (flag == CB.BUFFER) {
														
 
															+        copyBytes(in, out, BUFF_SIZE);
														
 
															+      } else if (flag == CB.BUFF_OFFSET){
														
 
															+        copyBytesWithOffset(in, out, BUFF_SIZE);
														
 
															+      } else {
														
 
															+        copyByteByByte(in, out);
														
 
															+      }
														
 
															+
														
 
															+      LOG.info(in);
														
 
															+      bandwidth = in.getBytesPerSec();
														
 
															+      Assert.assertEquals(in.getTotalBytesRead(), tmpFile.length());
														
 
															+      Assert.assertTrue(in.getBytesPerSec() > maxBandwidth / (factor * 1.2));
														
 
															+      Assert.assertTrue(in.getTotalSleepTime() >  sleepTime || in.getBytesPerSec() <= maxBPS);
														
 
															+    } finally {
														
 
															+      IOUtils.closeStream(in);
														
 
															+      IOUtils.closeStream(out);
														
 
															+    }
														
 
															+    return bandwidth;
														
 
															+  }
														
 
															+
														
 
															+  private static void copyBytesWithOffset(InputStream in, OutputStream out, int buffSize)
														
 
															+    throws IOException {
														
 
															+
														
 
															+    byte buf[] = new byte[buffSize];
														
 
															+    int bytesRead = in.read(buf, 0, buffSize);
														
 
															+    while (bytesRead >= 0) {
														
 
															+      out.write(buf, 0, bytesRead);
														
 
															+      bytesRead = in.read(buf);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private static void copyByteByByte(InputStream in, OutputStream out)
														
 
															+    throws IOException {
														
 
															+
														
 
															+    int ch = in.read();
														
 
															+    while (ch >= 0) {
														
 
															+      out.write(ch);
														
 
															+      ch = in.read();
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private static void copyBytes(InputStream in, OutputStream out, int buffSize)
														
 
															+    throws IOException {
														
 
															+
														
 
															+    byte buf[] = new byte[buffSize];
														
 
															+    int bytesRead = in.read(buf);
														
 
															+    while (bytesRead >= 0) {
														
 
															+      out.write(buf, 0, bytesRead);
														
 
															+      bytesRead = in.read(buf);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private File createFile(long sizeInKB) throws IOException {
														
 
															+    File tmpFile = createFile();
														
 
															+    writeToFile(tmpFile, sizeInKB);
														
 
															+    return tmpFile;
														
 
															+  }
														
 
															+
														
 
															+  private File createFile() throws IOException {
														
 
															+    return File.createTempFile("tmp", "dat");
														
 
															+  }
														
 
															+
														
 
															+  private void writeToFile(File tmpFile, long sizeInKB) throws IOException {
														
 
															+    OutputStream out = new FileOutputStream(tmpFile);
														
 
															+    try {
														
 
															+      byte[] buffer = new byte [1024];
														
 
															+      for (long index = 0; index < sizeInKB; index++) {
														
 
															+        out.write(buffer);
														
 
															+      }
														
 
															+    } finally {
														
 
															+      IOUtils.closeStream(out);
														
 
															+    }
														
 
															+  }
														
 
															+}
														
--- a/hadoop-tools/hadoop-distcp/src/test/resources/sslConfig.xml
+++ b/hadoop-tools/hadoop-distcp/src/test/resources/sslConfig.xml
@@ -0,0 +1,57 @@
 
															+<?xml version="1.0"?>
														
 
															+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
														
 
															+
														
 
															+<configuration>
														
 
															+
														
 
															+<property>
														
 
															+  <name>ssl.client.truststore.location</name>
														
 
															+  <value>/path/to/truststore/keys/keystore.jks</value>
														
 
															+  <description>Truststore to be used by clients like distcp. Must be
														
 
															+  specified.
														
 
															+  </description>
														
 
															+</property>
														
 
															+
														
 
															+<property>
														
 
															+  <name>ssl.client.truststore.password</name>
														
 
															+  <value>changeit</value>
														
 
															+  <description>Optional. Default value is "".
														
 
															+  </description>
														
 
															+</property>
														
 
															+
														
 
															+<property>
														
 
															+  <name>ssl.client.truststore.type</name>
														
 
															+  <value>jks</value>
														
 
															+  <description>Optional. Default value is "jks".
														
 
															+  </description>
														
 
															+</property>
														
 
															+
														
 
															+<property>
														
 
															+  <name>ssl.client.keystore.location</name>
														
 
															+  <value>/path/to/keystore/keys/keystore.jks</value>
														
 
															+  <description>Keystore to be used by clients like distcp. Must be
														
 
															+  specified.
														
 
															+  </description>
														
 
															+</property>
														
 
															+
														
 
															+<property>
														
 
															+  <name>ssl.client.keystore.password</name>
														
 
															+  <value>changeit</value>
														
 
															+  <description>Optional. Default value is "".
														
 
															+  </description>
														
 
															+</property>
														
 
															+
														
 
															+<property>
														
 
															+  <name>ssl.client.keystore.keypassword</name>
														
 
															+  <value>changeit</value>
														
 
															+  <description>Optional. Default value is "".
														
 
															+  </description>
														
 
															+</property>
														
 
															+
														
 
															+<property>
														
 
															+  <name>ssl.client.keystore.type</name>
														
 
															+  <value>jks</value>
														
 
															+  <description>Optional. Default value is "jks".
														
 
															+  </description>
														
 
															+</property>
														
 
															+
														
 
															+</configuration>
														
--- a/hadoop-tools/pom.xml
+++ b/hadoop-tools/pom.xml
@@ -29,6 +29,7 @@
 
															   <modules>
														
 
															     <module>hadoop-streaming</module>
														
 
															+    <module>hadoop-distcp</module>
														
 
															     <module>hadoop-archives</module>
														
 
															     <module>hadoop-rumen</module>
														
 
															     <module>hadoop-tools-dist</module>