13 سال پیش · d06948002f
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -195,6 +195,8 @@ Release 0.23.1 - Unreleased
 
				     MAPREDUCE-3710. Improved FileInputFormat to return better locality for the
			
 
				     last split. (Siddarth Seth via vinodkv)
			
 
				 
			
 
				+    MAPREDUCE-2765. DistCp Rewrite. (Mithun Radhakrishnan via mahadev)
			
 
				+
			
 
				   OPTIMIZATIONS
			
 
				 
			
 
				     MAPREDUCE-3567. Extraneous JobConf objects in AM heap. (Vinod Kumar
			
--- a/hadoop-project/pom.xml
+++ b/hadoop-project/pom.xml
@@ -709,11 +709,21 @@
 
				           <artifactId>maven-project-info-reports-plugin</artifactId>
			
 
				           <version>2.4</version>
			
 
				         </plugin>
			
 
				+        <plugin>
			
 
				+          <groupId>org.apache.maven.plugins</groupId>
			
 
				+          <artifactId>maven-resources-plugin</artifactId>
			
 
				+          <version>2.2</version>
			
 
				+        </plugin>
			
 
				         <plugin>
			
 
				           <groupId>org.codehaus.mojo</groupId>
			
 
				           <artifactId>exec-maven-plugin</artifactId>
			
 
				           <version>1.2</version>
			
 
				         </plugin>
			
 
				+        <plugin>
			
 
				+          <groupId>org.apache.maven.plugins</groupId>
			
 
				+          <artifactId>maven-pdf-plugin</artifactId>
			
 
				+          <version>1.1</version>
			
 
				+        </plugin>
			
 
				       </plugins>
			
 
				     </pluginManagement>
			
 
				 
			
@@ -773,6 +783,14 @@
 
				           </excludes>
			
 
				         </configuration>
			
 
				       </plugin>
			
 
				+      <plugin>
			
 
				+        <groupId>org.apache.maven.plugins</groupId>
			
 
				+        <artifactId>maven-pdf-plugin</artifactId>
			
 
				+        <configuration>
			
 
				+          <outputDirectory>${project.reporting.outputDirectory}</outputDirectory>
			
 
				+          <includeReports>false</includeReports>
			
 
				+        </configuration>
			
 
				+      </plugin>
			
 
				     </plugins>
			
 
				   </build>
			
 
				 
			
--- a/hadoop-tools/hadoop-distcp/README
+++ b/hadoop-tools/hadoop-distcp/README
@@ -0,0 +1,7 @@
 
				+DistCp (distributed copy) is a tool used for large inter/intra-cluster copying. 
			
 
				+It uses Map/Reduce to effect its distribution, error handling and recovery, 
			
 
				+and reporting. It expands a list of files and directories into input to map tasks, 
			
 
				+each of which will copy a partition of the files specified in the source list.
			
 
				+
			
 
				+Version 0.1 (2010/08/02 sriksun)
			
 
				+ - Initial Version
			
--- a/hadoop-tools/hadoop-distcp/pom.xml
+++ b/hadoop-tools/hadoop-distcp/pom.xml
@@ -0,0 +1,185 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<project>
			
 
				+  <modelVersion>4.0.0</modelVersion>
			
 
				+  <parent>
			
 
				+    <groupId>org.apache.hadoop</groupId>
			
 
				+    <artifactId>hadoop-project</artifactId>
			
 
				+    <version>0.23.1-SNAPSHOT</version>
			
 
				+    <relativePath>../../hadoop-project</relativePath>
			
 
				+  </parent>
			
 
				+  <groupId>org.apache.hadoop.tools</groupId>
			
 
				+  <artifactId>hadoop-distcp</artifactId>
			
 
				+  <version>0.23.1-SNAPSHOT</version>
			
 
				+  <description>Apache Hadoop Distributed Copy</description>
			
 
				+  <name>Apache Hadoop Distributed Copy</name>
			
 
				+  <packaging>jar</packaging>
			
 
				+
			
 
				+  <properties>
			
 
				+    <file.encoding>UTF-8</file.encoding>
			
 
				+    <downloadSources>true</downloadSources>
			
 
				+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
			
 
				+  </properties>
			
 
				+
			
 
				+  <dependencies>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-common</artifactId>
			
 
				+      <scope>provided</scope>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-annotations</artifactId>
			
 
				+      <scope>provided</scope>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-mapreduce-client-app</artifactId>
			
 
				+      <scope>test</scope>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-mapreduce-client-hs</artifactId>
			
 
				+      <scope>test</scope>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-mapreduce-client-core</artifactId>
			
 
				+      <scope>provided</scope>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
			
 
				+      <scope>provided</scope>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
			
 
				+      <scope>test</scope>
			
 
				+      <type>test-jar</type>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-hdfs</artifactId>
			
 
				+      <scope>provided</scope>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-hdfs</artifactId>
			
 
				+      <scope>test</scope>
			
 
				+      <type>test-jar</type>
			
 
				+    </dependency>
			
 
				+    <dependency>
			
 
				+      <groupId>org.apache.hadoop</groupId>
			
 
				+      <artifactId>hadoop-common</artifactId>
			
 
				+      <scope>test</scope>
			
 
				+      <type>test-jar</type>
			
 
				+    </dependency>
			
 
				+  </dependencies>
			
 
				+
			
 
				+  <build>
			
 
				+    <resources>
			
 
				+      <resource>
			
 
				+        <directory>src/main/resources</directory>
			
 
				+        <filtering>true</filtering>
			
 
				+      </resource>
			
 
				+    </resources>
			
 
				+    <testResources>
			
 
				+      <testResource>
			
 
				+        <directory>src/test/resources</directory>
			
 
				+        <filtering>true</filtering>
			
 
				+      </testResource>
			
 
				+    </testResources>
			
 
				+    <plugins>
			
 
				+      <plugin>
			
 
				+        <groupId>org.apache.maven.plugins</groupId>
			
 
				+        <artifactId>maven-surefire-plugin</artifactId>
			
 
				+        <configuration>
			
 
				+          <forkMode>always</forkMode>
			
 
				+          <forkedProcessTimeoutInSeconds>600</forkedProcessTimeoutInSeconds>
			
 
				+          <argLine>-Xmx1024m</argLine>
			
 
				+          <includes>
			
 
				+            <include>**/Test*.java</include>
			
 
				+          </includes>
			
 
				+          <redirectTestOutputToFile>true</redirectTestOutputToFile>
			
 
				+          <systemProperties>
			
 
				+            <property>
			
 
				+              <name>test.build.data</name>
			
 
				+              <value>${basedir}/target/test/data</value>
			
 
				+            </property>
			
 
				+            <property>
			
 
				+              <name>hadoop.log.dir</name>
			
 
				+              <value>target/test/logs</value>
			
 
				+            </property>
			
 
				+            <property>
			
 
				+              <name>org.apache.commons.logging.Log</name>
			
 
				+              <value>org.apache.commons.logging.impl.SimpleLog</value>
			
 
				+            </property>
			
 
				+            <property>
			
 
				+              <name>org.apache.commons.logging.simplelog.defaultlog</name>
			
 
				+              <value>warn</value>
			
 
				+            </property>
			
 
				+          </systemProperties>
			
 
				+        </configuration>
			
 
				+      </plugin>
			
 
				+      <plugin>
			
 
				+        <artifactId>maven-dependency-plugin</artifactId>
			
 
				+        <executions>
			
 
				+          <execution>
			
 
				+            <phase>package</phase>
			
 
				+            <goals>
			
 
				+              <goal>copy-dependencies</goal>
			
 
				+            </goals>
			
 
				+            <configuration>
			
 
				+              <outputDirectory>${project.build.directory}/lib</outputDirectory>
			
 
				+            </configuration>
			
 
				+          </execution>
			
 
				+        </executions>
			
 
				+      </plugin>
			
 
				+      <plugin>
			
 
				+        <groupId>org.apache.maven.plugins</groupId>
			
 
				+        <artifactId>maven-checkstyle-plugin</artifactId>
			
 
				+        <configuration>
			
 
				+          <enableRulesSummary>true</enableRulesSummary>
			
 
				+        </configuration>
			
 
				+      </plugin>
			
 
				+      <plugin>
			
 
				+        <groupId>org.apache.maven.plugins</groupId>
			
 
				+        <artifactId>maven-jar-plugin</artifactId>
			
 
				+        <configuration>
			
 
				+          <archive>
			
 
				+            <manifest>
			
 
				+              <mainClass>org.apache.hadoop.tools.DistCp</mainClass>
			
 
				+            </manifest>
			
 
				+          </archive>
			
 
				+        </configuration>
			
 
				+      </plugin>
			
 
				+      <plugin>
			
 
				+        <groupId>org.apache.maven.plugins</groupId>
			
 
				+        <artifactId>maven-source-plugin</artifactId>
			
 
				+        <configuration>
			
 
				+          <attach>true</attach>
			
 
				+        </configuration>
			
 
				+        <executions>
			
 
				+          <execution>
			
 
				+            <goals>
			
 
				+              <goal>jar</goal>
			
 
				+            </goals>
			
 
				+          </execution>
			
 
				+        </executions>
			
 
				+      </plugin>
			
 
				+      <plugin>
			
 
				+        <groupId>org.apache.maven.plugins</groupId>
			
 
				+        <artifactId>maven-pdf-plugin</artifactId>
			
 
				+        <executions>
			
 
				+          <execution>
			
 
				+            <id>pdf</id>
			
 
				+            <phase>package</phase>
			
 
				+            <goals>
			
 
				+              <goal>pdf</goal>
			
 
				+            </goals>
			
 
				+          </execution>
			
 
				+        </executions>
			
 
				+      </plugin>
			
 
				+    </plugins>
			
 
				+  </build>
			
 
				+</project>
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java
@@ -0,0 +1,218 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.conf.Configured;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.io.SequenceFile;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+import org.apache.hadoop.security.Credentials;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+/**
			
 
				+ * The CopyListing abstraction is responsible for how the list of
			
 
				+ * sources and targets is constructed, for DistCp's copy function.
			
 
				+ * The copy-listing should be a SequenceFile<Text, FileStatus>,
			
 
				+ * located at the path specified to buildListing(),
			
 
				+ * each entry being a pair of (Source relative path, source file status),
			
 
				+ * all the paths being fully qualified.
			
 
				+ */
			
 
				+public abstract class CopyListing extends Configured {
			
 
				+
			
 
				+  private Credentials credentials;
			
 
				+
			
 
				+  /**
			
 
				+   * Build listing function creates the input listing that distcp uses to
			
 
				+   * perform the copy.
			
 
				+   *
			
 
				+   * The build listing is a sequence file that has relative path of a file in the key
			
 
				+   * and the file status information of the source file in the value
			
 
				+   *
			
 
				+   * For instance if the source path is /tmp/data and the traversed path is
			
 
				+   * /tmp/data/dir1/dir2/file1, then the sequence file would contain
			
 
				+   *
			
 
				+   * key: /dir1/dir2/file1 and value: FileStatus(/tmp/data/dir1/dir2/file1)
			
 
				+   *
			
 
				+   * File would also contain directory entries. Meaning, if /tmp/data/dir1/dir2/file1
			
 
				+   * is the only file under /tmp/data, the resulting sequence file would contain the
			
 
				+   * following entries
			
 
				+   *
			
 
				+   * key: /dir1 and value: FileStatus(/tmp/data/dir1)
			
 
				+   * key: /dir1/dir2 and value: FileStatus(/tmp/data/dir1/dir2)
			
 
				+   * key: /dir1/dir2/file1 and value: FileStatus(/tmp/data/dir1/dir2/file1)
			
 
				+   *
			
 
				+   * Cases requiring special handling:
			
 
				+   * If source path is a file (/tmp/file1), contents of the file will be as follows
			
 
				+   *
			
 
				+   * TARGET DOES NOT EXIST: Key-"", Value-FileStatus(/tmp/file1)
			
 
				+   * TARGET IS FILE       : Key-"", Value-FileStatus(/tmp/file1)
			
 
				+   * TARGET IS DIR        : Key-"/file1", Value-FileStatus(/tmp/file1)  
			
 
				+   *
			
 
				+   * @param pathToListFile - Output file where the listing would be stored
			
 
				+   * @param options - Input options to distcp
			
 
				+   * @throws IOException - Exception if any
			
 
				+   */
			
 
				+  public final void buildListing(Path pathToListFile,
			
 
				+                                 DistCpOptions options) throws IOException {
			
 
				+    validatePaths(options);
			
 
				+    doBuildListing(pathToListFile, options);
			
 
				+    Configuration config = getConf();
			
 
				+
			
 
				+    config.set(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, pathToListFile.toString());
			
 
				+    config.setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, getBytesToCopy());
			
 
				+    config.setLong(DistCpConstants.CONF_LABEL_TOTAL_NUMBER_OF_RECORDS, getNumberOfPaths());
			
 
				+
			
 
				+    checkForDuplicates(pathToListFile);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Validate input and output paths
			
 
				+   *
			
 
				+   * @param options - Input options
			
 
				+   * @throws InvalidInputException: If inputs are invalid
			
 
				+   * @throws IOException: any Exception with FS 
			
 
				+   */
			
 
				+  protected abstract void validatePaths(DistCpOptions options)
			
 
				+      throws IOException, InvalidInputException;
			
 
				+
			
 
				+  /**
			
 
				+   * The interface to be implemented by sub-classes, to create the source/target file listing.
			
 
				+   * @param pathToListFile Path on HDFS where the listing file is written.
			
 
				+   * @param options Input Options for DistCp (indicating source/target paths.)
			
 
				+   * @throws IOException: Thrown on failure to create the listing file.
			
 
				+   */
			
 
				+  protected abstract void doBuildListing(Path pathToListFile,
			
 
				+                                         DistCpOptions options) throws IOException;
			
 
				+
			
 
				+  /**
			
 
				+   * Return the total bytes that distCp should copy for the source paths
			
 
				+   * This doesn't consider whether file is same should be skipped during copy
			
 
				+   *
			
 
				+   * @return total bytes to copy
			
 
				+   */
			
 
				+  protected abstract long getBytesToCopy();
			
 
				+
			
 
				+  /**
			
 
				+   * Return the total number of paths to distcp, includes directories as well
			
 
				+   * This doesn't consider whether file/dir is already present and should be skipped during copy
			
 
				+   *
			
 
				+   * @return Total number of paths to distcp
			
 
				+   */
			
 
				+  protected abstract long getNumberOfPaths();
			
 
				+
			
 
				+  /**
			
 
				+   * Validate the final resulting path listing to see if there are any duplicate entries
			
 
				+   *
			
 
				+   * @param pathToListFile - path listing build by doBuildListing
			
 
				+   * @throws IOException - Any issues while checking for duplicates and throws
			
 
				+   * @throws DuplicateFileException - if there are duplicates
			
 
				+   */
			
 
				+  private void checkForDuplicates(Path pathToListFile)
			
 
				+      throws DuplicateFileException, IOException {
			
 
				+
			
 
				+    Configuration config = getConf();
			
 
				+    FileSystem fs = pathToListFile.getFileSystem(config);
			
 
				+
			
 
				+    Path sortedList = DistCpUtils.sortListing(fs, config, pathToListFile);
			
 
				+
			
 
				+    SequenceFile.Reader reader = new SequenceFile.Reader(
			
 
				+                          config, SequenceFile.Reader.file(sortedList));
			
 
				+    try {
			
 
				+      Text lastKey = new Text("*"); //source relative path can never hold *
			
 
				+      FileStatus lastFileStatus = new FileStatus();
			
 
				+
			
 
				+      Text currentKey = new Text();
			
 
				+      while (reader.next(currentKey)) {
			
 
				+        if (currentKey.equals(lastKey)) {
			
 
				+          FileStatus currentFileStatus = new FileStatus();
			
 
				+          reader.getCurrentValue(currentFileStatus);
			
 
				+          throw new DuplicateFileException("File " + lastFileStatus.getPath() + " and " +
			
 
				+              currentFileStatus.getPath() + " would cause duplicates. Aborting");
			
 
				+        }
			
 
				+        reader.getCurrentValue(lastFileStatus);
			
 
				+        lastKey.set(currentKey);
			
 
				+      }
			
 
				+    } finally {
			
 
				+      IOUtils.closeStream(reader);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Protected constructor, to initialize configuration.
			
 
				+   * @param configuration The input configuration,
			
 
				+   *                        with which the source/target FileSystems may be accessed.
			
 
				+   * @param credentials - Credentials object on which the FS delegation tokens are cached.If null
			
 
				+   * delegation token caching is skipped
			
 
				+   */
			
 
				+  protected CopyListing(Configuration configuration, Credentials credentials) {
			
 
				+    setConf(configuration);
			
 
				+    setCredentials(credentials);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * set Credentials store, on which FS delegatin token will be cached
			
 
				+   * @param credentials - Credentials object
			
 
				+   */
			
 
				+  protected void setCredentials(Credentials credentials) {
			
 
				+    this.credentials = credentials;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * get credentials to update the delegation tokens for accessed FS objects
			
 
				+   * @return Credentials object
			
 
				+   */
			
 
				+  protected Credentials getCredentials() {
			
 
				+    return credentials;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Public Factory method with which the appropriate CopyListing implementation may be retrieved.
			
 
				+   * @param configuration The input configuration.
			
 
				+   * @param credentials Credentials object on which the FS delegation tokens are cached
			
 
				+   * @param options The input Options, to help choose the appropriate CopyListing Implementation.
			
 
				+   * @return An instance of the appropriate CopyListing implementation.
			
 
				+   */
			
 
				+  public static CopyListing getCopyListing(Configuration configuration,
			
 
				+                                           Credentials credentials,
			
 
				+                                           DistCpOptions options) {
			
 
				+    if (options.getSourceFileListing() == null) {
			
 
				+      return new GlobbedCopyListing(configuration, credentials);
			
 
				+    } else {
			
 
				+      return new FileBasedCopyListing(configuration, credentials);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  static class DuplicateFileException extends RuntimeException {
			
 
				+    public DuplicateFileException(String message) {
			
 
				+      super(message);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  static class InvalidInputException extends RuntimeException {
			
 
				+    public InvalidInputException(String message) {
			
 
				+      super(message);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java
@@ -0,0 +1,405 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.conf.Configured;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.mapreduce.Job;
			
 
				+import org.apache.hadoop.mapreduce.JobContext;
			
 
				+import org.apache.hadoop.mapreduce.JobSubmissionFiles;
			
 
				+import org.apache.hadoop.mapreduce.Cluster;
			
 
				+import org.apache.hadoop.tools.CopyListing.*;
			
 
				+import org.apache.hadoop.tools.mapred.CopyMapper;
			
 
				+import org.apache.hadoop.tools.mapred.CopyOutputFormat;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+import org.apache.hadoop.util.Tool;
			
 
				+import org.apache.hadoop.util.ToolRunner;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.Random;
			
 
				+
			
 
				+/**
			
 
				+ * DistCp is the main driver-class for DistCpV2.
			
 
				+ * For command-line use, DistCp::main() orchestrates the parsing of command-line
			
 
				+ * parameters and the launch of the DistCp job.
			
 
				+ * For programmatic use, a DistCp object can be constructed by specifying
			
 
				+ * options (in a DistCpOptions object), and DistCp::execute() may be used to
			
 
				+ * launch the copy-job. DistCp may alternatively be sub-classed to fine-tune
			
 
				+ * behaviour.
			
 
				+ */
			
 
				+public class DistCp extends Configured implements Tool {
			
 
				+  private static final Log LOG = LogFactory.getLog(DistCp.class);
			
 
				+
			
 
				+  private DistCpOptions inputOptions;
			
 
				+  private Path metaFolder;
			
 
				+
			
 
				+  private static final String PREFIX = "_distcp";
			
 
				+  private static final String WIP_PREFIX = "._WIP_";
			
 
				+  private static final String DISTCP_DEFAULT_XML = "distcp-default.xml";
			
 
				+  public static final Random rand = new Random();
			
 
				+
			
 
				+  private boolean submitted;
			
 
				+  private FileSystem jobFS;
			
 
				+
			
 
				+  /**
			
 
				+   * Public Constructor. Creates DistCp object with specified input-parameters.
			
 
				+   * (E.g. source-paths, target-location, etc.)
			
 
				+   * @param inputOptions Options (indicating source-paths, target-location.)
			
 
				+   * @param configuration The Hadoop configuration against which the Copy-mapper must run.
			
 
				+   * @throws Exception, on failure.
			
 
				+   */
			
 
				+  public DistCp(Configuration configuration, DistCpOptions inputOptions) throws Exception {
			
 
				+    Configuration config = new Configuration(configuration);
			
 
				+    config.addResource(DISTCP_DEFAULT_XML);
			
 
				+    setConf(config);
			
 
				+    this.inputOptions = inputOptions;
			
 
				+    this.metaFolder   = createMetaFolderPath();
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * To be used with the ToolRunner. Not for public consumption.
			
 
				+   */
			
 
				+  private DistCp() {}
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of Tool::run(). Orchestrates the copy of source file(s)
			
 
				+   * to target location, by:
			
 
				+   *  1. Creating a list of files to be copied to target.
			
 
				+   *  2. Launching a Map-only job to copy the files. (Delegates to execute().)
			
 
				+   * @param argv List of arguments passed to DistCp, from the ToolRunner.
			
 
				+   * @return On success, it returns 0. Else, -1.
			
 
				+   */
			
 
				+  public int run(String[] argv) {
			
 
				+    try {
			
 
				+      inputOptions = (OptionsParser.parse(argv));
			
 
				+
			
 
				+      LOG.info("Input Options: " + inputOptions);
			
 
				+    } catch (Throwable e) {
			
 
				+      LOG.error("Invalid arguments: ", e);
			
 
				+      System.err.println("Invalid arguments: " + e.getMessage());
			
 
				+      OptionsParser.usage();      
			
 
				+      return DistCpConstants.INVALID_ARGUMENT;
			
 
				+    }
			
 
				+    
			
 
				+    try {
			
 
				+      execute();
			
 
				+    } catch (InvalidInputException e) {
			
 
				+      LOG.error("Invalid input: ", e);
			
 
				+      return DistCpConstants.INVALID_ARGUMENT;
			
 
				+    } catch (DuplicateFileException e) {
			
 
				+      LOG.error("Duplicate files in input path: ", e);
			
 
				+      return DistCpConstants.DUPLICATE_INPUT;
			
 
				+    } catch (Exception e) {
			
 
				+      LOG.error("Exception encountered ", e);
			
 
				+      return DistCpConstants.UNKNOWN_ERROR;
			
 
				+    }
			
 
				+    return DistCpConstants.SUCCESS;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Implements the core-execution. Creates the file-list for copy,
			
 
				+   * and launches the Hadoop-job, to do the copy.
			
 
				+   * @return Job handle
			
 
				+   * @throws Exception, on failure.
			
 
				+   */
			
 
				+  public Job execute() throws Exception {
			
 
				+    assert inputOptions != null;
			
 
				+    assert getConf() != null;
			
 
				+
			
 
				+    Job job = null;
			
 
				+    try {
			
 
				+      metaFolder = createMetaFolderPath();
			
 
				+      jobFS = metaFolder.getFileSystem(getConf());
			
 
				+
			
 
				+      job = createJob();
			
 
				+      createInputFileListing(job);
			
 
				+
			
 
				+      job.submit();
			
 
				+      submitted = true;
			
 
				+    } finally {
			
 
				+      if (!submitted) {
			
 
				+        cleanup();
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    String jobID = job.getJobID().toString();
			
 
				+    job.getConfiguration().set(DistCpConstants.CONF_LABEL_DISTCP_JOB_ID, jobID);
			
 
				+    
			
 
				+    LOG.info("DistCp job-id: " + jobID);
			
 
				+    if (inputOptions.shouldBlock()) {
			
 
				+      job.waitForCompletion(true);
			
 
				+    }
			
 
				+    return job;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Create Job object for submitting it, with all the configuration
			
 
				+   *
			
 
				+   * @return Reference to job object.
			
 
				+   * @throws IOException - Exception if any
			
 
				+   */
			
 
				+  private Job createJob() throws IOException {
			
 
				+    String jobName = "distcp";
			
 
				+    String userChosenName = getConf().get(JobContext.JOB_NAME);
			
 
				+    if (userChosenName != null)
			
 
				+      jobName += ": " + userChosenName;
			
 
				+    Job job = Job.getInstance(getConf());
			
 
				+    job.setJobName(jobName);
			
 
				+    job.setInputFormatClass(DistCpUtils.getStrategy(getConf(), inputOptions));
			
 
				+    job.setJarByClass(CopyMapper.class);
			
 
				+    configureOutputFormat(job);
			
 
				+
			
 
				+    job.setMapperClass(CopyMapper.class);
			
 
				+    job.setNumReduceTasks(0);
			
 
				+    job.setMapOutputKeyClass(Text.class);
			
 
				+    job.setMapOutputValueClass(Text.class);
			
 
				+    job.setOutputFormatClass(CopyOutputFormat.class);
			
 
				+    job.getConfiguration().set(JobContext.MAP_SPECULATIVE, "false");
			
 
				+    job.getConfiguration().set(JobContext.NUM_MAPS,
			
 
				+                  String.valueOf(inputOptions.getMaxMaps()));
			
 
				+
			
 
				+    if (inputOptions.getSslConfigurationFile() != null) {
			
 
				+      setupSSLConfig(job);
			
 
				+    }
			
 
				+
			
 
				+    inputOptions.appendToConf(job.getConfiguration());
			
 
				+    return job;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Setup ssl configuration on the job configuration to enable hsftp access
			
 
				+   * from map job. Also copy the ssl configuration file to Distributed cache
			
 
				+   *
			
 
				+   * @param job - Reference to job's handle
			
 
				+   * @throws java.io.IOException - Exception if unable to locate ssl config file
			
 
				+   */
			
 
				+  private void setupSSLConfig(Job job) throws IOException  {
			
 
				+    Configuration configuration = job.getConfiguration();
			
 
				+    Path sslConfigPath = new Path(configuration.
			
 
				+        getResource(inputOptions.getSslConfigurationFile()).toString());
			
 
				+
			
 
				+    addSSLFilesToDistCache(job, sslConfigPath);
			
 
				+    configuration.set(DistCpConstants.CONF_LABEL_SSL_CONF, sslConfigPath.getName());
			
 
				+    configuration.set(DistCpConstants.CONF_LABEL_SSL_KEYSTORE, sslConfigPath.getName());
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Add SSL files to distributed cache. Trust store, key store and ssl config xml
			
 
				+   *
			
 
				+   * @param job - Job handle
			
 
				+   * @param sslConfigPath - ssl Configuration file specified through options
			
 
				+   * @throws IOException - If any
			
 
				+   */
			
 
				+  private void addSSLFilesToDistCache(Job job,
			
 
				+                                      Path sslConfigPath) throws IOException {
			
 
				+    Configuration configuration = job.getConfiguration();
			
 
				+    FileSystem localFS = FileSystem.getLocal(configuration);
			
 
				+
			
 
				+    Configuration sslConf = new Configuration(false);
			
 
				+    sslConf.addResource(sslConfigPath);
			
 
				+
			
 
				+    Path localStorePath = getLocalStorePath(sslConf,
			
 
				+                            DistCpConstants.CONF_LABEL_SSL_TRUST_STORE_LOCATION);
			
 
				+    job.addCacheFile(localStorePath.makeQualified(localFS.getUri(),
			
 
				+                                      localFS.getWorkingDirectory()).toUri());
			
 
				+    configuration.set(DistCpConstants.CONF_LABEL_SSL_TRUST_STORE_LOCATION,
			
 
				+                      localStorePath.getName());
			
 
				+
			
 
				+    localStorePath = getLocalStorePath(sslConf,
			
 
				+                             DistCpConstants.CONF_LABEL_SSL_KEY_STORE_LOCATION);
			
 
				+    job.addCacheFile(localStorePath.makeQualified(localFS.getUri(),
			
 
				+                                      localFS.getWorkingDirectory()).toUri());
			
 
				+    configuration.set(DistCpConstants.CONF_LABEL_SSL_KEY_STORE_LOCATION,
			
 
				+                                      localStorePath.getName());
			
 
				+
			
 
				+    job.addCacheFile(sslConfigPath.makeQualified(localFS.getUri(),
			
 
				+                                      localFS.getWorkingDirectory()).toUri());
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get Local Trust store/key store path
			
 
				+   *
			
 
				+   * @param sslConf - Config from SSL Client xml
			
 
				+   * @param storeKey - Key for either trust store or key store
			
 
				+   * @return - Path where the store is present
			
 
				+   * @throws IOException -If any
			
 
				+   */
			
 
				+  private Path getLocalStorePath(Configuration sslConf, String storeKey) throws IOException {
			
 
				+    if (sslConf.get(storeKey) != null) {
			
 
				+      return new Path(sslConf.get(storeKey));
			
 
				+    } else {
			
 
				+      throw new IOException("Store for " + storeKey + " is not set in " +
			
 
				+          inputOptions.getSslConfigurationFile());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Setup output format appropriately
			
 
				+   *
			
 
				+   * @param job - Job handle
			
 
				+   * @throws IOException - Exception if any
			
 
				+   */
			
 
				+  private void configureOutputFormat(Job job) throws IOException {
			
 
				+    final Configuration configuration = job.getConfiguration();
			
 
				+    Path targetPath = inputOptions.getTargetPath();
			
 
				+    FileSystem targetFS = targetPath.getFileSystem(configuration);
			
 
				+    targetPath = targetPath.makeQualified(targetFS.getUri(),
			
 
				+                                          targetFS.getWorkingDirectory());
			
 
				+
			
 
				+    if (inputOptions.shouldAtomicCommit()) {
			
 
				+      Path workDir = inputOptions.getAtomicWorkPath();
			
 
				+      if (workDir == null) {
			
 
				+        workDir = targetPath.getParent();
			
 
				+      }
			
 
				+      workDir = new Path(workDir, WIP_PREFIX + targetPath.getName()
			
 
				+                                + rand.nextInt());
			
 
				+      FileSystem workFS = workDir.getFileSystem(configuration);
			
 
				+      if (!DistCpUtils.compareFs(targetFS, workFS)) {
			
 
				+        throw new IllegalArgumentException("Work path " + workDir +
			
 
				+            " and target path " + targetPath + " are in different file system");
			
 
				+      }
			
 
				+      CopyOutputFormat.setWorkingDirectory(job, workDir);
			
 
				+    } else {
			
 
				+      CopyOutputFormat.setWorkingDirectory(job, targetPath);
			
 
				+    }
			
 
				+    CopyOutputFormat.setCommitDirectory(job, targetPath);
			
 
				+
			
 
				+    Path logPath = inputOptions.getLogPath();
			
 
				+    if (logPath == null) {
			
 
				+      logPath = new Path(metaFolder, "_logs");
			
 
				+    } else {
			
 
				+      LOG.info("DistCp job log path: " + logPath);
			
 
				+    }
			
 
				+    CopyOutputFormat.setOutputPath(job, logPath);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Create input listing by invoking an appropriate copy listing
			
 
				+   * implementation. Also add delegation tokens for each path
			
 
				+   * to job's credential store
			
 
				+   *
			
 
				+   * @param job - Handle to job
			
 
				+   * @return Returns the path where the copy listing is created
			
 
				+   * @throws IOException - If any
			
 
				+   */
			
 
				+  private Path createInputFileListing(Job job) throws IOException {
			
 
				+    Path fileListingPath = getFileListingPath();
			
 
				+    CopyListing copyListing = CopyListing.getCopyListing(job.getConfiguration(),
			
 
				+        job.getCredentials(), inputOptions);
			
 
				+    copyListing.buildListing(fileListingPath, inputOptions);
			
 
				+    return fileListingPath;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get default name of the copy listing file. Use the meta folder
			
 
				+   * to create the copy listing file
			
 
				+   *
			
 
				+   * @return - Path where the copy listing file has to be saved
			
 
				+   * @throws IOException - Exception if any
			
 
				+   */
			
 
				+  private Path getFileListingPath() throws IOException {
			
 
				+    String fileListPathStr = metaFolder + "/fileList.seq";
			
 
				+    Path path = new Path(fileListPathStr);
			
 
				+    return new Path(path.toUri().normalize().toString());
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Create a default working folder for the job, under the
			
 
				+   * job staging directory
			
 
				+   *
			
 
				+   * @return Returns the working folder information
			
 
				+   * @throws Exception - EXception if any
			
 
				+   */
			
 
				+  private Path createMetaFolderPath() throws Exception {
			
 
				+    Configuration configuration = getConf();
			
 
				+    Path stagingDir = JobSubmissionFiles.getStagingDir(
			
 
				+            new Cluster(configuration), configuration);
			
 
				+    Path metaFolderPath = new Path(stagingDir, PREFIX + String.valueOf(rand.nextInt()));
			
 
				+    if (LOG.isDebugEnabled())
			
 
				+      LOG.debug("Meta folder location: " + metaFolderPath);
			
 
				+    configuration.set(DistCpConstants.CONF_LABEL_META_FOLDER, metaFolderPath.toString());    
			
 
				+    return metaFolderPath;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Main function of the DistCp program. Parses the input arguments (via OptionsParser),
			
 
				+   * and invokes the DistCp::run() method, via the ToolRunner.
			
 
				+   * @param argv Command-line arguments sent to DistCp.
			
 
				+   */
			
 
				+  public static void main(String argv[]) {
			
 
				+    try {
			
 
				+      DistCp distCp = new DistCp();
			
 
				+      Cleanup CLEANUP = new Cleanup(distCp);
			
 
				+
			
 
				+      Runtime.getRuntime().addShutdownHook(CLEANUP);
			
 
				+      System.exit(ToolRunner.run(getDefaultConf(), distCp, argv));
			
 
				+    }
			
 
				+    catch (Exception e) {
			
 
				+      LOG.error("Couldn't complete DistCp operation: ", e);
			
 
				+      System.exit(DistCpConstants.UNKNOWN_ERROR);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Loads properties from distcp-default.xml into configuration
			
 
				+   * object
			
 
				+   * @return Configuration which includes properties from distcp-default.xml
			
 
				+   */
			
 
				+  private static Configuration getDefaultConf() {
			
 
				+    Configuration config = new Configuration();
			
 
				+    config.addResource(DISTCP_DEFAULT_XML);
			
 
				+    return config;
			
 
				+  }
			
 
				+
			
 
				+  private synchronized void cleanup() {
			
 
				+    try {
			
 
				+      if (metaFolder == null) return;
			
 
				+
			
 
				+      jobFS.delete(metaFolder, true);
			
 
				+      metaFolder = null;
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Unable to cleanup meta folder: " + metaFolder, e);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private boolean isSubmitted() {
			
 
				+    return submitted;
			
 
				+  }
			
 
				+
			
 
				+  private static class Cleanup extends Thread {
			
 
				+    private final DistCp distCp;
			
 
				+
			
 
				+    public Cleanup(DistCp distCp) {
			
 
				+      this.distCp = distCp;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public void run() {
			
 
				+      if (distCp.isSubmitted()) return;
			
 
				+
			
 
				+      distCp.cleanup();
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java
@@ -0,0 +1,104 @@
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+/**
			
 
				+ * Utility class to hold commonly used constants.
			
 
				+ */
			
 
				+public class DistCpConstants {
			
 
				+
			
 
				+  /* Default number of maps to use for DistCp */
			
 
				+  public static final int DEFAULT_MAPS = 20;
			
 
				+
			
 
				+  /* Default bandwidth if none specified */
			
 
				+  public static final int DEFAULT_BANDWIDTH_MB = 100;
			
 
				+
			
 
				+  /* Default strategy for copying. Implementation looked up
			
 
				+     from distcp-default.xml
			
 
				+   */
			
 
				+  public static final String UNIFORMSIZE = "uniformsize";
			
 
				+
			
 
				+  /**
			
 
				+   *  Constants mapping to command line switches/input options
			
 
				+   */
			
 
				+  public static final String CONF_LABEL_ATOMIC_COPY = "distcp.atomic.copy";
			
 
				+  public static final String CONF_LABEL_WORK_PATH = "distcp.work.path";
			
 
				+  public static final String CONF_LABEL_LOG_PATH = "distcp.log.path";
			
 
				+  public static final String CONF_LABEL_IGNORE_FAILURES = "distcp.ignore.failures";
			
 
				+  public static final String CONF_LABEL_PRESERVE_STATUS = "distcp.preserve.status";
			
 
				+  public static final String CONF_LABEL_SYNC_FOLDERS = "distcp.sync.folders";
			
 
				+  public static final String CONF_LABEL_DELETE_MISSING = "distcp.delete.missing.source";
			
 
				+  public static final String CONF_LABEL_SSL_CONF = "distcp.keystore.resource";
			
 
				+  public static final String CONF_LABEL_MAX_MAPS = "distcp.max.maps";
			
 
				+  public static final String CONF_LABEL_SOURCE_LISTING = "distcp.source.listing";
			
 
				+  public static final String CONF_LABEL_COPY_STRATEGY = "distcp.copy.strategy";
			
 
				+  public static final String CONF_LABEL_SKIP_CRC = "distcp.skip.crc";
			
 
				+  public static final String CONF_LABEL_OVERWRITE = "distcp.copy.overwrite";
			
 
				+  public static final String CONF_LABEL_BANDWIDTH_MB = "distcp.map.bandwidth.mb";
			
 
				+
			
 
				+  /* Total bytes to be copied. Updated by copylisting. Unfiltered count */
			
 
				+  public static final String CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED = "mapred.total.bytes.expected";
			
 
				+
			
 
				+  /* Total number of paths to copy, includes directories. Unfiltered count */
			
 
				+  public static final String CONF_LABEL_TOTAL_NUMBER_OF_RECORDS = "mapred.number.of.records";
			
 
				+
			
 
				+  /* SSL keystore resource */
			
 
				+  public static final String CONF_LABEL_SSL_KEYSTORE = "dfs.https.client.keystore.resource";
			
 
				+
			
 
				+  /* If input is based -f <<source listing>>, file containing the src paths */
			
 
				+  public static final String CONF_LABEL_LISTING_FILE_PATH = "distcp.listing.file.path";
			
 
				+
			
 
				+  /* Directory where the mapreduce job will write to. If not atomic commit, then same
			
 
				+    as CONF_LABEL_TARGET_FINAL_PATH
			
 
				+   */
			
 
				+  public static final String CONF_LABEL_TARGET_WORK_PATH = "distcp.target.work.path";
			
 
				+
			
 
				+  /* Directory where the final data will be committed to. If not atomic commit, then same
			
 
				+    as CONF_LABEL_TARGET_WORK_PATH
			
 
				+   */
			
 
				+  public static final String CONF_LABEL_TARGET_FINAL_PATH = "distcp.target.final.path";
			
 
				+
			
 
				+  /**
			
 
				+   * DistCp job id for consumers of the Disctp 
			
 
				+   */
			
 
				+  public static final String CONF_LABEL_DISTCP_JOB_ID = "distcp.job.id";
			
 
				+
			
 
				+  /* Meta folder where the job's intermediate data is kept */
			
 
				+  public static final String CONF_LABEL_META_FOLDER = "distcp.meta.folder";
			
 
				+
			
 
				+  /**
			
 
				+   * Conf label for SSL Trust-store location.
			
 
				+   */
			
 
				+  public static final String CONF_LABEL_SSL_TRUST_STORE_LOCATION
			
 
				+      = "ssl.client.truststore.location";
			
 
				+
			
 
				+  /**
			
 
				+   * Conf label for SSL Key-store location.
			
 
				+   */
			
 
				+  public static final String CONF_LABEL_SSL_KEY_STORE_LOCATION
			
 
				+      = "ssl.client.keystore.location";
			
 
				+
			
 
				+  /**
			
 
				+   * Constants for DistCp return code to shell / consumer of ToolRunner's run
			
 
				+   */
			
 
				+  public static final int SUCCESS = 0;
			
 
				+  public static final int INVALID_ARGUMENT = -1;
			
 
				+  public static final int DUPLICATE_INPUT = -2;
			
 
				+  public static final int UNKNOWN_ERROR = -999;
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java
@@ -0,0 +1,218 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.commons.cli.Option;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+
			
 
				+/**
			
 
				+ * Enumeration mapping configuration keys to distcp command line
			
 
				+ * options.
			
 
				+ */
			
 
				+public enum DistCpOptionSwitch {
			
 
				+
			
 
				+  /**
			
 
				+   * Ignores any failures during copy, and continues with rest.
			
 
				+   * Logs failures in a file
			
 
				+   */
			
 
				+  IGNORE_FAILURES(DistCpConstants.CONF_LABEL_IGNORE_FAILURES,
			
 
				+      new Option("i", false, "Ignore failures during copy")),
			
 
				+
			
 
				+  /**
			
 
				+   * Preserves status of file/path in the target.
			
 
				+   * Default behavior with -p, is to preserve replication,
			
 
				+   * block size, user, group and permission on the target file
			
 
				+   *
			
 
				+   * If any of the optional switches are present among rbugp, then
			
 
				+   * only the corresponding file attribute is preserved
			
 
				+   *
			
 
				+   */
			
 
				+  PRESERVE_STATUS(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
			
 
				+      new Option("p", true, "preserve status (rbugp)" +
			
 
				+          "(replication, block-size, user, group, permission)")),
			
 
				+
			
 
				+  /**
			
 
				+   * Update target location by copying only files that are missing
			
 
				+   * in the target. This can be used to periodically sync two folders
			
 
				+   * across source and target. Typically used with DELETE_MISSING
			
 
				+   * Incompatible with ATOMIC_COMMIT
			
 
				+   */
			
 
				+  SYNC_FOLDERS(DistCpConstants.CONF_LABEL_SYNC_FOLDERS, 
			
 
				+      new Option("update", false, "Update target, copying only missing" +
			
 
				+          "files or directories")),
			
 
				+
			
 
				+  /**
			
 
				+   * Deletes missing files in target that are missing from source
			
 
				+   * This allows the target to be in sync with the source contents
			
 
				+   * Typically used in conjunction with SYNC_FOLDERS
			
 
				+   * Incompatible with ATOMIC_COMMIT
			
 
				+   */
			
 
				+  DELETE_MISSING(DistCpConstants.CONF_LABEL_DELETE_MISSING,
			
 
				+      new Option("delete", false, "Delete from target, " +
			
 
				+          "files missing in source")),
			
 
				+
			
 
				+  /**
			
 
				+   * Configuration file to use with hftps:// for securely copying
			
 
				+   * files across clusters. Typically the configuration file contains
			
 
				+   * truststore/keystore information such as location, password and type
			
 
				+   */
			
 
				+  SSL_CONF(DistCpConstants.CONF_LABEL_SSL_CONF,
			
 
				+      new Option("mapredSslConf", true, "Configuration for ssl config file" +
			
 
				+          ", to use with hftps://")),
			
 
				+
			
 
				+  /**
			
 
				+   * Max number of maps to use during copy. DistCp will split work
			
 
				+   * as equally as possible among these maps
			
 
				+   */
			
 
				+  MAX_MAPS(DistCpConstants.CONF_LABEL_MAX_MAPS, 
			
 
				+      new Option("m", true, "Max number of concurrent maps to use for copy")),
			
 
				+
			
 
				+  /**
			
 
				+   * Source file listing can be provided to DistCp in a file.
			
 
				+   * This allows DistCp to copy random list of files from source
			
 
				+   * and copy them to target
			
 
				+   */
			
 
				+  SOURCE_FILE_LISTING(DistCpConstants.CONF_LABEL_SOURCE_LISTING,
			
 
				+      new Option("f", true, "List of files that need to be copied")),
			
 
				+
			
 
				+  /**
			
 
				+   * Copy all the source files and commit them atomically to the target
			
 
				+   * This is typically useful in cases where there is a process
			
 
				+   * polling for availability of a file/dir. This option is incompatible
			
 
				+   * with SYNC_FOLDERS & DELETE_MISSING
			
 
				+   */
			
 
				+  ATOMIC_COMMIT(DistCpConstants.CONF_LABEL_ATOMIC_COPY,
			
 
				+      new Option("atomic", false, "Commit all changes or none")),
			
 
				+
			
 
				+  /**
			
 
				+   * Work path to be used only in conjunction in Atomic commit
			
 
				+   */
			
 
				+  WORK_PATH(DistCpConstants.CONF_LABEL_WORK_PATH,
			
 
				+      new Option("tmp", true, "Intermediate work path to be used for atomic commit")),
			
 
				+
			
 
				+  /**
			
 
				+   * Log path where distcp output logs are written to
			
 
				+   */
			
 
				+  LOG_PATH(DistCpConstants.CONF_LABEL_LOG_PATH,
			
 
				+      new Option("log", true, "Folder on DFS where distcp execution logs are saved")),
			
 
				+
			
 
				+  /**
			
 
				+   * Copy strategy is use. This could be dynamic or uniform size etc.
			
 
				+   * DistCp would use an appropriate input format based on this.
			
 
				+   */
			
 
				+  COPY_STRATEGY(DistCpConstants.CONF_LABEL_COPY_STRATEGY,
			
 
				+      new Option("strategy", true, "Copy strategy to use. Default is " +
			
 
				+          "dividing work based on file sizes")),
			
 
				+
			
 
				+  /**
			
 
				+   * Skip CRC checks between source and target, when determining what
			
 
				+   * files need to be copied.
			
 
				+   */
			
 
				+  SKIP_CRC(DistCpConstants.CONF_LABEL_SKIP_CRC,
			
 
				+      new Option("skipcrccheck", false, "Whether to skip CRC checks between " +
			
 
				+          "source and target paths.")),
			
 
				+
			
 
				+  /**
			
 
				+   * Overwrite target-files unconditionally.
			
 
				+   */
			
 
				+  OVERWRITE(DistCpConstants.CONF_LABEL_OVERWRITE,
			
 
				+      new Option("overwrite", false, "Choose to overwrite target files " +
			
 
				+          "unconditionally, even if they exist.")),
			
 
				+
			
 
				+  /**
			
 
				+   * Should DisctpExecution be blocking
			
 
				+   */
			
 
				+  BLOCKING("",
			
 
				+      new Option("async", false, "Should distcp execution be blocking")),
			
 
				+
			
 
				+  FILE_LIMIT("",
			
 
				+      new Option("filelimit", true, "(Deprecated!) Limit number of files " +
			
 
				+              "copied to <= n")),
			
 
				+
			
 
				+  SIZE_LIMIT("",
			
 
				+      new Option("sizelimit", true, "(Deprecated!) Limit number of files " +
			
 
				+              "copied to <= n bytes")),
			
 
				+
			
 
				+  /**
			
 
				+   * Specify bandwidth per map in MB
			
 
				+   */
			
 
				+  BANDWIDTH(DistCpConstants.CONF_LABEL_BANDWIDTH_MB,
			
 
				+      new Option("bandwidth", true, "Specify bandwidth per map in MB"));
			
 
				+
			
 
				+  private final String confLabel;
			
 
				+  private final Option option;
			
 
				+
			
 
				+  DistCpOptionSwitch(String confLabel, Option option) {
			
 
				+    this.confLabel = confLabel;
			
 
				+    this.option = option;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get Configuration label for the option
			
 
				+   * @return configuration label name
			
 
				+   */
			
 
				+  public String getConfigLabel() {
			
 
				+    return confLabel;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get CLI Option corresponding to the distcp option
			
 
				+   * @return option
			
 
				+   */
			
 
				+  public Option getOption() {
			
 
				+    return option;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get Switch symbol
			
 
				+   * @return switch symbol char
			
 
				+   */
			
 
				+  public String getSwitch() {
			
 
				+    return option.getOpt();
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  public String toString() {
			
 
				+    return  super.name() + " {" +
			
 
				+        "confLabel='" + confLabel + '\'' +
			
 
				+        ", option=" + option + '}';
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Helper function to add an option to hadoop configuration object
			
 
				+   * @param conf - Configuration object to include the option
			
 
				+   * @param option - Option to add
			
 
				+   * @param value - Value
			
 
				+   */
			
 
				+  public static void addToConf(Configuration conf,
			
 
				+                               DistCpOptionSwitch option,
			
 
				+                               String value) {
			
 
				+    conf.set(option.getConfigLabel(), value);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Helper function to set an option to hadoop configuration object
			
 
				+   * @param conf - Configuration object to include the option
			
 
				+   * @param option - Option to add
			
 
				+   */
			
 
				+  public static void addToConf(Configuration conf,
			
 
				+                               DistCpOptionSwitch option) {
			
 
				+    conf.set(option.getConfigLabel(), "true");
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
@@ -0,0 +1,525 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+
			
 
				+import java.util.EnumSet;
			
 
				+import java.util.Iterator;
			
 
				+import java.util.List;
			
 
				+import java.util.NoSuchElementException;
			
 
				+
			
 
				+/**
			
 
				+ * The Options class encapsulates all DistCp options.
			
 
				+ * These may be set from command-line (via the OptionsParser)
			
 
				+ * or may be set manually.
			
 
				+ */
			
 
				+public class DistCpOptions {
			
 
				+
			
 
				+  private boolean atomicCommit = false;
			
 
				+  private boolean syncFolder = false;
			
 
				+  private boolean deleteMissing = false;
			
 
				+  private boolean ignoreFailures = false;
			
 
				+  private boolean overwrite = false;
			
 
				+  private boolean skipCRC = false;
			
 
				+  private boolean blocking = true;
			
 
				+
			
 
				+  private int maxMaps = DistCpConstants.DEFAULT_MAPS;
			
 
				+  private int mapBandwidth = DistCpConstants.DEFAULT_BANDWIDTH_MB;
			
 
				+
			
 
				+  private String sslConfigurationFile;
			
 
				+
			
 
				+  private String copyStrategy = DistCpConstants.UNIFORMSIZE;
			
 
				+
			
 
				+  private EnumSet<FileAttribute> preserveStatus = EnumSet.noneOf(FileAttribute.class);
			
 
				+
			
 
				+  private Path atomicWorkPath;
			
 
				+
			
 
				+  private Path logPath;
			
 
				+
			
 
				+  private Path sourceFileListing;
			
 
				+  private List<Path> sourcePaths;
			
 
				+
			
 
				+  private Path targetPath;
			
 
				+
			
 
				+  public static enum FileAttribute{
			
 
				+    REPLICATION, BLOCKSIZE, USER, GROUP, PERMISSION;
			
 
				+
			
 
				+    public static FileAttribute getAttribute(char symbol) {
			
 
				+      for (FileAttribute attribute : values()) {
			
 
				+        if (attribute.name().charAt(0) == Character.toUpperCase(symbol)) {
			
 
				+          return attribute;
			
 
				+        }
			
 
				+      }
			
 
				+      throw new NoSuchElementException("No attribute for " + symbol);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Constructor, to initialize source/target paths.
			
 
				+   * @param sourcePaths List of source-paths (including wildcards)
			
 
				+   *                     to be copied to target.
			
 
				+   * @param targetPath Destination path for the dist-copy.
			
 
				+   */
			
 
				+  public DistCpOptions(List<Path> sourcePaths, Path targetPath) {
			
 
				+    assert sourcePaths != null && !sourcePaths.isEmpty() : "Invalid source paths";
			
 
				+    assert targetPath != null : "Invalid Target path";
			
 
				+
			
 
				+    this.sourcePaths = sourcePaths;
			
 
				+    this.targetPath = targetPath;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Constructor, to initialize source/target paths.
			
 
				+   * @param sourceFileListing File containing list of source paths
			
 
				+   * @param targetPath Destination path for the dist-copy.
			
 
				+   */
			
 
				+  public DistCpOptions(Path sourceFileListing, Path targetPath) {
			
 
				+    assert sourceFileListing != null : "Invalid source paths";
			
 
				+    assert targetPath != null : "Invalid Target path";
			
 
				+
			
 
				+    this.sourceFileListing = sourceFileListing;
			
 
				+    this.targetPath = targetPath;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Copy constructor.
			
 
				+   * @param that DistCpOptions being copied from.
			
 
				+   */
			
 
				+  public DistCpOptions(DistCpOptions that) {
			
 
				+    if (this != that && that != null) {
			
 
				+      this.atomicCommit = that.atomicCommit;
			
 
				+      this.syncFolder = that.syncFolder;
			
 
				+      this.deleteMissing = that.deleteMissing;
			
 
				+      this.ignoreFailures = that.ignoreFailures;
			
 
				+      this.overwrite = that.overwrite;
			
 
				+      this.skipCRC = that.skipCRC;
			
 
				+      this.blocking = that.blocking;
			
 
				+      this.maxMaps = that.maxMaps;
			
 
				+      this.mapBandwidth = that.mapBandwidth;
			
 
				+      this.sslConfigurationFile = that.getSslConfigurationFile();
			
 
				+      this.copyStrategy = that.copyStrategy;
			
 
				+      this.preserveStatus = that.preserveStatus;
			
 
				+      this.atomicWorkPath = that.getAtomicWorkPath();
			
 
				+      this.logPath = that.getLogPath();
			
 
				+      this.sourceFileListing = that.getSourceFileListing();
			
 
				+      this.sourcePaths = that.getSourcePaths();
			
 
				+      this.targetPath = that.getTargetPath();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Should the data be committed atomically?
			
 
				+   *
			
 
				+   * @return true if data should be committed automically. false otherwise
			
 
				+   */
			
 
				+  public boolean shouldAtomicCommit() {
			
 
				+    return atomicCommit;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set if data need to be committed automatically
			
 
				+   *
			
 
				+   * @param atomicCommit - boolean switch
			
 
				+   */
			
 
				+  public void setAtomicCommit(boolean atomicCommit) {
			
 
				+    validate(DistCpOptionSwitch.ATOMIC_COMMIT, atomicCommit);
			
 
				+    this.atomicCommit = atomicCommit;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Should the data be sync'ed between source and target paths?
			
 
				+   *
			
 
				+   * @return true if data should be sync'ed up. false otherwise
			
 
				+   */
			
 
				+  public boolean shouldSyncFolder() {
			
 
				+    return syncFolder;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set if source and target folder contents be sync'ed up
			
 
				+   *
			
 
				+   * @param syncFolder - boolean switch
			
 
				+   */
			
 
				+  public void setSyncFolder(boolean syncFolder) {
			
 
				+    validate(DistCpOptionSwitch.SYNC_FOLDERS, syncFolder);
			
 
				+    this.syncFolder = syncFolder;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Should target files missing in source should be deleted?
			
 
				+   *
			
 
				+   * @return true if zoombie target files to be removed. false otherwise
			
 
				+   */
			
 
				+  public boolean shouldDeleteMissing() {
			
 
				+    return deleteMissing;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set if files only present in target should be deleted
			
 
				+   *
			
 
				+   * @param deleteMissing - boolean switch
			
 
				+   */
			
 
				+  public void setDeleteMissing(boolean deleteMissing) {
			
 
				+    validate(DistCpOptionSwitch.DELETE_MISSING, deleteMissing);
			
 
				+    this.deleteMissing = deleteMissing;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Should failures be logged and ignored during copy?
			
 
				+   *
			
 
				+   * @return true if failures are to be logged and ignored. false otherwise
			
 
				+   */
			
 
				+  public boolean shouldIgnoreFailures() {
			
 
				+    return ignoreFailures;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set if failures during copy be ignored
			
 
				+   *
			
 
				+   * @param ignoreFailures - boolean switch
			
 
				+   */
			
 
				+  public void setIgnoreFailures(boolean ignoreFailures) {
			
 
				+    this.ignoreFailures = ignoreFailures;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Should DistCp be running in blocking mode
			
 
				+   *
			
 
				+   * @return true if should run in blocking, false otherwise
			
 
				+   */
			
 
				+  public boolean shouldBlock() {
			
 
				+    return blocking;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set if Disctp should run blocking or non-blocking
			
 
				+   *
			
 
				+   * @param blocking - boolean switch
			
 
				+   */
			
 
				+  public void setBlocking(boolean blocking) {
			
 
				+    this.blocking = blocking;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Should files be overwritten always?
			
 
				+   *
			
 
				+   * @return true if files in target that may exist before distcp, should always
			
 
				+   *         be overwritten. false otherwise
			
 
				+   */
			
 
				+  public boolean shouldOverwrite() {
			
 
				+    return overwrite;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set if files should always be overwritten on target
			
 
				+   *
			
 
				+   * @param overwrite - boolean switch
			
 
				+   */
			
 
				+  public void setOverwrite(boolean overwrite) {
			
 
				+    validate(DistCpOptionSwitch.OVERWRITE, overwrite);
			
 
				+    this.overwrite = overwrite;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Should CRC/checksum check be skipped while checking files are identical
			
 
				+   *
			
 
				+   * @return true if checksum check should be skipped while checking files are
			
 
				+   *         identical. false otherwise
			
 
				+   */
			
 
				+  public boolean shouldSkipCRC() {
			
 
				+    return skipCRC;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set if checksum comparison should be skipped while determining if
			
 
				+   * source and destination files are identical
			
 
				+   *
			
 
				+   * @param skipCRC - boolean switch
			
 
				+   */
			
 
				+  public void setSkipCRC(boolean skipCRC) {
			
 
				+    validate(DistCpOptionSwitch.SKIP_CRC, skipCRC);
			
 
				+    this.skipCRC = skipCRC;
			
 
				+  }
			
 
				+
			
 
				+  /** Get the max number of maps to use for this copy
			
 
				+   *
			
 
				+   * @return Max number of maps
			
 
				+   */
			
 
				+  public int getMaxMaps() {
			
 
				+    return maxMaps;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the max number of maps to use for copy
			
 
				+   *
			
 
				+   * @param maxMaps - Number of maps
			
 
				+   */
			
 
				+  public void setMaxMaps(int maxMaps) {
			
 
				+    this.maxMaps = maxMaps;
			
 
				+  }
			
 
				+
			
 
				+  /** Get the map bandwidth in MB
			
 
				+   *
			
 
				+   * @return Bandwidth in MB
			
 
				+   */
			
 
				+  public int getMapBandwidth() {
			
 
				+    return mapBandwidth;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set per map bandwidth
			
 
				+   *
			
 
				+   * @param mapBandwidth - per map bandwidth
			
 
				+   */
			
 
				+  public void setMapBandwidth(int mapBandwidth) {
			
 
				+    assert mapBandwidth > 0 : "Bandwidth " + mapBandwidth + " is invalid (should be > 0)";
			
 
				+    this.mapBandwidth = mapBandwidth;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get path where the ssl configuration file is present to use for hftps://
			
 
				+   *
			
 
				+   * @return Path on local file system
			
 
				+   */
			
 
				+  public String getSslConfigurationFile() {
			
 
				+    return sslConfigurationFile;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the SSL configuration file path to use with hftps:// (local path)
			
 
				+   *
			
 
				+   * @param sslConfigurationFile - Local ssl config file path
			
 
				+   */
			
 
				+  public void setSslConfigurationFile(String sslConfigurationFile) {
			
 
				+    this.sslConfigurationFile = sslConfigurationFile;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Returns an iterator with the list of file attributes to preserve
			
 
				+   *
			
 
				+   * @return iterator of file attributes to preserve
			
 
				+   */
			
 
				+  public Iterator<FileAttribute> preserveAttributes() {
			
 
				+    return preserveStatus.iterator();
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Checks if the input attibute should be preserved or not
			
 
				+   *
			
 
				+   * @param attribute - Attribute to check
			
 
				+   * @return True if attribute should be preserved, false otherwise
			
 
				+   */
			
 
				+  public boolean shouldPreserve(FileAttribute attribute) {
			
 
				+    return preserveStatus.contains(attribute);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Add file attributes that need to be preserved. This method may be
			
 
				+   * called multiple times to add attributes.
			
 
				+   *
			
 
				+   * @param fileAttribute - Attribute to add, one at a time
			
 
				+   */
			
 
				+  public void preserve(FileAttribute fileAttribute) {
			
 
				+    for (FileAttribute attribute : preserveStatus) {
			
 
				+      if (attribute.equals(fileAttribute)) {
			
 
				+        return;
			
 
				+      }
			
 
				+    }
			
 
				+    preserveStatus.add(fileAttribute);
			
 
				+  }
			
 
				+
			
 
				+  /** Get work path for atomic commit. If null, the work
			
 
				+   * path would be parentOf(targetPath) + "/._WIP_" + nameOf(targetPath)
			
 
				+   *
			
 
				+   * @return Atomic work path on the target cluster. Null if not set
			
 
				+   */
			
 
				+  public Path getAtomicWorkPath() {
			
 
				+    return atomicWorkPath;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the work path for atomic commit
			
 
				+   *
			
 
				+   * @param atomicWorkPath - Path on the target cluster
			
 
				+   */
			
 
				+  public void setAtomicWorkPath(Path atomicWorkPath) {
			
 
				+    this.atomicWorkPath = atomicWorkPath;
			
 
				+  }
			
 
				+
			
 
				+  /** Get output directory for writing distcp logs. Otherwise logs
			
 
				+   * are temporarily written to JobStagingDir/_logs and deleted
			
 
				+   * upon job completion
			
 
				+   *
			
 
				+   * @return Log output path on the cluster where distcp job is run
			
 
				+   */
			
 
				+  public Path getLogPath() {
			
 
				+    return logPath;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the log path where distcp output logs are stored
			
 
				+   * Uses JobStagingDir/_logs by default
			
 
				+   *
			
 
				+   * @param logPath - Path where logs will be saved
			
 
				+   */
			
 
				+  public void setLogPath(Path logPath) {
			
 
				+    this.logPath = logPath;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Get the copy strategy to use. Uses appropriate input format
			
 
				+   *
			
 
				+   * @return copy strategy to use
			
 
				+   */
			
 
				+  public String getCopyStrategy() {
			
 
				+    return copyStrategy;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Set the copy strategy to use. Should map to a strategy implementation
			
 
				+   * in distp-default.xml
			
 
				+   *
			
 
				+   * @param copyStrategy - copy Strategy to use
			
 
				+   */
			
 
				+  public void setCopyStrategy(String copyStrategy) {
			
 
				+    this.copyStrategy = copyStrategy;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * File path (hdfs:// or file://) that contains the list of actual
			
 
				+   * files to copy
			
 
				+   *
			
 
				+   * @return - Source listing file path
			
 
				+   */
			
 
				+  public Path getSourceFileListing() {
			
 
				+    return sourceFileListing;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Getter for sourcePaths.
			
 
				+   * @return List of source-paths.
			
 
				+   */
			
 
				+  public List<Path> getSourcePaths() {
			
 
				+    return sourcePaths;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Setter for sourcePaths.
			
 
				+   * @param sourcePaths The new list of source-paths.
			
 
				+   */
			
 
				+  public void setSourcePaths(List<Path> sourcePaths) {
			
 
				+    assert sourcePaths != null && sourcePaths.size() != 0;
			
 
				+    this.sourcePaths = sourcePaths;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Getter for the targetPath.
			
 
				+   * @return The target-path.
			
 
				+   */
			
 
				+  public Path getTargetPath() {
			
 
				+    return targetPath;
			
 
				+  }
			
 
				+
			
 
				+  public void validate(DistCpOptionSwitch option, boolean value) {
			
 
				+
			
 
				+    boolean syncFolder = (option == DistCpOptionSwitch.SYNC_FOLDERS ?
			
 
				+        value : this.syncFolder);
			
 
				+    boolean overwrite = (option == DistCpOptionSwitch.OVERWRITE ?
			
 
				+        value : this.overwrite);
			
 
				+    boolean deleteMissing = (option == DistCpOptionSwitch.DELETE_MISSING ?
			
 
				+        value : this.deleteMissing);
			
 
				+    boolean atomicCommit = (option == DistCpOptionSwitch.ATOMIC_COMMIT ?
			
 
				+        value : this.atomicCommit);
			
 
				+    boolean skipCRC = (option == DistCpOptionSwitch.SKIP_CRC ?
			
 
				+        value : this.skipCRC);
			
 
				+
			
 
				+    if (syncFolder && atomicCommit) {
			
 
				+      throw new IllegalArgumentException("Atomic commit can't be used with " +
			
 
				+          "sync folder or overwrite options");
			
 
				+    }
			
 
				+
			
 
				+    if (deleteMissing && !(overwrite || syncFolder)) {
			
 
				+      throw new IllegalArgumentException("Delete missing is applicable " +
			
 
				+          "only with update or overwrite options");
			
 
				+    }
			
 
				+
			
 
				+    if (overwrite && syncFolder) {
			
 
				+      throw new IllegalArgumentException("Overwrite and update options are " +
			
 
				+          "mutually exclusive");
			
 
				+    }
			
 
				+
			
 
				+    if (!syncFolder && skipCRC) {
			
 
				+      throw new IllegalArgumentException("Skip CRC is valid only with update options");
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Add options to configuration. These will be used in the Mapper/committer
			
 
				+   *
			
 
				+   * @param conf - Configruation object to which the options need to be added
			
 
				+   */
			
 
				+  public void appendToConf(Configuration conf) {
			
 
				+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.ATOMIC_COMMIT,
			
 
				+        String.valueOf(atomicCommit));
			
 
				+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.IGNORE_FAILURES,
			
 
				+        String.valueOf(ignoreFailures));
			
 
				+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.SYNC_FOLDERS,
			
 
				+        String.valueOf(syncFolder));
			
 
				+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.DELETE_MISSING,
			
 
				+        String.valueOf(deleteMissing));
			
 
				+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.OVERWRITE,
			
 
				+        String.valueOf(overwrite));
			
 
				+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.SKIP_CRC,
			
 
				+        String.valueOf(skipCRC));
			
 
				+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.BANDWIDTH,
			
 
				+        String.valueOf(mapBandwidth));
			
 
				+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.PRESERVE_STATUS,
			
 
				+        DistCpUtils.packAttributes(preserveStatus));
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Utility to easily string-ify Options, for logging.
			
 
				+   *
			
 
				+   * @return String representation of the Options.
			
 
				+   */
			
 
				+  @Override
			
 
				+  public String toString() {
			
 
				+    return "DistCpOptions{" +
			
 
				+        "atomicCommit=" + atomicCommit +
			
 
				+        ", syncFolder=" + syncFolder +
			
 
				+        ", deleteMissing=" + deleteMissing +
			
 
				+        ", ignoreFailures=" + ignoreFailures +
			
 
				+        ", maxMaps=" + maxMaps +
			
 
				+        ", sslConfigurationFile='" + sslConfigurationFile + '\'' +
			
 
				+        ", copyStrategy='" + copyStrategy + '\'' +
			
 
				+        ", sourceFileListing=" + sourceFileListing +
			
 
				+        ", sourcePaths=" + sourcePaths +
			
 
				+        ", targetPath=" + targetPath +
			
 
				+        '}';
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  protected DistCpOptions clone() throws CloneNotSupportedException {
			
 
				+    return (DistCpOptions) super.clone();
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/FileBasedCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/FileBasedCopyListing.java
@@ -0,0 +1,100 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.security.Credentials;
			
 
				+
			
 
				+import java.io.BufferedReader;
			
 
				+import java.io.IOException;
			
 
				+import java.io.InputStreamReader;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.List;
			
 
				+
			
 
				+/**
			
 
				+ * FileBasedCopyListing implements the CopyListing interface,
			
 
				+ * to create the copy-listing for DistCp,
			
 
				+ * by iterating over all source paths mentioned in a specified input-file.
			
 
				+ */
			
 
				+public class FileBasedCopyListing extends CopyListing {
			
 
				+
			
 
				+  private final CopyListing globbedListing;
			
 
				+  /**
			
 
				+   * Constructor, to initialize base-class.
			
 
				+   * @param configuration The input Configuration object.
			
 
				+   * @param credentials - Credentials object on which the FS delegation tokens are cached. If null
			
 
				+   * delegation token caching is skipped
			
 
				+   */
			
 
				+  public FileBasedCopyListing(Configuration configuration, Credentials credentials) {
			
 
				+    super(configuration, credentials);
			
 
				+    globbedListing = new GlobbedCopyListing(getConf(), credentials);
			
 
				+  }
			
 
				+
			
 
				+  /** {@inheritDoc} */
			
 
				+  @Override
			
 
				+  protected void validatePaths(DistCpOptions options)
			
 
				+      throws IOException, InvalidInputException {
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of CopyListing::buildListing().
			
 
				+   *   Iterates over all source paths mentioned in the input-file.
			
 
				+   * @param pathToListFile Path on HDFS where the listing file is written.
			
 
				+   * @param options Input Options for DistCp (indicating source/target paths.)
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  @Override
			
 
				+  public void doBuildListing(Path pathToListFile, DistCpOptions options) throws IOException {
			
 
				+    DistCpOptions newOption = new DistCpOptions(options);
			
 
				+    newOption.setSourcePaths(fetchFileList(options.getSourceFileListing()));
			
 
				+    globbedListing.buildListing(pathToListFile, newOption);
			
 
				+  }
			
 
				+
			
 
				+  private List<Path> fetchFileList(Path sourceListing) throws IOException {
			
 
				+    List<Path> result = new ArrayList<Path>();
			
 
				+    FileSystem fs = sourceListing.getFileSystem(getConf());
			
 
				+    BufferedReader input = null;
			
 
				+    try {
			
 
				+      input = new BufferedReader(new InputStreamReader(fs.open(sourceListing)));
			
 
				+      String line = input.readLine();
			
 
				+      while (line != null) {
			
 
				+        result.add(new Path(line));
			
 
				+        line = input.readLine();
			
 
				+      }
			
 
				+    } finally {
			
 
				+      IOUtils.closeStream(input);
			
 
				+    }
			
 
				+    return result;
			
 
				+  }
			
 
				+
			
 
				+  /** {@inheritDoc} */
			
 
				+  @Override
			
 
				+  protected long getBytesToCopy() {
			
 
				+    return globbedListing.getBytesToCopy();
			
 
				+  }
			
 
				+
			
 
				+  /** {@inheritDoc} */
			
 
				+  @Override
			
 
				+  protected long getNumberOfPaths() {
			
 
				+    return globbedListing.getNumberOfPaths();
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/GlobbedCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/GlobbedCopyListing.java
@@ -0,0 +1,105 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.security.Credentials;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.List;
			
 
				+import java.util.ArrayList;
			
 
				+
			
 
				+/**
			
 
				+ * GlobbedCopyListing implements the CopyListing interface, to create the copy
			
 
				+ * listing-file by "globbing" all specified source paths (wild-cards and all.)
			
 
				+ */
			
 
				+public class GlobbedCopyListing extends CopyListing {
			
 
				+  private static final Log LOG = LogFactory.getLog(GlobbedCopyListing.class);
			
 
				+
			
 
				+  private final CopyListing simpleListing;
			
 
				+  /**
			
 
				+   * Constructor, to initialize the configuration.
			
 
				+   * @param configuration The input Configuration object.
			
 
				+   * @param credentials Credentials object on which the FS delegation tokens are cached. If null
			
 
				+   * delegation token caching is skipped
			
 
				+   */
			
 
				+  public GlobbedCopyListing(Configuration configuration, Credentials credentials) {
			
 
				+    super(configuration, credentials);
			
 
				+    simpleListing = new SimpleCopyListing(getConf(), credentials) ;
			
 
				+  }
			
 
				+
			
 
				+  /** {@inheritDoc} */
			
 
				+  @Override
			
 
				+  protected void validatePaths(DistCpOptions options)
			
 
				+      throws IOException, InvalidInputException {
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of CopyListing::buildListing().
			
 
				+   * Creates the copy listing by "globbing" all source-paths.
			
 
				+   * @param pathToListingFile The location at which the copy-listing file
			
 
				+   *                           is to be created.
			
 
				+   * @param options Input Options for DistCp (indicating source/target paths.)
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  @Override
			
 
				+  public void doBuildListing(Path pathToListingFile,
			
 
				+                             DistCpOptions options) throws IOException {
			
 
				+
			
 
				+    List<Path> globbedPaths = new ArrayList<Path>();
			
 
				+    if (options.getSourcePaths().isEmpty()) {
			
 
				+      throw new InvalidInputException("Nothing to process. Source paths::EMPTY");  
			
 
				+    }
			
 
				+
			
 
				+    for (Path p : options.getSourcePaths()) {
			
 
				+      FileSystem fs = p.getFileSystem(getConf());
			
 
				+      FileStatus[] inputs = fs.globStatus(p);
			
 
				+
			
 
				+      if(inputs != null && inputs.length > 0) {
			
 
				+        for (FileStatus onePath: inputs) {
			
 
				+          globbedPaths.add(onePath.getPath());
			
 
				+        }
			
 
				+      } else {
			
 
				+        throw new InvalidInputException(p + " doesn't exist");        
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    DistCpOptions optionsGlobbed = new DistCpOptions(options);
			
 
				+    optionsGlobbed.setSourcePaths(globbedPaths);
			
 
				+    simpleListing.buildListing(pathToListingFile, optionsGlobbed);
			
 
				+  }
			
 
				+
			
 
				+  /** {@inheritDoc} */
			
 
				+  @Override
			
 
				+  protected long getBytesToCopy() {
			
 
				+    return simpleListing.getBytesToCopy();
			
 
				+  }
			
 
				+
			
 
				+  /** {@inheritDoc} */
			
 
				+  @Override
			
 
				+  protected long getNumberOfPaths() {
			
 
				+    return simpleListing.getNumberOfPaths();
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java
@@ -0,0 +1,246 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.commons.cli.*;
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
			
 
				+
			
 
				+import java.util.*;
			
 
				+
			
 
				+/**
			
 
				+ * The OptionsParser parses out the command-line options passed to DistCp,
			
 
				+ * and interprets those specific to DistCp, to create an Options object.
			
 
				+ */
			
 
				+public class OptionsParser {
			
 
				+
			
 
				+  private static final Log LOG = LogFactory.getLog(OptionsParser.class);
			
 
				+
			
 
				+  private static final Options cliOptions = new Options();      
			
 
				+
			
 
				+  static {
			
 
				+    for (DistCpOptionSwitch option : DistCpOptionSwitch.values()) {
			
 
				+      if (LOG.isDebugEnabled()) {
			
 
				+        LOG.debug("Adding option " + option.getOption());
			
 
				+      }
			
 
				+      cliOptions.addOption(option.getOption());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private static class CustomParser extends GnuParser {
			
 
				+    @Override
			
 
				+    protected String[] flatten(Options options, String[] arguments, boolean stopAtNonOption) {
			
 
				+      for (int index = 0; index < arguments.length; index++) {
			
 
				+        if (arguments[index].equals("-" + DistCpOptionSwitch.PRESERVE_STATUS.getSwitch())) {
			
 
				+          arguments[index] = "-prbugp";
			
 
				+        }
			
 
				+      }
			
 
				+      return super.flatten(options, arguments, stopAtNonOption);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * The parse method parses the command-line options, and creates
			
 
				+   * a corresponding Options object.
			
 
				+   * @param args Command-line arguments (excluding the options consumed
			
 
				+   *              by the GenericOptionsParser).
			
 
				+   * @return The Options object, corresponding to the specified command-line.
			
 
				+   * @throws IllegalArgumentException: Thrown if the parse fails.
			
 
				+   */
			
 
				+  public static DistCpOptions parse(String args[]) throws IllegalArgumentException {
			
 
				+
			
 
				+    CommandLineParser parser = new CustomParser();
			
 
				+
			
 
				+    CommandLine command;
			
 
				+    try {
			
 
				+      command = parser.parse(cliOptions, args, true);
			
 
				+    } catch (ParseException e) {
			
 
				+      throw new IllegalArgumentException("Unable to parse arguments. " +
			
 
				+        Arrays.toString(args), e);
			
 
				+    }
			
 
				+
			
 
				+    DistCpOptions option;
			
 
				+    Path targetPath;
			
 
				+    List<Path> sourcePaths = new ArrayList<Path>();
			
 
				+
			
 
				+    String leftOverArgs[] = command.getArgs();
			
 
				+    if (leftOverArgs == null || leftOverArgs.length < 1) {
			
 
				+      throw new IllegalArgumentException("Target path not specified");
			
 
				+    }
			
 
				+
			
 
				+    //Last Argument is the target path
			
 
				+    targetPath = new Path(leftOverArgs[leftOverArgs.length -1].trim());
			
 
				+
			
 
				+    //Copy any source paths in the arguments to the list
			
 
				+    for (int index = 0; index < leftOverArgs.length - 1; index++) {
			
 
				+      sourcePaths.add(new Path(leftOverArgs[index].trim()));
			
 
				+    }
			
 
				+
			
 
				+    /* If command has source file listing, use it else, fall back on source paths in args
			
 
				+       If both are present, throw exception and bail */
			
 
				+    if (command.hasOption(DistCpOptionSwitch.SOURCE_FILE_LISTING.getSwitch())) {
			
 
				+      if (!sourcePaths.isEmpty()) {
			
 
				+        throw new IllegalArgumentException("Both source file listing and source paths present");
			
 
				+      }
			
 
				+      option = new DistCpOptions(new Path(getVal(command, DistCpOptionSwitch.
			
 
				+              SOURCE_FILE_LISTING.getSwitch())), targetPath);
			
 
				+    } else {
			
 
				+      if (sourcePaths.isEmpty()) {
			
 
				+        throw new IllegalArgumentException("Neither source file listing nor source paths present");
			
 
				+      }
			
 
				+      option = new DistCpOptions(sourcePaths, targetPath);
			
 
				+    }
			
 
				+
			
 
				+    //Process all the other option switches and set options appropriately
			
 
				+    if (command.hasOption(DistCpOptionSwitch.IGNORE_FAILURES.getSwitch())) {
			
 
				+      option.setIgnoreFailures(true);
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.ATOMIC_COMMIT.getSwitch())) {
			
 
				+      option.setAtomicCommit(true);
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.WORK_PATH.getSwitch()) &&
			
 
				+        option.shouldAtomicCommit()) {
			
 
				+      String workPath = getVal(command, DistCpOptionSwitch.WORK_PATH.getSwitch());
			
 
				+      if (workPath != null && !workPath.isEmpty()) {
			
 
				+        option.setAtomicWorkPath(new Path(workPath));
			
 
				+      }
			
 
				+    } else if (command.hasOption(DistCpOptionSwitch.WORK_PATH.getSwitch())) {
			
 
				+      throw new IllegalArgumentException("-tmp work-path can only be specified along with -atomic");      
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.LOG_PATH.getSwitch())) {
			
 
				+      option.setLogPath(new Path(getVal(command, DistCpOptionSwitch.LOG_PATH.getSwitch())));
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.SYNC_FOLDERS.getSwitch())) {
			
 
				+      option.setSyncFolder(true);
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.OVERWRITE.getSwitch())) {
			
 
				+      option.setOverwrite(true);
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.DELETE_MISSING.getSwitch())) {
			
 
				+      option.setDeleteMissing(true);
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.SKIP_CRC.getSwitch())) {
			
 
				+      option.setSkipCRC(true);
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.BLOCKING.getSwitch())) {
			
 
				+      option.setBlocking(false);
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.BANDWIDTH.getSwitch())) {
			
 
				+      try {
			
 
				+        Integer mapBandwidth = Integer.parseInt(
			
 
				+            getVal(command, DistCpOptionSwitch.BANDWIDTH.getSwitch()).trim());
			
 
				+        option.setMapBandwidth(mapBandwidth);
			
 
				+      } catch (NumberFormatException e) {
			
 
				+        throw new IllegalArgumentException("Bandwidth specified is invalid: " +
			
 
				+            getVal(command, DistCpOptionSwitch.BANDWIDTH.getSwitch()), e);
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.SSL_CONF.getSwitch())) {
			
 
				+      option.setSslConfigurationFile(command.
			
 
				+          getOptionValue(DistCpOptionSwitch.SSL_CONF.getSwitch()));
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.MAX_MAPS.getSwitch())) {
			
 
				+      try {
			
 
				+        Integer maps = Integer.parseInt(
			
 
				+            getVal(command, DistCpOptionSwitch.MAX_MAPS.getSwitch()).trim());
			
 
				+        option.setMaxMaps(maps);
			
 
				+      } catch (NumberFormatException e) {
			
 
				+        throw new IllegalArgumentException("Number of maps is invalid: " +
			
 
				+            getVal(command, DistCpOptionSwitch.MAX_MAPS.getSwitch()), e);
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.COPY_STRATEGY.getSwitch())) {
			
 
				+      option.setCopyStrategy(
			
 
				+            getVal(command, DistCpOptionSwitch.COPY_STRATEGY.getSwitch()));
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.PRESERVE_STATUS.getSwitch())) {
			
 
				+      String attributes =
			
 
				+          getVal(command, DistCpOptionSwitch.PRESERVE_STATUS.getSwitch());
			
 
				+      if (attributes == null || attributes.isEmpty()) {
			
 
				+        for (FileAttribute attribute : FileAttribute.values()) {
			
 
				+          option.preserve(attribute);
			
 
				+        }
			
 
				+      } else {
			
 
				+        for (int index = 0; index < attributes.length(); index++) {
			
 
				+          option.preserve(FileAttribute.
			
 
				+              getAttribute(attributes.charAt(index)));
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.FILE_LIMIT.getSwitch())) {
			
 
				+      String fileLimitString = getVal(command,
			
 
				+                              DistCpOptionSwitch.FILE_LIMIT.getSwitch().trim());
			
 
				+      try {
			
 
				+        Integer.parseInt(fileLimitString);
			
 
				+      }
			
 
				+      catch (NumberFormatException e) {
			
 
				+        throw new IllegalArgumentException("File-limit is invalid: "
			
 
				+                                            + fileLimitString, e);
			
 
				+      }
			
 
				+      LOG.warn(DistCpOptionSwitch.FILE_LIMIT.getSwitch() + " is a deprecated" +
			
 
				+              " option. Ignoring.");
			
 
				+    }
			
 
				+
			
 
				+    if (command.hasOption(DistCpOptionSwitch.SIZE_LIMIT.getSwitch())) {
			
 
				+      String sizeLimitString = getVal(command,
			
 
				+                              DistCpOptionSwitch.SIZE_LIMIT.getSwitch().trim());
			
 
				+      try {
			
 
				+        Long.parseLong(sizeLimitString);
			
 
				+      }
			
 
				+      catch (NumberFormatException e) {
			
 
				+        throw new IllegalArgumentException("Size-limit is invalid: "
			
 
				+                                            + sizeLimitString, e);
			
 
				+      }
			
 
				+      LOG.warn(DistCpOptionSwitch.SIZE_LIMIT.getSwitch() + " is a deprecated" +
			
 
				+              " option. Ignoring.");
			
 
				+    }
			
 
				+
			
 
				+    return option;
			
 
				+  }
			
 
				+
			
 
				+  private static String getVal(CommandLine command, String swtch) {
			
 
				+    String optionValue = command.getOptionValue(swtch);
			
 
				+    if (optionValue == null) {
			
 
				+      return null;
			
 
				+    } else {
			
 
				+      return optionValue.trim();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  public static void usage() {
			
 
				+    HelpFormatter formatter = new HelpFormatter();
			
 
				+    formatter.printHelp("distcp OPTIONS [source_path...] <target_path>\n\nOPTIONS", cliOptions);
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
@@ -0,0 +1,275 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.io.SequenceFile;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.io.DataInputBuffer;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+import org.apache.hadoop.mapreduce.security.TokenCache;
			
 
				+import org.apache.hadoop.security.Credentials;
			
 
				+
			
 
				+import java.io.*;
			
 
				+import java.util.Stack;
			
 
				+
			
 
				+/**
			
 
				+ * The SimpleCopyListing is responsible for making the exhaustive list of
			
 
				+ * all files/directories under its specified list of input-paths.
			
 
				+ * These are written into the specified copy-listing file.
			
 
				+ * Note: The SimpleCopyListing doesn't handle wild-cards in the input-paths.
			
 
				+ */
			
 
				+public class SimpleCopyListing extends CopyListing {
			
 
				+  private static final Log LOG = LogFactory.getLog(SimpleCopyListing.class);
			
 
				+
			
 
				+  private long totalPaths = 0;
			
 
				+  private long totalBytesToCopy = 0;
			
 
				+
			
 
				+  /**
			
 
				+   * Protected constructor, to initialize configuration.
			
 
				+   *
			
 
				+   * @param configuration The input configuration, with which the source/target FileSystems may be accessed.
			
 
				+   * @param credentials - Credentials object on which the FS delegation tokens are cached. If null
			
 
				+   * delegation token caching is skipped
			
 
				+   */
			
 
				+  protected SimpleCopyListing(Configuration configuration, Credentials credentials) {
			
 
				+    super(configuration, credentials);
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  protected void validatePaths(DistCpOptions options)
			
 
				+      throws IOException, InvalidInputException {
			
 
				+
			
 
				+    Path targetPath = options.getTargetPath();
			
 
				+    FileSystem targetFS = targetPath.getFileSystem(getConf());
			
 
				+    boolean targetIsFile = targetFS.isFile(targetPath);
			
 
				+
			
 
				+    //If target is a file, then source has to be single file
			
 
				+    if (targetIsFile) {
			
 
				+      if (options.getSourcePaths().size() > 1) {
			
 
				+        throw new InvalidInputException("Multiple source being copied to a file: " +
			
 
				+            targetPath);
			
 
				+      }
			
 
				+
			
 
				+      Path srcPath = options.getSourcePaths().get(0);
			
 
				+      FileSystem sourceFS = srcPath.getFileSystem(getConf());
			
 
				+      if (!sourceFS.isFile(srcPath)) {
			
 
				+        throw new InvalidInputException("Cannot copy " + srcPath +
			
 
				+            ", which is not a file to " + targetPath);
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    if (options.shouldAtomicCommit() && targetFS.exists(targetPath)) {
			
 
				+      throw new InvalidInputException("Target path for atomic-commit already exists: " +
			
 
				+        targetPath + ". Cannot atomic-commit to pre-existing target-path.");
			
 
				+    }
			
 
				+
			
 
				+    for (Path path: options.getSourcePaths()) {
			
 
				+      FileSystem fs = path.getFileSystem(getConf());
			
 
				+      if (!fs.exists(path)) {
			
 
				+        throw new InvalidInputException(path + " doesn't exist");
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    /* This is requires to allow map tasks to access each of the source
			
 
				+       clusters. This would retrieve the delegation token for each unique
			
 
				+       file system and add them to job's private credential store
			
 
				+     */
			
 
				+    Credentials credentials = getCredentials();
			
 
				+    if (credentials != null) {
			
 
				+      Path[] inputPaths = options.getSourcePaths().toArray(new Path[1]);
			
 
				+      TokenCache.obtainTokensForNamenodes(credentials, inputPaths, getConf());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /** {@inheritDoc} */
			
 
				+  @Override
			
 
				+  public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException {
			
 
				+
			
 
				+    SequenceFile.Writer fileListWriter = null;
			
 
				+
			
 
				+    try {
			
 
				+      fileListWriter = getWriter(pathToListingFile);
			
 
				+
			
 
				+      for (Path path: options.getSourcePaths()) {
			
 
				+        FileSystem sourceFS = path.getFileSystem(getConf());
			
 
				+        path = makeQualified(path);
			
 
				+
			
 
				+        FileStatus rootStatus = sourceFS.getFileStatus(path);
			
 
				+        Path sourcePathRoot = computeSourceRootPath(rootStatus, options);
			
 
				+        boolean localFile = (rootStatus.getClass() != FileStatus.class);
			
 
				+
			
 
				+        FileStatus[] sourceFiles = sourceFS.listStatus(path);
			
 
				+        if (sourceFiles != null && sourceFiles.length > 0) {
			
 
				+          for (FileStatus sourceStatus: sourceFiles) {
			
 
				+            if (LOG.isDebugEnabled()) {
			
 
				+              LOG.debug("Recording source-path: " + sourceStatus.getPath() + " for copy.");
			
 
				+            }
			
 
				+            writeToFileListing(fileListWriter, sourceStatus, sourcePathRoot, localFile);
			
 
				+
			
 
				+            if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) {
			
 
				+              if (LOG.isDebugEnabled()) {
			
 
				+                LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath());
			
 
				+              }
			
 
				+              traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot, localFile);
			
 
				+            }
			
 
				+          }
			
 
				+        } else {
			
 
				+          writeToFileListing(fileListWriter, rootStatus, sourcePathRoot, localFile);
			
 
				+        }
			
 
				+      }
			
 
				+    } finally {
			
 
				+      IOUtils.closeStream(fileListWriter);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private Path computeSourceRootPath(FileStatus sourceStatus,
			
 
				+                                     DistCpOptions options) throws IOException {
			
 
				+
			
 
				+    Path target = options.getTargetPath();
			
 
				+    FileSystem targetFS = target.getFileSystem(getConf());
			
 
				+
			
 
				+    boolean solitaryFile = options.getSourcePaths().size() == 1
			
 
				+                                                && !sourceStatus.isDirectory();
			
 
				+
			
 
				+    if (solitaryFile) {
			
 
				+      if (targetFS.isFile(target) || !targetFS.exists(target)) {
			
 
				+        return sourceStatus.getPath();
			
 
				+      } else {
			
 
				+        return sourceStatus.getPath().getParent();
			
 
				+      }
			
 
				+    } else {
			
 
				+      boolean specialHandling = (options.getSourcePaths().size() == 1 && !targetFS.exists(target)) ||
			
 
				+          options.shouldSyncFolder() || options.shouldOverwrite();
			
 
				+
			
 
				+      return specialHandling && sourceStatus.isDirectory() ? sourceStatus.getPath() :
			
 
				+          sourceStatus.getPath().getParent();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /** {@inheritDoc} */
			
 
				+  @Override
			
 
				+  protected long getBytesToCopy() {
			
 
				+    return totalBytesToCopy;
			
 
				+  }
			
 
				+
			
 
				+  /** {@inheritDoc} */
			
 
				+  @Override
			
 
				+  protected long getNumberOfPaths() {
			
 
				+    return totalPaths;
			
 
				+  }
			
 
				+
			
 
				+  private Path makeQualified(Path path) throws IOException {
			
 
				+    final FileSystem fs = path.getFileSystem(getConf());
			
 
				+    return path.makeQualified(fs.getUri(), fs.getWorkingDirectory());
			
 
				+  }
			
 
				+
			
 
				+  private SequenceFile.Writer getWriter(Path pathToListFile) throws IOException {
			
 
				+    FileSystem fs = pathToListFile.getFileSystem(getConf());
			
 
				+    if (fs.exists(pathToListFile)) {
			
 
				+      fs.delete(pathToListFile, false);
			
 
				+    }
			
 
				+    return SequenceFile.createWriter(getConf(),
			
 
				+            SequenceFile.Writer.file(pathToListFile),
			
 
				+            SequenceFile.Writer.keyClass(Text.class),
			
 
				+            SequenceFile.Writer.valueClass(FileStatus.class),
			
 
				+            SequenceFile.Writer.compression(SequenceFile.CompressionType.NONE));
			
 
				+  }
			
 
				+
			
 
				+  private static boolean isDirectoryAndNotEmpty(FileSystem fileSystem,
			
 
				+                                    FileStatus fileStatus) throws IOException {
			
 
				+    return fileStatus.isDirectory() && getChildren(fileSystem, fileStatus).length > 0;
			
 
				+  }
			
 
				+
			
 
				+  private static FileStatus[] getChildren(FileSystem fileSystem,
			
 
				+                                         FileStatus parent) throws IOException {
			
 
				+    return fileSystem.listStatus(parent.getPath());
			
 
				+  }
			
 
				+
			
 
				+  private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter,
			
 
				+                                         FileStatus sourceStatus,
			
 
				+                                         Path sourcePathRoot, boolean localFile)
			
 
				+                                         throws IOException {
			
 
				+    FileSystem sourceFS = sourcePathRoot.getFileSystem(getConf());
			
 
				+    Stack<FileStatus> pathStack = new Stack<FileStatus>();
			
 
				+    pathStack.push(sourceStatus);
			
 
				+
			
 
				+    while (!pathStack.isEmpty()) {
			
 
				+      for (FileStatus child: getChildren(sourceFS, pathStack.pop())) {
			
 
				+        if (LOG.isDebugEnabled())
			
 
				+          LOG.debug("Recording source-path: "
			
 
				+                    + sourceStatus.getPath() + " for copy.");
			
 
				+        writeToFileListing(fileListWriter, child, sourcePathRoot, localFile);
			
 
				+        if (isDirectoryAndNotEmpty(sourceFS, child)) {
			
 
				+          if (LOG.isDebugEnabled())
			
 
				+            LOG.debug("Traversing non-empty source dir: "
			
 
				+                       + sourceStatus.getPath());
			
 
				+          pathStack.push(child);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void writeToFileListing(SequenceFile.Writer fileListWriter,
			
 
				+                                  FileStatus fileStatus, Path sourcePathRoot,
			
 
				+                                  boolean localFile) throws IOException {
			
 
				+    if (fileStatus.getPath().equals(sourcePathRoot) && fileStatus.isDirectory())
			
 
				+      return; // Skip the root-paths.
			
 
				+
			
 
				+    if (LOG.isDebugEnabled()) {
			
 
				+      LOG.debug("REL PATH: " + DistCpUtils.getRelativePath(sourcePathRoot,
			
 
				+        fileStatus.getPath()) + ", FULL PATH: " + fileStatus.getPath());
			
 
				+    }
			
 
				+
			
 
				+    FileStatus status = fileStatus;
			
 
				+    if (localFile) {
			
 
				+      status = getFileStatus(fileStatus);
			
 
				+    }
			
 
				+
			
 
				+    fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot,
			
 
				+        fileStatus.getPath())), status);
			
 
				+    fileListWriter.sync();
			
 
				+
			
 
				+    if (!fileStatus.isDirectory()) {
			
 
				+      totalBytesToCopy += fileStatus.getLen();
			
 
				+    }
			
 
				+    totalPaths++;
			
 
				+  }
			
 
				+
			
 
				+  private static final ByteArrayOutputStream buffer = new ByteArrayOutputStream(64);
			
 
				+  private DataInputBuffer in = new DataInputBuffer();
			
 
				+  
			
 
				+  private FileStatus getFileStatus(FileStatus fileStatus) throws IOException {
			
 
				+    FileStatus status = new FileStatus();
			
 
				+
			
 
				+    buffer.reset();
			
 
				+    DataOutputStream out = new DataOutputStream(buffer);
			
 
				+    fileStatus.write(out);
			
 
				+
			
 
				+    in.reset(buffer.toByteArray(), 0, buffer.size());
			
 
				+    status.readFields(in);
			
 
				+    return status;
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
@@ -0,0 +1,297 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.mapred;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.io.SequenceFile;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.mapreduce.*;
			
 
				+import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
			
 
				+import org.apache.hadoop.tools.*;
			
 
				+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.EnumSet;
			
 
				+import java.util.List;
			
 
				+
			
 
				+/**
			
 
				+ * The CopyCommitter class is DistCp's OutputCommitter implementation. It is
			
 
				+ * responsible for handling the completion/cleanup of the DistCp run.
			
 
				+ * Specifically, it does the following:
			
 
				+ *  1. Cleanup of the meta-folder (where DistCp maintains its file-list, etc.)
			
 
				+ *  2. Preservation of user/group/replication-factor on any directories that
			
 
				+ *     have been copied. (Files are taken care of in their map-tasks.)
			
 
				+ *  3. Atomic-move of data from the temporary work-folder to the final path
			
 
				+ *     (if atomic-commit was opted for).
			
 
				+ *  4. Deletion of files from the target that are missing at source (if opted for).
			
 
				+ *  5. Cleanup of any partially copied files, from previous, failed attempts.
			
 
				+ */
			
 
				+public class CopyCommitter extends FileOutputCommitter {
			
 
				+  private static final Log LOG = LogFactory.getLog(CopyCommitter.class);
			
 
				+
			
 
				+  private final TaskAttemptContext taskAttemptContext;
			
 
				+
			
 
				+  /**
			
 
				+   * Create a output committer
			
 
				+   *
			
 
				+   * @param outputPath the job's output path
			
 
				+   * @param context    the task's context
			
 
				+   * @throws IOException - Exception if any
			
 
				+   */
			
 
				+  public CopyCommitter(Path outputPath, TaskAttemptContext context) throws IOException {
			
 
				+    super(outputPath, context);
			
 
				+    this.taskAttemptContext = context;
			
 
				+  }
			
 
				+
			
 
				+  /** @inheritDoc */
			
 
				+  @Override
			
 
				+  public void commitJob(JobContext jobContext) throws IOException {
			
 
				+    Configuration conf = jobContext.getConfiguration();
			
 
				+    super.commitJob(jobContext);
			
 
				+
			
 
				+    cleanupTempFiles(jobContext);
			
 
				+
			
 
				+    String attributes = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
			
 
				+    if (attributes != null && !attributes.isEmpty()) {
			
 
				+      preserveFileAttributesForDirectories(conf);
			
 
				+    }
			
 
				+
			
 
				+    try {
			
 
				+      if (conf.getBoolean(DistCpConstants.CONF_LABEL_DELETE_MISSING, false)) {
			
 
				+        deleteMissing(conf);
			
 
				+      } else if (conf.getBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, false)) {
			
 
				+        commitData(conf);
			
 
				+      }
			
 
				+      taskAttemptContext.setStatus("Commit Successful");
			
 
				+    }
			
 
				+    finally {
			
 
				+      cleanup(conf);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /** @inheritDoc */
			
 
				+  @Override
			
 
				+  public void abortJob(JobContext jobContext,
			
 
				+                       JobStatus.State state) throws IOException {
			
 
				+    try {
			
 
				+      super.abortJob(jobContext, state);
			
 
				+    } finally {
			
 
				+      cleanupTempFiles(jobContext);
			
 
				+      cleanup(jobContext.getConfiguration());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void cleanupTempFiles(JobContext context) {
			
 
				+    try {
			
 
				+      Configuration conf = context.getConfiguration();
			
 
				+
			
 
				+      Path targetWorkPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
			
 
				+      FileSystem targetFS = targetWorkPath.getFileSystem(conf);
			
 
				+
			
 
				+      String jobId = context.getJobID().toString();
			
 
				+      deleteAttemptTempFiles(targetWorkPath, targetFS, jobId);
			
 
				+      deleteAttemptTempFiles(targetWorkPath.getParent(), targetFS, jobId);
			
 
				+    } catch (Throwable t) {
			
 
				+      LOG.warn("Unable to cleanup temp files", t);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void deleteAttemptTempFiles(Path targetWorkPath,
			
 
				+                                      FileSystem targetFS,
			
 
				+                                      String jobId) throws IOException {
			
 
				+
			
 
				+    FileStatus[] tempFiles = targetFS.globStatus(
			
 
				+        new Path(targetWorkPath, ".distcp.tmp." + jobId.replaceAll("job","attempt") + "*"));
			
 
				+
			
 
				+    if (tempFiles != null && tempFiles.length > 0) {
			
 
				+      for (FileStatus file : tempFiles) {
			
 
				+        LOG.info("Cleaning up " + file.getPath());
			
 
				+        targetFS.delete(file.getPath(), false);
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Cleanup meta folder and other temporary files
			
 
				+   *
			
 
				+   * @param conf - Job Configuration
			
 
				+   */
			
 
				+  private void cleanup(Configuration conf) {
			
 
				+    Path metaFolder = new Path(conf.get(DistCpConstants.CONF_LABEL_META_FOLDER));
			
 
				+    try {
			
 
				+      FileSystem fs = metaFolder.getFileSystem(conf);
			
 
				+      LOG.info("Cleaning up temporary work folder: " + metaFolder);
			
 
				+      fs.delete(metaFolder, true);
			
 
				+    } catch (IOException ignore) {
			
 
				+      LOG.error("Exception encountered ", ignore);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  // This method changes the target-directories' file-attributes (owner,
			
 
				+  // user/group permissions, etc.) based on the corresponding source directories.
			
 
				+  private void preserveFileAttributesForDirectories(Configuration conf) throws IOException {
			
 
				+    String attrSymbols = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
			
 
				+    LOG.info("About to preserve attributes: " + attrSymbols);
			
 
				+
			
 
				+    EnumSet<FileAttribute> attributes = DistCpUtils.unpackAttributes(attrSymbols);
			
 
				+
			
 
				+    Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
			
 
				+    FileSystem clusterFS = sourceListing.getFileSystem(conf);
			
 
				+    SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
			
 
				+                                      SequenceFile.Reader.file(sourceListing));
			
 
				+    long totalLen = clusterFS.getFileStatus(sourceListing).getLen();
			
 
				+
			
 
				+    Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
			
 
				+
			
 
				+    long preservedEntries = 0;
			
 
				+    try {
			
 
				+      FileStatus srcFileStatus = new FileStatus();
			
 
				+      Text srcRelPath = new Text();
			
 
				+
			
 
				+      // Iterate over every source path that was copied.
			
 
				+      while (sourceReader.next(srcRelPath, srcFileStatus)) {
			
 
				+        // File-attributes for files are set at the time of copy,
			
 
				+        // in the map-task.
			
 
				+        if (! srcFileStatus.isDirectory()) continue;
			
 
				+
			
 
				+        Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath);
			
 
				+
			
 
				+        // Skip the root folder.
			
 
				+        // Status can't be preserved on root-folder. (E.g. multiple paths may
			
 
				+        // be copied to a single target folder. Which source-attributes to use
			
 
				+        // on the target is undefined.)
			
 
				+        if (targetRoot.equals(targetFile)) continue;
			
 
				+
			
 
				+        FileSystem targetFS = targetFile.getFileSystem(conf);
			
 
				+        DistCpUtils.preserve(targetFS, targetFile, srcFileStatus,  attributes);
			
 
				+
			
 
				+        taskAttemptContext.progress();
			
 
				+        taskAttemptContext.setStatus("Preserving status on directory entries. [" +
			
 
				+            sourceReader.getPosition() * 100 / totalLen + "%]");
			
 
				+      }
			
 
				+    } finally {
			
 
				+      IOUtils.closeStream(sourceReader);
			
 
				+    }
			
 
				+    LOG.info("Preserved status on " + preservedEntries + " dir entries on target");
			
 
				+  }
			
 
				+
			
 
				+  // This method deletes "extra" files from the target, if they're not
			
 
				+  // available at the source.
			
 
				+  private void deleteMissing(Configuration conf) throws IOException {
			
 
				+    LOG.info("-delete option is enabled. About to remove entries from " +
			
 
				+        "target that are missing in source");
			
 
				+
			
 
				+    // Sort the source-file listing alphabetically.
			
 
				+    Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
			
 
				+    FileSystem clusterFS = sourceListing.getFileSystem(conf);
			
 
				+    Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing);
			
 
				+
			
 
				+    // Similarly, create the listing of target-files. Sort alphabetically.
			
 
				+    Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
			
 
				+    CopyListing target = new GlobbedCopyListing(new Configuration(conf), null);
			
 
				+
			
 
				+    List<Path> targets = new ArrayList<Path>(1);
			
 
				+    Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
			
 
				+    targets.add(targetFinalPath);
			
 
				+    DistCpOptions options = new DistCpOptions(targets, new Path("/NONE"));
			
 
				+
			
 
				+    target.buildListing(targetListing, options);
			
 
				+    Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing);
			
 
				+    long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen();
			
 
				+
			
 
				+    SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
			
 
				+                                 SequenceFile.Reader.file(sortedSourceListing));
			
 
				+    SequenceFile.Reader targetReader = new SequenceFile.Reader(conf,
			
 
				+                                 SequenceFile.Reader.file(sortedTargetListing));
			
 
				+
			
 
				+    // Walk both source and target file listings.
			
 
				+    // Delete all from target that doesn't also exist on source.
			
 
				+    long deletedEntries = 0;
			
 
				+    try {
			
 
				+      FileStatus srcFileStatus = new FileStatus();
			
 
				+      Text srcRelPath = new Text();
			
 
				+      FileStatus trgtFileStatus = new FileStatus();
			
 
				+      Text trgtRelPath = new Text();
			
 
				+
			
 
				+      FileSystem targetFS = targetFinalPath.getFileSystem(conf);
			
 
				+      boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
			
 
				+      while (targetReader.next(trgtRelPath, trgtFileStatus)) {
			
 
				+        // Skip sources that don't exist on target.
			
 
				+        while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) {
			
 
				+          srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
			
 
				+        }
			
 
				+
			
 
				+        if (srcAvailable && trgtRelPath.equals(srcRelPath)) continue;
			
 
				+
			
 
				+        // Target doesn't exist at source. Delete.
			
 
				+        boolean result = (!targetFS.exists(trgtFileStatus.getPath()) ||
			
 
				+            targetFS.delete(trgtFileStatus.getPath(), true));
			
 
				+        if (result) {
			
 
				+          LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source");
			
 
				+          deletedEntries++;
			
 
				+        } else {
			
 
				+          throw new IOException("Unable to delete " + trgtFileStatus.getPath());
			
 
				+        }
			
 
				+        taskAttemptContext.progress();
			
 
				+        taskAttemptContext.setStatus("Deleting missing files from target. [" +
			
 
				+            targetReader.getPosition() * 100 / totalLen + "%]");
			
 
				+      }
			
 
				+    } finally {
			
 
				+      IOUtils.closeStream(sourceReader);
			
 
				+      IOUtils.closeStream(targetReader);
			
 
				+    }
			
 
				+    LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0));
			
 
				+  }
			
 
				+
			
 
				+  private void commitData(Configuration conf) throws IOException {
			
 
				+
			
 
				+    Path workDir = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
			
 
				+    Path finalDir = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
			
 
				+    FileSystem targetFS = workDir.getFileSystem(conf);
			
 
				+
			
 
				+    LOG.info("Atomic commit enabled. Moving " + workDir + " to " + finalDir);
			
 
				+    if (targetFS.exists(finalDir) && targetFS.exists(workDir)) {
			
 
				+      LOG.error("Pre-existing final-path found at: " + finalDir);
			
 
				+      throw new IOException("Target-path can't be committed to because it " +
			
 
				+          "exists at " + finalDir + ". Copied data is in temp-dir: " + workDir + ". ");
			
 
				+    }
			
 
				+
			
 
				+    boolean result = targetFS.rename(workDir, finalDir);
			
 
				+    if (!result) {
			
 
				+      LOG.warn("Rename failed. Perhaps data already moved. Verifying...");
			
 
				+      result = targetFS.exists(finalDir) && !targetFS.exists(workDir);
			
 
				+    }
			
 
				+    if (result) {
			
 
				+      LOG.info("Data committed successfully to " + finalDir);
			
 
				+      taskAttemptContext.setStatus("Data committed successfully to " + finalDir);
			
 
				+    } else {
			
 
				+      LOG.error("Unable to commit data to " + finalDir);
			
 
				+      throw new IOException("Atomic commit failed. Temporary data in " + workDir +
			
 
				+        ", Unable to move to " + finalDir);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java
@@ -0,0 +1,330 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.mapred;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.mapreduce.JobContext;
			
 
				+import org.apache.hadoop.mapreduce.Mapper;
			
 
				+import org.apache.hadoop.tools.DistCpConstants;
			
 
				+import org.apache.hadoop.tools.DistCpOptionSwitch;
			
 
				+import org.apache.hadoop.tools.DistCpOptions;
			
 
				+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+import org.apache.hadoop.util.StringUtils;
			
 
				+
			
 
				+import java.io.*;
			
 
				+import java.util.EnumSet;
			
 
				+import java.util.Arrays;
			
 
				+
			
 
				+/**
			
 
				+ * Mapper class that executes the DistCp copy operation.
			
 
				+ * Implements the o.a.h.mapreduce.Mapper<> interface.
			
 
				+ */
			
 
				+public class CopyMapper extends Mapper<Text, FileStatus, Text, Text> {
			
 
				+
			
 
				+  /**
			
 
				+   * Hadoop counters for the DistCp CopyMapper.
			
 
				+   * (These have been kept identical to the old DistCp,
			
 
				+   * for backward compatibility.)
			
 
				+   */
			
 
				+  public static enum Counter {
			
 
				+    COPY,         // Number of files received by the mapper for copy.
			
 
				+    SKIP,         // Number of files skipped.
			
 
				+    FAIL,         // Number of files that failed to be copied.
			
 
				+    BYTESCOPIED,  // Number of bytes actually copied by the copy-mapper, total.
			
 
				+    BYTESEXPECTED,// Number of bytes expected to be copied.
			
 
				+    BYTESFAILED,  // Number of bytes that failed to be copied.
			
 
				+    BYTESSKIPPED, // Number of bytes that were skipped from copy.
			
 
				+  }
			
 
				+
			
 
				+  private static Log LOG = LogFactory.getLog(CopyMapper.class);
			
 
				+
			
 
				+  private Configuration conf;
			
 
				+
			
 
				+  private boolean syncFolders = false;
			
 
				+  private boolean ignoreFailures = false;
			
 
				+  private boolean skipCrc = false;
			
 
				+  private boolean overWrite = false;
			
 
				+  private EnumSet<FileAttribute> preserve = EnumSet.noneOf(FileAttribute.class);
			
 
				+
			
 
				+  private FileSystem targetFS = null;
			
 
				+  private Path    targetWorkPath = null;
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of the Mapper::setup() method. This extracts the DistCp-
			
 
				+   * options specified in the Job's configuration, to set up the Job.
			
 
				+   * @param context Mapper's context.
			
 
				+   * @throws IOException On IO failure.
			
 
				+   * @throws InterruptedException If the job is interrupted.
			
 
				+   */
			
 
				+  @Override
			
 
				+  public void setup(Context context) throws IOException, InterruptedException {
			
 
				+    conf = context.getConfiguration();
			
 
				+
			
 
				+    syncFolders = conf.getBoolean(DistCpOptionSwitch.SYNC_FOLDERS.getConfigLabel(), false);
			
 
				+    ignoreFailures = conf.getBoolean(DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(), false);
			
 
				+    skipCrc = conf.getBoolean(DistCpOptionSwitch.SKIP_CRC.getConfigLabel(), false);
			
 
				+    overWrite = conf.getBoolean(DistCpOptionSwitch.OVERWRITE.getConfigLabel(), false);
			
 
				+    preserve = DistCpUtils.unpackAttributes(conf.get(DistCpOptionSwitch.
			
 
				+        PRESERVE_STATUS.getConfigLabel()));
			
 
				+
			
 
				+    targetWorkPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
			
 
				+    Path targetFinalPath = new Path(conf.get(
			
 
				+            DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
			
 
				+    targetFS = targetFinalPath.getFileSystem(conf);
			
 
				+
			
 
				+    if (targetFS.exists(targetFinalPath) && targetFS.isFile(targetFinalPath)) {
			
 
				+      overWrite = true; // When target is an existing file, overwrite it.
			
 
				+    }
			
 
				+
			
 
				+    if (conf.get(DistCpConstants.CONF_LABEL_SSL_CONF) != null) {
			
 
				+      initializeSSLConf(context);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Initialize SSL Config if same is set in conf
			
 
				+   *
			
 
				+   * @throws IOException - If any
			
 
				+   */
			
 
				+  private void initializeSSLConf(Context context) throws IOException {
			
 
				+    LOG.info("Initializing SSL configuration");
			
 
				+    
			
 
				+    String workDir = conf.get(JobContext.JOB_LOCAL_DIR) + "/work";
			
 
				+    Path[] cacheFiles = context.getLocalCacheFiles();
			
 
				+
			
 
				+    Configuration sslConfig = new Configuration(false);
			
 
				+    String sslConfFileName = conf.get(DistCpConstants.CONF_LABEL_SSL_CONF);
			
 
				+    Path sslClient = findCacheFile(cacheFiles, sslConfFileName);
			
 
				+    if (sslClient == null) {
			
 
				+      LOG.warn("SSL Client config file not found. Was looking for " + sslConfFileName +
			
 
				+          " in " + Arrays.toString(cacheFiles));
			
 
				+      return;
			
 
				+    }
			
 
				+    sslConfig.addResource(sslClient);
			
 
				+
			
 
				+    String trustStoreFile = conf.get("ssl.client.truststore.location");
			
 
				+    Path trustStorePath = findCacheFile(cacheFiles, trustStoreFile);
			
 
				+    sslConfig.set("ssl.client.truststore.location", trustStorePath.toString());
			
 
				+
			
 
				+    String keyStoreFile = conf.get("ssl.client.keystore.location");
			
 
				+    Path keyStorePath = findCacheFile(cacheFiles, keyStoreFile);
			
 
				+    sslConfig.set("ssl.client.keystore.location", keyStorePath.toString());
			
 
				+
			
 
				+    try {
			
 
				+      OutputStream out = new FileOutputStream(workDir + "/" + sslConfFileName);
			
 
				+      try {
			
 
				+        sslConfig.writeXml(out);
			
 
				+      } finally {
			
 
				+        out.close();
			
 
				+      }
			
 
				+      conf.set(DistCpConstants.CONF_LABEL_SSL_KEYSTORE, sslConfFileName);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.warn("Unable to write out the ssl configuration. " +
			
 
				+          "Will fall back to default ssl-client.xml in class path, if there is one", e);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Find entry from distributed cache
			
 
				+   *
			
 
				+   * @param cacheFiles - All localized cache files
			
 
				+   * @param fileName - fileName to search
			
 
				+   * @return Path of the filename if found, else null
			
 
				+   */
			
 
				+  private Path findCacheFile(Path[] cacheFiles, String fileName) {
			
 
				+    if (cacheFiles != null && cacheFiles.length > 0) {
			
 
				+      for (Path file : cacheFiles) {
			
 
				+        if (file.getName().equals(fileName)) {
			
 
				+          return file;
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+    return null;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of the Mapper<>::map(). Does the copy.
			
 
				+   * @param relPath The target path.
			
 
				+   * @param sourceFileStatus The source path.
			
 
				+   * @throws IOException
			
 
				+   */
			
 
				+  @Override
			
 
				+  public void map(Text relPath, FileStatus sourceFileStatus, Context context)
			
 
				+          throws IOException, InterruptedException {
			
 
				+    Path sourcePath = sourceFileStatus.getPath();
			
 
				+
			
 
				+    if (LOG.isDebugEnabled())
			
 
				+      LOG.debug("DistCpMapper::map(): Received " + sourcePath + ", " + relPath);
			
 
				+
			
 
				+    Path target = new Path(targetWorkPath.makeQualified(targetFS.getUri(),
			
 
				+                          targetFS.getWorkingDirectory()) + relPath.toString());
			
 
				+
			
 
				+    EnumSet<DistCpOptions.FileAttribute> fileAttributes
			
 
				+            = getFileAttributeSettings(context);
			
 
				+
			
 
				+    final String description = "Copying " + sourcePath + " to " + target;
			
 
				+    context.setStatus(description);
			
 
				+
			
 
				+    LOG.info(description);
			
 
				+
			
 
				+    try {
			
 
				+      FileStatus sourceCurrStatus;
			
 
				+      FileSystem sourceFS;
			
 
				+      try {
			
 
				+        sourceFS = sourcePath.getFileSystem(conf);
			
 
				+        sourceCurrStatus = sourceFS.getFileStatus(sourcePath);
			
 
				+      } catch (FileNotFoundException e) {
			
 
				+        throw new IOException(new RetriableFileCopyCommand.CopyReadException(e));
			
 
				+      }
			
 
				+
			
 
				+      FileStatus targetStatus = null;
			
 
				+
			
 
				+      try {
			
 
				+        targetStatus = targetFS.getFileStatus(target);
			
 
				+      } catch (FileNotFoundException ignore) {
			
 
				+        if (LOG.isDebugEnabled())
			
 
				+          LOG.debug("Path could not be found: " + target, ignore);
			
 
				+      }
			
 
				+
			
 
				+      if (targetStatus != null && (targetStatus.isDirectory() != sourceCurrStatus.isDirectory())) {
			
 
				+        throw new IOException("Can't replace " + target + ". Target is " +
			
 
				+            getFileType(targetStatus) + ", Source is " + getFileType(sourceCurrStatus));
			
 
				+      }
			
 
				+
			
 
				+      if (sourceCurrStatus.isDirectory()) {
			
 
				+        createTargetDirsWithRetry(description, target, context);
			
 
				+        return;
			
 
				+      }
			
 
				+
			
 
				+      if (skipFile(sourceFS, sourceCurrStatus, target)) {
			
 
				+        LOG.info("Skipping copy of " + sourceCurrStatus.getPath()
			
 
				+                 + " to " + target);
			
 
				+        updateSkipCounters(context, sourceCurrStatus);
			
 
				+        context.write(null, new Text("SKIP: " + sourceCurrStatus.getPath()));
			
 
				+      }
			
 
				+      else {
			
 
				+        copyFileWithRetry(description, sourceCurrStatus, target, context,
			
 
				+                          fileAttributes);
			
 
				+      }
			
 
				+
			
 
				+      DistCpUtils.preserve(target.getFileSystem(conf), target,
			
 
				+                           sourceCurrStatus, fileAttributes);
			
 
				+
			
 
				+    } catch (IOException exception) {
			
 
				+      handleFailures(exception, sourceFileStatus, target, context);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private String getFileType(FileStatus fileStatus) {
			
 
				+    return fileStatus == null ? "N/A" : (fileStatus.isDirectory() ? "dir" : "file");
			
 
				+  }
			
 
				+
			
 
				+  private static EnumSet<DistCpOptions.FileAttribute>
			
 
				+          getFileAttributeSettings(Mapper.Context context) {
			
 
				+    String attributeString = context.getConfiguration().get(
			
 
				+            DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel());
			
 
				+    return DistCpUtils.unpackAttributes(attributeString);
			
 
				+  }
			
 
				+
			
 
				+  private void copyFileWithRetry(String description, FileStatus sourceFileStatus,
			
 
				+               Path target, Context context,
			
 
				+               EnumSet<DistCpOptions.FileAttribute> fileAttributes) throws IOException {
			
 
				+
			
 
				+    long bytesCopied;
			
 
				+    try {
			
 
				+      bytesCopied = (Long)new RetriableFileCopyCommand(description)
			
 
				+                       .execute(sourceFileStatus, target, context, fileAttributes);
			
 
				+    } catch (Exception e) {
			
 
				+      context.setStatus("Copy Failure: " + sourceFileStatus.getPath());
			
 
				+      throw new IOException("File copy failed: " + sourceFileStatus.getPath() +
			
 
				+          " --> " + target, e);
			
 
				+    }
			
 
				+    incrementCounter(context, Counter.BYTESEXPECTED, sourceFileStatus.getLen());
			
 
				+    incrementCounter(context, Counter.BYTESCOPIED, bytesCopied);
			
 
				+    incrementCounter(context, Counter.COPY, 1);
			
 
				+  }
			
 
				+
			
 
				+  private void createTargetDirsWithRetry(String description,
			
 
				+                   Path target, Context context) throws IOException {
			
 
				+    try {
			
 
				+      new RetriableDirectoryCreateCommand(description).execute(target, context);
			
 
				+    } catch (Exception e) {
			
 
				+      throw new IOException("mkdir failed for " + target, e);
			
 
				+    }
			
 
				+    incrementCounter(context, Counter.COPY, 1);
			
 
				+  }
			
 
				+
			
 
				+  private static void updateSkipCounters(Context context,
			
 
				+                                         FileStatus sourceFile) {
			
 
				+    incrementCounter(context, Counter.SKIP, 1);
			
 
				+    incrementCounter(context, Counter.BYTESSKIPPED, sourceFile.getLen());
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  private void handleFailures(IOException exception,
			
 
				+                                     FileStatus sourceFileStatus, Path target,
			
 
				+                                     Context context) throws IOException, InterruptedException {
			
 
				+    LOG.error("Failure in copying " + sourceFileStatus.getPath() + " to " +
			
 
				+                target, exception);
			
 
				+
			
 
				+    if (ignoreFailures && exception.getCause() instanceof
			
 
				+            RetriableFileCopyCommand.CopyReadException) {
			
 
				+      incrementCounter(context, Counter.FAIL, 1);
			
 
				+      incrementCounter(context, Counter.BYTESFAILED, sourceFileStatus.getLen());
			
 
				+      context.write(null, new Text("FAIL: " + sourceFileStatus.getPath() + " - " + 
			
 
				+          StringUtils.stringifyException(exception)));
			
 
				+    }
			
 
				+    else
			
 
				+      throw exception;
			
 
				+  }
			
 
				+
			
 
				+  private static void incrementCounter(Context context, Counter counter,
			
 
				+                                       long value) {
			
 
				+    context.getCounter(counter).increment(value);
			
 
				+  }
			
 
				+
			
 
				+  private boolean skipFile(FileSystem sourceFS, FileStatus source, Path target)
			
 
				+                                          throws IOException {
			
 
				+    return     targetFS.exists(target)
			
 
				+            && !overWrite
			
 
				+            && !mustUpdate(sourceFS, source, target);
			
 
				+  }
			
 
				+
			
 
				+  private boolean mustUpdate(FileSystem sourceFS, FileStatus source, Path target)
			
 
				+                                    throws IOException {
			
 
				+    final FileStatus targetFileStatus = targetFS.getFileStatus(target);
			
 
				+
			
 
				+    return     syncFolders
			
 
				+            && (
			
 
				+                   targetFileStatus.getLen() != source.getLen()
			
 
				+                || (!skipCrc &&
			
 
				+                       !DistCpUtils.checksumsAreEqual(sourceFS,
			
 
				+                                          source.getPath(), targetFS, target))
			
 
				+                || (source.getBlockSize() != targetFileStatus.getBlockSize() &&
			
 
				+                      preserve.contains(FileAttribute.BLOCKSIZE))
			
 
				+               );
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyOutputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyOutputFormat.java
@@ -0,0 +1,124 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.mapred;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.mapreduce.*;
			
 
				+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
			
 
				+import org.apache.hadoop.mapreduce.security.TokenCache;
			
 
				+import org.apache.hadoop.tools.DistCpConstants;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+/**
			
 
				+ * The CopyOutputFormat is the Hadoop OutputFormat used in DistCp.
			
 
				+ * It sets up the Job's Configuration (in the Job-Context) with the settings
			
 
				+ * for the work-directory, final commit-directory, etc. It also sets the right
			
 
				+ * output-committer.
			
 
				+ * @param <K>
			
 
				+ * @param <V>
			
 
				+ */
			
 
				+public class CopyOutputFormat<K, V> extends TextOutputFormat<K, V> {
			
 
				+
			
 
				+  /**
			
 
				+   * Setter for the working directory for DistCp (where files will be copied
			
 
				+   * before they are moved to the final commit-directory.)
			
 
				+   * @param job The Job on whose configuration the working-directory is to be set.
			
 
				+   * @param workingDirectory The path to use as the working directory.
			
 
				+   */
			
 
				+  public static void setWorkingDirectory(Job job, Path workingDirectory) {
			
 
				+    job.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH,
			
 
				+        workingDirectory.toString());
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Setter for the final directory for DistCp (where files copied will be
			
 
				+   * moved, atomically.)
			
 
				+   * @param job The Job on whose configuration the working-directory is to be set.
			
 
				+   * @param commitDirectory The path to use for final commit.
			
 
				+   */
			
 
				+  public static void setCommitDirectory(Job job, Path commitDirectory) {
			
 
				+    job.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH,
			
 
				+        commitDirectory.toString());
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Getter for the working directory.
			
 
				+   * @param job The Job from whose configuration the working-directory is to
			
 
				+   * be retrieved.
			
 
				+   * @return The working-directory Path.
			
 
				+   */
			
 
				+  public static Path getWorkingDirectory(Job job) {
			
 
				+    return getWorkingDirectory(job.getConfiguration());
			
 
				+  }
			
 
				+
			
 
				+  private static Path getWorkingDirectory(Configuration conf) {
			
 
				+    String workingDirectory = conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH);
			
 
				+    if (workingDirectory == null || workingDirectory.isEmpty()) {
			
 
				+      return null;
			
 
				+    } else {
			
 
				+      return new Path(workingDirectory);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Getter for the final commit-directory.
			
 
				+   * @param job The Job from whose configuration the commit-directory is to be
			
 
				+   * retrieved.
			
 
				+   * @return The commit-directory Path.
			
 
				+   */
			
 
				+  public static Path getCommitDirectory(Job job) {
			
 
				+    return getCommitDirectory(job.getConfiguration());
			
 
				+  }
			
 
				+
			
 
				+  private static Path getCommitDirectory(Configuration conf) {
			
 
				+    String commitDirectory = conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH);
			
 
				+    if (commitDirectory == null || commitDirectory.isEmpty()) {
			
 
				+      return null;
			
 
				+    } else {
			
 
				+      return new Path(commitDirectory);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /** @inheritDoc */
			
 
				+  @Override
			
 
				+  public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException {
			
 
				+    return new CopyCommitter(getOutputPath(context), context);
			
 
				+  }
			
 
				+
			
 
				+  /** @inheritDoc */
			
 
				+  @Override
			
 
				+  public void checkOutputSpecs(JobContext context) throws IOException {
			
 
				+    Configuration conf = context.getConfiguration();
			
 
				+
			
 
				+    if (getCommitDirectory(conf) == null) {
			
 
				+      throw new IllegalStateException("Commit directory not configured");
			
 
				+    }
			
 
				+
			
 
				+    Path workingPath = getWorkingDirectory(conf);
			
 
				+    if (workingPath == null) {
			
 
				+      throw new IllegalStateException("Working directory not configured");
			
 
				+    }
			
 
				+
			
 
				+    // get delegation token for outDir's file system
			
 
				+    TokenCache.obtainTokensForNamenodes(context.getCredentials(),
			
 
				+                                        new Path[] {workingPath}, conf);
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableDirectoryCreateCommand.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableDirectoryCreateCommand.java
@@ -0,0 +1,56 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.mapred;
			
 
				+
			
 
				+import org.apache.hadoop.tools.util.RetriableCommand;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.mapreduce.Mapper;
			
 
				+
			
 
				+/**
			
 
				+ * This class extends Retriable command to implement the creation of directories
			
 
				+ * with retries on failure.
			
 
				+ */
			
 
				+public class RetriableDirectoryCreateCommand extends RetriableCommand {
			
 
				+
			
 
				+  /**
			
 
				+   * Constructor, taking a description of the action.
			
 
				+   * @param description Verbose description of the copy operation.
			
 
				+   */
			
 
				+  public RetriableDirectoryCreateCommand(String description) {
			
 
				+    super(description);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of RetriableCommand::doExecute().
			
 
				+   * This implements the actual mkdirs() functionality.
			
 
				+   * @param arguments Argument-list to the command.
			
 
				+   * @return Boolean. True, if the directory could be created successfully.
			
 
				+   * @throws Exception IOException, on failure to create the directory.
			
 
				+   */
			
 
				+  @Override
			
 
				+  protected Object doExecute(Object... arguments) throws Exception {
			
 
				+    assert arguments.length == 2 : "Unexpected argument list.";
			
 
				+    Path target = (Path)arguments[0];
			
 
				+    Mapper.Context context = (Mapper.Context)arguments[1];
			
 
				+
			
 
				+    FileSystem targetFS = target.getFileSystem(context.getConfiguration());
			
 
				+    return targetFS.mkdirs(target);
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
@@ -0,0 +1,245 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.mapred;
			
 
				+
			
 
				+import org.apache.hadoop.tools.util.RetriableCommand;
			
 
				+import org.apache.hadoop.tools.util.ThrottledInputStream;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+import org.apache.hadoop.tools.DistCpOptions.*;
			
 
				+import org.apache.hadoop.tools.DistCpConstants;
			
 
				+import org.apache.hadoop.fs.*;
			
 
				+import org.apache.hadoop.mapreduce.Mapper;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+
			
 
				+import java.io.*;
			
 
				+import java.util.EnumSet;
			
 
				+
			
 
				+/**
			
 
				+ * This class extends RetriableCommand to implement the copy of files,
			
 
				+ * with retries on failure.
			
 
				+ */
			
 
				+public class RetriableFileCopyCommand extends RetriableCommand {
			
 
				+
			
 
				+  private static Log LOG = LogFactory.getLog(RetriableFileCopyCommand.class);
			
 
				+  private static int BUFFER_SIZE = 8 * 1024;
			
 
				+
			
 
				+  /**
			
 
				+   * Constructor, taking a description of the action.
			
 
				+   * @param description Verbose description of the copy operation.
			
 
				+   */
			
 
				+  public RetriableFileCopyCommand(String description) {
			
 
				+    super(description);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of RetriableCommand::doExecute().
			
 
				+   * This is the actual copy-implementation.
			
 
				+   * @param arguments Argument-list to the command.
			
 
				+   * @return Number of bytes copied.
			
 
				+   * @throws Exception: CopyReadException, if there are read-failures. All other
			
 
				+   *         failures are IOExceptions.
			
 
				+   */
			
 
				+  @SuppressWarnings("unchecked")
			
 
				+  @Override
			
 
				+  protected Object doExecute(Object... arguments) throws Exception {
			
 
				+    assert arguments.length == 4 : "Unexpected argument list.";
			
 
				+    FileStatus source = (FileStatus)arguments[0];
			
 
				+    assert !source.isDirectory() : "Unexpected file-status. Expected file.";
			
 
				+    Path target = (Path)arguments[1];
			
 
				+    Mapper.Context context = (Mapper.Context)arguments[2];
			
 
				+    EnumSet<FileAttribute> fileAttributes
			
 
				+            = (EnumSet<FileAttribute>)arguments[3];
			
 
				+    return doCopy(source, target, context, fileAttributes);
			
 
				+  }
			
 
				+
			
 
				+  private long doCopy(FileStatus sourceFileStatus, Path target,
			
 
				+                      Mapper.Context context,
			
 
				+                      EnumSet<FileAttribute> fileAttributes)
			
 
				+          throws IOException {
			
 
				+
			
 
				+    Path tmpTargetPath = getTmpFile(target, context);
			
 
				+    final Configuration configuration = context.getConfiguration();
			
 
				+    FileSystem targetFS = target.getFileSystem(configuration);
			
 
				+
			
 
				+    try {
			
 
				+      if (LOG.isDebugEnabled()) {
			
 
				+        LOG.debug("Copying " + sourceFileStatus.getPath() + " to " + target);
			
 
				+        LOG.debug("Tmp-file path: " + tmpTargetPath);
			
 
				+      }
			
 
				+      FileSystem sourceFS = sourceFileStatus.getPath().getFileSystem(
			
 
				+              configuration);
			
 
				+      long bytesRead = copyToTmpFile(tmpTargetPath, targetFS, sourceFileStatus,
			
 
				+                                     context, fileAttributes);
			
 
				+
			
 
				+      compareFileLengths(sourceFileStatus, tmpTargetPath, configuration, bytesRead);
			
 
				+      compareCheckSums(sourceFS, sourceFileStatus.getPath(), targetFS, tmpTargetPath);
			
 
				+      promoteTmpToTarget(tmpTargetPath, target, targetFS);
			
 
				+      return bytesRead;
			
 
				+
			
 
				+    } finally {
			
 
				+      if (targetFS.exists(tmpTargetPath))
			
 
				+        targetFS.delete(tmpTargetPath, false);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private long copyToTmpFile(Path tmpTargetPath, FileSystem targetFS,
			
 
				+                             FileStatus sourceFileStatus, Mapper.Context context,
			
 
				+                             EnumSet<FileAttribute> fileAttributes)
			
 
				+                             throws IOException {
			
 
				+    OutputStream outStream = new BufferedOutputStream(targetFS.create(
			
 
				+            tmpTargetPath, true, BUFFER_SIZE,
			
 
				+            getReplicationFactor(fileAttributes, sourceFileStatus, targetFS),
			
 
				+            getBlockSize(fileAttributes, sourceFileStatus, targetFS), context));
			
 
				+    return copyBytes(sourceFileStatus, outStream, BUFFER_SIZE, true, context);
			
 
				+  }
			
 
				+
			
 
				+  private void compareFileLengths(FileStatus sourceFileStatus, Path target,
			
 
				+                                  Configuration configuration, long bytesRead)
			
 
				+                                  throws IOException {
			
 
				+    final Path sourcePath = sourceFileStatus.getPath();
			
 
				+    FileSystem fs = sourcePath.getFileSystem(configuration);
			
 
				+    if (fs.getFileStatus(sourcePath).getLen() != bytesRead)
			
 
				+      throw new IOException("Mismatch in length of source:" + sourcePath
			
 
				+                + " and target:" + target);
			
 
				+  }
			
 
				+
			
 
				+  private void compareCheckSums(FileSystem sourceFS, Path source,
			
 
				+                                FileSystem targetFS, Path target)
			
 
				+                                throws IOException {
			
 
				+    if (!DistCpUtils.checksumsAreEqual(sourceFS, source, targetFS, target))
			
 
				+      throw new IOException("Check-sum mismatch between "
			
 
				+                              + source + " and " + target);
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  //If target file exists and unable to delete target - fail
			
 
				+  //If target doesn't exist and unable to create parent folder - fail
			
 
				+  //If target is successfully deleted and parent exists, if rename fails - fail
			
 
				+  private void promoteTmpToTarget(Path tmpTarget, Path target, FileSystem fs)
			
 
				+                                  throws IOException {
			
 
				+    if ((fs.exists(target) && !fs.delete(target, false))
			
 
				+        || (!fs.exists(target.getParent()) && !fs.mkdirs(target.getParent()))
			
 
				+        || !fs.rename(tmpTarget, target)) {
			
 
				+      throw new IOException("Failed to promote tmp-file:" + tmpTarget
			
 
				+                              + " to: " + target);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private Path getTmpFile(Path target, Mapper.Context context) {
			
 
				+    Path targetWorkPath = new Path(context.getConfiguration().
			
 
				+        get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
			
 
				+
			
 
				+    Path root = target.equals(targetWorkPath)? targetWorkPath.getParent() : targetWorkPath;
			
 
				+    LOG.info("Creating temp file: " +
			
 
				+        new Path(root, ".distcp.tmp." + context.getTaskAttemptID().toString()));
			
 
				+    return new Path(root, ".distcp.tmp." + context.getTaskAttemptID().toString());
			
 
				+  }
			
 
				+
			
 
				+  private long copyBytes(FileStatus sourceFileStatus, OutputStream outStream,
			
 
				+                         int bufferSize, boolean mustCloseStream,
			
 
				+                         Mapper.Context context) throws IOException {
			
 
				+    Path source = sourceFileStatus.getPath();
			
 
				+    byte buf[] = new byte[bufferSize];
			
 
				+    ThrottledInputStream inStream = null;
			
 
				+    long totalBytesRead = 0;
			
 
				+
			
 
				+    try {
			
 
				+      inStream = getInputStream(source, context.getConfiguration());
			
 
				+      int bytesRead = readBytes(inStream, buf);
			
 
				+      while (bytesRead >= 0) {
			
 
				+        totalBytesRead += bytesRead;
			
 
				+        outStream.write(buf, 0, bytesRead);
			
 
				+        updateContextStatus(totalBytesRead, context, sourceFileStatus);
			
 
				+        bytesRead = inStream.read(buf);
			
 
				+      }
			
 
				+    } finally {
			
 
				+      if (mustCloseStream)
			
 
				+        IOUtils.cleanup(LOG, outStream, inStream);
			
 
				+    }
			
 
				+
			
 
				+    return totalBytesRead;
			
 
				+  }
			
 
				+
			
 
				+  private void updateContextStatus(long totalBytesRead, Mapper.Context context,
			
 
				+                                   FileStatus sourceFileStatus) {
			
 
				+    StringBuilder message = new StringBuilder(DistCpUtils.getFormatter()
			
 
				+                .format(totalBytesRead * 100.0f / sourceFileStatus.getLen()));
			
 
				+    message.append("% ")
			
 
				+            .append(description).append(" [")
			
 
				+            .append(DistCpUtils.getStringDescriptionFor(totalBytesRead))
			
 
				+            .append('/')
			
 
				+        .append(DistCpUtils.getStringDescriptionFor(sourceFileStatus.getLen()))
			
 
				+            .append(']');
			
 
				+    context.setStatus(message.toString());
			
 
				+  }
			
 
				+
			
 
				+  private static int readBytes(InputStream inStream, byte buf[])
			
 
				+          throws IOException {
			
 
				+    try {
			
 
				+      return inStream.read(buf);
			
 
				+    }
			
 
				+    catch (IOException e) {
			
 
				+      throw new CopyReadException(e);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private static ThrottledInputStream getInputStream(Path path, Configuration conf)
			
 
				+          throws IOException {
			
 
				+    try {
			
 
				+      FileSystem fs = path.getFileSystem(conf);
			
 
				+      long bandwidthMB = conf.getInt(DistCpConstants.CONF_LABEL_BANDWIDTH_MB,
			
 
				+              DistCpConstants.DEFAULT_BANDWIDTH_MB);
			
 
				+      return new ThrottledInputStream(new BufferedInputStream(fs.open(path)),
			
 
				+              bandwidthMB * 1024 * 1024);
			
 
				+    }
			
 
				+    catch (IOException e) {
			
 
				+      throw new CopyReadException(e);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private static short getReplicationFactor(
			
 
				+          EnumSet<FileAttribute> fileAttributes,
			
 
				+          FileStatus sourceFile, FileSystem targetFS) {
			
 
				+    return fileAttributes.contains(FileAttribute.REPLICATION)?
			
 
				+            sourceFile.getReplication() : targetFS.getDefaultReplication();
			
 
				+  }
			
 
				+
			
 
				+  private static long getBlockSize(
			
 
				+          EnumSet<FileAttribute> fileAttributes,
			
 
				+          FileStatus sourceFile, FileSystem targetFS) {
			
 
				+    return fileAttributes.contains(FileAttribute.BLOCKSIZE)?
			
 
				+            sourceFile.getBlockSize() : targetFS.getDefaultBlockSize();
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Special subclass of IOException. This is used to distinguish read-operation
			
 
				+   * failures from other kinds of IOExceptions.
			
 
				+   * The failure to read from source is dealt with specially, in the CopyMapper.
			
 
				+   * Such failures may be skipped if the DistCpOptions indicate so.
			
 
				+   * Write failures are intolerable, and amount to CopyMapper failure.  
			
 
				+   */
			
 
				+  public static class CopyReadException extends IOException {
			
 
				+    public CopyReadException(Throwable rootCause) {
			
 
				+      super(rootCause);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformSizeInputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformSizeInputFormat.java
@@ -0,0 +1,169 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.mapred;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.io.SequenceFile;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.tools.DistCpConstants;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.mapreduce.*;
			
 
				+import org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader;
			
 
				+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.List;
			
 
				+import java.util.ArrayList;
			
 
				+
			
 
				+/**
			
 
				+ * UniformSizeInputFormat extends the InputFormat<> class, to produce
			
 
				+ * input-splits for DistCp.
			
 
				+ * It looks at the copy-listing and groups the contents into input-splits such
			
 
				+ * that the total-number of bytes to be copied for each input split is
			
 
				+ * uniform.
			
 
				+ */
			
 
				+public class UniformSizeInputFormat extends InputFormat<Text, FileStatus> {
			
 
				+  private static final Log LOG
			
 
				+                = LogFactory.getLog(UniformSizeInputFormat.class);
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of InputFormat::getSplits(). Returns a list of InputSplits,
			
 
				+   * such that the number of bytes to be copied for all the splits are
			
 
				+   * approximately equal.
			
 
				+   * @param context JobContext for the job.
			
 
				+   * @return The list of uniformly-distributed input-splits.
			
 
				+   * @throws IOException: On failure.
			
 
				+   * @throws InterruptedException
			
 
				+   */
			
 
				+  @Override
			
 
				+  public List<InputSplit> getSplits(JobContext context)
			
 
				+                      throws IOException, InterruptedException {
			
 
				+    Configuration configuration = context.getConfiguration();
			
 
				+    int numSplits = DistCpUtils.getInt(configuration,
			
 
				+                                       JobContext.NUM_MAPS);
			
 
				+
			
 
				+    if (numSplits == 0) return new ArrayList<InputSplit>();
			
 
				+
			
 
				+    return getSplits(configuration, numSplits,
			
 
				+                     DistCpUtils.getLong(configuration,
			
 
				+                          DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED));
			
 
				+  }
			
 
				+
			
 
				+  private List<InputSplit> getSplits(Configuration configuration, int numSplits,
			
 
				+                                     long totalSizeBytes) throws IOException {
			
 
				+    List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
			
 
				+    long nBytesPerSplit = (long) Math.ceil(totalSizeBytes * 1.0 / numSplits);
			
 
				+
			
 
				+    FileStatus srcFileStatus = new FileStatus();
			
 
				+    Text srcRelPath = new Text();
			
 
				+    long currentSplitSize = 0;
			
 
				+    long lastSplitStart = 0;
			
 
				+    long lastPosition = 0;
			
 
				+
			
 
				+    final Path listingFilePath = getListingFilePath(configuration);
			
 
				+
			
 
				+    if (LOG.isDebugEnabled()) {
			
 
				+      LOG.debug("Average bytes per map: " + nBytesPerSplit +
			
 
				+          ", Number of maps: " + numSplits + ", total size: " + totalSizeBytes);
			
 
				+    }
			
 
				+    SequenceFile.Reader reader=null;
			
 
				+    try {
			
 
				+      reader = getListingFileReader(configuration);
			
 
				+      while (reader.next(srcRelPath, srcFileStatus)) {
			
 
				+        // If adding the current file would cause the bytes per map to exceed
			
 
				+        // limit. Add the current file to new split
			
 
				+        if (currentSplitSize + srcFileStatus.getLen() > nBytesPerSplit && lastPosition != 0) {
			
 
				+          FileSplit split = new FileSplit(listingFilePath, lastSplitStart,
			
 
				+              lastPosition - lastSplitStart, null);
			
 
				+          if (LOG.isDebugEnabled()) {
			
 
				+            LOG.debug ("Creating split : " + split + ", bytes in split: " + currentSplitSize);
			
 
				+          }
			
 
				+          splits.add(split);
			
 
				+          lastSplitStart = lastPosition;
			
 
				+          currentSplitSize = 0;
			
 
				+        }
			
 
				+        currentSplitSize += srcFileStatus.getLen();
			
 
				+        lastPosition = reader.getPosition();
			
 
				+      }
			
 
				+      if (lastPosition > lastSplitStart) {
			
 
				+        FileSplit split = new FileSplit(listingFilePath, lastSplitStart,
			
 
				+            lastPosition - lastSplitStart, null);
			
 
				+        if (LOG.isDebugEnabled()) {
			
 
				+          LOG.info ("Creating split : " + split + ", bytes in split: " + currentSplitSize);
			
 
				+        }
			
 
				+        splits.add(split);
			
 
				+      }
			
 
				+
			
 
				+    } finally {
			
 
				+      IOUtils.closeStream(reader);
			
 
				+    }
			
 
				+
			
 
				+    return splits;
			
 
				+  }
			
 
				+
			
 
				+  private static Path getListingFilePath(Configuration configuration) {
			
 
				+    final String listingFilePathString =
			
 
				+            configuration.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, "");
			
 
				+
			
 
				+    assert !listingFilePathString.equals("")
			
 
				+              : "Couldn't find listing file. Invalid input.";
			
 
				+    return new Path(listingFilePathString);
			
 
				+  }
			
 
				+
			
 
				+  private SequenceFile.Reader getListingFileReader(Configuration configuration) {
			
 
				+
			
 
				+    final Path listingFilePath = getListingFilePath(configuration);
			
 
				+    try {
			
 
				+      final FileSystem fileSystem = listingFilePath.getFileSystem(configuration);
			
 
				+      if (!fileSystem.exists(listingFilePath))
			
 
				+        throw new IllegalArgumentException("Listing file doesn't exist at: "
			
 
				+                                           + listingFilePath);
			
 
				+
			
 
				+      return new SequenceFile.Reader(configuration,
			
 
				+                                     SequenceFile.Reader.file(listingFilePath));
			
 
				+    }
			
 
				+    catch (IOException exception) {
			
 
				+      LOG.error("Couldn't find listing file at: " + listingFilePath, exception);
			
 
				+      throw new IllegalArgumentException("Couldn't find listing-file at: "
			
 
				+                                         + listingFilePath, exception);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of InputFormat::createRecordReader().
			
 
				+   * @param split The split for which the RecordReader is sought.
			
 
				+   * @param context The context of the current task-attempt.
			
 
				+   * @return A SequenceFileRecordReader instance, (since the copy-listing is a
			
 
				+   * simple sequence-file.)
			
 
				+   * @throws IOException
			
 
				+   * @throws InterruptedException
			
 
				+   */
			
 
				+  @Override
			
 
				+  public RecordReader<Text, FileStatus> createRecordReader(InputSplit split,
			
 
				+                                                     TaskAttemptContext context)
			
 
				+                                      throws IOException, InterruptedException {
			
 
				+    return new SequenceFileRecordReader<Text, FileStatus>();
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputChunk.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputChunk.java
@@ -0,0 +1,246 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+package org.apache.hadoop.tools.mapred.lib;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.io.SequenceFile;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.tools.DistCpConstants;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+import org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader;
			
 
				+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
			
 
				+import org.apache.hadoop.mapreduce.TaskAttemptContext;
			
 
				+import org.apache.hadoop.mapreduce.TaskID;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+/**
			
 
				+ * The DynamicInputChunk represents a single chunk of work, when used in
			
 
				+ * conjunction with the DynamicInputFormat and the DynamicRecordReader.
			
 
				+ * The records in the DynamicInputFormat's input-file are split across various
			
 
				+ * DynamicInputChunks. Each one is claimed and processed in an iteration of
			
 
				+ * a dynamic-mapper. When a DynamicInputChunk has been exhausted, the faster
			
 
				+ * mapper may claim another and process it, until there are no more to be
			
 
				+ * consumed.
			
 
				+ */
			
 
				+class DynamicInputChunk<K, V> {
			
 
				+  private static Log LOG = LogFactory.getLog(DynamicInputChunk.class);
			
 
				+
			
 
				+  private static Configuration configuration;
			
 
				+  private static Path chunkRootPath;
			
 
				+  private static String chunkFilePrefix;
			
 
				+  private static int numChunksLeft = -1; // Un-initialized before 1st dir-scan.
			
 
				+  private static FileSystem fs;
			
 
				+
			
 
				+  private Path chunkFilePath;
			
 
				+  private SequenceFileRecordReader<K, V> reader;
			
 
				+  private SequenceFile.Writer writer;
			
 
				+
			
 
				+  private static void initializeChunkInvariants(Configuration config)
			
 
				+                                                  throws IOException {
			
 
				+    configuration = config;
			
 
				+    Path listingFilePath = new Path(getListingFilePath(configuration));
			
 
				+    chunkRootPath = new Path(listingFilePath.getParent(), "chunkDir");
			
 
				+    fs = chunkRootPath.getFileSystem(configuration);
			
 
				+    chunkFilePrefix = listingFilePath.getName() + ".chunk.";
			
 
				+  }
			
 
				+
			
 
				+  private static String getListingFilePath(Configuration configuration) {
			
 
				+    final String listingFileString = configuration.get(
			
 
				+            DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, "");
			
 
				+    assert !listingFileString.equals("") : "Listing file not found.";
			
 
				+    return listingFileString;
			
 
				+  }
			
 
				+
			
 
				+  private static boolean areInvariantsInitialized() {
			
 
				+    return chunkRootPath != null;
			
 
				+  }
			
 
				+
			
 
				+  private DynamicInputChunk(String chunkId, Configuration configuration)
			
 
				+                                                      throws IOException {
			
 
				+    if (!areInvariantsInitialized())
			
 
				+      initializeChunkInvariants(configuration);
			
 
				+
			
 
				+    chunkFilePath = new Path(chunkRootPath, chunkFilePrefix + chunkId);
			
 
				+    openForWrite();
			
 
				+  }
			
 
				+
			
 
				+
			
 
				+  private void openForWrite() throws IOException {
			
 
				+    writer = SequenceFile.createWriter(
			
 
				+            chunkFilePath.getFileSystem(configuration), configuration,
			
 
				+            chunkFilePath, Text.class, FileStatus.class,
			
 
				+            SequenceFile.CompressionType.NONE);
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Factory method to create chunk-files for writing to.
			
 
				+   * (For instance, when the DynamicInputFormat splits the input-file into
			
 
				+   * chunks.)
			
 
				+   * @param chunkId String to identify the chunk.
			
 
				+   * @param configuration Configuration, describing the location of the listing-
			
 
				+   * file, file-system for the map-job, etc.
			
 
				+   * @return A DynamicInputChunk, corresponding to a chunk-file, with the name
			
 
				+   * incorporating the chunk-id.
			
 
				+   * @throws IOException Exception on failure to create the chunk.
			
 
				+   */
			
 
				+  public static DynamicInputChunk createChunkForWrite(String chunkId,
			
 
				+                          Configuration configuration) throws IOException {
			
 
				+    return new DynamicInputChunk(chunkId, configuration);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Method to write records into a chunk.
			
 
				+   * @param key Key from the listing file.
			
 
				+   * @param value Corresponding value from the listing file.
			
 
				+   * @throws IOException Exception onf failure to write to the file.
			
 
				+   */
			
 
				+  public void write(Text key, FileStatus value) throws IOException {
			
 
				+    writer.append(key, value);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Closes streams opened to the chunk-file.
			
 
				+   */
			
 
				+  public void close() {
			
 
				+    IOUtils.cleanup(LOG, reader, writer);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Reassigns the chunk to a specified Map-Task, for consumption.
			
 
				+   * @param taskId The Map-Task to which a the chunk is to be reassigned.
			
 
				+   * @throws IOException Exception on failure to reassign.
			
 
				+   */
			
 
				+  public void assignTo(TaskID taskId) throws IOException {
			
 
				+    Path newPath = new Path(chunkRootPath, taskId.toString());
			
 
				+    if (!fs.rename(chunkFilePath, newPath)) {
			
 
				+      LOG.warn(chunkFilePath + " could not be assigned to " + taskId);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private DynamicInputChunk(Path chunkFilePath,
			
 
				+                            TaskAttemptContext taskAttemptContext)
			
 
				+                                   throws IOException, InterruptedException {
			
 
				+    if (!areInvariantsInitialized())
			
 
				+      initializeChunkInvariants(taskAttemptContext.getConfiguration());
			
 
				+
			
 
				+    this.chunkFilePath = chunkFilePath;
			
 
				+    openForRead(taskAttemptContext);
			
 
				+  }
			
 
				+
			
 
				+  private void openForRead(TaskAttemptContext taskAttemptContext)
			
 
				+          throws IOException, InterruptedException {
			
 
				+    reader = new SequenceFileRecordReader<K, V>();
			
 
				+    reader.initialize(new FileSplit(chunkFilePath, 0,
			
 
				+            DistCpUtils.getFileSize(chunkFilePath, configuration), null),
			
 
				+            taskAttemptContext);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Factory method that
			
 
				+   * 1. acquires a chunk for the specified map-task attempt
			
 
				+   * 2. returns a DynamicInputChunk associated with the acquired chunk-file.
			
 
				+   * @param taskAttemptContext The attempt-context for the map task that's
			
 
				+   * trying to acquire a chunk.
			
 
				+   * @return The acquired dynamic-chunk. The chunk-file is renamed to the
			
 
				+   * attempt-id (from the attempt-context.)
			
 
				+   * @throws IOException Exception on failure.
			
 
				+   * @throws InterruptedException Exception on failure.
			
 
				+   */
			
 
				+  public static DynamicInputChunk acquire(TaskAttemptContext taskAttemptContext)
			
 
				+                                      throws IOException, InterruptedException {
			
 
				+    if (!areInvariantsInitialized())
			
 
				+        initializeChunkInvariants(taskAttemptContext.getConfiguration());
			
 
				+
			
 
				+    String taskId
			
 
				+            = taskAttemptContext.getTaskAttemptID().getTaskID().toString();
			
 
				+    Path acquiredFilePath = new Path(chunkRootPath, taskId);
			
 
				+
			
 
				+    if (fs.exists(acquiredFilePath)) {
			
 
				+      LOG.info("Acquiring pre-assigned chunk: " + acquiredFilePath);
			
 
				+      return new DynamicInputChunk(acquiredFilePath, taskAttemptContext);
			
 
				+    }
			
 
				+
			
 
				+    for (FileStatus chunkFile : getListOfChunkFiles()) {
			
 
				+      if (fs.rename(chunkFile.getPath(), acquiredFilePath)) {
			
 
				+        LOG.info(taskId + " acquired " + chunkFile.getPath());
			
 
				+        return new DynamicInputChunk(acquiredFilePath, taskAttemptContext);
			
 
				+      }
			
 
				+      else
			
 
				+        LOG.warn(taskId + " could not acquire " + chunkFile.getPath());
			
 
				+    }
			
 
				+
			
 
				+    return null;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Method to be called to relinquish an acquired chunk. All streams open to
			
 
				+   * the chunk are closed, and the chunk-file is deleted.
			
 
				+   * @throws IOException Exception thrown on failure to release (i.e. delete)
			
 
				+   * the chunk file.
			
 
				+   */
			
 
				+  public void release() throws IOException {
			
 
				+    close();
			
 
				+    if (!fs.delete(chunkFilePath, false)) {
			
 
				+      LOG.error("Unable to release chunk at path: " + chunkFilePath);
			
 
				+      throw new IOException("Unable to release chunk at path: " + chunkFilePath);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  static FileStatus [] getListOfChunkFiles() throws IOException {
			
 
				+    Path chunkFilePattern = new Path(chunkRootPath, chunkFilePrefix + "*");
			
 
				+    FileStatus chunkFiles[] = fs.globStatus(chunkFilePattern);
			
 
				+    numChunksLeft = chunkFiles.length;
			
 
				+    return chunkFiles;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Getter for the chunk-file's path, on HDFS.
			
 
				+   * @return The qualified path to the chunk-file.
			
 
				+   */
			
 
				+  public Path getPath() {
			
 
				+    return chunkFilePath;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Getter for the record-reader, opened to the chunk-file.
			
 
				+   * @return Opened Sequence-file reader.
			
 
				+   */
			
 
				+  public SequenceFileRecordReader<K,V> getReader() {
			
 
				+    assert reader != null : "Reader un-initialized!";
			
 
				+    return reader;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Getter for the number of chunk-files left in the chunk-file directory.
			
 
				+   * Useful to determine how many chunks (and hence, records) are left to be
			
 
				+   * processed.
			
 
				+   * @return Before the first scan of the directory, the number returned is -1.
			
 
				+   * Otherwise, the number of chunk-files seen from the last scan is returned.
			
 
				+   */
			
 
				+  public static int getNumChunksLeft() {
			
 
				+    return numChunksLeft;
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputFormat.java
@@ -0,0 +1,292 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.mapred.lib;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.mapreduce.*;
			
 
				+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
			
 
				+import org.apache.hadoop.tools.DistCpConstants;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.io.SequenceFile;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+
			
 
				+import java.util.List;
			
 
				+import java.util.ArrayList;
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+/**
			
 
				+ * DynamicInputFormat implements the "Worker pattern" for DistCp.
			
 
				+ * Rather than to split up the copy-list into a set of static splits,
			
 
				+ * the DynamicInputFormat does the following:
			
 
				+ * 1. Splits the copy-list into small chunks on the DFS.
			
 
				+ * 2. Creates a set of empty "dynamic" splits, that each consume as many chunks
			
 
				+ *    as it can.
			
 
				+ * This arrangement ensures that a single slow mapper won't slow down the entire
			
 
				+ * job (since the slack will be picked up by other mappers, who consume more
			
 
				+ * chunks.)
			
 
				+ * By varying the split-ratio, one can vary chunk sizes to achieve different
			
 
				+ * performance characteristics. 
			
 
				+ */
			
 
				+public class DynamicInputFormat<K, V> extends InputFormat<K, V> {
			
 
				+  private static final Log LOG = LogFactory.getLog(DynamicInputFormat.class);
			
 
				+
			
 
				+  private static final String CONF_LABEL_LISTING_SPLIT_RATIO
			
 
				+          = "mapred.listing.split.ratio";
			
 
				+  private static final String CONF_LABEL_NUM_SPLITS
			
 
				+          = "mapred.num.splits";
			
 
				+  private static final String CONF_LABEL_NUM_ENTRIES_PER_CHUNK
			
 
				+          = "mapred.num.entries.per.chunk";
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of InputFormat::getSplits(). This method splits up the
			
 
				+   * copy-listing file into chunks, and assigns the first batch to different
			
 
				+   * tasks.
			
 
				+   * @param jobContext JobContext for the map job.
			
 
				+   * @return The list of (empty) dynamic input-splits.
			
 
				+   * @throws IOException, on failure.
			
 
				+   * @throws InterruptedException
			
 
				+   */
			
 
				+  @Override
			
 
				+  public List<InputSplit> getSplits(JobContext jobContext)
			
 
				+      throws IOException, InterruptedException {
			
 
				+    LOG.info("DynamicInputFormat: Getting splits for job:"
			
 
				+             + jobContext.getJobID());
			
 
				+    return createSplits(jobContext,
			
 
				+                        splitCopyListingIntoChunksWithShuffle(jobContext));
			
 
				+  }
			
 
				+
			
 
				+  private List<InputSplit> createSplits(JobContext jobContext,
			
 
				+                                        List<DynamicInputChunk> chunks)
			
 
				+          throws IOException {
			
 
				+    int numMaps = getNumMapTasks(jobContext.getConfiguration());
			
 
				+
			
 
				+    final int nSplits = Math.min(numMaps, chunks.size());
			
 
				+    List<InputSplit> splits = new ArrayList<InputSplit>(nSplits);
			
 
				+    
			
 
				+    for (int i=0; i< nSplits; ++i) {
			
 
				+      TaskID taskId = new TaskID(jobContext.getJobID(), TaskType.MAP, i);
			
 
				+      chunks.get(i).assignTo(taskId);
			
 
				+      splits.add(new FileSplit(chunks.get(i).getPath(), 0,
			
 
				+          // Setting non-zero length for FileSplit size, to avoid a possible
			
 
				+          // future when 0-sized file-splits are considered "empty" and skipped
			
 
				+          // over.
			
 
				+          MIN_RECORDS_PER_CHUNK,
			
 
				+          null));
			
 
				+    }
			
 
				+    DistCpUtils.publish(jobContext.getConfiguration(),
			
 
				+                        CONF_LABEL_NUM_SPLITS, splits.size());
			
 
				+    return splits;
			
 
				+  }
			
 
				+
			
 
				+  private static int N_CHUNKS_OPEN_AT_ONCE_DEFAULT = 16;
			
 
				+
			
 
				+  private List<DynamicInputChunk> splitCopyListingIntoChunksWithShuffle
			
 
				+                                    (JobContext context) throws IOException {
			
 
				+
			
 
				+    final Configuration configuration = context.getConfiguration();
			
 
				+    int numRecords = getNumberOfRecords(configuration);
			
 
				+    int numMaps = getNumMapTasks(configuration);
			
 
				+    // Number of chunks each map will process, on average.
			
 
				+    int splitRatio = getListingSplitRatio(configuration, numMaps, numRecords);
			
 
				+    validateNumChunksUsing(splitRatio, numMaps);
			
 
				+
			
 
				+    int numEntriesPerChunk = (int)Math.ceil((float)numRecords
			
 
				+                                          /(splitRatio * numMaps));
			
 
				+    DistCpUtils.publish(context.getConfiguration(),
			
 
				+                        CONF_LABEL_NUM_ENTRIES_PER_CHUNK,
			
 
				+                        numEntriesPerChunk);
			
 
				+
			
 
				+    final int nChunksTotal = (int)Math.ceil((float)numRecords/numEntriesPerChunk);
			
 
				+    int nChunksOpenAtOnce
			
 
				+            = Math.min(N_CHUNKS_OPEN_AT_ONCE_DEFAULT, nChunksTotal);
			
 
				+
			
 
				+    Path listingPath = getListingFilePath(configuration);
			
 
				+    SequenceFile.Reader reader
			
 
				+            = new SequenceFile.Reader(configuration,
			
 
				+                                      SequenceFile.Reader.file(listingPath));
			
 
				+
			
 
				+    List<DynamicInputChunk> openChunks
			
 
				+                  = new ArrayList<DynamicInputChunk>();
			
 
				+    
			
 
				+    List<DynamicInputChunk> chunksFinal = new ArrayList<DynamicInputChunk>();
			
 
				+
			
 
				+    FileStatus fileStatus = new FileStatus();
			
 
				+    Text relPath = new Text();
			
 
				+    int recordCounter = 0;
			
 
				+    int chunkCount = 0;
			
 
				+
			
 
				+    try {
			
 
				+
			
 
				+      while (reader.next(relPath, fileStatus)) {
			
 
				+        if (recordCounter % (nChunksOpenAtOnce*numEntriesPerChunk) == 0) {
			
 
				+          // All chunks full. Create new chunk-set.
			
 
				+          closeAll(openChunks);
			
 
				+          chunksFinal.addAll(openChunks);
			
 
				+
			
 
				+          openChunks = createChunks(
			
 
				+                  configuration, chunkCount, nChunksTotal, nChunksOpenAtOnce);
			
 
				+
			
 
				+          chunkCount += openChunks.size();
			
 
				+
			
 
				+          nChunksOpenAtOnce = openChunks.size();
			
 
				+          recordCounter = 0;
			
 
				+        }
			
 
				+
			
 
				+        // Shuffle into open chunks.
			
 
				+        openChunks.get(recordCounter%nChunksOpenAtOnce).write(relPath, fileStatus);
			
 
				+        ++recordCounter;
			
 
				+      }
			
 
				+
			
 
				+    } finally {
			
 
				+      closeAll(openChunks);
			
 
				+      chunksFinal.addAll(openChunks);
			
 
				+      IOUtils.closeStream(reader);
			
 
				+    }
			
 
				+
			
 
				+    LOG.info("Number of dynamic-chunk-files created: " + chunksFinal.size()); 
			
 
				+    return chunksFinal;
			
 
				+  }
			
 
				+
			
 
				+  private static void validateNumChunksUsing(int splitRatio, int numMaps)
			
 
				+                                              throws IOException {
			
 
				+    if (splitRatio * numMaps > MAX_CHUNKS_TOLERABLE)
			
 
				+      throw new IOException("Too many chunks created with splitRatio:"
			
 
				+                 + splitRatio + ", numMaps:" + numMaps
			
 
				+                 + ". Reduce numMaps or decrease split-ratio to proceed.");
			
 
				+  }
			
 
				+
			
 
				+  private static void closeAll(List<DynamicInputChunk> chunks) {
			
 
				+    for (DynamicInputChunk chunk: chunks)
			
 
				+      chunk.close();
			
 
				+  }
			
 
				+
			
 
				+  private static List<DynamicInputChunk> createChunks(Configuration config,
			
 
				+                      int chunkCount, int nChunksTotal, int nChunksOpenAtOnce)
			
 
				+                                          throws IOException {
			
 
				+    List<DynamicInputChunk> chunks = new ArrayList<DynamicInputChunk>();
			
 
				+    int chunkIdUpperBound
			
 
				+            = Math.min(nChunksTotal, chunkCount + nChunksOpenAtOnce);
			
 
				+
			
 
				+    // If there will be fewer than nChunksOpenAtOnce chunks left after
			
 
				+    // the current batch of chunks, fold the remaining chunks into
			
 
				+    // the current batch.
			
 
				+    if (nChunksTotal - chunkIdUpperBound < nChunksOpenAtOnce)
			
 
				+      chunkIdUpperBound = nChunksTotal;
			
 
				+
			
 
				+    for (int i=chunkCount; i < chunkIdUpperBound; ++i)
			
 
				+      chunks.add(createChunk(i, config));
			
 
				+    return chunks;
			
 
				+  }
			
 
				+
			
 
				+  private static DynamicInputChunk createChunk(int chunkId, Configuration config)
			
 
				+                                              throws IOException {
			
 
				+    return DynamicInputChunk.createChunkForWrite(String.format("%05d", chunkId),
			
 
				+                                              config);
			
 
				+  }
			
 
				+
			
 
				+
			
 
				+  private static Path getListingFilePath(Configuration configuration) {
			
 
				+    String listingFilePathString = configuration.get(
			
 
				+            DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, "");
			
 
				+
			
 
				+    assert !listingFilePathString.equals("") : "Listing file not found.";
			
 
				+
			
 
				+    Path listingFilePath = new Path(listingFilePathString);
			
 
				+    try {
			
 
				+      assert listingFilePath.getFileSystem(configuration)
			
 
				+              .exists(listingFilePath) : "Listing file: " + listingFilePath +
			
 
				+                                          " not found.";
			
 
				+    } catch (IOException e) {
			
 
				+      assert false :   "Listing file: " + listingFilePath
			
 
				+                    + " couldn't be accessed. " + e.getMessage();
			
 
				+    }
			
 
				+    return listingFilePath;
			
 
				+  }
			
 
				+
			
 
				+  private static int getNumberOfRecords(Configuration configuration) {
			
 
				+    return DistCpUtils.getInt(configuration,
			
 
				+                              DistCpConstants.CONF_LABEL_TOTAL_NUMBER_OF_RECORDS);
			
 
				+  }
			
 
				+
			
 
				+  private static int getNumMapTasks(Configuration configuration) {
			
 
				+    return DistCpUtils.getInt(configuration,
			
 
				+                              JobContext.NUM_MAPS);
			
 
				+  }
			
 
				+
			
 
				+  private static int getListingSplitRatio(Configuration configuration,
			
 
				+                                            int numMaps, int numPaths) {
			
 
				+    return configuration.getInt(
			
 
				+            CONF_LABEL_LISTING_SPLIT_RATIO,
			
 
				+            getSplitRatio(numMaps, numPaths));
			
 
				+  }
			
 
				+
			
 
				+  private static final int MAX_CHUNKS_TOLERABLE = 400;
			
 
				+  private static final int MAX_CHUNKS_IDEAL     = 100;
			
 
				+  private static final int MIN_RECORDS_PER_CHUNK = 5;
			
 
				+  private static final int SPLIT_RATIO_DEFAULT  = 2;
			
 
				+
			
 
				+  /**
			
 
				+   * Package private, for testability.
			
 
				+   * @param nMaps The number of maps requested for.
			
 
				+   * @param nRecords The number of records to be copied.
			
 
				+   * @return The number of splits each map should handle, ideally.
			
 
				+   */
			
 
				+  static int getSplitRatio(int nMaps, int nRecords) {
			
 
				+    if (nMaps == 1) {
			
 
				+      LOG.warn("nMaps == 1. Why use DynamicInputFormat?");
			
 
				+      return 1;
			
 
				+    }
			
 
				+
			
 
				+    if (nMaps > MAX_CHUNKS_IDEAL)
			
 
				+      return SPLIT_RATIO_DEFAULT;
			
 
				+
			
 
				+    int nPickups = (int)Math.ceil((float)MAX_CHUNKS_IDEAL/nMaps);
			
 
				+    int nRecordsPerChunk = (int)Math.ceil((float)nRecords/(nMaps*nPickups));
			
 
				+
			
 
				+    return nRecordsPerChunk < MIN_RECORDS_PER_CHUNK ?
			
 
				+              SPLIT_RATIO_DEFAULT : nPickups;
			
 
				+  }
			
 
				+
			
 
				+  static int getNumEntriesPerChunk(Configuration configuration) {
			
 
				+    return DistCpUtils.getInt(configuration,
			
 
				+                              CONF_LABEL_NUM_ENTRIES_PER_CHUNK);
			
 
				+  }
			
 
				+
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of Inputformat::createRecordReader().
			
 
				+   * @param inputSplit The split for which the RecordReader is required.
			
 
				+   * @param taskAttemptContext TaskAttemptContext for the current attempt.
			
 
				+   * @return DynamicRecordReader instance.
			
 
				+   * @throws IOException, on failure.
			
 
				+   * @throws InterruptedException
			
 
				+   */
			
 
				+  @Override
			
 
				+  public RecordReader<K, V> createRecordReader(
			
 
				+          InputSplit inputSplit,
			
 
				+          TaskAttemptContext taskAttemptContext)
			
 
				+          throws IOException, InterruptedException {
			
 
				+    return new DynamicRecordReader<K, V>();
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicRecordReader.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicRecordReader.java
@@ -0,0 +1,203 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.mapred.lib;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+import org.apache.hadoop.tools.DistCpConstants;
			
 
				+import org.apache.hadoop.mapreduce.*;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.concurrent.TimeUnit;
			
 
				+
			
 
				+/**
			
 
				+ * The DynamicRecordReader is used in conjunction with the DynamicInputFormat
			
 
				+ * to implement the "Worker pattern" for DistCp.
			
 
				+ * The DynamicRecordReader is responsible for:
			
 
				+ * 1. Presenting the contents of each chunk to DistCp's mapper.
			
 
				+ * 2. Acquiring a new chunk when the current chunk has been completely consumed,
			
 
				+ *    transparently.
			
 
				+ */
			
 
				+public class DynamicRecordReader<K, V> extends RecordReader<K, V> {
			
 
				+  private static final Log LOG = LogFactory.getLog(DynamicRecordReader.class);
			
 
				+  private TaskAttemptContext taskAttemptContext;
			
 
				+  private Configuration configuration;
			
 
				+  private DynamicInputChunk<K, V> chunk;
			
 
				+  private TaskID taskId;
			
 
				+
			
 
				+  // Data required for progress indication.
			
 
				+  private int numRecordsPerChunk; // Constant per job.
			
 
				+  private int totalNumRecords;    // Constant per job.
			
 
				+  private int numRecordsProcessedByThisMap = 0;
			
 
				+  private long timeOfLastChunkDirScan = 0;
			
 
				+  private boolean isChunkDirAlreadyScanned = false;
			
 
				+
			
 
				+  private static long TIME_THRESHOLD_FOR_DIR_SCANS = TimeUnit.MINUTES.toMillis(5);
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation for RecordReader::initialize(). Initializes the internal
			
 
				+   * RecordReader to read from chunks.
			
 
				+   * @param inputSplit The InputSplit for the map. Ignored entirely.
			
 
				+   * @param taskAttemptContext The AttemptContext.
			
 
				+   * @throws IOException, on failure.
			
 
				+   * @throws InterruptedException
			
 
				+   */
			
 
				+  @Override
			
 
				+  public void initialize(InputSplit inputSplit,
			
 
				+                         TaskAttemptContext taskAttemptContext)
			
 
				+                         throws IOException, InterruptedException {
			
 
				+    numRecordsPerChunk = DynamicInputFormat.getNumEntriesPerChunk(
			
 
				+            taskAttemptContext.getConfiguration());
			
 
				+    this.taskAttemptContext = taskAttemptContext;
			
 
				+    configuration = taskAttemptContext.getConfiguration();
			
 
				+    taskId = taskAttemptContext.getTaskAttemptID().getTaskID();
			
 
				+    chunk = DynamicInputChunk.acquire(this.taskAttemptContext);
			
 
				+    timeOfLastChunkDirScan = System.currentTimeMillis();
			
 
				+    isChunkDirAlreadyScanned = false;
			
 
				+
			
 
				+    totalNumRecords = getTotalNumRecords();
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  private int getTotalNumRecords() {
			
 
				+    return DistCpUtils.getInt(configuration,
			
 
				+                              DistCpConstants.CONF_LABEL_TOTAL_NUMBER_OF_RECORDS);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of RecordReader::nextValue().
			
 
				+   * Reads the contents of the current chunk and returns them. When a chunk has
			
 
				+   * been completely exhausted, an new chunk is acquired and read,
			
 
				+   * transparently.
			
 
				+   * @return True, if the nextValue() could be traversed to. False, otherwise.
			
 
				+   * @throws IOException, on failure.
			
 
				+   * @throws InterruptedException
			
 
				+   */
			
 
				+  @Override
			
 
				+  public boolean nextKeyValue()
			
 
				+      throws IOException, InterruptedException {
			
 
				+
			
 
				+    if (chunk == null) {
			
 
				+      if (LOG.isDebugEnabled())
			
 
				+        LOG.debug(taskId + ": RecordReader is null. No records to be read.");
			
 
				+      return false;
			
 
				+    }
			
 
				+
			
 
				+    if (chunk.getReader().nextKeyValue()) {
			
 
				+      ++numRecordsProcessedByThisMap;
			
 
				+      return true;
			
 
				+    }
			
 
				+
			
 
				+    if (LOG.isDebugEnabled())
			
 
				+      LOG.debug(taskId + ": Current chunk exhausted. " +
			
 
				+                         " Attempting to pick up new one.");
			
 
				+
			
 
				+    chunk.release();
			
 
				+    timeOfLastChunkDirScan = System.currentTimeMillis();
			
 
				+    isChunkDirAlreadyScanned = false;
			
 
				+    
			
 
				+    chunk = DynamicInputChunk.acquire(taskAttemptContext);
			
 
				+
			
 
				+    if (chunk == null) return false;
			
 
				+
			
 
				+    if (chunk.getReader().nextKeyValue()) {
			
 
				+      ++numRecordsProcessedByThisMap;
			
 
				+      return true;
			
 
				+    }
			
 
				+    else {
			
 
				+      return false;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of RecordReader::getCurrentKey().
			
 
				+   * @return The key of the current record. (i.e. the source-path.)
			
 
				+   * @throws IOException, on failure.
			
 
				+   * @throws InterruptedException
			
 
				+   */
			
 
				+  @Override
			
 
				+  public K getCurrentKey()
			
 
				+      throws IOException, InterruptedException {
			
 
				+    return chunk.getReader().getCurrentKey();
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of RecordReader::getCurrentValue().
			
 
				+   * @return The value of the current record. (i.e. the target-path.)
			
 
				+   * @throws IOException, on failure.
			
 
				+   * @throws InterruptedException
			
 
				+   */
			
 
				+  @Override
			
 
				+  public V getCurrentValue()
			
 
				+      throws IOException, InterruptedException {
			
 
				+    return chunk.getReader().getCurrentValue();
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Implementation of RecordReader::getProgress().
			
 
				+   * @return A fraction [0.0,1.0] indicating the progress of a DistCp mapper.
			
 
				+   * @throws IOException, on failure.
			
 
				+   * @throws InterruptedException
			
 
				+   */
			
 
				+  @Override
			
 
				+  public float getProgress()
			
 
				+      throws IOException, InterruptedException {
			
 
				+    final int numChunksLeft = getNumChunksLeft();
			
 
				+    if (numChunksLeft < 0) {// Un-initialized. i.e. Before 1st dir-scan.
			
 
				+      assert numRecordsProcessedByThisMap <= numRecordsPerChunk
			
 
				+              : "numRecordsProcessedByThisMap:" + numRecordsProcessedByThisMap +
			
 
				+                " exceeds numRecordsPerChunk:" + numRecordsPerChunk;
			
 
				+      return ((float) numRecordsProcessedByThisMap) / totalNumRecords;
			
 
				+      // Conservative estimate, till the first directory scan.
			
 
				+    }
			
 
				+
			
 
				+    return ((float) numRecordsProcessedByThisMap)
			
 
				+            /(numRecordsProcessedByThisMap + numRecordsPerChunk*numChunksLeft);
			
 
				+  }
			
 
				+
			
 
				+  private int getNumChunksLeft() throws IOException {
			
 
				+    long now = System.currentTimeMillis();
			
 
				+    boolean tooLongSinceLastDirScan
			
 
				+                  = now - timeOfLastChunkDirScan > TIME_THRESHOLD_FOR_DIR_SCANS;
			
 
				+
			
 
				+    if (tooLongSinceLastDirScan
			
 
				+            || (!isChunkDirAlreadyScanned &&
			
 
				+                    numRecordsProcessedByThisMap%numRecordsPerChunk
			
 
				+                              > numRecordsPerChunk/2)) {
			
 
				+      DynamicInputChunk.getListOfChunkFiles();
			
 
				+      isChunkDirAlreadyScanned = true;
			
 
				+      timeOfLastChunkDirScan = now;
			
 
				+    }
			
 
				+
			
 
				+    return DynamicInputChunk.getNumChunksLeft();
			
 
				+  }
			
 
				+  /**
			
 
				+   * Implementation of RecordReader::close().
			
 
				+   * Closes the RecordReader.
			
 
				+   * @throws IOException, on failure.
			
 
				+   */
			
 
				+  @Override
			
 
				+  public void close()
			
 
				+      throws IOException {
			
 
				+    if (chunk != null)
			
 
				+        chunk.close();
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java
@@ -0,0 +1,343 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.util;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.FileChecksum;
			
 
				+import org.apache.hadoop.io.SequenceFile;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
			
 
				+import org.apache.hadoop.tools.mapred.UniformSizeInputFormat;
			
 
				+import org.apache.hadoop.tools.DistCpOptions;
			
 
				+import org.apache.hadoop.mapreduce.InputFormat;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.EnumSet;
			
 
				+import java.util.Locale;
			
 
				+import java.text.DecimalFormat;
			
 
				+import java.net.URI;
			
 
				+import java.net.InetAddress;
			
 
				+import java.net.UnknownHostException;
			
 
				+
			
 
				+/**
			
 
				+ * Utility functions used in DistCp.
			
 
				+ */
			
 
				+public class DistCpUtils {
			
 
				+
			
 
				+  private static final Log LOG = LogFactory.getLog(DistCpUtils.class);
			
 
				+
			
 
				+  /**
			
 
				+   * Retrieves size of the file at the specified path.
			
 
				+   * @param path The path of the file whose size is sought.
			
 
				+   * @param configuration Configuration, to retrieve the appropriate FileSystem.
			
 
				+   * @return The file-size, in number of bytes.
			
 
				+   * @throws IOException, on failure.
			
 
				+   */
			
 
				+  public static long getFileSize(Path path, Configuration configuration)
			
 
				+                                            throws IOException {
			
 
				+    if (LOG.isDebugEnabled())
			
 
				+      LOG.debug("Retrieving file size for: " + path);
			
 
				+    return path.getFileSystem(configuration).getFileStatus(path).getLen();
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Utility to publish a value to a configuration.
			
 
				+   * @param configuration The Configuration to which the value must be written.
			
 
				+   * @param label The label for the value being published.
			
 
				+   * @param value The value being published.
			
 
				+   * @param <T> The type of the value.
			
 
				+   */
			
 
				+  public static <T> void publish(Configuration configuration,
			
 
				+                                 String label, T value) {
			
 
				+    configuration.set(label, String.valueOf(value));
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Utility to retrieve a specified key from a Configuration. Throw exception
			
 
				+   * if not found.
			
 
				+   * @param configuration The Configuration in which the key is sought.
			
 
				+   * @param label The key being sought.
			
 
				+   * @return Integer value of the key.
			
 
				+   */
			
 
				+  public static int getInt(Configuration configuration, String label) {
			
 
				+    int value = configuration.getInt(label, -1);
			
 
				+    assert value >= 0 : "Couldn't find " + label;
			
 
				+    return value;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Utility to retrieve a specified key from a Configuration. Throw exception
			
 
				+   * if not found.
			
 
				+   * @param configuration The Configuration in which the key is sought.
			
 
				+   * @param label The key being sought.
			
 
				+   * @return Long value of the key.
			
 
				+   */
			
 
				+  public static long getLong(Configuration configuration, String label) {
			
 
				+    long value = configuration.getLong(label, -1);
			
 
				+    assert value >= 0 : "Couldn't find " + label;
			
 
				+    return value;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Returns the class that implements a copy strategy. Looks up the implementation for
			
 
				+   * a particular strategy from distcp-default.xml
			
 
				+   *
			
 
				+   * @param conf - Configuration object
			
 
				+   * @param options - Handle to input options
			
 
				+   * @return Class implementing the strategy specified in options.
			
 
				+   */
			
 
				+  public static Class<? extends InputFormat> getStrategy(Configuration conf,
			
 
				+                                                                 DistCpOptions options) {
			
 
				+    String confLabel = "distcp." +
			
 
				+        options.getCopyStrategy().toLowerCase(Locale.getDefault()) + ".strategy.impl";
			
 
				+    return conf.getClass(confLabel, UniformSizeInputFormat.class, InputFormat.class);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Gets relative path of child path with respect to a root path
			
 
				+   * For ex. If childPath = /tmp/abc/xyz/file and
			
 
				+   *            sourceRootPath = /tmp/abc
			
 
				+   * Relative path would be /xyz/file
			
 
				+   *         If childPath = /file and
			
 
				+   *            sourceRootPath = /
			
 
				+   * Relative path would be /file
			
 
				+   * @param sourceRootPath - Source root path
			
 
				+   * @param childPath - Path for which relative path is required
			
 
				+   * @return - Relative portion of the child path (always prefixed with /
			
 
				+   *           unless it is empty 
			
 
				+   */
			
 
				+  public static String getRelativePath(Path sourceRootPath, Path childPath) {
			
 
				+    String childPathString = childPath.toUri().getPath();
			
 
				+    String sourceRootPathString = sourceRootPath.toUri().getPath();
			
 
				+    return sourceRootPathString.equals("/") ? childPathString :
			
 
				+        childPathString.substring(sourceRootPathString.length());
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Pack file preservation attributes into a string, containing
			
 
				+   * just the first character of each preservation attribute
			
 
				+   * @param attributes - Attribute set to preserve
			
 
				+   * @return - String containing first letters of each attribute to preserve
			
 
				+   */
			
 
				+  public static String packAttributes(EnumSet<FileAttribute> attributes) {
			
 
				+    StringBuffer buffer = new StringBuffer(5);
			
 
				+    int len = 0;
			
 
				+    for (FileAttribute attribute : attributes) {
			
 
				+      buffer.append(attribute.name().charAt(0));
			
 
				+      len++;
			
 
				+    }
			
 
				+    return buffer.substring(0, len);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Un packs preservation attribute string containing the first character of
			
 
				+   * each preservation attribute back to a set of attributes to preserve
			
 
				+   * @param attributes - Attribute string
			
 
				+   * @return - Attribute set
			
 
				+   */
			
 
				+  public static EnumSet<FileAttribute> unpackAttributes(String attributes) {
			
 
				+    EnumSet<FileAttribute> retValue = EnumSet.noneOf(FileAttribute.class);
			
 
				+
			
 
				+    if (attributes != null) {
			
 
				+      for (int index = 0; index < attributes.length(); index++) {
			
 
				+        retValue.add(FileAttribute.getAttribute(attributes.charAt(index)));
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    return retValue;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Preserve attribute on file matching that of the file status being sent
			
 
				+   * as argument. Barring the block size, all the other attributes are preserved
			
 
				+   * by this function
			
 
				+   *
			
 
				+   * @param targetFS - File system
			
 
				+   * @param path - Path that needs to preserve original file status
			
 
				+   * @param srcFileStatus - Original file status
			
 
				+   * @param attributes - Attribute set that need to be preserved
			
 
				+   * @throws IOException - Exception if any (particularly relating to group/owner
			
 
				+   *                       change or any transient error)
			
 
				+   */
			
 
				+  public static void preserve(FileSystem targetFS, Path path,
			
 
				+                              FileStatus srcFileStatus,
			
 
				+                              EnumSet<FileAttribute> attributes) throws IOException {
			
 
				+
			
 
				+    FileStatus targetFileStatus = targetFS.getFileStatus(path);
			
 
				+    String group = targetFileStatus.getGroup();
			
 
				+    String user = targetFileStatus.getOwner();
			
 
				+    boolean chown = false;
			
 
				+
			
 
				+    if (attributes.contains(FileAttribute.PERMISSION) &&
			
 
				+      !srcFileStatus.getPermission().equals(targetFileStatus.getPermission())) {
			
 
				+      targetFS.setPermission(path, srcFileStatus.getPermission());
			
 
				+    }
			
 
				+
			
 
				+    if (attributes.contains(FileAttribute.REPLICATION) && ! targetFileStatus.isDirectory() &&
			
 
				+        srcFileStatus.getReplication() != targetFileStatus.getReplication()) {
			
 
				+      targetFS.setReplication(path, srcFileStatus.getReplication());
			
 
				+    }
			
 
				+
			
 
				+    if (attributes.contains(FileAttribute.GROUP) &&
			
 
				+            !group.equals(srcFileStatus.getGroup())) {
			
 
				+      group = srcFileStatus.getGroup();
			
 
				+      chown = true;
			
 
				+    }
			
 
				+
			
 
				+    if (attributes.contains(FileAttribute.USER) &&
			
 
				+            !user.equals(srcFileStatus.getOwner())) {
			
 
				+      user = srcFileStatus.getOwner();
			
 
				+      chown = true;
			
 
				+    }
			
 
				+
			
 
				+    if (chown) {
			
 
				+      targetFS.setOwner(path, user, group);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Sort sequence file containing FileStatus and Text as key and value respecitvely
			
 
				+   *
			
 
				+   * @param fs - File System
			
 
				+   * @param conf - Configuration
			
 
				+   * @param sourceListing - Source listing file
			
 
				+   * @return Path of the sorted file. Is source file with _sorted appended to the name
			
 
				+   * @throws IOException - Any exception during sort.
			
 
				+   */
			
 
				+  public static Path sortListing(FileSystem fs, Configuration conf, Path sourceListing)
			
 
				+      throws IOException {
			
 
				+    SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, Text.class, FileStatus.class, conf);
			
 
				+    Path output = new Path(sourceListing.toString() +  "_sorted");
			
 
				+
			
 
				+    if (fs.exists(output)) {
			
 
				+      fs.delete(output, false);
			
 
				+    }
			
 
				+
			
 
				+    sorter.sort(sourceListing, output);
			
 
				+    return output;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * String utility to convert a number-of-bytes to human readable format.
			
 
				+   */
			
 
				+  private static ThreadLocal<DecimalFormat> FORMATTER
			
 
				+                        = new ThreadLocal<DecimalFormat>() {
			
 
				+    @Override
			
 
				+    protected DecimalFormat initialValue() {
			
 
				+      return new DecimalFormat("0.0");
			
 
				+    }
			
 
				+  };
			
 
				+
			
 
				+  public static DecimalFormat getFormatter() {
			
 
				+    return FORMATTER.get();
			
 
				+  }
			
 
				+
			
 
				+  public static String getStringDescriptionFor(long nBytes) {
			
 
				+
			
 
				+    char units [] = {'B', 'K', 'M', 'G', 'T', 'P'};
			
 
				+
			
 
				+    double current = nBytes;
			
 
				+    double prev    = current;
			
 
				+    int index = 0;
			
 
				+
			
 
				+    while ((current = current/1024) >= 1) {
			
 
				+      prev = current;
			
 
				+      ++index;
			
 
				+    }
			
 
				+
			
 
				+    assert index < units.length : "Too large a number.";
			
 
				+
			
 
				+    return getFormatter().format(prev) + units[index];
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Utility to compare checksums for the paths specified.
			
 
				+   *
			
 
				+   * If checksums's can't be retrieved, it doesn't fail the test
			
 
				+   * Only time the comparison would fail is when checksums are
			
 
				+   * available and they don't match
			
 
				+   *                                  
			
 
				+   * @param sourceFS FileSystem for the source path.
			
 
				+   * @param source The source path.
			
 
				+   * @param targetFS FileSystem for the target path.
			
 
				+   * @param target The target path.
			
 
				+   * @return If either checksum couldn't be retrieved, the function returns
			
 
				+   * false. If checksums are retrieved, the function returns true if they match,
			
 
				+   * and false otherwise.
			
 
				+   * @throws IOException if there's an exception while retrieving checksums.
			
 
				+   */
			
 
				+  public static boolean checksumsAreEqual(FileSystem sourceFS, Path source,
			
 
				+                                   FileSystem targetFS, Path target)
			
 
				+                                   throws IOException {
			
 
				+    FileChecksum sourceChecksum = null;
			
 
				+    FileChecksum targetChecksum = null;
			
 
				+    try {
			
 
				+      sourceChecksum = sourceFS.getFileChecksum(source);
			
 
				+      targetChecksum = targetFS.getFileChecksum(target);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Unable to retrieve checksum for " + source + " or " + target, e);
			
 
				+    }
			
 
				+    return (sourceChecksum == null || targetChecksum == null ||
			
 
				+            sourceChecksum.equals(targetChecksum));
			
 
				+  }
			
 
				+
			
 
				+  /* see if two file systems are the same or not
			
 
				+   *
			
 
				+   */
			
 
				+  public static boolean compareFs(FileSystem srcFs, FileSystem destFs) {
			
 
				+    URI srcUri = srcFs.getUri();
			
 
				+    URI dstUri = destFs.getUri();
			
 
				+    if (srcUri.getScheme() == null) {
			
 
				+      return false;
			
 
				+    }
			
 
				+    if (!srcUri.getScheme().equals(dstUri.getScheme())) {
			
 
				+      return false;
			
 
				+    }
			
 
				+    String srcHost = srcUri.getHost();
			
 
				+    String dstHost = dstUri.getHost();
			
 
				+    if ((srcHost != null) && (dstHost != null)) {
			
 
				+      try {
			
 
				+        srcHost = InetAddress.getByName(srcHost).getCanonicalHostName();
			
 
				+        dstHost = InetAddress.getByName(dstHost).getCanonicalHostName();
			
 
				+      } catch(UnknownHostException ue) {
			
 
				+        if (LOG.isDebugEnabled())
			
 
				+          LOG.debug("Could not compare file-systems. Unknown host: ", ue);
			
 
				+        return false;
			
 
				+      }
			
 
				+      if (!srcHost.equals(dstHost)) {
			
 
				+        return false;
			
 
				+      }
			
 
				+    }
			
 
				+    else if (srcHost == null && dstHost != null) {
			
 
				+      return false;
			
 
				+    }
			
 
				+    else if (srcHost != null) {
			
 
				+      return false;
			
 
				+    }
			
 
				+
			
 
				+    //check for ports
			
 
				+
			
 
				+    return srcUri.getPort() == dstUri.getPort();
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/RetriableCommand.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/RetriableCommand.java
@@ -0,0 +1,106 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+
			
 
				+package org.apache.hadoop.tools.util;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.io.retry.RetryPolicy;
			
 
				+import org.apache.hadoop.io.retry.RetryPolicies;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.concurrent.TimeUnit;
			
 
				+
			
 
				+/**
			
 
				+ * This class represents commands that be retried on failure, in a configurable
			
 
				+ * manner.
			
 
				+ */
			
 
				+public abstract class RetriableCommand {
			
 
				+
			
 
				+  private static Log LOG = LogFactory.getLog(RetriableCommand.class);
			
 
				+
			
 
				+  private static final long DELAY_MILLISECONDS = 500;
			
 
				+  private static final int  MAX_RETRIES        = 3;
			
 
				+
			
 
				+  private RetryPolicy retryPolicy = RetryPolicies.
			
 
				+      exponentialBackoffRetry(MAX_RETRIES, DELAY_MILLISECONDS, TimeUnit.MILLISECONDS);
			
 
				+  protected String description;
			
 
				+
			
 
				+  /**
			
 
				+   * Constructor.
			
 
				+   * @param description The human-readable description of the command.
			
 
				+   */
			
 
				+  public RetriableCommand(String description) {
			
 
				+    this.description = description;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Constructor.
			
 
				+   * @param description The human-readable description of the command.
			
 
				+   * @param retryPolicy The RetryHandler to be used to compute retries.
			
 
				+   */
			
 
				+  public RetriableCommand(String description, RetryPolicy retryPolicy) {
			
 
				+    this(description);
			
 
				+    setRetryPolicy(retryPolicy);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Implement this interface-method define the command-logic that will be
			
 
				+   * retried on failure (i.e. with Exception).
			
 
				+   * @param arguments Argument-list to the command.
			
 
				+   * @return Generic "Object".
			
 
				+   * @throws Exception Throws Exception on complete failure.
			
 
				+   */
			
 
				+  protected abstract Object doExecute(Object... arguments) throws Exception;
			
 
				+
			
 
				+  /**
			
 
				+   * The execute() method invokes doExecute() until either:
			
 
				+   *  1. doExecute() succeeds, or
			
 
				+   *  2. the command may no longer be retried (e.g. runs out of retry-attempts).
			
 
				+   * @param arguments The list of arguments for the command.
			
 
				+   * @return Generic "Object" from doExecute(), on success.
			
 
				+   * @throws IOException, IOException, on complete failure.
			
 
				+   */
			
 
				+  public Object execute(Object... arguments) throws Exception {
			
 
				+    Exception latestException;
			
 
				+    int counter = 0;
			
 
				+    do {
			
 
				+      try {
			
 
				+        return doExecute(arguments);
			
 
				+      } catch(Exception exception) {
			
 
				+        LOG.error("Failure in Retriable command: " + description, exception);
			
 
				+        latestException = exception;
			
 
				+      }
			
 
				+      counter++;
			
 
				+    } while (retryPolicy.shouldRetry(latestException, counter, 0, true).equals(RetryPolicy.RetryAction.RETRY));
			
 
				+
			
 
				+    throw new IOException("Couldn't run retriable-command: " + description,
			
 
				+                          latestException);
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Fluent-interface to change the RetryHandler.
			
 
				+   * @param retryHandler The new RetryHandler instance to be used.
			
 
				+   * @return Self.
			
 
				+   */
			
 
				+  public RetriableCommand setRetryPolicy(RetryPolicy retryHandler) {
			
 
				+    this.retryPolicy = retryHandler;
			
 
				+    return this;
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ThrottledInputStream.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ThrottledInputStream.java
@@ -0,0 +1,139 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.util;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.io.InputStream;
			
 
				+
			
 
				+/**
			
 
				+ * The ThrottleInputStream provides bandwidth throttling on a specified
			
 
				+ * InputStream. It is implemented as a wrapper on top of another InputStream
			
 
				+ * instance.
			
 
				+ * The throttling works by examining the number of bytes read from the underlying
			
 
				+ * InputStream from the beginning, and sleep()ing for a time interval if
			
 
				+ * the byte-transfer is found exceed the specified tolerable maximum.
			
 
				+ * (Thus, while the read-rate might exceed the maximum for a given short interval,
			
 
				+ * the average tends towards the specified maximum, overall.)
			
 
				+ */
			
 
				+public class ThrottledInputStream extends InputStream {
			
 
				+
			
 
				+  private final InputStream rawStream;
			
 
				+  private final long maxBytesPerSec;
			
 
				+  private final long startTime = System.currentTimeMillis();
			
 
				+
			
 
				+  private long bytesRead = 0;
			
 
				+  private long totalSleepTime = 0;
			
 
				+
			
 
				+  private static final long SLEEP_DURATION_MS = 50;
			
 
				+
			
 
				+  public ThrottledInputStream(InputStream rawStream) {
			
 
				+    this(rawStream, Long.MAX_VALUE);
			
 
				+  }
			
 
				+
			
 
				+  public ThrottledInputStream(InputStream rawStream, long maxBytesPerSec) {
			
 
				+    assert maxBytesPerSec > 0 : "Bandwidth " + maxBytesPerSec + " is invalid"; 
			
 
				+    this.rawStream = rawStream;
			
 
				+    this.maxBytesPerSec = maxBytesPerSec;
			
 
				+  }
			
 
				+
			
 
				+  /** @inheritDoc */
			
 
				+  @Override
			
 
				+  public int read() throws IOException {
			
 
				+    throttle();
			
 
				+    int data = rawStream.read();
			
 
				+    if (data != -1) {
			
 
				+      bytesRead++;
			
 
				+    }
			
 
				+    return data;
			
 
				+  }
			
 
				+
			
 
				+  /** @inheritDoc */
			
 
				+  @Override
			
 
				+  public int read(byte[] b) throws IOException {
			
 
				+    throttle();
			
 
				+    int readLen = rawStream.read(b);
			
 
				+    if (readLen != -1) {
			
 
				+      bytesRead += readLen;
			
 
				+    }
			
 
				+    return readLen;
			
 
				+  }
			
 
				+
			
 
				+  /** @inheritDoc */
			
 
				+  @Override
			
 
				+  public int read(byte[] b, int off, int len) throws IOException {
			
 
				+    throttle();
			
 
				+    int readLen = rawStream.read(b, off, len);
			
 
				+    if (readLen != -1) {
			
 
				+      bytesRead += readLen;
			
 
				+    }
			
 
				+    return readLen;
			
 
				+  }
			
 
				+
			
 
				+  private void throttle() throws IOException {
			
 
				+    if (getBytesPerSec() > maxBytesPerSec) {
			
 
				+      try {
			
 
				+        Thread.sleep(SLEEP_DURATION_MS);
			
 
				+        totalSleepTime += SLEEP_DURATION_MS;
			
 
				+      } catch (InterruptedException e) {
			
 
				+        throw new IOException("Thread aborted", e);
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Getter for the number of bytes read from this stream, since creation.
			
 
				+   * @return The number of bytes.
			
 
				+   */
			
 
				+  public long getTotalBytesRead() {
			
 
				+    return bytesRead;
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Getter for the read-rate from this stream, since creation.
			
 
				+   * Calculated as bytesRead/elapsedTimeSinceStart.
			
 
				+   * @return Read rate, in bytes/sec.
			
 
				+   */
			
 
				+  public long getBytesPerSec() {
			
 
				+    long elapsed = (System.currentTimeMillis() - startTime) / 1000;
			
 
				+    if (elapsed == 0) {
			
 
				+      return bytesRead;
			
 
				+    } else {
			
 
				+      return bytesRead / elapsed;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * Getter the total time spent in sleep.
			
 
				+   * @return Number of milliseconds spent in sleep.
			
 
				+   */
			
 
				+  public long getTotalSleepTime() {
			
 
				+    return totalSleepTime;
			
 
				+  }
			
 
				+
			
 
				+  /** @inheritDoc */
			
 
				+  @Override
			
 
				+  public String toString() {
			
 
				+    return "ThrottledInputStream{" +
			
 
				+        "bytesRead=" + bytesRead +
			
 
				+        ", maxBytesPerSec=" + maxBytesPerSec +
			
 
				+        ", bytesPerSec=" + getBytesPerSec() +
			
 
				+        ", totalSleepTime=" + totalSleepTime +
			
 
				+        '}';
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/main/resources/distcp-default.xml
+++ b/hadoop-tools/hadoop-distcp/src/main/resources/distcp-default.xml
@@ -0,0 +1,41 @@
 
				+<?xml version="1.0"?>
			
 
				+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
			
 
				+
			
 
				+<!-- Do not modify this file directly. Anything that need to be overwritten,
			
 
				+     need to be done so, through -D switches or customized conf -->
			
 
				+
			
 
				+<configuration>
			
 
				+
			
 
				+    <property>
			
 
				+        <name>distcp.dynamic.strategy.impl</name>
			
 
				+        <value>org.apache.hadoop.tools.mapred.lib.DynamicInputFormat</value>
			
 
				+        <description>Implementation of dynamic input format</description>
			
 
				+    </property>
			
 
				+
			
 
				+    <property>
			
 
				+        <name>distcp.static.strategy.impl</name>
			
 
				+        <value>org.apache.hadoop.tools.mapred.UniformSizeInputFormat</value>
			
 
				+        <description>Implementation of static input format</description>
			
 
				+    </property>
			
 
				+
			
 
				+    <property>
			
 
				+        <name>mapred.job.map.memory.mb</name>
			
 
				+        <value>1024</value>
			
 
				+    </property>
			
 
				+
			
 
				+    <property>
			
 
				+        <name>mapred.job.reduce.memory.mb</name>
			
 
				+        <value>1024</value>
			
 
				+    </property>
			
 
				+
			
 
				+    <property>
			
 
				+        <name>mapred.reducer.new-api</name>
			
 
				+        <value>true</value>
			
 
				+    </property>
			
 
				+
			
 
				+    <property>
			
 
				+        <name>mapreduce.reduce.class</name>
			
 
				+        <value>org.apache.hadoop.mapreduce.Reducer</value>
			
 
				+    </property>
			
 
				+
			
 
				+</configuration>
			
--- a/hadoop-tools/hadoop-distcp/src/site/fml/faq.fml
+++ b/hadoop-tools/hadoop-distcp/src/site/fml/faq.fml
@@ -0,0 +1,98 @@
 
				+<?xml version="1.0" encoding="ISO-8859-1" ?>
			
 
				+
			
 
				+<!--
			
 
				+Licensed to the Apache Software Foundation (ASF) under one
			
 
				+or more contributor license agreements.  See the NOTICE file
			
 
				+distributed with this work for additional information
			
 
				+regarding copyright ownership.  The ASF licenses this file
			
 
				+to you under the Apache License, Version 2.0 (the
			
 
				+"License"); you may not use this file except in compliance
			
 
				+with the License.  You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing,
			
 
				+software distributed under the License is distributed on an
			
 
				+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+KIND, either express or implied.  See the License for the
			
 
				+specific language governing permissions and limitations
			
 
				+under the License.
			
 
				+-->
			
 
				+
			
 
				+<faqs xmlns="http://maven.apache.org/FML/1.0.1"
			
 
				+      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
			
 
				+      xsi:schemaLocation="http://maven.apache.org/FML/1.0.1 http://maven.apache.org/xsd/fml-1.0.1.xsd"
			
 
				+      title="Frequently Asked Questions">
			
 
				+  <part id="General">
			
 
				+    <title>General</title>
			
 
				+
			
 
				+    <faq id="Update">
			
 
				+      <question>Why does -update not create the parent source-directory under
			
 
				+      a pre-existing target directory?</question>
			
 
				+      <answer>The behaviour of <code>-update</code> and <code>-overwrite</code>
			
 
				+      is described in detail in the Usage section of this document. In short,
			
 
				+      if either option is used with a pre-existing destination directory, the
			
 
				+      <strong>contents</strong> of each source directory is copied over, rather
			
 
				+      than the source-directory itself.
			
 
				+      This behaviour is consistent with the legacy DistCp implementation as well.
			
 
				+      </answer>
			
 
				+    </faq>
			
 
				+
			
 
				+    <faq id="Deviation">
			
 
				+      <question>How does the new DistCp differ in semantics from the Legacy
			
 
				+      DistCp?</question>
			
 
				+      <answer>
			
 
				+          <ul>
			
 
				+              <li>Files that are skipped during copy used to also have their
			
 
				+              file-attributes (permissions, owner/group info, etc.) unchanged,
			
 
				+              when copied with Legacy DistCp. These are now updated, even if
			
 
				+              the file-copy is skipped.</li>
			
 
				+              <li>Empty root directories among the source-path inputs were not
			
 
				+              created at the target, in Legacy DistCp. These are now created.</li>
			
 
				+          </ul>
			
 
				+      </answer>
			
 
				+    </faq>
			
 
				+
			
 
				+    <faq id="nMaps">
			
 
				+      <question>Why does the new DistCp use more maps than legacy DistCp?</question>
			
 
				+      <answer>
			
 
				+          <p>Legacy DistCp works by figuring out what files need to be actually
			
 
				+      copied to target <strong>before</strong> the copy-job is launched, and then
			
 
				+      launching as many maps as required for copy. So if a majority of the files
			
 
				+      need to be skipped (because they already exist, for example), fewer maps
			
 
				+      will be needed. As a consequence, the time spent in setup (i.e. before the
			
 
				+      M/R job) is higher.</p>
			
 
				+          <p>The new DistCp calculates only the contents of the source-paths. It
			
 
				+      doesn't try to filter out what files can be skipped. That decision is put-
			
 
				+      off till the M/R job runs. This is much faster (vis-a-vis execution-time),
			
 
				+      but the number of maps launched will be as specified in the <code>-m</code>
			
 
				+      option, or 20 (default) if unspecified.</p>
			
 
				+      </answer>
			
 
				+    </faq>
			
 
				+
			
 
				+    <faq id="more_maps">
			
 
				+      <question>Why does DistCp not run faster when more maps are specified?</question>
			
 
				+      <answer>
			
 
				+          <p>At present, the smallest unit of work for DistCp is a file. i.e.,
			
 
				+          a file is processed by only one map. Increasing the number of maps to
			
 
				+          a value exceeding the number of files would yield no performance
			
 
				+          benefit. The number of maps lauched would equal the number of files.</p>
			
 
				+      </answer>
			
 
				+    </faq>
			
 
				+
			
 
				+    <faq id="client_mem">
			
 
				+      <question>Why does DistCp run out of memory?</question>
			
 
				+      <answer>
			
 
				+          <p>If the number of individual files/directories being copied from
			
 
				+      the source path(s) is extremely large (e.g. 1,000,000 paths), DistCp might
			
 
				+      run out of memory while determining the list of paths for copy. This is
			
 
				+      not unique to the new DistCp implementation.</p>
			
 
				+          <p>To get around this, consider changing the <code>-Xmx</code> JVM
			
 
				+      heap-size parameters, as follows:</p>
			
 
				+          <p><code>bash$ export HADOOP_CLIENT_OPTS="-Xms64m -Xmx1024m"</code></p>
			
 
				+          <p><code>bash$ hadoop distcp /source /target</code></p>
			
 
				+      </answer>
			
 
				+    </faq>
			
 
				+
			
 
				+  </part>
			
 
				+</faqs>
			
--- a/hadoop-tools/hadoop-distcp/src/site/pdf.xml
+++ b/hadoop-tools/hadoop-distcp/src/site/pdf.xml
@@ -0,0 +1,47 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+
			
 
				+<!--
			
 
				+Licensed to the Apache Software Foundation (ASF) under one
			
 
				+or more contributor license agreements.  See the NOTICE file
			
 
				+distributed with this work for additional information
			
 
				+regarding copyright ownership.  The ASF licenses this file
			
 
				+to you under the Apache License, Version 2.0 (the
			
 
				+"License"); you may not use this file except in compliance
			
 
				+with the License.  You may obtain a copy of the License at
			
 
				+
			
 
				+http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+Unless required by applicable law or agreed to in writing,
			
 
				+software distributed under the License is distributed on an
			
 
				+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
			
 
				+KIND, either express or implied.  See the License for the
			
 
				+specific language governing permissions and limitations
			
 
				+under the License.
			
 
				+-->
			
 
				+
			
 
				+<!-- START SNIPPET: docDescriptor -->
			
 
				+<document xmlns="http://maven.apache.org/DOCUMENT/1.0.1"
			
 
				+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
			
 
				+  xsi:schemaLocation="http://maven.apache.org/DOCUMENT/1.0.1 http://maven.apache.org/xsd/document-1.0.1.xsd"
			
 
				+  outputName="distcp">
			
 
				+
			
 
				+  <meta>
			
 
				+    <title>${project.name}</title>
			
 
				+  </meta>
			
 
				+
			
 
				+  <toc name="Table of Contents">
			
 
				+    <item name="Introduction" ref="index.xml"/>
			
 
				+    <item name="Usage" ref="usage.xml"/>
			
 
				+    <item name="Command Line Reference" ref="cli.xml"/>
			
 
				+    <item name="Architecture" ref="architecture.xml"/>
			
 
				+    <item name="Appendix" ref="appendix.xml"/>
			
 
				+    <item name="FAQ" ref="faq.fml"/>
			
 
				+  </toc>
			
 
				+  <cover>
			
 
				+    <coverTitle>${project.name}</coverTitle>
			
 
				+    <coverSubTitle>v. ${project.version}</coverSubTitle>
			
 
				+    <coverType>User Guide</coverType>
			
 
				+    <projectName>${project.name}</projectName>
			
 
				+    <companyName>Apache Hadoop</companyName>
			
 
				+  </cover>
			
 
				+</document>
			
--- a/hadoop-tools/hadoop-distcp/src/site/xdoc/appendix.xml
+++ b/hadoop-tools/hadoop-distcp/src/site/xdoc/appendix.xml
@@ -0,0 +1,125 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<document xmlns="http://maven.apache.org/XDOC/2.0"
			
 
				+          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
			
 
				+          xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
			
 
				+  <head>
			
 
				+    <title>Appendix</title>
			
 
				+  </head>
			
 
				+  <body>
			
 
				+    <section name="Map sizing">
			
 
				+ 
			
 
				+      <p> By default, DistCp makes an attempt to size each map comparably so
			
 
				+      that each copies roughly the same number of bytes. Note that files are the
			
 
				+      finest level of granularity, so increasing the number of simultaneous
			
 
				+      copiers (i.e. maps) may not always increase the number of
			
 
				+      simultaneous copies nor the overall throughput.</p>
			
 
				+
			
 
				+      <p> The new DistCp also provides a strategy to "dynamically" size maps,
			
 
				+      allowing faster data-nodes to copy more bytes than slower nodes. Using
			
 
				+      <code>-strategy dynamic</code> (explained in the Architecture), rather
			
 
				+      than to assign a fixed set of source-files to each map-task, files are
			
 
				+      instead split into several sets. The number of sets exceeds the number of
			
 
				+      maps, usually by a factor of 2-3. Each map picks up and copies all files
			
 
				+      listed in a chunk. When a chunk is exhausted, a new chunk is acquired and
			
 
				+      processed, until no more chunks remain.</p>
			
 
				+
			
 
				+      <p> By not assigning a source-path to a fixed map, faster map-tasks (i.e.
			
 
				+      data-nodes) are able to consume more chunks, and thus copy more data,
			
 
				+      than slower nodes. While this distribution isn't uniform, it is
			
 
				+      <strong>fair</strong> with regard to each mapper's capacity.</p>
			
 
				+
			
 
				+      <p>The dynamic-strategy is implemented by the DynamicInputFormat. It
			
 
				+      provides superior performance under most conditions. </p>
			
 
				+
			
 
				+      <p>Tuning the number of maps to the size of the source and
			
 
				+      destination clusters, the size of the copy, and the available
			
 
				+      bandwidth is recommended for long-running and regularly run jobs.</p>
			
 
				+
			
 
				+   </section>
			
 
				+
			
 
				+   <section name="Copying between versions of HDFS">
			
 
				+
			
 
				+        <p>For copying between two different versions of Hadoop, one will
			
 
				+        usually use HftpFileSystem. This is a read-only FileSystem, so DistCp
			
 
				+        must be run on the destination cluster (more specifically, on
			
 
				+        TaskTrackers that can write to the destination cluster). Each source is
			
 
				+        specified as <code>hftp://&lt;dfs.http.address&gt;/&lt;path&gt;</code>
			
 
				+        (the default <code>dfs.http.address</code> is
			
 
				+        &lt;namenode&gt;:50070).</p>
			
 
				+
			
 
				+   </section>
			
 
				+
			
 
				+   <section name="Map/Reduce and other side-effects">
			
 
				+
			
 
				+        <p>As has been mentioned in the preceding, should a map fail to copy
			
 
				+        one of its inputs, there will be several side-effects.</p>
			
 
				+
			
 
				+        <ul>
			
 
				+
			
 
				+          <li>Unless <code>-overwrite</code> is specified, files successfully
			
 
				+          copied by a previous map on a re-execution will be marked as
			
 
				+          &quot;skipped&quot;.</li>
			
 
				+
			
 
				+          <li>If a map fails <code>mapred.map.max.attempts</code> times, the
			
 
				+          remaining map tasks will be killed (unless <code>-i</code> is
			
 
				+          set).</li>
			
 
				+
			
 
				+          <li>If <code>mapred.speculative.execution</code> is set set
			
 
				+          <code>final</code> and <code>true</code>, the result of the copy is
			
 
				+          undefined.</li>
			
 
				+
			
 
				+        </ul>
			
 
				+
			
 
				+   </section>
			
 
				+
			
 
				+   <section name="SSL Configurations for HSFTP sources:">
			
 
				+
			
 
				+       <p>To use an HSFTP source (i.e. using the hsftp protocol), a Map-Red SSL
			
 
				+       configuration file needs to be specified (via the <code>-mapredSslConf</code>
			
 
				+       option). This must specify 3 parameters:</p>
			
 
				+
			
 
				+       <ul>
			
 
				+           <li><code>ssl.client.truststore.location</code>: The local-filesystem
			
 
				+            location of the trust-store file, containing the certificate for
			
 
				+            the namenode.</li>
			
 
				+
			
 
				+           <li><code>ssl.client.truststore.type</code>: (Optional) The format of
			
 
				+           the trust-store file.</li>
			
 
				+
			
 
				+           <li><code>ssl.client.truststore.password</code>: (Optional) Password
			
 
				+           for the trust-store file.</li>
			
 
				+
			
 
				+       </ul>
			
 
				+
			
 
				+       <p>The following is an example of the contents of the contents of
			
 
				+       a Map-Red SSL Configuration file:</p>
			
 
				+
			
 
				+           <p> <br/> <code> &lt;configuration&gt; </code> </p>
			
 
				+
			
 
				+           <p> <br/> <code>&lt;property&gt; </code> </p>
			
 
				+           <p> <code>&lt;name&gt;ssl.client.truststore.location&lt;/name&gt; </code> </p>
			
 
				+           <p> <code>&lt;value&gt;/work/keystore.jks&lt;/value&gt; </code> </p>
			
 
				+           <p> <code>&lt;description&gt;Truststore to be used by clients like distcp. Must be specified. &lt;/description&gt;</code> </p>
			
 
				+           <p> <br/> <code>&lt;/property&gt; </code> </p>
			
 
				+
			
 
				+           <p><code> &lt;property&gt; </code> </p>
			
 
				+           <p> <code>&lt;name&gt;ssl.client.truststore.password&lt;/name&gt; </code> </p>
			
 
				+           <p> <code>&lt;value&gt;changeme&lt;/value&gt; </code> </p>
			
 
				+           <p> <code>&lt;description&gt;Optional. Default value is "". &lt;/description&gt;  </code> </p>
			
 
				+           <p> <code>&lt;/property&gt; </code>  </p>
			
 
				+
			
 
				+           <p> <br/> <code> &lt;property&gt; </code> </p>
			
 
				+           <p> <code> &lt;name&gt;ssl.client.truststore.type&lt;/name&gt;</code>  </p>
			
 
				+           <p> <code> &lt;value&gt;jks&lt;/value&gt;</code>  </p>
			
 
				+           <p> <code> &lt;description&gt;Optional. Default value is "jks". &lt;/description&gt;</code>  </p>
			
 
				+           <p> <code> &lt;/property&gt; </code> </p>
			
 
				+
			
 
				+           <p> <code> <br/> &lt;/configuration&gt; </code> </p>
			
 
				+
			
 
				+       <p><br/>The SSL configuration file must be in the class-path of the 
			
 
				+       DistCp program.</p>
			
 
				+
			
 
				+   </section>
			
 
				+
			
 
				+  </body>
			
 
				+</document>
			
--- a/hadoop-tools/hadoop-distcp/src/site/xdoc/architecture.xml
+++ b/hadoop-tools/hadoop-distcp/src/site/xdoc/architecture.xml
@@ -0,0 +1,200 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<document xmlns="http://maven.apache.org/XDOC/2.0"
			
 
				+          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
			
 
				+          xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
			
 
				+    <head>
			
 
				+        <title>Architecture of DistCp</title>
			
 
				+    </head>
			
 
				+    <body>
			
 
				+      <section name="Architecture">
			
 
				+
			
 
				+        <p>The components of the new DistCp may be classified into the following
			
 
				+           categories: </p>
			
 
				+
			
 
				+        <ul>
			
 
				+
			
 
				+          <li>DistCp Driver</li>
			
 
				+          <li>Copy-listing generator</li>
			
 
				+          <li>Input-formats and Map-Reduce components</li>
			
 
				+
			
 
				+        </ul>
			
 
				+
			
 
				+        <subsection name="DistCp Driver">
			
 
				+          <p>The DistCp Driver components are responsible for:</p>
			
 
				+
			
 
				+          <ul>
			
 
				+            <li>Parsing the arguments passed to the DistCp command on the
			
 
				+                command-line, via:
			
 
				+              <ul>
			
 
				+                <li>OptionsParser, and</li>
			
 
				+                <li>DistCpOptionsSwitch</li>
			
 
				+              </ul>
			
 
				+            </li>
			
 
				+            <li>Assembling the command arguments into an appropriate
			
 
				+                DistCpOptions object, and initializing DistCp. These arguments
			
 
				+                include:
			
 
				+              <ul>
			
 
				+                <li>Source-paths</li>
			
 
				+                <li>Target location</li>
			
 
				+                <li>Copy options (e.g. whether to update-copy, overwrite, which
			
 
				+                    file-attributes to preserve, etc.)</li>
			
 
				+              </ul>
			
 
				+            </li>
			
 
				+            <li>Orchestrating the copy operation by:
			
 
				+              <ul>
			
 
				+                <li>Invoking the copy-listing-generator to create the list of
			
 
				+                    files to be copied.</li>
			
 
				+                <li>Setting up and launching the Hadoop Map-Reduce Job to carry
			
 
				+                    out the copy.</li>
			
 
				+                <li>Based on the options, either returning a handle to the
			
 
				+                    Hadoop MR Job immediately, or waiting till completion.</li>
			
 
				+              </ul>
			
 
				+            </li>
			
 
				+          </ul>
			
 
				+          <br/>
			
 
				+
			
 
				+          <p>The parser-elements are exercised only from the command-line (or if
			
 
				+             DistCp::run() is invoked). The DistCp class may also be used
			
 
				+             programmatically, by constructing the DistCpOptions object, and
			
 
				+             initializing a DistCp object appropriately.</p>
			
 
				+
			
 
				+        </subsection>
			
 
				+
			
 
				+        <subsection name="Copy-listing generator">
			
 
				+
			
 
				+          <p>The copy-listing-generator classes are responsible for creating the
			
 
				+             list of files/directories to be copied from source. They examine
			
 
				+             the contents of the source-paths (files/directories, including
			
 
				+             wild-cards), and record all paths that need copy into a sequence-
			
 
				+             file, for consumption by the DistCp Hadoop Job. The main classes in
			
 
				+             this module include:</p>
			
 
				+
			
 
				+          <ol>
			
 
				+
			
 
				+            <li>CopyListing: The interface that should be implemented by any 
			
 
				+                copy-listing-generator implementation. Also provides the factory
			
 
				+                method by which the concrete CopyListing implementation is
			
 
				+                chosen.</li>
			
 
				+
			
 
				+            <li>SimpleCopyListing: An implementation of CopyListing that accepts
			
 
				+                multiple source paths (files/directories), and recursively lists
			
 
				+                all the individual files and directories under each, for
			
 
				+                copy.</li>
			
 
				+
			
 
				+            <li>GlobbedCopyListing: Another implementation of CopyListing that
			
 
				+                expands wild-cards in the source paths.</li>
			
 
				+
			
 
				+            <li>FileBasedCopyListing: An implementation of CopyListing that
			
 
				+                reads the source-path list from a specified file.</li>
			
 
				+
			
 
				+          </ol>
			
 
				+          <p/>
			
 
				+
			
 
				+          <p>Based on whether a source-file-list is specified in the
			
 
				+             DistCpOptions, the source-listing is generated in one of the
			
 
				+             following ways:</p>
			
 
				+
			
 
				+          <ol>
			
 
				+
			
 
				+            <li>If there's no source-file-list, the GlobbedCopyListing is used.
			
 
				+                All wild-cards are expanded, and all the expansions are
			
 
				+                forwarded to the SimpleCopyListing, which in turn constructs the
			
 
				+                listing (via recursive descent of each path). </li>
			
 
				+
			
 
				+            <li>If a source-file-list is specified, the FileBasedCopyListing is
			
 
				+                used. Source-paths are read from the specified file, and then
			
 
				+                forwarded to the GlobbedCopyListing. The listing is then
			
 
				+                constructed as described above.</li>
			
 
				+
			
 
				+          </ol>
			
 
				+
			
 
				+          <br/>
			
 
				+
			
 
				+          <p>One may customize the method by which the copy-listing is
			
 
				+             constructed by providing a custom implementation of the CopyListing
			
 
				+             interface. The behaviour of DistCp differs here from the legacy
			
 
				+             DistCp, in how paths are considered for copy. </p>
			
 
				+
			
 
				+          <p>The legacy implementation only lists those paths that must
			
 
				+             definitely be copied on to target.
			
 
				+             E.g. if a file already exists at the target (and -overwrite isn't
			
 
				+             specified), the file isn't even considered in the Map-Reduce Copy
			
 
				+             Job. Determining this during setup (i.e. before the Map-Reduce Job)
			
 
				+             involves file-size and checksum-comparisons that are potentially
			
 
				+             time-consuming.</p>
			
 
				+
			
 
				+          <p>The new DistCp postpones such checks until the Map-Reduce Job, thus
			
 
				+             reducing setup time. Performance is enhanced further since these
			
 
				+             checks are parallelized across multiple maps.</p>
			
 
				+
			
 
				+        </subsection>
			
 
				+
			
 
				+        <subsection name="Input-formats and Map-Reduce components">
			
 
				+
			
 
				+          <p> The Input-formats and Map-Reduce components are responsible for
			
 
				+              the actual copy of files and directories from the source to the
			
 
				+              destination path. The listing-file created during copy-listing
			
 
				+              generation is consumed at this point, when the copy is carried
			
 
				+              out. The classes of interest here include:</p>
			
 
				+
			
 
				+          <ul>
			
 
				+            <li><strong>UniformSizeInputFormat:</strong> This implementation of
			
 
				+                org.apache.hadoop.mapreduce.InputFormat provides equivalence
			
 
				+                with Legacy DistCp in balancing load across maps.
			
 
				+                The aim of the UniformSizeInputFormat is to make each map copy
			
 
				+                roughly the same number of bytes. Apropos, the listing file is
			
 
				+                split into groups of paths, such that the sum of file-sizes in
			
 
				+                each InputSplit is nearly equal to every other map. The splitting
			
 
				+                isn't always perfect, but its trivial implementation keeps the
			
 
				+                setup-time low.</li>
			
 
				+
			
 
				+            <li><strong>DynamicInputFormat and DynamicRecordReader:</strong>
			
 
				+                <p> The DynamicInputFormat implements org.apache.hadoop.mapreduce.InputFormat,
			
 
				+                and is new to DistCp. The listing-file is split into several
			
 
				+                "chunk-files", the exact number of chunk-files being a multiple
			
 
				+                of the number of maps requested for in the Hadoop Job. Each map
			
 
				+                task is "assigned" one of the chunk-files (by renaming the chunk
			
 
				+                to the task's id), before the Job is launched.</p>
			
 
				+
			
 
				+                <p>Paths are read from each chunk using the DynamicRecordReader,
			
 
				+                and processed in the CopyMapper. After all the paths in a chunk
			
 
				+                are processed, the current chunk is deleted and a new chunk is
			
 
				+                acquired. The process continues until no more chunks are
			
 
				+                available.</p>
			
 
				+                <p>This "dynamic" approach allows faster map-tasks to consume
			
 
				+                more paths than slower ones, thus speeding up the DistCp job
			
 
				+                overall. </p>
			
 
				+            </li>
			
 
				+
			
 
				+            <li><strong>CopyMapper:</strong> This class implements the physical
			
 
				+                file-copy. The input-paths are checked against the input-options
			
 
				+                (specified in the Job's Configuration), to determine whether a
			
 
				+                file needs copy. A file will be copied only if at least one of
			
 
				+                the following is true:
			
 
				+              <ul>
			
 
				+                <li>A file with the same name doesn't exist at target.</li>
			
 
				+                <li>A file with the same name exists at target, but has a
			
 
				+                    different file size.</li>
			
 
				+                <li>A file with the same name exists at target, but has a
			
 
				+                    different checksum, and -skipcrccheck isn't mentioned.</li>
			
 
				+                <li>A file with the same name exists at target, but -overwrite
			
 
				+                    is specified.</li>
			
 
				+                <li>A file with the same name exists at target, but differs in
			
 
				+                    block-size (and block-size needs to be preserved.</li>
			
 
				+              </ul>
			
 
				+            </li>
			
 
				+
			
 
				+            <li><strong>CopyCommitter:</strong>
			
 
				+                This class is responsible for the commit-phase of the DistCp
			
 
				+                job, including:
			
 
				+              <ul>
			
 
				+                <li>Preservation of directory-permissions (if specified in the
			
 
				+                    options)</li>
			
 
				+                <li>Clean-up of temporary-files, work-directories, etc.</li>
			
 
				+              </ul>
			
 
				+            </li>
			
 
				+          </ul>
			
 
				+        </subsection>
			
 
				+      </section>
			
 
				+    </body>
			
 
				+</document>
			
--- a/hadoop-tools/hadoop-distcp/src/site/xdoc/cli.xml
+++ b/hadoop-tools/hadoop-distcp/src/site/xdoc/cli.xml
@@ -0,0 +1,123 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<document xmlns="http://maven.apache.org/XDOC/2.0"
			
 
				+          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
			
 
				+          xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
			
 
				+  <head>
			
 
				+    <title>Command Line Options</title>
			
 
				+  </head>
			
 
				+  <body>
			
 
				+      <section name="Options Index"> 
			
 
				+        <table>
			
 
				+          <tr><th> Flag </th><th> Description </th><th> Notes </th></tr>
			
 
				+
			
 
				+          <tr><td><code>-p[rbugp]</code></td>
			
 
				+              <td>Preserve<br/>
			
 
				+                  r: replication number<br/>
			
 
				+                  b: block size<br/>
			
 
				+                  u: user<br/>
			
 
				+                  g: group<br/>
			
 
				+                  p: permission<br/></td>
			
 
				+              <td>Modification times are not preserved. Also, when
			
 
				+              <code>-update</code> is specified, status updates will
			
 
				+              <strong>not</strong> be synchronized unless the file sizes
			
 
				+              also differ (i.e. unless the file is re-created).
			
 
				+              </td></tr>
			
 
				+          <tr><td><code>-i</code></td>
			
 
				+              <td>Ignore failures</td>
			
 
				+              <td>As explained in the Appendix, this option
			
 
				+              will keep more accurate statistics about the copy than the
			
 
				+              default case. It also preserves logs from failed copies, which
			
 
				+              can be valuable for debugging. Finally, a failing map will not
			
 
				+              cause the job to fail before all splits are attempted.
			
 
				+              </td></tr>
			
 
				+          <tr><td><code>-log &lt;logdir&gt;</code></td>
			
 
				+              <td>Write logs to &lt;logdir&gt;</td>
			
 
				+              <td>DistCp keeps logs of each file it attempts to copy as map
			
 
				+              output. If a map fails, the log output will not be retained if
			
 
				+              it is re-executed.
			
 
				+              </td></tr>
			
 
				+          <tr><td><code>-m &lt;num_maps&gt;</code></td>
			
 
				+              <td>Maximum number of simultaneous copies</td>
			
 
				+              <td>Specify the number of maps to copy data. Note that more maps
			
 
				+              may not necessarily improve throughput.
			
 
				+              </td></tr>
			
 
				+          <tr><td><code>-overwrite</code></td>
			
 
				+              <td>Overwrite destination</td>
			
 
				+              <td>If a map fails and <code>-i</code> is not specified, all the
			
 
				+              files in the split, not only those that failed, will be recopied.
			
 
				+              As discussed in the Usage documentation, it also changes
			
 
				+              the semantics for generating destination paths, so users should
			
 
				+              use this carefully.
			
 
				+              </td></tr>
			
 
				+          <tr><td><code>-update</code></td>
			
 
				+              <td>Overwrite if src size different from dst size</td>
			
 
				+              <td>As noted in the preceding, this is not a &quot;sync&quot;
			
 
				+              operation. The only criterion examined is the source and
			
 
				+              destination file sizes; if they differ, the source file
			
 
				+              replaces the destination file. As discussed in the
			
 
				+              Usage documentation, it also changes the semantics for
			
 
				+              generating destination paths, so users should use this carefully.
			
 
				+              </td></tr>
			
 
				+          <tr><td><code>-f &lt;urilist_uri&gt;</code></td>
			
 
				+              <td>Use list at &lt;urilist_uri&gt; as src list</td>
			
 
				+              <td>This is equivalent to listing each source on the command
			
 
				+              line. The <code>urilist_uri</code> list should be a fully
			
 
				+              qualified URI.
			
 
				+              </td></tr>
			
 
				+          <tr><td><code>-filelimit &lt;n&gt;</code></td>
			
 
				+              <td>Limit the total number of files to be &lt;= n</td>
			
 
				+              <td><strong>Deprecated!</strong> Ignored in the new DistCp.
			
 
				+              </td></tr>
			
 
				+          <tr><td><code>-sizelimit &lt;n&gt;</code></td>
			
 
				+              <td>Limit the total size to be &lt;= n bytes</td>
			
 
				+              <td><strong>Deprecated!</strong> Ignored in the new DistCp.
			
 
				+              </td></tr>
			
 
				+          <tr><td><code>-delete</code></td>
			
 
				+              <td>Delete the files existing in the dst but not in src</td>
			
 
				+              <td>The deletion is done by FS Shell.  So the trash will be used,
			
 
				+                  if it is enable.
			
 
				+              </td></tr>
			
 
				+          <tr><td><code>-strategy {dynamic|uniformsize}</code></td>
			
 
				+              <td>Choose the copy-strategy to be used in DistCp.</td>
			
 
				+              <td>By default, uniformsize is used. (i.e. Maps are balanced on the
			
 
				+                  total size of files copied by each map. Similar to legacy.)
			
 
				+                  If "dynamic" is specified, <code>DynamicInputFormat</code> is
			
 
				+                  used instead. (This is described in the Architecture section,
			
 
				+                  under InputFormats.)
			
 
				+              </td></tr>
			
 
				+          <tr><td><code>-bandwidth</code></td>
			
 
				+                <td>Specify bandwidth per map, in MB/second.</td>
			
 
				+                <td>Each map will be restricted to consume only the specified
			
 
				+                    bandwidth. This is not always exact. The map throttles back
			
 
				+                    its bandwidth consumption during a copy, such that the
			
 
				+                    <strong>net</strong> bandwidth used tends towards the
			
 
				+                    specified value.
			
 
				+                </td></tr>
			
 
				+          <tr><td><code>-atomic {-tmp &lt;tmp_dir&gt;}</code></td>
			
 
				+                <td>Specify atomic commit, with optional tmp directory.</td>
			
 
				+                <td><code>-atomic</code> instructs DistCp to copy the source
			
 
				+                    data to a temporary target location, and then move the
			
 
				+                    temporary target to the final-location atomically. Data will
			
 
				+                    either be available at final target in a complete and consistent
			
 
				+                    form, or not at all.
			
 
				+                    Optionally, <code>-tmp</code> may be used to specify the
			
 
				+                    location of the tmp-target. If not specified, a default is
			
 
				+                    chosen. <strong>Note:</strong> tmp_dir must be on the final
			
 
				+                    target cluster.
			
 
				+                </td></tr>
			
 
				+            <tr><td><code>-mapredSslConf &lt;ssl_conf_file&gt;</code></td>
			
 
				+                  <td>Specify SSL Config file, to be used with HSFTP source</td>
			
 
				+                  <td>When using the hsftp protocol with a source, the security-
			
 
				+                      related properties may be specified in a config-file and
			
 
				+                      passed to DistCp. &lt;ssl_conf_file&gt; needs to be in
			
 
				+                      the classpath.
			
 
				+                  </td></tr>
			
 
				+            <tr><td><code>-async</code></td>
			
 
				+                  <td>Run DistCp asynchronously. Quits as soon as the Hadoop
			
 
				+                  Job is launched.</td>
			
 
				+                  <td>The Hadoop Job-id is logged, for tracking.
			
 
				+                  </td></tr>
			
 
				+        </table>
			
 
				+      </section>
			
 
				+  </body>
			
 
				+</document>
			
--- a/hadoop-tools/hadoop-distcp/src/site/xdoc/index.xml
+++ b/hadoop-tools/hadoop-distcp/src/site/xdoc/index.xml
@@ -0,0 +1,32 @@
 
				+<?xml version="1.0" encoding="UTF-8"?>
			
 
				+<document xmlns="http://maven.apache.org/XDOC/2.0"
			
 
				+          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
			
 
				+          xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
			
 
				+  <head>
			
 
				+    <title>DistCp</title>
			
 
				+  </head>
			
 
				+  <body>
			
 
				+    <section name="Overview">
			
 
				+      <p>
			
 
				+        DistCp (distributed copy) is a tool used for large inter/intra-cluster
			
 
				+      copying. It uses Map/Reduce to effect its distribution, error
			
 
				+      handling and recovery, and reporting. It expands a list of files and
			
 
				+      directories into input to map tasks, each of which will copy a partition
			
 
				+      of the files specified in the source list.
			
 
				+      </p>
			
 
				+      <p>
			
 
				+       The erstwhile implementation of DistCp has its share of quirks and
			
 
				+       drawbacks, both in its usage, as well as its extensibility and
			
 
				+       performance. The purpose of the DistCp refactor was to fix these shortcomings,
			
 
				+       enabling it to be used and extended programmatically. New paradigms have
			
 
				+       been introduced to improve runtime and setup performance, while simultaneously
			
 
				+       retaining the legacy behaviour as default.
			
 
				+      </p>
			
 
				+      <p>
			
 
				+       This document aims to describe the design of the new DistCp, its spanking
			
 
				+       new features, their optimal use, and any deviance from the legacy
			
 
				+       implementation.
			
 
				+      </p>
			
 
				+    </section>
			
 
				+  </body>
			
 
				+</document>
			
--- a/hadoop-tools/hadoop-distcp/src/site/xdoc/usage.xml
+++ b/hadoop-tools/hadoop-distcp/src/site/xdoc/usage.xml
@@ -0,0 +1,147 @@
 
				+<document xmlns="http://maven.apache.org/XDOC/2.0"
			
 
				+          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
			
 
				+          xsi:schemaLocation="http://maven.apache.org/XDOC/2.0 http://maven.apache.org/xsd/xdoc-2.0.xsd">
			
 
				+  <head>
			
 
				+    <title>Usage </title>
			
 
				+  </head>
			
 
				+  <body>
			
 
				+    <section name="Basic Usage">
			
 
				+        <p>The most common invocation of DistCp is an inter-cluster copy:</p>
			
 
				+        <p><code>bash$ hadoop jar hadoop-distcp.jar hdfs://nn1:8020/foo/bar \</code><br/>
			
 
				+           <code>                    hdfs://nn2:8020/bar/foo</code></p>
			
 
				+
			
 
				+        <p>This will expand the namespace under <code>/foo/bar</code> on nn1
			
 
				+        into a temporary file, partition its contents among a set of map
			
 
				+        tasks, and start a copy on each TaskTracker from nn1 to nn2.</p>
			
 
				+
			
 
				+        <p>One can also specify multiple source directories on the command
			
 
				+        line:</p>
			
 
				+        <p><code>bash$ hadoop jar hadoop-distcp.jar hdfs://nn1:8020/foo/a \</code><br/>
			
 
				+           <code> hdfs://nn1:8020/foo/b \</code><br/>
			
 
				+           <code> hdfs://nn2:8020/bar/foo</code></p>
			
 
				+
			
 
				+        <p>Or, equivalently, from a file using the <code>-f</code> option:<br/>
			
 
				+        <code>bash$ hadoop jar hadoop-distcp.jar -f hdfs://nn1:8020/srclist \</code><br/>
			
 
				+        <code> hdfs://nn2:8020/bar/foo</code><br/></p>
			
 
				+
			
 
				+        <p>Where <code>srclist</code> contains<br/>
			
 
				+        <code>hdfs://nn1:8020/foo/a</code><br/>
			
 
				+        <code>hdfs://nn1:8020/foo/b</code></p>
			
 
				+
			
 
				+        <p>When copying from multiple sources, DistCp will abort the copy with
			
 
				+        an error message if two sources collide, but collisions at the
			
 
				+        destination are resolved per the <a href="#options">options</a>
			
 
				+        specified. By default, files already existing at the destination are
			
 
				+        skipped (i.e. not replaced by the source file). A count of skipped
			
 
				+        files is reported at the end of each job, but it may be inaccurate if a
			
 
				+        copier failed for some subset of its files, but succeeded on a later
			
 
				+        attempt.</p>
			
 
				+
			
 
				+        <p>It is important that each TaskTracker can reach and communicate with
			
 
				+        both the source and destination file systems. For HDFS, both the source
			
 
				+        and destination must be running the same version of the protocol or use
			
 
				+        a backwards-compatible protocol (see <a href="#cpver">Copying Between
			
 
				+        Versions</a>).</p>
			
 
				+
			
 
				+        <p>After a copy, it is recommended that one generates and cross-checks
			
 
				+        a listing of the source and destination to verify that the copy was
			
 
				+        truly successful. Since DistCp employs both Map/Reduce and the
			
 
				+        FileSystem API, issues in or between any of the three could adversely
			
 
				+        and silently affect the copy. Some have had success running with
			
 
				+        <code>-update</code> enabled to perform a second pass, but users should
			
 
				+        be acquainted with its semantics before attempting this.</p>
			
 
				+
			
 
				+        <p>It's also worth noting that if another client is still writing to a
			
 
				+        source file, the copy will likely fail. Attempting to overwrite a file
			
 
				+        being written at the destination should also fail on HDFS. If a source
			
 
				+        file is (re)moved before it is copied, the copy will fail with a
			
 
				+        FileNotFoundException.</p>
			
 
				+
			
 
				+        <p>Please refer to the detailed Command Line Reference for information
			
 
				+        on all the options available in DistCp.</p>
			
 
				+        
			
 
				+    </section>
			
 
				+    <section name="Update and Overwrite">
			
 
				+
			
 
				+        <p><code>-update</code> is used to copy files from source that don't
			
 
				+        exist at the target, or have different contents. <code>-overwrite</code>
			
 
				+        overwrites target-files even if they exist at the source, or have the
			
 
				+        same contents.</p>
			
 
				+
			
 
				+        <p><br/>Update and Overwrite options warrant special attention, since their
			
 
				+        handling of source-paths varies from the defaults in a very subtle manner.
			
 
				+        Consider a copy from <code>/source/first/</code> and
			
 
				+        <code>/source/second/</code> to <code>/target/</code>, where the source
			
 
				+        paths have the following contents:</p>
			
 
				+
			
 
				+        <p><code>hdfs://nn1:8020/source/first/1</code><br/>
			
 
				+           <code>hdfs://nn1:8020/source/first/2</code><br/>
			
 
				+           <code>hdfs://nn1:8020/source/second/10</code><br/>
			
 
				+           <code>hdfs://nn1:8020/source/second/20</code><br/></p>
			
 
				+
			
 
				+        <p><br/>When DistCp is invoked without <code>-update</code> or
			
 
				+        <code>-overwrite</code>, the DistCp defaults would create directories
			
 
				+        <code>first/</code> and <code>second/</code>, under <code>/target</code>.
			
 
				+        Thus:<br/></p>
			
 
				+
			
 
				+        <p><code>distcp hdfs://nn1:8020/source/first hdfs://nn1:8020/source/second hdfs://nn2:8020/target</code></p>
			
 
				+        <p><br/>would yield the following contents in <code>/target</code>: </p>
			
 
				+
			
 
				+        <p><code>hdfs://nn2:8020/target/first/1</code><br/>
			
 
				+           <code>hdfs://nn2:8020/target/first/2</code><br/>
			
 
				+           <code>hdfs://nn2:8020/target/second/10</code><br/>
			
 
				+           <code>hdfs://nn2:8020/target/second/20</code><br/></p>
			
 
				+
			
 
				+        <p><br/>When either <code>-update</code> or <code>-overwrite</code> is
			
 
				+            specified, the <strong>contents</strong> of the source-directories
			
 
				+            are copied to target, and not the source directories themselves. Thus: </p>
			
 
				+
			
 
				+        <p><code>distcp -update hdfs://nn1:8020/source/first hdfs://nn1:8020/source/second hdfs://nn2:8020/target</code></p>
			
 
				+
			
 
				+        <p><br/>would yield the following contents in <code>/target</code>: </p>
			
 
				+
			
 
				+        <p><code>hdfs://nn2:8020/target/1</code><br/>
			
 
				+           <code>hdfs://nn2:8020/target/2</code><br/>
			
 
				+           <code>hdfs://nn2:8020/target/10</code><br/>
			
 
				+           <code>hdfs://nn2:8020/target/20</code><br/></p>
			
 
				+
			
 
				+        <p><br/>By extension, if both source folders contained a file with the same
			
 
				+        name (say, <code>0</code>), then both sources would map an entry to
			
 
				+        <code>/target/0</code> at the destination. Rather than to permit this
			
 
				+        conflict, DistCp will abort.</p>
			
 
				+
			
 
				+        <p><br/>Now, consider the following copy operation:</p>
			
 
				+
			
 
				+        <p><code>distcp hdfs://nn1:8020/source/first hdfs://nn1:8020/source/second hdfs://nn2:8020/target</code></p>
			
 
				+
			
 
				+        <p><br/>With sources/sizes:</p>
			
 
				+
			
 
				+        <p><code>hdfs://nn1:8020/source/first/1     32</code><br/>
			
 
				+           <code>hdfs://nn1:8020/source/first/2     32</code><br/>
			
 
				+           <code>hdfs://nn1:8020/source/second/10   64</code><br/>
			
 
				+           <code>hdfs://nn1:8020/source/second/20   32</code><br/></p>
			
 
				+
			
 
				+        <p><br/>And destination/sizes:</p>
			
 
				+
			
 
				+        <p><code>hdfs://nn2:8020/target/1   32</code><br/>
			
 
				+           <code>hdfs://nn2:8020/target/10  32</code><br/>
			
 
				+           <code>hdfs://nn2:8020/target/20  64</code><br/></p>
			
 
				+
			
 
				+        <p><br/>Will effect: </p>
			
 
				+
			
 
				+        <p><code>hdfs://nn2:8020/target/1   32</code><br/>
			
 
				+           <code>hdfs://nn2:8020/target/2   32</code><br/>
			
 
				+           <code>hdfs://nn2:8020/target/10  64</code><br/>
			
 
				+           <code>hdfs://nn2:8020/target/20  32</code><br/></p>
			
 
				+
			
 
				+        <p><br/><code>1</code> is skipped because the file-length and contents match.
			
 
				+        <code>2</code> is copied because it doesn't exist at the target.
			
 
				+        <code>10</code> and <code>20</code> are overwritten since the contents
			
 
				+        don't match the source. </p>
			
 
				+
			
 
				+        <p>If <code>-update</code> is used, <code>1</code> is overwritten as well.</p>
			
 
				+
			
 
				+    </section>
			
 
				+  </body>
			
 
				+
			
 
				+</document>
			
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/StubContext.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/StubContext.java
@@ -0,0 +1,139 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.hadoop.mapreduce.*;
			
 
				+import org.apache.hadoop.mapreduce.task.MapContextImpl;
			
 
				+import org.apache.hadoop.mapreduce.lib.map.WrappedMapper;
			
 
				+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+
			
 
				+import java.util.List;
			
 
				+import java.util.ArrayList;
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+public class StubContext {
			
 
				+
			
 
				+  private StubStatusReporter reporter = new StubStatusReporter();
			
 
				+  private RecordReader<Text, FileStatus> reader;
			
 
				+  private StubInMemoryWriter writer = new StubInMemoryWriter();
			
 
				+  private Mapper<Text, FileStatus, Text, Text>.Context mapperContext;
			
 
				+
			
 
				+  public StubContext(Configuration conf, RecordReader<Text, FileStatus> reader,
			
 
				+                     int taskId) throws IOException, InterruptedException {
			
 
				+
			
 
				+    WrappedMapper<Text, FileStatus, Text, Text> wrappedMapper
			
 
				+            = new WrappedMapper<Text, FileStatus, Text, Text>();
			
 
				+
			
 
				+    MapContextImpl<Text, FileStatus, Text, Text> contextImpl
			
 
				+            = new MapContextImpl<Text, FileStatus, Text, Text>(conf,
			
 
				+            getTaskAttemptID(taskId), reader, writer,
			
 
				+            null, reporter, null);
			
 
				+
			
 
				+    this.reader = reader;
			
 
				+    this.mapperContext = wrappedMapper.getMapContext(contextImpl);
			
 
				+  }
			
 
				+
			
 
				+  public Mapper<Text, FileStatus, Text, Text>.Context getContext() {
			
 
				+    return mapperContext;
			
 
				+  }
			
 
				+
			
 
				+  public StatusReporter getReporter() {
			
 
				+    return reporter;
			
 
				+  }
			
 
				+
			
 
				+  public RecordReader<Text, FileStatus> getReader() {
			
 
				+    return reader;
			
 
				+  }
			
 
				+
			
 
				+  public StubInMemoryWriter getWriter() {
			
 
				+    return writer;
			
 
				+  }
			
 
				+
			
 
				+  public static class StubStatusReporter extends StatusReporter {
			
 
				+
			
 
				+    private Counters counters = new Counters();
			
 
				+
			
 
				+    public StubStatusReporter() {
			
 
				+	    /*
			
 
				+      final CounterGroup counterGroup
			
 
				+              = new CounterGroup("FileInputFormatCounters",
			
 
				+                                 "FileInputFormatCounters");
			
 
				+      counterGroup.addCounter(new Counter("BYTES_READ",
			
 
				+                                          "BYTES_READ",
			
 
				+                                          0));
			
 
				+      counters.addGroup(counterGroup);
			
 
				+      */
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public Counter getCounter(Enum<?> name) {
			
 
				+      return counters.findCounter(name);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public Counter getCounter(String group, String name) {
			
 
				+      return counters.findCounter(group, name);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public void progress() {}
			
 
				+
			
 
				+    @Override
			
 
				+    public float getProgress() {
			
 
				+      return 0F;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public void setStatus(String status) {}
			
 
				+  }
			
 
				+
			
 
				+
			
 
				+  public static class StubInMemoryWriter extends RecordWriter<Text, Text> {
			
 
				+
			
 
				+    List<Text> keys = new ArrayList<Text>();
			
 
				+
			
 
				+    List<Text> values = new ArrayList<Text>();
			
 
				+
			
 
				+    @Override
			
 
				+    public void write(Text key, Text value) throws IOException, InterruptedException {
			
 
				+      keys.add(key);
			
 
				+      values.add(value);
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
			
 
				+    }
			
 
				+
			
 
				+    public List<Text> keys() {
			
 
				+      return keys;
			
 
				+    }
			
 
				+
			
 
				+    public List<Text> values() {
			
 
				+      return values;
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  public static TaskAttemptID getTaskAttemptID(int taskId) {
			
 
				+    return new TaskAttemptID("", 0, TaskType.MAP, taskId, 0);
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestCopyListing.java
@@ -0,0 +1,252 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.tools.util.TestDistCpUtils;
			
 
				+import org.apache.hadoop.hdfs.MiniDFSCluster;
			
 
				+import org.apache.hadoop.security.Credentials;
			
 
				+import org.apache.hadoop.io.SequenceFile;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.junit.Test;
			
 
				+import org.junit.Assert;
			
 
				+import org.junit.BeforeClass;
			
 
				+import org.junit.AfterClass;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.io.OutputStream;
			
 
				+import java.util.List;
			
 
				+import java.util.ArrayList;
			
 
				+
			
 
				+public class TestCopyListing extends SimpleCopyListing {
			
 
				+  private static final Log LOG = LogFactory.getLog(TestCopyListing.class);
			
 
				+
			
 
				+  private static final Credentials CREDENTIALS = new Credentials();
			
 
				+
			
 
				+  private static final Configuration config = new Configuration();
			
 
				+  private static MiniDFSCluster cluster;
			
 
				+
			
 
				+  @BeforeClass
			
 
				+  public static void create() throws IOException {
			
 
				+    cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).format(true)
			
 
				+                                                .build();
			
 
				+  }
			
 
				+
			
 
				+  @AfterClass
			
 
				+  public static void destroy() {
			
 
				+    if (cluster != null) {
			
 
				+      cluster.shutdown();
			
 
				+    }
			
 
				+  }
			
 
				+  
			
 
				+  public TestCopyListing() {
			
 
				+    super(config, CREDENTIALS);
			
 
				+  }
			
 
				+
			
 
				+  protected TestCopyListing(Configuration configuration) {
			
 
				+    super(configuration, CREDENTIALS);
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  protected long getBytesToCopy() {
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  @Override
			
 
				+  protected long getNumberOfPaths() {
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testMultipleSrcToFile() {
			
 
				+    FileSystem fs = null;
			
 
				+    try {
			
 
				+      fs = FileSystem.get(getConf());
			
 
				+      List<Path> srcPaths = new ArrayList<Path>();
			
 
				+      srcPaths.add(new Path("/tmp/in/1"));
			
 
				+      srcPaths.add(new Path("/tmp/in/2"));
			
 
				+      Path target = new Path("/tmp/out/1");
			
 
				+      TestDistCpUtils.createFile(fs, "/tmp/in/1");
			
 
				+      TestDistCpUtils.createFile(fs, "/tmp/in/2");
			
 
				+      fs.mkdirs(target);
			
 
				+      DistCpOptions options = new DistCpOptions(srcPaths, target);
			
 
				+      validatePaths(options);
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+      //No errors
			
 
				+
			
 
				+      target = new Path("/tmp/out/1");
			
 
				+      fs.create(target).close();
			
 
				+      options = new DistCpOptions(srcPaths, target);
			
 
				+      try {
			
 
				+        validatePaths(options);
			
 
				+        Assert.fail("Invalid inputs accepted");
			
 
				+      } catch (InvalidInputException ignore) { }
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+
			
 
				+      srcPaths.clear();
			
 
				+      srcPaths.add(new Path("/tmp/in/1"));
			
 
				+      fs.mkdirs(new Path("/tmp/in/1"));
			
 
				+      target = new Path("/tmp/out/1");
			
 
				+      fs.create(target).close();
			
 
				+      options = new DistCpOptions(srcPaths, target);
			
 
				+      try {
			
 
				+        validatePaths(options);
			
 
				+        Assert.fail("Invalid inputs accepted");
			
 
				+      } catch (InvalidInputException ignore) { }
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered ", e);
			
 
				+      Assert.fail("Test input validation failed");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testDuplicates() {
			
 
				+    FileSystem fs = null;
			
 
				+    try {
			
 
				+      fs = FileSystem.get(getConf());
			
 
				+      List<Path> srcPaths = new ArrayList<Path>();
			
 
				+      srcPaths.add(new Path("/tmp/in/*/*"));
			
 
				+      TestDistCpUtils.createFile(fs, "/tmp/in/1.txt");
			
 
				+      TestDistCpUtils.createFile(fs, "/tmp/in/src/1.txt");
			
 
				+      Path target = new Path("/tmp/out");
			
 
				+      Path listingFile = new Path("/tmp/list");
			
 
				+      DistCpOptions options = new DistCpOptions(srcPaths, target);
			
 
				+      CopyListing listing = CopyListing.getCopyListing(getConf(), CREDENTIALS, options);
			
 
				+      try {
			
 
				+        listing.buildListing(listingFile, options);
			
 
				+        Assert.fail("Duplicates not detected");
			
 
				+      } catch (DuplicateFileException ignore) {
			
 
				+      }
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered in test", e);
			
 
				+      Assert.fail("Test failed " + e.getMessage());
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testBuildListing() {
			
 
				+    FileSystem fs = null;
			
 
				+    try {
			
 
				+      fs = FileSystem.get(getConf());
			
 
				+      List<Path> srcPaths = new ArrayList<Path>();
			
 
				+      Path p1 = new Path("/tmp/in/1");
			
 
				+      Path p2 = new Path("/tmp/in/2");
			
 
				+      Path p3 = new Path("/tmp/in2/2");
			
 
				+      Path target = new Path("/tmp/out/1");
			
 
				+      srcPaths.add(p1.getParent());
			
 
				+      srcPaths.add(p3.getParent());
			
 
				+      TestDistCpUtils.createFile(fs, "/tmp/in/1");
			
 
				+      TestDistCpUtils.createFile(fs, "/tmp/in/2");
			
 
				+      TestDistCpUtils.createFile(fs, "/tmp/in2/2");
			
 
				+      fs.mkdirs(target);
			
 
				+      OutputStream out = fs.create(p1);
			
 
				+      out.write("ABC".getBytes());
			
 
				+      out.close();
			
 
				+
			
 
				+      out = fs.create(p2);
			
 
				+      out.write("DEF".getBytes());
			
 
				+      out.close();
			
 
				+
			
 
				+      out = fs.create(p3);
			
 
				+      out.write("GHIJ".getBytes());
			
 
				+      out.close();
			
 
				+
			
 
				+      Path listingFile = new Path("/tmp/file");
			
 
				+
			
 
				+      DistCpOptions options = new DistCpOptions(srcPaths, target);
			
 
				+      options.setSyncFolder(true);
			
 
				+      CopyListing listing = new SimpleCopyListing(getConf(), CREDENTIALS);
			
 
				+      try {
			
 
				+        listing.buildListing(listingFile, options);
			
 
				+        Assert.fail("Duplicates not detected");
			
 
				+      } catch (DuplicateFileException ignore) {
			
 
				+      }
			
 
				+      Assert.assertEquals(listing.getBytesToCopy(), 10);
			
 
				+      Assert.assertEquals(listing.getNumberOfPaths(), 3);
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+
			
 
				+      try {
			
 
				+        listing.buildListing(listingFile, options);
			
 
				+        Assert.fail("Invalid input not detected");
			
 
				+      } catch (InvalidInputException ignore) {
			
 
				+      }
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered ", e);
			
 
				+      Assert.fail("Test build listing failed");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testBuildListingForSingleFile() {
			
 
				+    FileSystem fs = null;
			
 
				+    String testRootString = "/singleFileListing";
			
 
				+    Path testRoot = new Path(testRootString);
			
 
				+    SequenceFile.Reader reader = null;
			
 
				+    try {
			
 
				+      fs = FileSystem.get(getConf());
			
 
				+      if (fs.exists(testRoot))
			
 
				+        TestDistCpUtils.delete(fs, testRootString);
			
 
				+
			
 
				+      Path sourceFile = new Path(testRoot, "/source/foo/bar/source.txt");
			
 
				+      Path decoyFile  = new Path(testRoot, "/target/moo/source.txt");
			
 
				+      Path targetFile = new Path(testRoot, "/target/moo/target.txt");
			
 
				+
			
 
				+      TestDistCpUtils.createFile(fs, sourceFile.toString());
			
 
				+      TestDistCpUtils.createFile(fs, decoyFile.toString());
			
 
				+      TestDistCpUtils.createFile(fs, targetFile.toString());
			
 
				+
			
 
				+      List<Path> srcPaths = new ArrayList<Path>();
			
 
				+      srcPaths.add(sourceFile);
			
 
				+
			
 
				+      DistCpOptions options = new DistCpOptions(srcPaths, targetFile);
			
 
				+      CopyListing listing = new SimpleCopyListing(getConf(), CREDENTIALS);
			
 
				+
			
 
				+      final Path listFile = new Path(testRoot, "/tmp/fileList.seq");
			
 
				+      listing.buildListing(listFile, options);
			
 
				+
			
 
				+      reader = new SequenceFile.Reader(fs, listFile, getConf());
			
 
				+      FileStatus fileStatus = new FileStatus();
			
 
				+      Text relativePath = new Text();
			
 
				+      Assert.assertTrue(reader.next(relativePath, fileStatus));
			
 
				+      Assert.assertTrue(relativePath.toString().equals(""));
			
 
				+    }
			
 
				+    catch (Exception e) {
			
 
				+      Assert.fail("Unexpected exception encountered.");
			
 
				+      LOG.error("Unexpected exception: ", e);
			
 
				+    }
			
 
				+    finally {
			
 
				+      TestDistCpUtils.delete(fs, testRootString);
			
 
				+      IOUtils.closeStream(reader);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCp.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestDistCp.java
@@ -0,0 +1,275 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.hdfs.MiniDFSCluster;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.mapred.MiniMRCluster;
			
 
				+import org.apache.hadoop.mapreduce.Job;
			
 
				+import org.apache.hadoop.mapreduce.JobSubmissionFiles;
			
 
				+import org.apache.hadoop.mapreduce.Cluster;
			
 
				+import org.apache.hadoop.tools.mapred.CopyOutputFormat;
			
 
				+import org.junit.*;
			
 
				+
			
 
				+import java.util.List;
			
 
				+import java.util.ArrayList;
			
 
				+import java.io.*;
			
 
				+
			
 
				+@Ignore
			
 
				+public class TestDistCp {
			
 
				+  private static final Log LOG = LogFactory.getLog(TestDistCp.class);
			
 
				+  private static List<Path> pathList = new ArrayList<Path>();
			
 
				+  private static final int FILE_SIZE = 1024;
			
 
				+
			
 
				+  private static Configuration configuration;
			
 
				+  private static MiniDFSCluster cluster;
			
 
				+  private static MiniMRCluster mrCluster;
			
 
				+
			
 
				+  private static final String SOURCE_PATH = "/tmp/source";
			
 
				+  private static final String TARGET_PATH = "/tmp/target";
			
 
				+
			
 
				+  @BeforeClass
			
 
				+  public static void setup() throws Exception {
			
 
				+    configuration = getConfigurationForCluster();
			
 
				+    cluster = new MiniDFSCluster.Builder(configuration).numDataNodes(1)
			
 
				+                    .format(true).build();
			
 
				+    System.setProperty("org.apache.hadoop.mapred.TaskTracker", "target/tmp");
			
 
				+    configuration.set("org.apache.hadoop.mapred.TaskTracker", "target/tmp");
			
 
				+    System.setProperty("hadoop.log.dir", "target/tmp");
			
 
				+    configuration.set("hadoop.log.dir", "target/tmp");
			
 
				+    mrCluster = new MiniMRCluster(1, cluster.getFileSystem().getUri().toString(), 1);
			
 
				+    Configuration mrConf = mrCluster.createJobConf();
			
 
				+    final String mrJobTracker = mrConf.get("mapred.job.tracker");
			
 
				+    configuration.set("mapred.job.tracker", mrJobTracker);
			
 
				+    final String mrJobTrackerAddress
			
 
				+            = mrConf.get("mapred.job.tracker.http.address");
			
 
				+    configuration.set("mapred.job.tracker.http.address", mrJobTrackerAddress);
			
 
				+  }
			
 
				+
			
 
				+  @AfterClass
			
 
				+  public static void cleanup() {
			
 
				+    if (mrCluster != null) mrCluster.shutdown();
			
 
				+    if (cluster != null) cluster.shutdown();
			
 
				+  }
			
 
				+
			
 
				+  private static Configuration getConfigurationForCluster() throws IOException {
			
 
				+    Configuration configuration = new Configuration();
			
 
				+    System.setProperty("test.build.data", "target/build/TEST_DISTCP/data");
			
 
				+    configuration.set("hadoop.log.dir", "target/tmp");
			
 
				+
			
 
				+    LOG.debug("fs.default.name  == " + configuration.get("fs.default.name"));
			
 
				+    LOG.debug("dfs.http.address == " + configuration.get("dfs.http.address"));
			
 
				+    return configuration;
			
 
				+  }
			
 
				+
			
 
				+  private static void createSourceData() throws Exception {
			
 
				+    mkdirs(SOURCE_PATH + "/1");
			
 
				+    mkdirs(SOURCE_PATH + "/2");
			
 
				+    mkdirs(SOURCE_PATH + "/2/3/4");
			
 
				+    mkdirs(SOURCE_PATH + "/2/3");
			
 
				+    mkdirs(SOURCE_PATH + "/5");
			
 
				+    touchFile(SOURCE_PATH + "/5/6");
			
 
				+    mkdirs(SOURCE_PATH + "/7");
			
 
				+    mkdirs(SOURCE_PATH + "/7/8");
			
 
				+    touchFile(SOURCE_PATH + "/7/8/9");
			
 
				+  }
			
 
				+
			
 
				+  private static void mkdirs(String path) throws Exception {
			
 
				+    FileSystem fileSystem = cluster.getFileSystem();
			
 
				+    final Path qualifiedPath = new Path(path).makeQualified(fileSystem.getUri(),
			
 
				+                                  fileSystem.getWorkingDirectory());
			
 
				+    pathList.add(qualifiedPath);
			
 
				+    fileSystem.mkdirs(qualifiedPath);
			
 
				+  }
			
 
				+
			
 
				+  private static void touchFile(String path) throws Exception {
			
 
				+    FileSystem fs;
			
 
				+    DataOutputStream outputStream = null;
			
 
				+    try {
			
 
				+      fs = cluster.getFileSystem();
			
 
				+      final Path qualifiedPath = new Path(path).makeQualified(fs.getUri(),
			
 
				+                                            fs.getWorkingDirectory());
			
 
				+      final long blockSize = fs.getDefaultBlockSize() * 2;
			
 
				+      outputStream = fs.create(qualifiedPath, true, 0,
			
 
				+              (short)(fs.getDefaultReplication()*2),
			
 
				+              blockSize);
			
 
				+      outputStream.write(new byte[FILE_SIZE]);
			
 
				+      pathList.add(qualifiedPath);
			
 
				+    }
			
 
				+    finally {
			
 
				+      IOUtils.cleanup(null, outputStream);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private static void clearState() throws Exception {
			
 
				+    pathList.clear();
			
 
				+    cluster.getFileSystem().delete(new Path(TARGET_PATH), true);
			
 
				+    createSourceData();
			
 
				+  }
			
 
				+
			
 
				+//  @Test
			
 
				+  public void testUniformSizeDistCp() throws Exception {
			
 
				+    try {
			
 
				+      clearState();
			
 
				+      final FileSystem fileSystem = cluster.getFileSystem();
			
 
				+      Path sourcePath = new Path(SOURCE_PATH)
			
 
				+              .makeQualified(fileSystem.getUri(),
			
 
				+                             fileSystem.getWorkingDirectory());
			
 
				+      List<Path> sources = new ArrayList<Path>();
			
 
				+      sources.add(sourcePath);
			
 
				+
			
 
				+      Path targetPath = new Path(TARGET_PATH)
			
 
				+              .makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
			
 
				+      DistCpOptions options = new DistCpOptions(sources, targetPath);
			
 
				+      options.setAtomicCommit(true);
			
 
				+      options.setBlocking(false);
			
 
				+      Job job = new DistCp(configuration, options).execute();
			
 
				+      Path workDir = CopyOutputFormat.getWorkingDirectory(job);
			
 
				+      Path finalDir = CopyOutputFormat.getCommitDirectory(job);
			
 
				+
			
 
				+      while (!job.isComplete()) {
			
 
				+        if (cluster.getFileSystem().exists(workDir)) {
			
 
				+          break;
			
 
				+        }
			
 
				+      }
			
 
				+      job.waitForCompletion(true);
			
 
				+      Assert.assertFalse(cluster.getFileSystem().exists(workDir));
			
 
				+      Assert.assertTrue(cluster.getFileSystem().exists(finalDir));
			
 
				+      Assert.assertFalse(cluster.getFileSystem().exists(
			
 
				+          new Path(job.getConfiguration().get(DistCpConstants.CONF_LABEL_META_FOLDER))));
			
 
				+      verifyResults();
			
 
				+    }
			
 
				+    catch (Exception e) {
			
 
				+      LOG.error("Exception encountered", e);
			
 
				+      Assert.fail("Unexpected exception: " + e.getMessage());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+//  @Test
			
 
				+  public void testCleanup() {
			
 
				+    try {
			
 
				+      clearState();
			
 
				+      Path sourcePath = new Path("noscheme:///file");
			
 
				+      List<Path> sources = new ArrayList<Path>();
			
 
				+      sources.add(sourcePath);
			
 
				+
			
 
				+      final FileSystem fs = cluster.getFileSystem();
			
 
				+      Path targetPath = new Path(TARGET_PATH)
			
 
				+              .makeQualified(fs.getUri(), fs.getWorkingDirectory());
			
 
				+      DistCpOptions options = new DistCpOptions(sources, targetPath);
			
 
				+
			
 
				+      Path stagingDir = JobSubmissionFiles.getStagingDir(
			
 
				+              new Cluster(configuration), configuration);
			
 
				+      stagingDir.getFileSystem(configuration).mkdirs(stagingDir);
			
 
				+
			
 
				+      try {
			
 
				+        new DistCp(configuration, options).execute();
			
 
				+      } catch (Throwable t) {
			
 
				+        Assert.assertEquals(stagingDir.getFileSystem(configuration).
			
 
				+            listStatus(stagingDir).length, 0);
			
 
				+      }
			
 
				+    } catch (Exception e) {
			
 
				+      LOG.error("Exception encountered ", e);
			
 
				+      Assert.fail("testCleanup failed " + e.getMessage());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testRootPath() throws Exception {
			
 
				+    try {
			
 
				+      clearState();
			
 
				+      List<Path> sources = new ArrayList<Path>();
			
 
				+      final FileSystem fs = cluster.getFileSystem();
			
 
				+      sources.add(new Path("/a")
			
 
				+              .makeQualified(fs.getUri(), fs.getWorkingDirectory()));
			
 
				+      sources.add(new Path("/b")
			
 
				+              .makeQualified(fs.getUri(), fs.getWorkingDirectory()));
			
 
				+      touchFile("/a/a.txt");
			
 
				+      touchFile("/b/b.txt");
			
 
				+
			
 
				+      Path targetPath = new Path("/c")
			
 
				+              .makeQualified(fs.getUri(), fs.getWorkingDirectory());
			
 
				+      DistCpOptions options = new DistCpOptions(sources, targetPath);
			
 
				+      new DistCp(configuration, options).execute();
			
 
				+      Assert.assertTrue(fs.exists(new Path("/c/a/a.txt")));
			
 
				+      Assert.assertTrue(fs.exists(new Path("/c/b/b.txt")));
			
 
				+    }
			
 
				+    catch (Exception e) {
			
 
				+      LOG.error("Exception encountered", e);
			
 
				+      Assert.fail("Unexpected exception: " + e.getMessage());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testDynamicDistCp() throws Exception {
			
 
				+    try {
			
 
				+      clearState();
			
 
				+      final FileSystem fs = cluster.getFileSystem();
			
 
				+      Path sourcePath = new Path(SOURCE_PATH)
			
 
				+              .makeQualified(fs.getUri(), fs.getWorkingDirectory());
			
 
				+      List<Path> sources = new ArrayList<Path>();
			
 
				+      sources.add(sourcePath);
			
 
				+
			
 
				+      Path targetPath = new Path(TARGET_PATH)
			
 
				+              .makeQualified(fs.getUri(), fs.getWorkingDirectory());
			
 
				+      DistCpOptions options = new DistCpOptions(sources, targetPath);
			
 
				+      options.setCopyStrategy("dynamic");
			
 
				+
			
 
				+      options.setAtomicCommit(true);
			
 
				+      options.setAtomicWorkPath(new Path("/work"));
			
 
				+      options.setBlocking(false);
			
 
				+      Job job = new DistCp(configuration, options).execute();
			
 
				+      Path workDir = CopyOutputFormat.getWorkingDirectory(job);
			
 
				+      Path finalDir = CopyOutputFormat.getCommitDirectory(job);
			
 
				+
			
 
				+      while (!job.isComplete()) {
			
 
				+        if (fs.exists(workDir)) {
			
 
				+          break;
			
 
				+        }
			
 
				+      }
			
 
				+      job.waitForCompletion(true);
			
 
				+      Assert.assertFalse(fs.exists(workDir));
			
 
				+      Assert.assertTrue(fs.exists(finalDir));
			
 
				+
			
 
				+      verifyResults();
			
 
				+    }
			
 
				+    catch (Exception e) {
			
 
				+      LOG.error("Exception encountered", e);
			
 
				+      Assert.fail("Unexpected exception: " + e.getMessage());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private static void verifyResults() throws Exception {
			
 
				+    for (Path path : pathList) {
			
 
				+      FileSystem fs = cluster.getFileSystem();
			
 
				+
			
 
				+      Path sourcePath = path.makeQualified(fs.getUri(), fs.getWorkingDirectory());
			
 
				+      Path targetPath
			
 
				+              = new Path(sourcePath.toString().replaceAll(SOURCE_PATH, TARGET_PATH));
			
 
				+
			
 
				+      Assert.assertTrue(fs.exists(targetPath));
			
 
				+      Assert.assertEquals(fs.isFile(sourcePath), fs.isFile(targetPath));
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestFileBasedCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestFileBasedCopyListing.java
@@ -0,0 +1,542 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.hdfs.MiniDFSCluster;
			
 
				+import org.apache.hadoop.io.SequenceFile;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.tools.util.TestDistCpUtils;
			
 
				+import org.apache.hadoop.security.Credentials;
			
 
				+import org.junit.AfterClass;
			
 
				+import org.junit.Assert;
			
 
				+import org.junit.BeforeClass;
			
 
				+import org.junit.Test;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.io.OutputStream;
			
 
				+import java.util.HashMap;
			
 
				+import java.util.Map;
			
 
				+
			
 
				+public class TestFileBasedCopyListing {
			
 
				+  private static final Log LOG = LogFactory.getLog(TestFileBasedCopyListing.class);
			
 
				+
			
 
				+  private static final Credentials CREDENTIALS = new Credentials();
			
 
				+
			
 
				+  private static final Configuration config = new Configuration();
			
 
				+  private static MiniDFSCluster cluster;
			
 
				+  private static FileSystem fs;
			
 
				+
			
 
				+  @BeforeClass
			
 
				+  public static void create() throws IOException {
			
 
				+    cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).format(true)
			
 
				+                                                .build();
			
 
				+    fs = cluster.getFileSystem();
			
 
				+    buildExpectedValuesMap();
			
 
				+  }
			
 
				+
			
 
				+  @AfterClass
			
 
				+  public static void destroy() {
			
 
				+    if (cluster != null) {
			
 
				+      cluster.shutdown();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private static Map<String, String> map = new HashMap<String, String>();
			
 
				+
			
 
				+  private static void buildExpectedValuesMap() {
			
 
				+    map.put("/file1", "/tmp/singlefile1/file1");
			
 
				+    map.put("/file2", "/tmp/singlefile2/file2");
			
 
				+    map.put("/file3", "/tmp/multifile/file3");
			
 
				+    map.put("/file4", "/tmp/multifile/file4");
			
 
				+    map.put("/file5", "/tmp/multifile/file5");
			
 
				+    map.put("/multifile/file3", "/tmp/multifile/file3");
			
 
				+    map.put("/multifile/file4", "/tmp/multifile/file4");
			
 
				+    map.put("/multifile/file5", "/tmp/multifile/file5");
			
 
				+    map.put("/Ufile3", "/tmp/Umultifile/Ufile3");
			
 
				+    map.put("/Ufile4", "/tmp/Umultifile/Ufile4");
			
 
				+    map.put("/Ufile5", "/tmp/Umultifile/Ufile5");
			
 
				+    map.put("/dir1", "/tmp/singledir/dir1");
			
 
				+    map.put("/singledir/dir1", "/tmp/singledir/dir1");
			
 
				+    map.put("/dir2", "/tmp/singledir/dir2");
			
 
				+    map.put("/singledir/dir2", "/tmp/singledir/dir2");
			
 
				+    map.put("/Udir1", "/tmp/Usingledir/Udir1");
			
 
				+    map.put("/Udir2", "/tmp/Usingledir/Udir2");
			
 
				+    map.put("/dir2/file6", "/tmp/singledir/dir2/file6");
			
 
				+    map.put("/singledir/dir2/file6", "/tmp/singledir/dir2/file6");
			
 
				+    map.put("/file7", "/tmp/singledir1/dir3/file7");
			
 
				+    map.put("/file8", "/tmp/singledir1/dir3/file8");
			
 
				+    map.put("/file9", "/tmp/singledir1/dir3/file9");
			
 
				+    map.put("/dir3/file7", "/tmp/singledir1/dir3/file7");
			
 
				+    map.put("/dir3/file8", "/tmp/singledir1/dir3/file8");
			
 
				+    map.put("/dir3/file9", "/tmp/singledir1/dir3/file9");
			
 
				+    map.put("/Ufile7", "/tmp/Usingledir1/Udir3/Ufile7");
			
 
				+    map.put("/Ufile8", "/tmp/Usingledir1/Udir3/Ufile8");
			
 
				+    map.put("/Ufile9", "/tmp/Usingledir1/Udir3/Ufile9");
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testSingleFileMissingTarget() {
			
 
				+    caseSingleFileMissingTarget(false);
			
 
				+    caseSingleFileMissingTarget(true);
			
 
				+  }
			
 
				+
			
 
				+  private void caseSingleFileMissingTarget(boolean sync) {
			
 
				+
			
 
				+    try {
			
 
				+      Path listFile = new Path("/tmp/listing");
			
 
				+      Path target = new Path("/tmp/target");
			
 
				+
			
 
				+      addEntries(listFile, "/tmp/singlefile1/file1");
			
 
				+      createFiles("/tmp/singlefile1/file1");
			
 
				+
			
 
				+      runTest(listFile, target, sync);
			
 
				+
			
 
				+      checkResult(listFile, 0);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing build listing", e);
			
 
				+      Assert.fail("build listing failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testSingleFileTargetFile() {
			
 
				+    caseSingleFileTargetFile(false);
			
 
				+    caseSingleFileTargetFile(true);
			
 
				+  }
			
 
				+
			
 
				+  private void caseSingleFileTargetFile(boolean sync) {
			
 
				+
			
 
				+    try {
			
 
				+      Path listFile = new Path("/tmp/listing");
			
 
				+      Path target = new Path("/tmp/target");
			
 
				+
			
 
				+      addEntries(listFile, "/tmp/singlefile1/file1");
			
 
				+      createFiles("/tmp/singlefile1/file1", target.toString());
			
 
				+
			
 
				+      runTest(listFile, target, sync);
			
 
				+
			
 
				+      checkResult(listFile, 0);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing build listing", e);
			
 
				+      Assert.fail("build listing failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testSingleFileTargetDir() {
			
 
				+    caseSingleFileTargetDir(false);
			
 
				+    caseSingleFileTargetDir(true);
			
 
				+  }
			
 
				+
			
 
				+  private void caseSingleFileTargetDir(boolean sync) {
			
 
				+
			
 
				+    try {
			
 
				+      Path listFile = new Path("/tmp/listing");
			
 
				+      Path target = new Path("/tmp/target");
			
 
				+
			
 
				+      addEntries(listFile, "/tmp/singlefile2/file2");
			
 
				+      createFiles("/tmp/singlefile2/file2");
			
 
				+      mkdirs(target.toString());
			
 
				+
			
 
				+      runTest(listFile, target, sync);
			
 
				+
			
 
				+      checkResult(listFile, 1);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing build listing", e);
			
 
				+      Assert.fail("build listing failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testSingleDirTargetMissing() {
			
 
				+    caseSingleDirTargetMissing(false);
			
 
				+    caseSingleDirTargetMissing(true);
			
 
				+  }
			
 
				+
			
 
				+  private void caseSingleDirTargetMissing(boolean sync) {
			
 
				+
			
 
				+    try {
			
 
				+      Path listFile = new Path("/tmp/listing");
			
 
				+      Path target = new Path("/tmp/target");
			
 
				+
			
 
				+      addEntries(listFile, "/tmp/singledir");
			
 
				+      mkdirs("/tmp/singledir/dir1");
			
 
				+
			
 
				+      runTest(listFile, target, sync);
			
 
				+
			
 
				+      checkResult(listFile, 1);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing build listing", e);
			
 
				+      Assert.fail("build listing failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testSingleDirTargetPresent() {
			
 
				+
			
 
				+    try {
			
 
				+      Path listFile = new Path("/tmp/listing");
			
 
				+      Path target = new Path("/tmp/target");
			
 
				+
			
 
				+      addEntries(listFile, "/tmp/singledir");
			
 
				+      mkdirs("/tmp/singledir/dir1");
			
 
				+      mkdirs(target.toString());
			
 
				+
			
 
				+      runTest(listFile, target);
			
 
				+
			
 
				+      checkResult(listFile, 1);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing build listing", e);
			
 
				+      Assert.fail("build listing failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testUpdateSingleDirTargetPresent() {
			
 
				+
			
 
				+    try {
			
 
				+      Path listFile = new Path("/tmp/listing");
			
 
				+      Path target = new Path("/tmp/target");
			
 
				+
			
 
				+      addEntries(listFile, "/tmp/Usingledir");
			
 
				+      mkdirs("/tmp/Usingledir/Udir1");
			
 
				+      mkdirs(target.toString());
			
 
				+
			
 
				+      runTest(listFile, target, true);
			
 
				+
			
 
				+      checkResult(listFile, 1);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing build listing", e);
			
 
				+      Assert.fail("build listing failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testMultiFileTargetPresent() {
			
 
				+    caseMultiFileTargetPresent(false);
			
 
				+    caseMultiFileTargetPresent(true);
			
 
				+  }
			
 
				+
			
 
				+  private void caseMultiFileTargetPresent(boolean sync) {
			
 
				+
			
 
				+    try {
			
 
				+      Path listFile = new Path("/tmp/listing");
			
 
				+      Path target = new Path("/tmp/target");
			
 
				+
			
 
				+      addEntries(listFile, "/tmp/multifile/file3", "/tmp/multifile/file4", "/tmp/multifile/file5");
			
 
				+      createFiles("/tmp/multifile/file3", "/tmp/multifile/file4", "/tmp/multifile/file5");
			
 
				+      mkdirs(target.toString());
			
 
				+
			
 
				+      runTest(listFile, target, sync);
			
 
				+
			
 
				+      checkResult(listFile, 3);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing build listing", e);
			
 
				+      Assert.fail("build listing failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testMultiFileTargetMissing() {
			
 
				+    caseMultiFileTargetMissing(false);
			
 
				+    caseMultiFileTargetMissing(true);
			
 
				+  }
			
 
				+
			
 
				+  private void caseMultiFileTargetMissing(boolean sync) {
			
 
				+
			
 
				+    try {
			
 
				+      Path listFile = new Path("/tmp/listing");
			
 
				+      Path target = new Path("/tmp/target");
			
 
				+
			
 
				+      addEntries(listFile, "/tmp/multifile/file3", "/tmp/multifile/file4", "/tmp/multifile/file5");
			
 
				+      createFiles("/tmp/multifile/file3", "/tmp/multifile/file4", "/tmp/multifile/file5");
			
 
				+
			
 
				+      runTest(listFile, target, sync);
			
 
				+
			
 
				+      checkResult(listFile, 3);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing build listing", e);
			
 
				+      Assert.fail("build listing failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testMultiDirTargetPresent() {
			
 
				+
			
 
				+    try {
			
 
				+      Path listFile = new Path("/tmp/listing");
			
 
				+      Path target = new Path("/tmp/target");
			
 
				+
			
 
				+      addEntries(listFile, "/tmp/multifile", "/tmp/singledir");
			
 
				+      createFiles("/tmp/multifile/file3", "/tmp/multifile/file4", "/tmp/multifile/file5");
			
 
				+      mkdirs(target.toString(), "/tmp/singledir/dir1");
			
 
				+
			
 
				+      runTest(listFile, target);
			
 
				+
			
 
				+      checkResult(listFile, 4);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing build listing", e);
			
 
				+      Assert.fail("build listing failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testUpdateMultiDirTargetPresent() {
			
 
				+
			
 
				+    try {
			
 
				+      Path listFile = new Path("/tmp/listing");
			
 
				+      Path target = new Path("/tmp/target");
			
 
				+
			
 
				+      addEntries(listFile, "/tmp/Umultifile", "/tmp/Usingledir");
			
 
				+      createFiles("/tmp/Umultifile/Ufile3", "/tmp/Umultifile/Ufile4", "/tmp/Umultifile/Ufile5");
			
 
				+      mkdirs(target.toString(), "/tmp/Usingledir/Udir1");
			
 
				+
			
 
				+      runTest(listFile, target, true);
			
 
				+
			
 
				+      checkResult(listFile, 4);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing build listing", e);
			
 
				+      Assert.fail("build listing failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testMultiDirTargetMissing() {
			
 
				+    caseMultiDirTargetMissing(false);
			
 
				+    caseMultiDirTargetMissing(true);
			
 
				+  }
			
 
				+
			
 
				+  private void caseMultiDirTargetMissing(boolean sync) {
			
 
				+
			
 
				+    try {
			
 
				+      Path listFile = new Path("/tmp/listing");
			
 
				+      Path target = new Path("/tmp/target");
			
 
				+
			
 
				+      addEntries(listFile, "/tmp/multifile", "/tmp/singledir");
			
 
				+      createFiles("/tmp/multifile/file3", "/tmp/multifile/file4", "/tmp/multifile/file5");
			
 
				+      mkdirs("/tmp/singledir/dir1");
			
 
				+
			
 
				+      runTest(listFile, target, sync);
			
 
				+
			
 
				+      checkResult(listFile, 4);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing build listing", e);
			
 
				+      Assert.fail("build listing failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testGlobTargetMissingSingleLevel() {
			
 
				+    caseGlobTargetMissingSingleLevel(false);
			
 
				+    caseGlobTargetMissingSingleLevel(true);
			
 
				+  }
			
 
				+
			
 
				+  private void caseGlobTargetMissingSingleLevel(boolean sync) {
			
 
				+
			
 
				+    try {
			
 
				+      Path listFile = new Path("/tmp1/listing");
			
 
				+      Path target = new Path("/tmp/target");
			
 
				+
			
 
				+      addEntries(listFile, "/tmp/*");
			
 
				+      createFiles("/tmp/multifile/file3", "/tmp/multifile/file4", "/tmp/multifile/file5");
			
 
				+      createFiles("/tmp/singledir/dir2/file6");
			
 
				+
			
 
				+      runTest(listFile, target, sync);
			
 
				+
			
 
				+      checkResult(listFile, 5);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing build listing", e);
			
 
				+      Assert.fail("build listing failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+      TestDistCpUtils.delete(fs, "/tmp1");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testGlobTargetMissingMultiLevel() {
			
 
				+    caseGlobTargetMissingMultiLevel(false);
			
 
				+    caseGlobTargetMissingMultiLevel(true);
			
 
				+  }
			
 
				+
			
 
				+  private void caseGlobTargetMissingMultiLevel(boolean sync) {
			
 
				+
			
 
				+    try {
			
 
				+      Path listFile = new Path("/tmp1/listing");
			
 
				+      Path target = new Path("/tmp/target");
			
 
				+
			
 
				+      addEntries(listFile, "/tmp/*/*");
			
 
				+      createFiles("/tmp/multifile/file3", "/tmp/multifile/file4", "/tmp/multifile/file5");
			
 
				+      createFiles("/tmp/singledir1/dir3/file7", "/tmp/singledir1/dir3/file8",
			
 
				+          "/tmp/singledir1/dir3/file9");
			
 
				+
			
 
				+      runTest(listFile, target, sync);
			
 
				+
			
 
				+      checkResult(listFile, 6);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing build listing", e);
			
 
				+      Assert.fail("build listing failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+      TestDistCpUtils.delete(fs, "/tmp1");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testGlobTargetDirMultiLevel() {
			
 
				+
			
 
				+    try {
			
 
				+      Path listFile = new Path("/tmp1/listing");
			
 
				+      Path target = new Path("/tmp/target");
			
 
				+
			
 
				+      addEntries(listFile, "/tmp/*/*");
			
 
				+      createFiles("/tmp/multifile/file3", "/tmp/multifile/file4", "/tmp/multifile/file5");
			
 
				+      createFiles("/tmp/singledir1/dir3/file7", "/tmp/singledir1/dir3/file8",
			
 
				+          "/tmp/singledir1/dir3/file9");
			
 
				+      mkdirs(target.toString());
			
 
				+
			
 
				+      runTest(listFile, target);
			
 
				+
			
 
				+      checkResult(listFile, 6);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing build listing", e);
			
 
				+      Assert.fail("build listing failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+      TestDistCpUtils.delete(fs, "/tmp1");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testUpdateGlobTargetDirMultiLevel() {
			
 
				+
			
 
				+    try {
			
 
				+      Path listFile = new Path("/tmp1/listing");
			
 
				+      Path target = new Path("/tmp/target");
			
 
				+
			
 
				+      addEntries(listFile, "/tmp/*/*");
			
 
				+      createFiles("/tmp/Umultifile/Ufile3", "/tmp/Umultifile/Ufile4", "/tmp/Umultifile/Ufile5");
			
 
				+      createFiles("/tmp/Usingledir1/Udir3/Ufile7", "/tmp/Usingledir1/Udir3/Ufile8",
			
 
				+          "/tmp/Usingledir1/Udir3/Ufile9");
			
 
				+      mkdirs(target.toString());
			
 
				+
			
 
				+      runTest(listFile, target, true);
			
 
				+
			
 
				+      checkResult(listFile, 6);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing build listing", e);
			
 
				+      Assert.fail("build listing failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp");
			
 
				+      TestDistCpUtils.delete(fs, "/tmp1");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void addEntries(Path listFile, String... entries) throws IOException {
			
 
				+    OutputStream out = fs.create(listFile);
			
 
				+    try {
			
 
				+      for (String entry : entries){
			
 
				+        out.write(entry.getBytes());
			
 
				+        out.write("\n".getBytes());
			
 
				+      }
			
 
				+    } finally {
			
 
				+      out.close();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void createFiles(String... entries) throws IOException {
			
 
				+    for (String entry : entries){
			
 
				+      OutputStream out = fs.create(new Path(entry));
			
 
				+      try {
			
 
				+        out.write(entry.getBytes());
			
 
				+        out.write("\n".getBytes());
			
 
				+      } finally {
			
 
				+        out.close();
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void mkdirs(String... entries) throws IOException {
			
 
				+    for (String entry : entries){
			
 
				+      fs.mkdirs(new Path(entry));
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void runTest(Path listFile, Path target) throws IOException {
			
 
				+    runTest(listFile, target, true);
			
 
				+  }
			
 
				+
			
 
				+  private void runTest(Path listFile, Path target, boolean sync) throws IOException {
			
 
				+    CopyListing listing = new FileBasedCopyListing(config, CREDENTIALS);
			
 
				+    DistCpOptions options = new DistCpOptions(listFile, target);
			
 
				+    options.setSyncFolder(sync);
			
 
				+    listing.buildListing(listFile, options);
			
 
				+  }
			
 
				+
			
 
				+  private void checkResult(Path listFile, int count) throws IOException {
			
 
				+    if (count == 0) {
			
 
				+      return;
			
 
				+    }
			
 
				+
			
 
				+    int recCount = 0;
			
 
				+    SequenceFile.Reader reader = new SequenceFile.Reader(config,
			
 
				+                                            SequenceFile.Reader.file(listFile));
			
 
				+    try {
			
 
				+      Text relPath = new Text();
			
 
				+      FileStatus fileStatus = new FileStatus();
			
 
				+      while (reader.next(relPath, fileStatus)) {
			
 
				+        Assert.assertEquals(fileStatus.getPath().toUri().getPath(), map.get(relPath.toString()));
			
 
				+        recCount++;
			
 
				+      }
			
 
				+    } finally {
			
 
				+      IOUtils.closeStream(reader);
			
 
				+    }
			
 
				+    Assert.assertEquals(recCount, count);
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestGlobbedCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestGlobbedCopyListing.java
@@ -0,0 +1,135 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.hdfs.MiniDFSCluster;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.io.SequenceFile;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+import org.apache.hadoop.security.Credentials;
			
 
				+import org.junit.AfterClass;
			
 
				+import org.junit.Assert;
			
 
				+import org.junit.BeforeClass;
			
 
				+import org.junit.Test;
			
 
				+
			
 
				+import java.io.DataOutputStream;
			
 
				+import java.net.URI;
			
 
				+import java.util.Arrays;
			
 
				+import java.util.HashMap;
			
 
				+import java.util.Map;
			
 
				+
			
 
				+public class TestGlobbedCopyListing {
			
 
				+
			
 
				+  private static MiniDFSCluster cluster;
			
 
				+
			
 
				+  private static final Credentials CREDENTIALS = new Credentials();
			
 
				+
			
 
				+  public static Map<String, String> expectedValues = new HashMap<String, String>();
			
 
				+
			
 
				+  @BeforeClass
			
 
				+  public static void setup() throws Exception {
			
 
				+    cluster = new MiniDFSCluster(new Configuration(), 1, true, null);
			
 
				+    createSourceData();
			
 
				+  }
			
 
				+
			
 
				+  private static void createSourceData() throws Exception {
			
 
				+    mkdirs("/tmp/source/1");
			
 
				+    mkdirs("/tmp/source/2");
			
 
				+    mkdirs("/tmp/source/2/3");
			
 
				+    mkdirs("/tmp/source/2/3/4");
			
 
				+    mkdirs("/tmp/source/5");
			
 
				+    touchFile("/tmp/source/5/6");
			
 
				+    mkdirs("/tmp/source/7");
			
 
				+    mkdirs("/tmp/source/7/8");
			
 
				+    touchFile("/tmp/source/7/8/9");
			
 
				+  }
			
 
				+
			
 
				+  private static void mkdirs(String path) throws Exception {
			
 
				+    FileSystem fileSystem = null;
			
 
				+    try {
			
 
				+      fileSystem = cluster.getFileSystem();
			
 
				+      fileSystem.mkdirs(new Path(path));
			
 
				+      recordInExpectedValues(path);
			
 
				+    }
			
 
				+    finally {
			
 
				+      IOUtils.cleanup(null, fileSystem);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private static void touchFile(String path) throws Exception {
			
 
				+    FileSystem fileSystem = null;
			
 
				+    DataOutputStream outputStream = null;
			
 
				+    try {
			
 
				+      fileSystem = cluster.getFileSystem();
			
 
				+      outputStream = fileSystem.create(new Path(path), true, 0);
			
 
				+      recordInExpectedValues(path);
			
 
				+    }
			
 
				+    finally {
			
 
				+      IOUtils.cleanup(null, fileSystem, outputStream);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private static void recordInExpectedValues(String path) throws Exception {
			
 
				+    FileSystem fileSystem = cluster.getFileSystem();
			
 
				+    Path sourcePath = new Path(fileSystem.getUri().toString() + path);
			
 
				+    expectedValues.put(sourcePath.toString(), DistCpUtils.getRelativePath(
			
 
				+        new Path("/tmp/source"), sourcePath));
			
 
				+  }
			
 
				+
			
 
				+  @AfterClass
			
 
				+  public static void tearDown() {
			
 
				+    cluster.shutdown();
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testRun() throws Exception {
			
 
				+    final URI uri = cluster.getFileSystem().getUri();
			
 
				+    final String pathString = uri.toString();
			
 
				+    Path fileSystemPath = new Path(pathString);
			
 
				+    Path source = new Path(fileSystemPath.toString() + "/tmp/source");
			
 
				+    Path target = new Path(fileSystemPath.toString() + "/tmp/target");
			
 
				+    Path listingPath = new Path(fileSystemPath.toString() + "/tmp/META/fileList.seq");
			
 
				+    DistCpOptions options = new DistCpOptions(Arrays.asList(source), target);
			
 
				+
			
 
				+    new GlobbedCopyListing(new Configuration(), CREDENTIALS).buildListing(listingPath, options);
			
 
				+
			
 
				+    verifyContents(listingPath);
			
 
				+  }
			
 
				+
			
 
				+  private void verifyContents(Path listingPath) throws Exception {
			
 
				+    SequenceFile.Reader reader = new SequenceFile.Reader(cluster.getFileSystem(),
			
 
				+                                              listingPath, new Configuration());
			
 
				+    Text key   = new Text();
			
 
				+    FileStatus value = new FileStatus();
			
 
				+    Map<String, String> actualValues = new HashMap<String, String>();
			
 
				+    while (reader.next(key, value)) {
			
 
				+      actualValues.put(value.getPath().toString(), key.toString());
			
 
				+    }
			
 
				+
			
 
				+    Assert.assertEquals(expectedValues.size(), actualValues.size());
			
 
				+    for (Map.Entry<String, String> entry : actualValues.entrySet()) {
			
 
				+      Assert.assertEquals(entry.getValue(), expectedValues.get(entry.getKey()));
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestIntegration.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestIntegration.java
@@ -0,0 +1,466 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.tools.util.TestDistCpUtils;
			
 
				+import org.junit.Assert;
			
 
				+import org.junit.BeforeClass;
			
 
				+import org.junit.Test;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.io.OutputStream;
			
 
				+
			
 
				+public class TestIntegration {
			
 
				+  private static final Log LOG = LogFactory.getLog(TestIntegration.class);
			
 
				+
			
 
				+  private static FileSystem fs;
			
 
				+
			
 
				+  private static Path listFile;
			
 
				+  private static Path target;
			
 
				+  private static String root;
			
 
				+
			
 
				+  private static Configuration getConf() {
			
 
				+    Configuration conf = new Configuration();
			
 
				+    conf.set("fs.default.name", "file:///");
			
 
				+    conf.set("mapred.job.tracker", "local");
			
 
				+    return conf;
			
 
				+  }
			
 
				+
			
 
				+  @BeforeClass
			
 
				+  public static void setup() {
			
 
				+    try {
			
 
				+      fs = FileSystem.get(getConf());
			
 
				+      listFile = new Path("target/tmp/listing").makeQualified(fs.getUri(),
			
 
				+              fs.getWorkingDirectory());
			
 
				+      target = new Path("target/tmp/target").makeQualified(fs.getUri(),
			
 
				+              fs.getWorkingDirectory());
			
 
				+      root = new Path("target/tmp").makeQualified(fs.getUri(),
			
 
				+              fs.getWorkingDirectory()).toString();
			
 
				+      TestDistCpUtils.delete(fs, root);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered ", e);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testSingleFileMissingTarget() {
			
 
				+    caseSingleFileMissingTarget(false);
			
 
				+    caseSingleFileMissingTarget(true);
			
 
				+  }
			
 
				+
			
 
				+  private void caseSingleFileMissingTarget(boolean sync) {
			
 
				+
			
 
				+    try {
			
 
				+      addEntries(listFile, "singlefile1/file1");
			
 
				+      createFiles("singlefile1/file1");
			
 
				+
			
 
				+      runTest(listFile, target, sync);
			
 
				+
			
 
				+      checkResult(target, 1);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing distcp", e);
			
 
				+      Assert.fail("distcp failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, root);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testSingleFileTargetFile() {
			
 
				+    caseSingleFileTargetFile(false);
			
 
				+    caseSingleFileTargetFile(true);
			
 
				+  }
			
 
				+
			
 
				+  private void caseSingleFileTargetFile(boolean sync) {
			
 
				+
			
 
				+    try {
			
 
				+      addEntries(listFile, "singlefile1/file1");
			
 
				+      createFiles("singlefile1/file1", target.toString());
			
 
				+
			
 
				+      runTest(listFile, target, sync);
			
 
				+
			
 
				+      checkResult(target, 1);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing distcp", e);
			
 
				+      Assert.fail("distcp failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, root);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testSingleFileTargetDir() {
			
 
				+    caseSingleFileTargetDir(false);
			
 
				+    caseSingleFileTargetDir(true);
			
 
				+  }
			
 
				+
			
 
				+  private void caseSingleFileTargetDir(boolean sync) {
			
 
				+
			
 
				+    try {
			
 
				+      addEntries(listFile, "singlefile2/file2");
			
 
				+      createFiles("singlefile2/file2");
			
 
				+      mkdirs(target.toString());
			
 
				+
			
 
				+      runTest(listFile, target, sync);
			
 
				+
			
 
				+      checkResult(target, 1, "file2");
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing distcp", e);
			
 
				+      Assert.fail("distcp failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, root);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testSingleDirTargetMissing() {
			
 
				+    caseSingleDirTargetMissing(false);
			
 
				+    caseSingleDirTargetMissing(true);
			
 
				+  }
			
 
				+
			
 
				+  private void caseSingleDirTargetMissing(boolean sync) {
			
 
				+
			
 
				+    try {
			
 
				+      addEntries(listFile, "singledir");
			
 
				+      mkdirs(root + "/singledir/dir1");
			
 
				+
			
 
				+      runTest(listFile, target, sync);
			
 
				+
			
 
				+      checkResult(target, 1, "dir1");
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing distcp", e);
			
 
				+      Assert.fail("distcp failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, root);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testSingleDirTargetPresent() {
			
 
				+
			
 
				+    try {
			
 
				+      addEntries(listFile, "singledir");
			
 
				+      mkdirs(root + "/singledir/dir1");
			
 
				+      mkdirs(target.toString());
			
 
				+
			
 
				+      runTest(listFile, target, false);
			
 
				+
			
 
				+      checkResult(target, 1, "singledir/dir1");
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing distcp", e);
			
 
				+      Assert.fail("distcp failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, root);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testUpdateSingleDirTargetPresent() {
			
 
				+
			
 
				+    try {
			
 
				+      addEntries(listFile, "Usingledir");
			
 
				+      mkdirs(root + "/Usingledir/Udir1");
			
 
				+      mkdirs(target.toString());
			
 
				+
			
 
				+      runTest(listFile, target, true);
			
 
				+
			
 
				+      checkResult(target, 1, "Udir1");
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing distcp", e);
			
 
				+      Assert.fail("distcp failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, root);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testMultiFileTargetPresent() {
			
 
				+    caseMultiFileTargetPresent(false);
			
 
				+    caseMultiFileTargetPresent(true);
			
 
				+  }
			
 
				+
			
 
				+  private void caseMultiFileTargetPresent(boolean sync) {
			
 
				+
			
 
				+    try {
			
 
				+      addEntries(listFile, "multifile/file3", "multifile/file4", "multifile/file5");
			
 
				+      createFiles("multifile/file3", "multifile/file4", "multifile/file5");
			
 
				+      mkdirs(target.toString());
			
 
				+
			
 
				+      runTest(listFile, target, sync);
			
 
				+
			
 
				+      checkResult(target, 3, "file3", "file4", "file5");
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing distcp", e);
			
 
				+      Assert.fail("distcp failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, root);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testMultiFileTargetMissing() {
			
 
				+    caseMultiFileTargetMissing(false);
			
 
				+    caseMultiFileTargetMissing(true);
			
 
				+  }
			
 
				+
			
 
				+  private void caseMultiFileTargetMissing(boolean sync) {
			
 
				+
			
 
				+    try {
			
 
				+      addEntries(listFile, "multifile/file3", "multifile/file4", "multifile/file5");
			
 
				+      createFiles("multifile/file3", "multifile/file4", "multifile/file5");
			
 
				+
			
 
				+      runTest(listFile, target, sync);
			
 
				+
			
 
				+      checkResult(target, 3, "file3", "file4", "file5");
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing distcp", e);
			
 
				+      Assert.fail("distcp failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, root);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testMultiDirTargetPresent() {
			
 
				+
			
 
				+    try {
			
 
				+      addEntries(listFile, "multifile", "singledir");
			
 
				+      createFiles("multifile/file3", "multifile/file4", "multifile/file5");
			
 
				+      mkdirs(target.toString(), root + "/singledir/dir1");
			
 
				+
			
 
				+      runTest(listFile, target, false);
			
 
				+
			
 
				+      checkResult(target, 2, "multifile/file3", "multifile/file4", "multifile/file5", "singledir/dir1");
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing distcp", e);
			
 
				+      Assert.fail("distcp failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, root);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testUpdateMultiDirTargetPresent() {
			
 
				+
			
 
				+    try {
			
 
				+      addEntries(listFile, "Umultifile", "Usingledir");
			
 
				+      createFiles("Umultifile/Ufile3", "Umultifile/Ufile4", "Umultifile/Ufile5");
			
 
				+      mkdirs(target.toString(), root + "/Usingledir/Udir1");
			
 
				+
			
 
				+      runTest(listFile, target, true);
			
 
				+
			
 
				+      checkResult(target, 4, "Ufile3", "Ufile4", "Ufile5", "Udir1");
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing distcp", e);
			
 
				+      Assert.fail("distcp failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, root);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testMultiDirTargetMissing() {
			
 
				+
			
 
				+    try {
			
 
				+      addEntries(listFile, "multifile", "singledir");
			
 
				+      createFiles("multifile/file3", "multifile/file4", "multifile/file5");
			
 
				+      mkdirs(root + "/singledir/dir1");
			
 
				+
			
 
				+      runTest(listFile, target, false);
			
 
				+
			
 
				+      checkResult(target, 2, "multifile/file3", "multifile/file4",
			
 
				+          "multifile/file5", "singledir/dir1");
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing distcp", e);
			
 
				+      Assert.fail("distcp failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, root);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testUpdateMultiDirTargetMissing() {
			
 
				+
			
 
				+    try {
			
 
				+      addEntries(listFile, "multifile", "singledir");
			
 
				+      createFiles("multifile/file3", "multifile/file4", "multifile/file5");
			
 
				+      mkdirs(root + "/singledir/dir1");
			
 
				+
			
 
				+      runTest(listFile, target, true);
			
 
				+
			
 
				+      checkResult(target, 4, "file3", "file4", "file5", "dir1");
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing distcp", e);
			
 
				+      Assert.fail("distcp failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, root);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testGlobTargetMissingSingleLevel() {
			
 
				+
			
 
				+    try {
			
 
				+      Path listFile = new Path("target/tmp1/listing").makeQualified(fs.getUri(),
			
 
				+                                fs.getWorkingDirectory());
			
 
				+      addEntries(listFile, "*");
			
 
				+      createFiles("multifile/file3", "multifile/file4", "multifile/file5");
			
 
				+      createFiles("singledir/dir2/file6");
			
 
				+
			
 
				+      runTest(listFile, target, false);
			
 
				+
			
 
				+      checkResult(target, 2, "multifile/file3", "multifile/file4", "multifile/file5",
			
 
				+          "singledir/dir2/file6");
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing distcp", e);
			
 
				+      Assert.fail("distcp failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, root);
			
 
				+      TestDistCpUtils.delete(fs, "target/tmp1");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testUpdateGlobTargetMissingSingleLevel() {
			
 
				+
			
 
				+    try {
			
 
				+      Path listFile = new Path("target/tmp1/listing").makeQualified(fs.getUri(),
			
 
				+                                  fs.getWorkingDirectory());
			
 
				+      addEntries(listFile, "*");
			
 
				+      createFiles("multifile/file3", "multifile/file4", "multifile/file5");
			
 
				+      createFiles("singledir/dir2/file6");
			
 
				+
			
 
				+      runTest(listFile, target, true);
			
 
				+
			
 
				+      checkResult(target, 4, "file3", "file4", "file5", "dir2/file6");
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while running distcp", e);
			
 
				+      Assert.fail("distcp failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, root);
			
 
				+      TestDistCpUtils.delete(fs, "target/tmp1");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testGlobTargetMissingMultiLevel() {
			
 
				+
			
 
				+    try {
			
 
				+      Path listFile = new Path("target/tmp1/listing").makeQualified(fs.getUri(),
			
 
				+              fs.getWorkingDirectory());
			
 
				+      addEntries(listFile, "*/*");
			
 
				+      createFiles("multifile/file3", "multifile/file4", "multifile/file5");
			
 
				+      createFiles("singledir1/dir3/file7", "singledir1/dir3/file8",
			
 
				+          "singledir1/dir3/file9");
			
 
				+
			
 
				+      runTest(listFile, target, false);
			
 
				+
			
 
				+      checkResult(target, 4, "file3", "file4", "file5",
			
 
				+          "dir3/file7", "dir3/file8", "dir3/file9");
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while running distcp", e);
			
 
				+      Assert.fail("distcp failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, root);
			
 
				+      TestDistCpUtils.delete(fs, "target/tmp1");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testUpdateGlobTargetMissingMultiLevel() {
			
 
				+
			
 
				+    try {
			
 
				+      Path listFile = new Path("target/tmp1/listing").makeQualified(fs.getUri(),
			
 
				+              fs.getWorkingDirectory());
			
 
				+      addEntries(listFile, "*/*");
			
 
				+      createFiles("multifile/file3", "multifile/file4", "multifile/file5");
			
 
				+      createFiles("singledir1/dir3/file7", "singledir1/dir3/file8",
			
 
				+          "singledir1/dir3/file9");
			
 
				+
			
 
				+      runTest(listFile, target, true);
			
 
				+
			
 
				+      checkResult(target, 6, "file3", "file4", "file5",
			
 
				+          "file7", "file8", "file9");
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while running distcp", e);
			
 
				+      Assert.fail("distcp failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, root);
			
 
				+      TestDistCpUtils.delete(fs, "target/tmp1");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void addEntries(Path listFile, String... entries) throws IOException {
			
 
				+    OutputStream out = fs.create(listFile);
			
 
				+    try {
			
 
				+      for (String entry : entries){
			
 
				+        out.write((root + "/" + entry).getBytes());
			
 
				+        out.write("\n".getBytes());
			
 
				+      }
			
 
				+    } finally {
			
 
				+      out.close();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void createFiles(String... entries) throws IOException {
			
 
				+    for (String entry : entries){
			
 
				+      OutputStream out = fs.create(new Path(root + "/" + entry));
			
 
				+      try {
			
 
				+        out.write((root + "/" + entry).getBytes());
			
 
				+        out.write("\n".getBytes());
			
 
				+      } finally {
			
 
				+        out.close();
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void mkdirs(String... entries) throws IOException {
			
 
				+    for (String entry : entries){
			
 
				+      fs.mkdirs(new Path(entry));
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void runTest(Path listFile, Path target, boolean sync) throws IOException {
			
 
				+    DistCpOptions options = new DistCpOptions(listFile, target);
			
 
				+    options.setSyncFolder(sync);
			
 
				+    try {
			
 
				+      new DistCp(getConf(), options).execute();
			
 
				+    } catch (Exception e) {
			
 
				+      LOG.error("Exception encountered ", e);
			
 
				+      throw new IOException(e);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void checkResult(Path target, int count, String... relPaths) throws IOException {
			
 
				+    Assert.assertEquals(count, fs.listStatus(target).length);
			
 
				+    if (relPaths == null || relPaths.length == 0) {
			
 
				+      Assert.assertTrue(target.toString(), fs.exists(target));
			
 
				+      return;
			
 
				+    }
			
 
				+    for (String relPath : relPaths) {
			
 
				+      Assert.assertTrue(new Path(target, relPath).toString(), fs.exists(new Path(target, relPath)));
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/TestOptionsParser.java
@@ -0,0 +1,497 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools;
			
 
				+
			
 
				+import org.junit.Assert;
			
 
				+import org.junit.Test;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.tools.DistCpOptions.*;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+
			
 
				+import java.util.Iterator;
			
 
				+import java.util.NoSuchElementException;
			
 
				+
			
 
				+public class TestOptionsParser {
			
 
				+
			
 
				+  @Test
			
 
				+  public void testParseIgnoreFailure() {
			
 
				+    DistCpOptions options = OptionsParser.parse(new String[] {
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertFalse(options.shouldIgnoreFailures());
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-i",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertTrue(options.shouldIgnoreFailures());
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testParseOverwrite() {
			
 
				+    DistCpOptions options = OptionsParser.parse(new String[] {
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertFalse(options.shouldOverwrite());
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-overwrite",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertTrue(options.shouldOverwrite());
			
 
				+
			
 
				+    try {
			
 
				+      OptionsParser.parse(new String[] {
			
 
				+          "-update",
			
 
				+          "-overwrite",
			
 
				+          "hdfs://localhost:8020/source/first",
			
 
				+          "hdfs://localhost:8020/target/"});
			
 
				+      Assert.fail("Update and overwrite aren't allowed together");
			
 
				+    } catch (IllegalArgumentException ignore) {
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testLogPath() {
			
 
				+    DistCpOptions options = OptionsParser.parse(new String[] {
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertNull(options.getLogPath());
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-log",
			
 
				+        "hdfs://localhost:8020/logs",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertEquals(options.getLogPath(), new Path("hdfs://localhost:8020/logs"));
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testParseBlokcing() {
			
 
				+    DistCpOptions options = OptionsParser.parse(new String[] {
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertTrue(options.shouldBlock());
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-async",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertFalse(options.shouldBlock());
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testParsebandwidth() {
			
 
				+    DistCpOptions options = OptionsParser.parse(new String[] {
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertEquals(options.getMapBandwidth(), DistCpConstants.DEFAULT_BANDWIDTH_MB);
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-bandwidth",
			
 
				+        "11",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertEquals(options.getMapBandwidth(), 11);
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testParseSkipCRC() {
			
 
				+    DistCpOptions options = OptionsParser.parse(new String[] {
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertFalse(options.shouldSkipCRC());
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-update",
			
 
				+        "-skipcrccheck",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertTrue(options.shouldSyncFolder());
			
 
				+    Assert.assertTrue(options.shouldSkipCRC());
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testParseAtomicCommit() {
			
 
				+    DistCpOptions options = OptionsParser.parse(new String[] {
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertFalse(options.shouldAtomicCommit());
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-atomic",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertTrue(options.shouldAtomicCommit());
			
 
				+
			
 
				+    try {
			
 
				+      OptionsParser.parse(new String[] {
			
 
				+          "-atomic",
			
 
				+          "-update",
			
 
				+          "hdfs://localhost:8020/source/first",
			
 
				+          "hdfs://localhost:8020/target/"});
			
 
				+      Assert.fail("Atomic and sync folders were allowed");
			
 
				+    } catch (IllegalArgumentException ignore) { }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testParseWorkPath() {
			
 
				+    DistCpOptions options = OptionsParser.parse(new String[] {
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertNull(options.getAtomicWorkPath());
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-atomic",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertNull(options.getAtomicWorkPath());
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-atomic",
			
 
				+        "-tmp",
			
 
				+        "hdfs://localhost:8020/work",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertEquals(options.getAtomicWorkPath(), new Path("hdfs://localhost:8020/work"));
			
 
				+
			
 
				+    try {
			
 
				+      OptionsParser.parse(new String[] {
			
 
				+          "-tmp",
			
 
				+          "hdfs://localhost:8020/work",
			
 
				+          "hdfs://localhost:8020/source/first",
			
 
				+          "hdfs://localhost:8020/target/"});
			
 
				+      Assert.fail("work path was allowed without -atomic switch");
			
 
				+    } catch (IllegalArgumentException ignore) {}
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testParseSyncFolders() {
			
 
				+    DistCpOptions options = OptionsParser.parse(new String[] {
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertFalse(options.shouldSyncFolder());
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-update",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertTrue(options.shouldSyncFolder());
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testParseDeleteMissing() {
			
 
				+    DistCpOptions options = OptionsParser.parse(new String[] {
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertFalse(options.shouldDeleteMissing());
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-update",
			
 
				+        "-delete",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertTrue(options.shouldSyncFolder());
			
 
				+    Assert.assertTrue(options.shouldDeleteMissing());
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-overwrite",
			
 
				+        "-delete",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertTrue(options.shouldOverwrite());
			
 
				+    Assert.assertTrue(options.shouldDeleteMissing());
			
 
				+
			
 
				+    try {
			
 
				+      OptionsParser.parse(new String[] {
			
 
				+          "-atomic",
			
 
				+          "-delete",
			
 
				+          "hdfs://localhost:8020/source/first",
			
 
				+          "hdfs://localhost:8020/target/"});
			
 
				+      Assert.fail("Atomic and delete folders were allowed");
			
 
				+    } catch (IllegalArgumentException ignore) { }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testParseSSLConf() {
			
 
				+    DistCpOptions options = OptionsParser.parse(new String[] {
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertNull(options.getSslConfigurationFile());
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-mapredSslConf",
			
 
				+        "/tmp/ssl-client.xml",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertEquals(options.getSslConfigurationFile(), "/tmp/ssl-client.xml");
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testParseMaps() {
			
 
				+    DistCpOptions options = OptionsParser.parse(new String[] {
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertEquals(options.getMaxMaps(), DistCpConstants.DEFAULT_MAPS);
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-m",
			
 
				+        "1",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertEquals(options.getMaxMaps(), 1);
			
 
				+
			
 
				+    try {
			
 
				+      OptionsParser.parse(new String[] {
			
 
				+          "-m",
			
 
				+          "hello",
			
 
				+          "hdfs://localhost:8020/source/first",
			
 
				+          "hdfs://localhost:8020/target/"});
			
 
				+      Assert.fail("Non numberic map parsed");
			
 
				+    } catch (IllegalArgumentException ignore) { }
			
 
				+
			
 
				+    try {
			
 
				+      OptionsParser.parse(new String[] {
			
 
				+          "-mapredXslConf",
			
 
				+          "hdfs://localhost:8020/source/first",
			
 
				+          "hdfs://localhost:8020/target/"});
			
 
				+      Assert.fail("Non numberic map parsed");
			
 
				+    } catch (IllegalArgumentException ignore) { }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testSourceListing() {
			
 
				+    DistCpOptions options = OptionsParser.parse(new String[] {
			
 
				+        "-f",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertEquals(options.getSourceFileListing(),
			
 
				+        new Path("hdfs://localhost:8020/source/first"));
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testSourceListingAndSourcePath() {
			
 
				+    try {
			
 
				+      OptionsParser.parse(new String[] {
			
 
				+          "-f",
			
 
				+          "hdfs://localhost:8020/source/first",
			
 
				+          "hdfs://localhost:8020/source/first",
			
 
				+          "hdfs://localhost:8020/target/"});
			
 
				+      Assert.fail("Both source listing & source paths allowed");
			
 
				+    } catch (IllegalArgumentException ignore) {}
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testMissingSourceInfo() {
			
 
				+    try {
			
 
				+      OptionsParser.parse(new String[] {
			
 
				+          "hdfs://localhost:8020/target/"});
			
 
				+      Assert.fail("Neither source listing not source paths present");
			
 
				+    } catch (IllegalArgumentException ignore) {}
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testMissingTarget() {
			
 
				+    try {
			
 
				+      OptionsParser.parse(new String[] {
			
 
				+          "-f", "hdfs://localhost:8020/source"});
			
 
				+      Assert.fail("Missing target allowed");
			
 
				+    } catch (IllegalArgumentException ignore) {}
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testInvalidArgs() {
			
 
				+    try {
			
 
				+      OptionsParser.parse(new String[] {
			
 
				+          "-m", "-f", "hdfs://localhost:8020/source"});
			
 
				+      Assert.fail("Missing map value");
			
 
				+    } catch (IllegalArgumentException ignore) {}
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testToString() {
			
 
				+    DistCpOptions option = new DistCpOptions(new Path("abc"), new Path("xyz"));
			
 
				+    String val = "DistCpOptions{atomicCommit=false, syncFolder=false, deleteMissing=false, " +
			
 
				+        "ignoreFailures=false, maxMaps=20, sslConfigurationFile='null', copyStrategy='uniformsize', " +
			
 
				+        "sourceFileListing=abc, sourcePaths=null, targetPath=xyz}";
			
 
				+    Assert.assertEquals(val, option.toString());
			
 
				+    Assert.assertNotSame(DistCpOptionSwitch.ATOMIC_COMMIT.toString(),
			
 
				+        DistCpOptionSwitch.ATOMIC_COMMIT.name());
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testCopyStrategy() {
			
 
				+    DistCpOptions options = OptionsParser.parse(new String[] {
			
 
				+        "-strategy",
			
 
				+        "dynamic",
			
 
				+        "-f",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertEquals(options.getCopyStrategy(), "dynamic");
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-f",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertEquals(options.getCopyStrategy(), DistCpConstants.UNIFORMSIZE);
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testTargetPath() {
			
 
				+    DistCpOptions options = OptionsParser.parse(new String[] {
			
 
				+        "-f",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertEquals(options.getTargetPath(), new Path("hdfs://localhost:8020/target/"));
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testPreserve() {
			
 
				+    DistCpOptions options = OptionsParser.parse(new String[] {
			
 
				+        "-f",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertFalse(options.shouldPreserve(FileAttribute.BLOCKSIZE));
			
 
				+    Assert.assertFalse(options.shouldPreserve(FileAttribute.REPLICATION));
			
 
				+    Assert.assertFalse(options.shouldPreserve(FileAttribute.PERMISSION));
			
 
				+    Assert.assertFalse(options.shouldPreserve(FileAttribute.USER));
			
 
				+    Assert.assertFalse(options.shouldPreserve(FileAttribute.GROUP));
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-p",
			
 
				+        "-f",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertTrue(options.shouldPreserve(FileAttribute.BLOCKSIZE));
			
 
				+    Assert.assertTrue(options.shouldPreserve(FileAttribute.REPLICATION));
			
 
				+    Assert.assertTrue(options.shouldPreserve(FileAttribute.PERMISSION));
			
 
				+    Assert.assertTrue(options.shouldPreserve(FileAttribute.USER));
			
 
				+    Assert.assertTrue(options.shouldPreserve(FileAttribute.GROUP));
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-p",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertTrue(options.shouldPreserve(FileAttribute.BLOCKSIZE));
			
 
				+    Assert.assertTrue(options.shouldPreserve(FileAttribute.REPLICATION));
			
 
				+    Assert.assertTrue(options.shouldPreserve(FileAttribute.PERMISSION));
			
 
				+    Assert.assertTrue(options.shouldPreserve(FileAttribute.USER));
			
 
				+    Assert.assertTrue(options.shouldPreserve(FileAttribute.GROUP));
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-pbr",
			
 
				+        "-f",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertTrue(options.shouldPreserve(FileAttribute.BLOCKSIZE));
			
 
				+    Assert.assertTrue(options.shouldPreserve(FileAttribute.REPLICATION));
			
 
				+    Assert.assertFalse(options.shouldPreserve(FileAttribute.PERMISSION));
			
 
				+    Assert.assertFalse(options.shouldPreserve(FileAttribute.USER));
			
 
				+    Assert.assertFalse(options.shouldPreserve(FileAttribute.GROUP));
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-pbrgup",
			
 
				+        "-f",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertTrue(options.shouldPreserve(FileAttribute.BLOCKSIZE));
			
 
				+    Assert.assertTrue(options.shouldPreserve(FileAttribute.REPLICATION));
			
 
				+    Assert.assertTrue(options.shouldPreserve(FileAttribute.PERMISSION));
			
 
				+    Assert.assertTrue(options.shouldPreserve(FileAttribute.USER));
			
 
				+    Assert.assertTrue(options.shouldPreserve(FileAttribute.GROUP));
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-p",
			
 
				+        "-f",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    int i = 0;
			
 
				+    Iterator<FileAttribute> attribIterator = options.preserveAttributes();
			
 
				+    while (attribIterator.hasNext()) {
			
 
				+      attribIterator.next();
			
 
				+      i++;
			
 
				+    }
			
 
				+    Assert.assertEquals(i, 5);
			
 
				+
			
 
				+    try {
			
 
				+      OptionsParser.parse(new String[] {
			
 
				+          "-pabc",
			
 
				+          "-f",
			
 
				+          "hdfs://localhost:8020/source/first",
			
 
				+          "hdfs://localhost:8020/target"});
			
 
				+      Assert.fail("Invalid preserve attribute");
			
 
				+    }
			
 
				+    catch (IllegalArgumentException ignore) {}
			
 
				+    catch (NoSuchElementException ignore) {}
			
 
				+
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-f",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    Assert.assertFalse(options.shouldPreserve(FileAttribute.PERMISSION));
			
 
				+    options.preserve(FileAttribute.PERMISSION);
			
 
				+    Assert.assertTrue(options.shouldPreserve(FileAttribute.PERMISSION));
			
 
				+
			
 
				+    options.preserve(FileAttribute.PERMISSION);
			
 
				+    Assert.assertTrue(options.shouldPreserve(FileAttribute.PERMISSION));
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testOptionsSwitchAddToConf() {
			
 
				+    Configuration conf = new Configuration();
			
 
				+    Assert.assertNull(conf.get(DistCpOptionSwitch.ATOMIC_COMMIT.getConfigLabel()));
			
 
				+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.ATOMIC_COMMIT);
			
 
				+    Assert.assertTrue(conf.getBoolean(DistCpOptionSwitch.ATOMIC_COMMIT.getConfigLabel(), false));
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testOptionsAppendToConf() {
			
 
				+    Configuration conf = new Configuration();
			
 
				+    Assert.assertFalse(conf.getBoolean(DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(), false));
			
 
				+    Assert.assertFalse(conf.getBoolean(DistCpOptionSwitch.ATOMIC_COMMIT.getConfigLabel(), false));
			
 
				+    DistCpOptions options = OptionsParser.parse(new String[] {
			
 
				+        "-atomic",
			
 
				+        "-i",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    options.appendToConf(conf);
			
 
				+    Assert.assertTrue(conf.getBoolean(DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(), false));
			
 
				+    Assert.assertTrue(conf.getBoolean(DistCpOptionSwitch.ATOMIC_COMMIT.getConfigLabel(), false));
			
 
				+    Assert.assertEquals(conf.getInt(DistCpOptionSwitch.BANDWIDTH.getConfigLabel(), -1),
			
 
				+        DistCpConstants.DEFAULT_BANDWIDTH_MB);
			
 
				+
			
 
				+    conf = new Configuration();
			
 
				+    Assert.assertFalse(conf.getBoolean(DistCpOptionSwitch.SYNC_FOLDERS.getConfigLabel(), false));
			
 
				+    Assert.assertFalse(conf.getBoolean(DistCpOptionSwitch.DELETE_MISSING.getConfigLabel(), false));
			
 
				+    Assert.assertEquals(conf.get(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel()), null);
			
 
				+    options = OptionsParser.parse(new String[] {
			
 
				+        "-update",
			
 
				+        "-delete",
			
 
				+        "-pu",
			
 
				+        "-bandwidth",
			
 
				+        "11",
			
 
				+        "hdfs://localhost:8020/source/first",
			
 
				+        "hdfs://localhost:8020/target/"});
			
 
				+    options.appendToConf(conf);
			
 
				+    Assert.assertTrue(conf.getBoolean(DistCpOptionSwitch.SYNC_FOLDERS.getConfigLabel(), false));
			
 
				+    Assert.assertTrue(conf.getBoolean(DistCpOptionSwitch.DELETE_MISSING.getConfigLabel(), false));
			
 
				+    Assert.assertEquals(conf.get(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel()), "U");
			
 
				+    Assert.assertEquals(conf.getInt(DistCpOptionSwitch.BANDWIDTH.getConfigLabel(), -1), 11);
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyCommitter.java
@@ -0,0 +1,419 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.mapred;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.permission.FsPermission;
			
 
				+import org.apache.hadoop.hdfs.MiniDFSCluster;
			
 
				+import org.apache.hadoop.mapreduce.*;
			
 
				+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
			
 
				+import org.apache.hadoop.mapreduce.task.JobContextImpl;
			
 
				+import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
			
 
				+import org.apache.hadoop.tools.CopyListing;
			
 
				+import org.apache.hadoop.tools.DistCpConstants;
			
 
				+import org.apache.hadoop.tools.DistCpOptions;
			
 
				+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
			
 
				+import org.apache.hadoop.tools.GlobbedCopyListing;
			
 
				+import org.apache.hadoop.tools.util.TestDistCpUtils;
			
 
				+import org.apache.hadoop.security.Credentials;
			
 
				+import org.junit.*;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+import java.util.*;
			
 
				+
			
 
				+public class TestCopyCommitter {
			
 
				+  private static final Log LOG = LogFactory.getLog(TestCopyCommitter.class);
			
 
				+
			
 
				+  private static final Random rand = new Random();
			
 
				+
			
 
				+  private static final Credentials CREDENTIALS = new Credentials();
			
 
				+  public static final int PORT = 39737;
			
 
				+
			
 
				+
			
 
				+  private static Configuration config;
			
 
				+  private static MiniDFSCluster cluster;
			
 
				+
			
 
				+  private static Job getJobForClient() throws IOException {
			
 
				+    Job job = Job.getInstance(new Configuration());
			
 
				+    job.getConfiguration().set("mapred.job.tracker", "localhost:" + PORT);
			
 
				+    job.setInputFormatClass(NullInputFormat.class);
			
 
				+    job.setOutputFormatClass(NullOutputFormat.class);
			
 
				+    job.setNumReduceTasks(0);
			
 
				+    return job;
			
 
				+  }
			
 
				+
			
 
				+  @BeforeClass
			
 
				+  public static void create() throws IOException {
			
 
				+    config = getJobForClient().getConfiguration();
			
 
				+    config.setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, 0);
			
 
				+    cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).format(true)
			
 
				+                      .build();
			
 
				+  }
			
 
				+
			
 
				+  @AfterClass
			
 
				+  public static void destroy() {
			
 
				+    if (cluster != null) {
			
 
				+      cluster.shutdown();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Before
			
 
				+  public void createMetaFolder() {
			
 
				+    config.set(DistCpConstants.CONF_LABEL_META_FOLDER, "/meta");
			
 
				+    Path meta = new Path("/meta");
			
 
				+    try {
			
 
				+      cluster.getFileSystem().mkdirs(meta);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while creating meta folder", e);
			
 
				+      Assert.fail("Unable to create meta folder");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @After
			
 
				+  public void cleanupMetaFolder() {
			
 
				+    Path meta = new Path("/meta");
			
 
				+    try {
			
 
				+      if (cluster.getFileSystem().exists(meta)) {
			
 
				+        cluster.getFileSystem().delete(meta, true);
			
 
				+        Assert.fail("Expected meta folder to be deleted");
			
 
				+      }
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while cleaning up folder", e);
			
 
				+      Assert.fail("Unable to clean up meta folder");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testNoCommitAction() {
			
 
				+    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
			
 
				+    JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
			
 
				+        taskAttemptContext.getTaskAttemptID().getJobID());
			
 
				+    try {
			
 
				+      OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
			
 
				+      committer.commitJob(jobContext);
			
 
				+      Assert.assertEquals(taskAttemptContext.getStatus(), "Commit Successful");
			
 
				+
			
 
				+      //Test for idempotent commit
			
 
				+      committer.commitJob(jobContext);
			
 
				+      Assert.assertEquals(taskAttemptContext.getStatus(), "Commit Successful");
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered ", e);
			
 
				+      Assert.fail("Commit failed");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testPreserveStatus() {
			
 
				+    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
			
 
				+    JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
			
 
				+        taskAttemptContext.getTaskAttemptID().getJobID());
			
 
				+    Configuration conf = jobContext.getConfiguration();
			
 
				+
			
 
				+
			
 
				+    String sourceBase;
			
 
				+    String targetBase;
			
 
				+    FileSystem fs = null;
			
 
				+    try {
			
 
				+      OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
			
 
				+      fs = FileSystem.get(conf);
			
 
				+      FsPermission sourcePerm = new FsPermission((short) 511);
			
 
				+      FsPermission initialPerm = new FsPermission((short) 448);
			
 
				+      sourceBase = TestDistCpUtils.createTestSetup(fs, sourcePerm);
			
 
				+      targetBase = TestDistCpUtils.createTestSetup(fs, initialPerm);
			
 
				+
			
 
				+      DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)),
			
 
				+          new Path("/out"));
			
 
				+      options.preserve(FileAttribute.PERMISSION);
			
 
				+      options.appendToConf(conf);
			
 
				+
			
 
				+      CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
			
 
				+      Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
			
 
				+      listing.buildListing(listingFile, options);
			
 
				+
			
 
				+      conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
			
 
				+
			
 
				+      committer.commitJob(jobContext);
			
 
				+      if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) {
			
 
				+        Assert.fail("Permission don't match");
			
 
				+      }
			
 
				+
			
 
				+      //Test for idempotent commit
			
 
				+      committer.commitJob(jobContext);
			
 
				+      if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) {
			
 
				+        Assert.fail("Permission don't match");
			
 
				+      }
			
 
				+
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing for preserve status", e);
			
 
				+      Assert.fail("Preserve status failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp1");
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testDeleteMissing() {
			
 
				+    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
			
 
				+    JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
			
 
				+        taskAttemptContext.getTaskAttemptID().getJobID());
			
 
				+    Configuration conf = jobContext.getConfiguration();
			
 
				+
			
 
				+    String sourceBase;
			
 
				+    String targetBase;
			
 
				+    FileSystem fs = null;
			
 
				+    try {
			
 
				+      OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
			
 
				+      fs = FileSystem.get(conf);
			
 
				+      sourceBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
			
 
				+      targetBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
			
 
				+      String targetBaseAdd = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
			
 
				+      fs.rename(new Path(targetBaseAdd), new Path(targetBase));
			
 
				+
			
 
				+      DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)),
			
 
				+          new Path("/out"));
			
 
				+      options.setSyncFolder(true);
			
 
				+      options.setDeleteMissing(true);
			
 
				+      options.appendToConf(conf);
			
 
				+
			
 
				+      CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
			
 
				+      Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
			
 
				+      listing.buildListing(listingFile, options);
			
 
				+
			
 
				+      conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
			
 
				+      conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);
			
 
				+
			
 
				+      committer.commitJob(jobContext);
			
 
				+      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
			
 
				+        Assert.fail("Source and target folders are not in sync");
			
 
				+      }
			
 
				+      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) {
			
 
				+        Assert.fail("Source and target folders are not in sync");
			
 
				+      }
			
 
				+
			
 
				+      //Test for idempotent commit
			
 
				+      committer.commitJob(jobContext);
			
 
				+      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
			
 
				+        Assert.fail("Source and target folders are not in sync");
			
 
				+      }
			
 
				+      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) {
			
 
				+        Assert.fail("Source and target folders are not in sync");
			
 
				+      }
			
 
				+    } catch (Throwable e) {
			
 
				+      LOG.error("Exception encountered while testing for delete missing", e);
			
 
				+      Assert.fail("Delete missing failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp1");
			
 
				+      conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testDeleteMissingFlatInterleavedFiles() {
			
 
				+    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
			
 
				+    JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
			
 
				+        taskAttemptContext.getTaskAttemptID().getJobID());
			
 
				+    Configuration conf = jobContext.getConfiguration();
			
 
				+
			
 
				+
			
 
				+    String sourceBase;
			
 
				+    String targetBase;
			
 
				+    FileSystem fs = null;
			
 
				+    try {
			
 
				+      OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
			
 
				+      fs = FileSystem.get(conf);
			
 
				+      sourceBase = "/tmp1/" + String.valueOf(rand.nextLong());
			
 
				+      targetBase = "/tmp1/" + String.valueOf(rand.nextLong());
			
 
				+      TestDistCpUtils.createFile(fs, sourceBase + "/1");
			
 
				+      TestDistCpUtils.createFile(fs, sourceBase + "/3");
			
 
				+      TestDistCpUtils.createFile(fs, sourceBase + "/4");
			
 
				+      TestDistCpUtils.createFile(fs, sourceBase + "/5");
			
 
				+      TestDistCpUtils.createFile(fs, sourceBase + "/7");
			
 
				+      TestDistCpUtils.createFile(fs, sourceBase + "/8");
			
 
				+      TestDistCpUtils.createFile(fs, sourceBase + "/9");
			
 
				+
			
 
				+      TestDistCpUtils.createFile(fs, targetBase + "/2");
			
 
				+      TestDistCpUtils.createFile(fs, targetBase + "/4");
			
 
				+      TestDistCpUtils.createFile(fs, targetBase + "/5");
			
 
				+      TestDistCpUtils.createFile(fs, targetBase + "/7");
			
 
				+      TestDistCpUtils.createFile(fs, targetBase + "/9");
			
 
				+      TestDistCpUtils.createFile(fs, targetBase + "/A");
			
 
				+
			
 
				+      DistCpOptions options = new DistCpOptions(Arrays.asList(new Path(sourceBase)), 
			
 
				+          new Path("/out"));
			
 
				+      options.setSyncFolder(true);
			
 
				+      options.setDeleteMissing(true);
			
 
				+      options.appendToConf(conf);
			
 
				+
			
 
				+      CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
			
 
				+      Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
			
 
				+      listing.buildListing(listingFile, options);
			
 
				+
			
 
				+      conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
			
 
				+      conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);
			
 
				+
			
 
				+      committer.commitJob(jobContext);
			
 
				+      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
			
 
				+        Assert.fail("Source and target folders are not in sync");
			
 
				+      }
			
 
				+      Assert.assertEquals(fs.listStatus(new Path(targetBase)).length, 4);
			
 
				+
			
 
				+      //Test for idempotent commit
			
 
				+      committer.commitJob(jobContext);
			
 
				+      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
			
 
				+        Assert.fail("Source and target folders are not in sync");
			
 
				+      }
			
 
				+      Assert.assertEquals(fs.listStatus(new Path(targetBase)).length, 4);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing for delete missing", e);
			
 
				+      Assert.fail("Delete missing failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, "/tmp1");
			
 
				+      conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false");
			
 
				+    }
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testAtomicCommitMissingFinal() {
			
 
				+    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
			
 
				+    JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
			
 
				+        taskAttemptContext.getTaskAttemptID().getJobID());
			
 
				+    Configuration conf = jobContext.getConfiguration();
			
 
				+
			
 
				+    String workPath = "/tmp1/" + String.valueOf(rand.nextLong());
			
 
				+    String finalPath = "/tmp1/" + String.valueOf(rand.nextLong());
			
 
				+    FileSystem fs = null;
			
 
				+    try {
			
 
				+      OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
			
 
				+      fs = FileSystem.get(conf);
			
 
				+      fs.mkdirs(new Path(workPath));
			
 
				+
			
 
				+      conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath);
			
 
				+      conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, finalPath);
			
 
				+      conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true);
			
 
				+
			
 
				+      Assert.assertTrue(fs.exists(new Path(workPath)));
			
 
				+      Assert.assertFalse(fs.exists(new Path(finalPath)));
			
 
				+      committer.commitJob(jobContext);
			
 
				+      Assert.assertFalse(fs.exists(new Path(workPath)));
			
 
				+      Assert.assertTrue(fs.exists(new Path(finalPath)));
			
 
				+
			
 
				+      //Test for idempotent commit
			
 
				+      committer.commitJob(jobContext);
			
 
				+      Assert.assertFalse(fs.exists(new Path(workPath)));
			
 
				+      Assert.assertTrue(fs.exists(new Path(finalPath)));
			
 
				+
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing for preserve status", e);
			
 
				+      Assert.fail("Atomic commit failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, workPath);
			
 
				+      TestDistCpUtils.delete(fs, finalPath);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testAtomicCommitExistingFinal() {
			
 
				+    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
			
 
				+    JobContext jobContext = new JobContextImpl(taskAttemptContext.getConfiguration(),
			
 
				+        taskAttemptContext.getTaskAttemptID().getJobID());
			
 
				+    Configuration conf = jobContext.getConfiguration();
			
 
				+
			
 
				+
			
 
				+    String workPath = "/tmp1/" + String.valueOf(rand.nextLong());
			
 
				+    String finalPath = "/tmp1/" + String.valueOf(rand.nextLong());
			
 
				+    FileSystem fs = null;
			
 
				+    try {
			
 
				+      OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
			
 
				+      fs = FileSystem.get(conf);
			
 
				+      fs.mkdirs(new Path(workPath));
			
 
				+      fs.mkdirs(new Path(finalPath));
			
 
				+
			
 
				+      conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, workPath);
			
 
				+      conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, finalPath);
			
 
				+      conf.setBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, true);
			
 
				+
			
 
				+      Assert.assertTrue(fs.exists(new Path(workPath)));
			
 
				+      Assert.assertTrue(fs.exists(new Path(finalPath)));
			
 
				+      try {
			
 
				+        committer.commitJob(jobContext);
			
 
				+        Assert.fail("Should not be able to atomic-commit to pre-existing path.");
			
 
				+      } catch(Exception exception) {
			
 
				+        Assert.assertTrue(fs.exists(new Path(workPath)));
			
 
				+        Assert.assertTrue(fs.exists(new Path(finalPath)));
			
 
				+        LOG.info("Atomic-commit Test pass.");
			
 
				+      }
			
 
				+
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing for atomic commit.", e);
			
 
				+      Assert.fail("Atomic commit failure");
			
 
				+    } finally {
			
 
				+      TestDistCpUtils.delete(fs, workPath);
			
 
				+      TestDistCpUtils.delete(fs, finalPath);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private TaskAttemptContext getTaskAttemptContext(Configuration conf) {
			
 
				+    return new TaskAttemptContextImpl(conf,
			
 
				+        new TaskAttemptID("200707121733", 1, TaskType.MAP, 1, 1));
			
 
				+  }
			
 
				+
			
 
				+  private boolean checkDirectoryPermissions(FileSystem fs, String targetBase,
			
 
				+                                            FsPermission sourcePerm) throws IOException {
			
 
				+    Path base = new Path(targetBase);
			
 
				+
			
 
				+    Stack<Path> stack = new Stack<Path>();
			
 
				+    stack.push(base);
			
 
				+    while (!stack.isEmpty()) {
			
 
				+      Path file = stack.pop();
			
 
				+      if (!fs.exists(file)) continue;
			
 
				+      FileStatus[] fStatus = fs.listStatus(file);
			
 
				+      if (fStatus == null || fStatus.length == 0) continue;
			
 
				+
			
 
				+      for (FileStatus status : fStatus) {
			
 
				+        if (status.isDirectory()) {
			
 
				+          stack.push(status.getPath());
			
 
				+          Assert.assertEquals(status.getPermission(), sourcePerm);
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+    return true;
			
 
				+  }
			
 
				+
			
 
				+  private static class NullInputFormat extends InputFormat {
			
 
				+    @Override
			
 
				+    public List getSplits(JobContext context)
			
 
				+        throws IOException, InterruptedException {
			
 
				+      return Collections.EMPTY_LIST;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    public RecordReader createRecordReader(InputSplit split,
			
 
				+                                           TaskAttemptContext context)
			
 
				+        throws IOException, InterruptedException {
			
 
				+      return null;
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java
@@ -0,0 +1,826 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.mapred;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.permission.FsAction;
			
 
				+import org.apache.hadoop.fs.permission.FsPermission;
			
 
				+import org.apache.hadoop.hdfs.MiniDFSCluster;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.mapreduce.*;
			
 
				+import org.apache.hadoop.security.AccessControlException;
			
 
				+import org.apache.hadoop.security.UserGroupInformation;
			
 
				+import org.apache.hadoop.tools.DistCpConstants;
			
 
				+import org.apache.hadoop.tools.DistCpOptionSwitch;
			
 
				+import org.apache.hadoop.tools.DistCpOptions;
			
 
				+import org.apache.hadoop.tools.StubContext;
			
 
				+import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				+import org.junit.Assert;
			
 
				+import org.junit.BeforeClass;
			
 
				+import org.junit.Test;
			
 
				+
			
 
				+import java.io.DataOutputStream;
			
 
				+import java.io.IOException;
			
 
				+import java.io.OutputStream;
			
 
				+import java.security.PrivilegedAction;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.EnumSet;
			
 
				+import java.util.List;
			
 
				+
			
 
				+public class TestCopyMapper {
			
 
				+  private static final Log LOG = LogFactory.getLog(TestCopyMapper.class);
			
 
				+  private static List<Path> pathList = new ArrayList<Path>();
			
 
				+  private static int nFiles = 0;
			
 
				+  private static final int FILE_SIZE = 1024;
			
 
				+
			
 
				+  private static MiniDFSCluster cluster;
			
 
				+
			
 
				+  private static final String SOURCE_PATH = "/tmp/source";
			
 
				+  private static final String TARGET_PATH = "/tmp/target";
			
 
				+
			
 
				+  private static Configuration configuration;
			
 
				+
			
 
				+  @BeforeClass
			
 
				+  public static void setup() throws Exception {
			
 
				+    configuration = getConfigurationForCluster();
			
 
				+    cluster = new MiniDFSCluster.Builder(configuration)
			
 
				+                .numDataNodes(1)
			
 
				+                .format(true)
			
 
				+                .build();
			
 
				+  }
			
 
				+
			
 
				+  private static Configuration getConfigurationForCluster() throws IOException {
			
 
				+    Configuration configuration = new Configuration();
			
 
				+    System.setProperty("test.build.data", "target/tmp/build/TEST_COPY_MAPPER/data");
			
 
				+    configuration.set("hadoop.log.dir", "target/tmp");
			
 
				+    LOG.debug("fs.default.name  == " + configuration.get("fs.default.name"));
			
 
				+    LOG.debug("dfs.http.address == " + configuration.get("dfs.http.address"));
			
 
				+    return configuration;
			
 
				+  }
			
 
				+
			
 
				+  private static Configuration getConfiguration() throws IOException {
			
 
				+    Configuration configuration = getConfigurationForCluster();
			
 
				+    final FileSystem fs = cluster.getFileSystem();
			
 
				+    Path workPath = new Path(TARGET_PATH)
			
 
				+            .makeQualified(fs.getUri(), fs.getWorkingDirectory());
			
 
				+    configuration.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH,
			
 
				+            workPath.toString());
			
 
				+    configuration.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH,
			
 
				+            workPath.toString());
			
 
				+    configuration.setBoolean(DistCpOptionSwitch.OVERWRITE.getConfigLabel(),
			
 
				+            false);
			
 
				+    configuration.setBoolean(DistCpOptionSwitch.SKIP_CRC.getConfigLabel(),
			
 
				+            true);
			
 
				+    configuration.setBoolean(DistCpOptionSwitch.SYNC_FOLDERS.getConfigLabel(),
			
 
				+            true);
			
 
				+    configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(),
			
 
				+            "br");
			
 
				+    return configuration;
			
 
				+  }
			
 
				+
			
 
				+  private static void createSourceData() throws Exception {
			
 
				+    mkdirs(SOURCE_PATH + "/1");
			
 
				+    mkdirs(SOURCE_PATH + "/2");
			
 
				+    mkdirs(SOURCE_PATH + "/2/3/4");
			
 
				+    mkdirs(SOURCE_PATH + "/2/3");
			
 
				+    mkdirs(SOURCE_PATH + "/5");
			
 
				+    touchFile(SOURCE_PATH + "/5/6");
			
 
				+    mkdirs(SOURCE_PATH + "/7");
			
 
				+    mkdirs(SOURCE_PATH + "/7/8");
			
 
				+    touchFile(SOURCE_PATH + "/7/8/9");
			
 
				+  }
			
 
				+
			
 
				+  private static void mkdirs(String path) throws Exception {
			
 
				+    FileSystem fileSystem = cluster.getFileSystem();
			
 
				+    final Path qualifiedPath = new Path(path).makeQualified(fileSystem.getUri(),
			
 
				+                                              fileSystem.getWorkingDirectory());
			
 
				+    pathList.add(qualifiedPath);
			
 
				+    fileSystem.mkdirs(qualifiedPath);
			
 
				+  }
			
 
				+
			
 
				+  private static void touchFile(String path) throws Exception {
			
 
				+    FileSystem fs;
			
 
				+    DataOutputStream outputStream = null;
			
 
				+    try {
			
 
				+      fs = cluster.getFileSystem();
			
 
				+      final Path qualifiedPath = new Path(path).makeQualified(fs.getUri(),
			
 
				+                                                      fs.getWorkingDirectory());
			
 
				+      final long blockSize = fs.getDefaultBlockSize() * 2;
			
 
				+      outputStream = fs.create(qualifiedPath, true, 0,
			
 
				+              (short)(fs.getDefaultReplication()*2),
			
 
				+              blockSize);
			
 
				+      outputStream.write(new byte[FILE_SIZE]);
			
 
				+      pathList.add(qualifiedPath);
			
 
				+      ++nFiles;
			
 
				+
			
 
				+      FileStatus fileStatus = fs.getFileStatus(qualifiedPath);
			
 
				+      System.out.println(fileStatus.getBlockSize());
			
 
				+      System.out.println(fileStatus.getReplication());
			
 
				+    }
			
 
				+    finally {
			
 
				+      IOUtils.cleanup(null, outputStream);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testRun() {
			
 
				+    try {
			
 
				+      deleteState();
			
 
				+      createSourceData();
			
 
				+
			
 
				+      FileSystem fs = cluster.getFileSystem();
			
 
				+      CopyMapper copyMapper = new CopyMapper();
			
 
				+      StubContext stubContext = new StubContext(getConfiguration(), null, 0);
			
 
				+      Mapper<Text, FileStatus, Text, Text>.Context context
			
 
				+              = stubContext.getContext();
			
 
				+      copyMapper.setup(context);
			
 
				+
			
 
				+      for (Path path: pathList) {
			
 
				+        copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
			
 
				+                fs.getFileStatus(path), context);
			
 
				+      }
			
 
				+
			
 
				+      // Check that the maps worked.
			
 
				+      for (Path path : pathList) {
			
 
				+        final Path targetPath = new Path(path.toString()
			
 
				+                .replaceAll(SOURCE_PATH, TARGET_PATH));
			
 
				+        Assert.assertTrue(fs.exists(targetPath));
			
 
				+        Assert.assertTrue(fs.isFile(targetPath) == fs.isFile(path));
			
 
				+        Assert.assertEquals(fs.getFileStatus(path).getReplication(),
			
 
				+                fs.getFileStatus(targetPath).getReplication());
			
 
				+        Assert.assertEquals(fs.getFileStatus(path).getBlockSize(),
			
 
				+                fs.getFileStatus(targetPath).getBlockSize());
			
 
				+        Assert.assertTrue(!fs.isFile(targetPath) ||
			
 
				+                fs.getFileChecksum(targetPath).equals(
			
 
				+                        fs.getFileChecksum(path)));
			
 
				+      }
			
 
				+
			
 
				+      Assert.assertEquals(pathList.size(),
			
 
				+              stubContext.getReporter().getCounter(CopyMapper.Counter.COPY).getValue());
			
 
				+      Assert.assertEquals(nFiles * FILE_SIZE,
			
 
				+              stubContext.getReporter().getCounter(CopyMapper.Counter.BYTESCOPIED).getValue());
			
 
				+
			
 
				+      testCopyingExistingFiles(fs, copyMapper, context);
			
 
				+      for (Text value : stubContext.getWriter().values()) {
			
 
				+        Assert.assertTrue(value.toString() + " is not skipped", value.toString().startsWith("SKIP:"));
			
 
				+      }
			
 
				+    }
			
 
				+    catch (Exception e) {
			
 
				+      LOG.error("Unexpected exception: ", e);
			
 
				+      Assert.assertTrue(false);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void testCopyingExistingFiles(FileSystem fs, CopyMapper copyMapper,
			
 
				+                                        Mapper<Text, FileStatus, Text, Text>.Context context) {
			
 
				+
			
 
				+    try {
			
 
				+      for (Path path : pathList) {
			
 
				+        copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
			
 
				+                fs.getFileStatus(path), context);
			
 
				+      }
			
 
				+
			
 
				+      Assert.assertEquals(nFiles,
			
 
				+              context.getCounter(CopyMapper.Counter.SKIP).getValue());
			
 
				+    }
			
 
				+    catch (Exception exception) {
			
 
				+      Assert.assertTrue("Caught unexpected exception:" + exception.getMessage(),
			
 
				+              false);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testMakeDirFailure() {
			
 
				+    try {
			
 
				+      deleteState();
			
 
				+      createSourceData();
			
 
				+
			
 
				+      FileSystem fs = cluster.getFileSystem();
			
 
				+      CopyMapper copyMapper = new CopyMapper();
			
 
				+      StubContext stubContext = new StubContext(getConfiguration(), null, 0);
			
 
				+      Mapper<Text, FileStatus, Text, Text>.Context context
			
 
				+              = stubContext.getContext();
			
 
				+
			
 
				+      Configuration configuration = context.getConfiguration();
			
 
				+      String workPath = new Path("hftp://localhost:1234/*/*/*/?/")
			
 
				+              .makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString();
			
 
				+      configuration.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH,
			
 
				+              workPath);
			
 
				+      copyMapper.setup(context);
			
 
				+
			
 
				+      copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), pathList.get(0))),
			
 
				+              fs.getFileStatus(pathList.get(0)), context);
			
 
				+
			
 
				+      Assert.assertTrue("There should have been an exception.", false);
			
 
				+    }
			
 
				+    catch (Exception ignore) {
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testIgnoreFailures() {
			
 
				+    doTestIgnoreFailures(true);
			
 
				+    doTestIgnoreFailures(false);
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testDirToFile() {
			
 
				+    try {
			
 
				+      deleteState();
			
 
				+      createSourceData();
			
 
				+
			
 
				+      FileSystem fs = cluster.getFileSystem();
			
 
				+      CopyMapper copyMapper = new CopyMapper();
			
 
				+      StubContext stubContext = new StubContext(getConfiguration(), null, 0);
			
 
				+      Mapper<Text, FileStatus, Text, Text>.Context context
			
 
				+              = stubContext.getContext();
			
 
				+
			
 
				+      mkdirs(SOURCE_PATH + "/src/file");
			
 
				+      touchFile(TARGET_PATH + "/src/file");
			
 
				+      try {
			
 
				+        copyMapper.setup(context);
			
 
				+        copyMapper.map(new Text("/src/file"),
			
 
				+            fs.getFileStatus(new Path(SOURCE_PATH + "/src/file")),
			
 
				+            context);
			
 
				+      } catch (IOException e) {
			
 
				+        Assert.assertTrue(e.getMessage().startsWith("Can't replace"));
			
 
				+      }
			
 
				+    } catch (Exception e) {
			
 
				+      LOG.error("Exception encountered ", e);
			
 
				+      Assert.fail("Test failed: " + e.getMessage());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testPreserve() {
			
 
				+    try {
			
 
				+      deleteState();
			
 
				+      createSourceData();
			
 
				+
			
 
				+      UserGroupInformation tmpUser = UserGroupInformation.createRemoteUser("guest");
			
 
				+
			
 
				+      final CopyMapper copyMapper = new CopyMapper();
			
 
				+      
			
 
				+      final Mapper<Text, FileStatus, Text, Text>.Context context =  tmpUser.
			
 
				+          doAs(new PrivilegedAction<Mapper<Text, FileStatus, Text, Text>.Context>() {
			
 
				+        @Override
			
 
				+        public Mapper<Text, FileStatus, Text, Text>.Context run() {
			
 
				+          try {
			
 
				+            StubContext stubContext = new StubContext(getConfiguration(), null, 0);
			
 
				+            return stubContext.getContext();
			
 
				+          } catch (Exception e) {
			
 
				+            LOG.error("Exception encountered ", e);
			
 
				+            throw new RuntimeException(e);
			
 
				+          }
			
 
				+        }
			
 
				+      });
			
 
				+
			
 
				+      EnumSet<DistCpOptions.FileAttribute> preserveStatus =
			
 
				+          EnumSet.allOf(DistCpOptions.FileAttribute.class);
			
 
				+
			
 
				+      context.getConfiguration().set(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
			
 
				+        DistCpUtils.packAttributes(preserveStatus));
			
 
				+
			
 
				+      touchFile(SOURCE_PATH + "/src/file");
			
 
				+      mkdirs(TARGET_PATH);
			
 
				+      cluster.getFileSystem().setPermission(new Path(TARGET_PATH), new FsPermission((short)511));
			
 
				+
			
 
				+      final FileSystem tmpFS = tmpUser.doAs(new PrivilegedAction<FileSystem>() {
			
 
				+        @Override
			
 
				+        public FileSystem run() {
			
 
				+          try {
			
 
				+            return FileSystem.get(configuration);
			
 
				+          } catch (IOException e) {
			
 
				+            LOG.error("Exception encountered ", e);
			
 
				+            Assert.fail("Test failed: " + e.getMessage());
			
 
				+            throw new RuntimeException("Test ought to fail here");
			
 
				+          }
			
 
				+        }
			
 
				+      });
			
 
				+
			
 
				+      tmpUser.doAs(new PrivilegedAction<Integer>() {
			
 
				+        @Override
			
 
				+        public Integer run() {
			
 
				+          try {
			
 
				+            copyMapper.setup(context);
			
 
				+            copyMapper.map(new Text("/src/file"),
			
 
				+                tmpFS.getFileStatus(new Path(SOURCE_PATH + "/src/file")),
			
 
				+                context);
			
 
				+            Assert.fail("Expected copy to fail");
			
 
				+          } catch (AccessControlException e) {
			
 
				+            Assert.assertTrue("Got exception: " + e.getMessage(), true);
			
 
				+          } catch (Exception e) {
			
 
				+            throw new RuntimeException(e);
			
 
				+          }
			
 
				+          return null;
			
 
				+        }
			
 
				+      });
			
 
				+    } catch (Exception e) {
			
 
				+      LOG.error("Exception encountered ", e);
			
 
				+      Assert.fail("Test failed: " + e.getMessage());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testCopyReadableFiles() {
			
 
				+    try {
			
 
				+      deleteState();
			
 
				+      createSourceData();
			
 
				+
			
 
				+      UserGroupInformation tmpUser = UserGroupInformation.createRemoteUser("guest");
			
 
				+
			
 
				+      final CopyMapper copyMapper = new CopyMapper();
			
 
				+
			
 
				+      final Mapper<Text, FileStatus, Text, Text>.Context context =  tmpUser.
			
 
				+          doAs(new PrivilegedAction<Mapper<Text, FileStatus, Text, Text>.Context>() {
			
 
				+        @Override
			
 
				+        public Mapper<Text, FileStatus, Text, Text>.Context run() {
			
 
				+          try {
			
 
				+            StubContext stubContext = new StubContext(getConfiguration(), null, 0);
			
 
				+            return stubContext.getContext();
			
 
				+          } catch (Exception e) {
			
 
				+            LOG.error("Exception encountered ", e);
			
 
				+            throw new RuntimeException(e);
			
 
				+          }
			
 
				+        }
			
 
				+      });
			
 
				+
			
 
				+      touchFile(SOURCE_PATH + "/src/file");
			
 
				+      mkdirs(TARGET_PATH);
			
 
				+      cluster.getFileSystem().setPermission(new Path(SOURCE_PATH + "/src/file"),
			
 
				+          new FsPermission(FsAction.READ, FsAction.READ, FsAction.READ));
			
 
				+      cluster.getFileSystem().setPermission(new Path(TARGET_PATH), new FsPermission((short)511));
			
 
				+
			
 
				+      final FileSystem tmpFS = tmpUser.doAs(new PrivilegedAction<FileSystem>() {
			
 
				+        @Override
			
 
				+        public FileSystem run() {
			
 
				+          try {
			
 
				+            return FileSystem.get(configuration);
			
 
				+          } catch (IOException e) {
			
 
				+            LOG.error("Exception encountered ", e);
			
 
				+            Assert.fail("Test failed: " + e.getMessage());
			
 
				+            throw new RuntimeException("Test ought to fail here");
			
 
				+          }
			
 
				+        }
			
 
				+      });
			
 
				+
			
 
				+      tmpUser.doAs(new PrivilegedAction<Integer>() {
			
 
				+        @Override
			
 
				+        public Integer run() {
			
 
				+          try {
			
 
				+            copyMapper.setup(context);
			
 
				+            copyMapper.map(new Text("/src/file"),
			
 
				+                tmpFS.getFileStatus(new Path(SOURCE_PATH + "/src/file")),
			
 
				+                context);
			
 
				+          } catch (Exception e) {
			
 
				+            throw new RuntimeException(e);
			
 
				+          }
			
 
				+          return null;
			
 
				+        }
			
 
				+      });
			
 
				+    } catch (Exception e) {
			
 
				+      LOG.error("Exception encountered ", e);
			
 
				+      Assert.fail("Test failed: " + e.getMessage());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testSkipCopyNoPerms() {
			
 
				+    try {
			
 
				+      deleteState();
			
 
				+      createSourceData();
			
 
				+
			
 
				+      UserGroupInformation tmpUser = UserGroupInformation.createRemoteUser("guest");
			
 
				+
			
 
				+      final CopyMapper copyMapper = new CopyMapper();
			
 
				+
			
 
				+      final StubContext stubContext =  tmpUser.
			
 
				+          doAs(new PrivilegedAction<StubContext>() {
			
 
				+        @Override
			
 
				+        public StubContext run() {
			
 
				+          try {
			
 
				+            return new StubContext(getConfiguration(), null, 0);
			
 
				+          } catch (Exception e) {
			
 
				+            LOG.error("Exception encountered ", e);
			
 
				+            throw new RuntimeException(e);
			
 
				+          }
			
 
				+        }
			
 
				+      });
			
 
				+
			
 
				+      final Mapper<Text, FileStatus, Text, Text>.Context context = stubContext.getContext();
			
 
				+      EnumSet<DistCpOptions.FileAttribute> preserveStatus =
			
 
				+          EnumSet.allOf(DistCpOptions.FileAttribute.class);
			
 
				+
			
 
				+      context.getConfiguration().set(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
			
 
				+        DistCpUtils.packAttributes(preserveStatus));
			
 
				+
			
 
				+      touchFile(SOURCE_PATH + "/src/file");
			
 
				+      touchFile(TARGET_PATH + "/src/file");
			
 
				+      cluster.getFileSystem().setPermission(new Path(SOURCE_PATH + "/src/file"),
			
 
				+          new FsPermission(FsAction.READ, FsAction.READ, FsAction.READ));
			
 
				+      cluster.getFileSystem().setPermission(new Path(TARGET_PATH + "/src/file"),
			
 
				+          new FsPermission(FsAction.READ, FsAction.READ, FsAction.READ));
			
 
				+
			
 
				+      final FileSystem tmpFS = tmpUser.doAs(new PrivilegedAction<FileSystem>() {
			
 
				+        @Override
			
 
				+        public FileSystem run() {
			
 
				+          try {
			
 
				+            return FileSystem.get(configuration);
			
 
				+          } catch (IOException e) {
			
 
				+            LOG.error("Exception encountered ", e);
			
 
				+            Assert.fail("Test failed: " + e.getMessage());
			
 
				+            throw new RuntimeException("Test ought to fail here");
			
 
				+          }
			
 
				+        }
			
 
				+      });
			
 
				+
			
 
				+      tmpUser.doAs(new PrivilegedAction<Integer>() {
			
 
				+        @Override
			
 
				+        public Integer run() {
			
 
				+          try {
			
 
				+            copyMapper.setup(context);
			
 
				+            copyMapper.map(new Text("/src/file"),
			
 
				+                tmpFS.getFileStatus(new Path(SOURCE_PATH + "/src/file")),
			
 
				+                context);
			
 
				+            Assert.assertEquals(stubContext.getWriter().values().size(), 1);
			
 
				+            Assert.assertTrue(stubContext.getWriter().values().get(0).toString().startsWith("SKIP"));
			
 
				+            Assert.assertTrue(stubContext.getWriter().values().get(0).toString().
			
 
				+                contains(SOURCE_PATH + "/src/file"));
			
 
				+          } catch (Exception e) {
			
 
				+            throw new RuntimeException(e);
			
 
				+          }
			
 
				+          return null;
			
 
				+        }
			
 
				+      });
			
 
				+    } catch (Exception e) {
			
 
				+      LOG.error("Exception encountered ", e);
			
 
				+      Assert.fail("Test failed: " + e.getMessage());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testFailCopyWithAccessControlException() {
			
 
				+    try {
			
 
				+      deleteState();
			
 
				+      createSourceData();
			
 
				+
			
 
				+      UserGroupInformation tmpUser = UserGroupInformation.createRemoteUser("guest");
			
 
				+
			
 
				+      final CopyMapper copyMapper = new CopyMapper();
			
 
				+
			
 
				+      final StubContext stubContext =  tmpUser.
			
 
				+          doAs(new PrivilegedAction<StubContext>() {
			
 
				+        @Override
			
 
				+        public StubContext run() {
			
 
				+          try {
			
 
				+            return new StubContext(getConfiguration(), null, 0);
			
 
				+          } catch (Exception e) {
			
 
				+            LOG.error("Exception encountered ", e);
			
 
				+            throw new RuntimeException(e);
			
 
				+          }
			
 
				+        }
			
 
				+      });
			
 
				+
			
 
				+      EnumSet<DistCpOptions.FileAttribute> preserveStatus =
			
 
				+          EnumSet.allOf(DistCpOptions.FileAttribute.class);
			
 
				+
			
 
				+      final Mapper<Text, FileStatus, Text, Text>.Context context
			
 
				+              = stubContext.getContext();
			
 
				+      
			
 
				+      context.getConfiguration().set(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
			
 
				+        DistCpUtils.packAttributes(preserveStatus));
			
 
				+
			
 
				+      touchFile(SOURCE_PATH + "/src/file");
			
 
				+      OutputStream out = cluster.getFileSystem().create(new Path(TARGET_PATH + "/src/file"));
			
 
				+      out.write("hello world".getBytes());
			
 
				+      out.close();
			
 
				+      cluster.getFileSystem().setPermission(new Path(SOURCE_PATH + "/src/file"),
			
 
				+          new FsPermission(FsAction.READ, FsAction.READ, FsAction.READ));
			
 
				+      cluster.getFileSystem().setPermission(new Path(TARGET_PATH + "/src/file"),
			
 
				+          new FsPermission(FsAction.READ, FsAction.READ, FsAction.READ));
			
 
				+
			
 
				+      final FileSystem tmpFS = tmpUser.doAs(new PrivilegedAction<FileSystem>() {
			
 
				+        @Override
			
 
				+        public FileSystem run() {
			
 
				+          try {
			
 
				+            return FileSystem.get(configuration);
			
 
				+          } catch (IOException e) {
			
 
				+            LOG.error("Exception encountered ", e);
			
 
				+            Assert.fail("Test failed: " + e.getMessage());
			
 
				+            throw new RuntimeException("Test ought to fail here");
			
 
				+          }
			
 
				+        }
			
 
				+      });
			
 
				+
			
 
				+      tmpUser.doAs(new PrivilegedAction<Integer>() {
			
 
				+        @Override
			
 
				+        public Integer run() {
			
 
				+          try {
			
 
				+            copyMapper.setup(context);
			
 
				+            copyMapper.map(new Text("/src/file"),
			
 
				+                tmpFS.getFileStatus(new Path(SOURCE_PATH + "/src/file")),
			
 
				+                context);
			
 
				+            Assert.fail("Didn't expect the file to be copied");
			
 
				+          } catch (AccessControlException ignore) {
			
 
				+          } catch (Exception e) {
			
 
				+            if (e.getCause() == null || !(e.getCause() instanceof AccessControlException)) {
			
 
				+              throw new RuntimeException(e);
			
 
				+            }
			
 
				+          }
			
 
				+          return null;
			
 
				+        }
			
 
				+      });
			
 
				+    } catch (Exception e) {
			
 
				+      LOG.error("Exception encountered ", e);
			
 
				+      Assert.fail("Test failed: " + e.getMessage());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testFileToDir() {
			
 
				+    try {
			
 
				+      deleteState();
			
 
				+      createSourceData();
			
 
				+
			
 
				+      FileSystem fs = cluster.getFileSystem();
			
 
				+      CopyMapper copyMapper = new CopyMapper();
			
 
				+      StubContext stubContext = new StubContext(getConfiguration(), null, 0);
			
 
				+      Mapper<Text, FileStatus, Text, Text>.Context context
			
 
				+              = stubContext.getContext();
			
 
				+
			
 
				+      touchFile(SOURCE_PATH + "/src/file");
			
 
				+      mkdirs(TARGET_PATH + "/src/file");
			
 
				+      try {
			
 
				+        copyMapper.setup(context);
			
 
				+        copyMapper.map(new Text("/src/file"),
			
 
				+            fs.getFileStatus(new Path(SOURCE_PATH + "/src/file")),
			
 
				+            context);
			
 
				+      } catch (IOException e) {
			
 
				+        Assert.assertTrue(e.getMessage().startsWith("Can't replace"));
			
 
				+      }
			
 
				+    } catch (Exception e) {
			
 
				+      LOG.error("Exception encountered ", e);
			
 
				+      Assert.fail("Test failed: " + e.getMessage());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void doTestIgnoreFailures(boolean ignoreFailures) {
			
 
				+    try {
			
 
				+      deleteState();
			
 
				+      createSourceData();
			
 
				+
			
 
				+      FileSystem fs = cluster.getFileSystem();
			
 
				+      CopyMapper copyMapper = new CopyMapper();
			
 
				+      StubContext stubContext = new StubContext(getConfiguration(), null, 0);
			
 
				+      Mapper<Text, FileStatus, Text, Text>.Context context
			
 
				+              = stubContext.getContext();
			
 
				+
			
 
				+      Configuration configuration = context.getConfiguration();
			
 
				+      configuration.setBoolean(
			
 
				+              DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(),ignoreFailures);
			
 
				+      configuration.setBoolean(DistCpOptionSwitch.OVERWRITE.getConfigLabel(),
			
 
				+              true);
			
 
				+      configuration.setBoolean(DistCpOptionSwitch.SKIP_CRC.getConfigLabel(),
			
 
				+              true);
			
 
				+      copyMapper.setup(context);
			
 
				+
			
 
				+      for (Path path : pathList) {
			
 
				+        final FileStatus fileStatus = fs.getFileStatus(path);
			
 
				+        if (!fileStatus.isDirectory()) {
			
 
				+          fs.delete(path, true);
			
 
				+          copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
			
 
				+                  fileStatus, context);
			
 
				+        }
			
 
				+      }
			
 
				+      if (ignoreFailures) {
			
 
				+        for (Text value : stubContext.getWriter().values()) {
			
 
				+          Assert.assertTrue(value.toString() + " is not skipped", value.toString().startsWith("FAIL:"));
			
 
				+        }
			
 
				+      }
			
 
				+      Assert.assertTrue("There should have been an exception.", ignoreFailures);
			
 
				+    }
			
 
				+    catch (Exception e) {
			
 
				+      Assert.assertTrue("Unexpected exception: " + e.getMessage(),
			
 
				+              !ignoreFailures);
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private static void deleteState() throws IOException {
			
 
				+    pathList.clear();
			
 
				+    nFiles = 0;
			
 
				+    cluster.getFileSystem().delete(new Path(SOURCE_PATH), true);
			
 
				+    cluster.getFileSystem().delete(new Path(TARGET_PATH), true);
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testPreserveBlockSizeAndReplication() {
			
 
				+    testPreserveBlockSizeAndReplicationImpl(true);
			
 
				+    testPreserveBlockSizeAndReplicationImpl(false);
			
 
				+  }
			
 
				+
			
 
				+  private void testPreserveBlockSizeAndReplicationImpl(boolean preserve){
			
 
				+    try {
			
 
				+
			
 
				+      deleteState();
			
 
				+      createSourceData();
			
 
				+
			
 
				+      FileSystem fs = cluster.getFileSystem();
			
 
				+      CopyMapper copyMapper = new CopyMapper();
			
 
				+      StubContext stubContext = new StubContext(getConfiguration(), null, 0);
			
 
				+      Mapper<Text, FileStatus, Text, Text>.Context context
			
 
				+              = stubContext.getContext();
			
 
				+
			
 
				+      Configuration configuration = context.getConfiguration();
			
 
				+      EnumSet<DistCpOptions.FileAttribute> fileAttributes
			
 
				+              = EnumSet.noneOf(DistCpOptions.FileAttribute.class);
			
 
				+      if (preserve) {
			
 
				+        fileAttributes.add(DistCpOptions.FileAttribute.BLOCKSIZE);
			
 
				+        fileAttributes.add(DistCpOptions.FileAttribute.REPLICATION);
			
 
				+      }
			
 
				+      configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(),
			
 
				+              DistCpUtils.packAttributes(fileAttributes));
			
 
				+
			
 
				+      copyMapper.setup(context);
			
 
				+
			
 
				+      for (Path path : pathList) {
			
 
				+        final FileStatus fileStatus = fs.getFileStatus(path);
			
 
				+        copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
			
 
				+                fileStatus, context);
			
 
				+      }
			
 
				+
			
 
				+      // Check that the block-size/replication aren't preserved.
			
 
				+      for (Path path : pathList) {
			
 
				+        final Path targetPath = new Path(path.toString()
			
 
				+                .replaceAll(SOURCE_PATH, TARGET_PATH));
			
 
				+        final FileStatus source = fs.getFileStatus(path);
			
 
				+        final FileStatus target = fs.getFileStatus(targetPath);
			
 
				+        if (!source.isDirectory() ) {
			
 
				+          Assert.assertTrue(preserve ||
			
 
				+                  source.getBlockSize() != target.getBlockSize());
			
 
				+          Assert.assertTrue(preserve ||
			
 
				+                  source.getReplication() != target.getReplication());
			
 
				+          Assert.assertTrue(!preserve ||
			
 
				+                  source.getBlockSize() == target.getBlockSize());
			
 
				+          Assert.assertTrue(!preserve ||
			
 
				+                  source.getReplication() == target.getReplication());
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+    catch (Exception e) {
			
 
				+      Assert.assertTrue("Unexpected exception: " + e.getMessage(), false);
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private static void changeUserGroup(String user, String group)
			
 
				+          throws IOException {
			
 
				+    FileSystem fs = cluster.getFileSystem();
			
 
				+    FsPermission changedPermission = new FsPermission(
			
 
				+            FsAction.ALL, FsAction.ALL, FsAction.ALL
			
 
				+    );
			
 
				+    for (Path path : pathList)
			
 
				+      if (fs.isFile(path)) {
			
 
				+        fs.setOwner(path, user, group);
			
 
				+        fs.setPermission(path, changedPermission);
			
 
				+      }
			
 
				+  }
			
 
				+
			
 
				+  /**
			
 
				+   * If a single file is being copied to a location where the file (of the same
			
 
				+   * name) already exists, then the file shouldn't be skipped.
			
 
				+   */
			
 
				+  @Test
			
 
				+  public void testSingleFileCopy() {
			
 
				+    try {
			
 
				+      deleteState();
			
 
				+      touchFile(SOURCE_PATH + "/1");
			
 
				+      Path sourceFilePath = pathList.get(0);
			
 
				+      Path targetFilePath = new Path(sourceFilePath.toString().replaceAll(
			
 
				+              SOURCE_PATH, TARGET_PATH));
			
 
				+      touchFile(targetFilePath.toString());
			
 
				+
			
 
				+      FileSystem fs = cluster.getFileSystem();
			
 
				+      CopyMapper copyMapper = new CopyMapper();
			
 
				+      StubContext stubContext = new StubContext(getConfiguration(), null, 0);
			
 
				+      Mapper<Text, FileStatus, Text, Text>.Context context
			
 
				+              = stubContext.getContext();
			
 
				+
			
 
				+      context.getConfiguration().set(
			
 
				+              DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH,
			
 
				+              targetFilePath.getParent().toString()); // Parent directory.
			
 
				+      copyMapper.setup(context);
			
 
				+
			
 
				+      final FileStatus sourceFileStatus = fs.getFileStatus(sourceFilePath);
			
 
				+
			
 
				+      long before = fs.getFileStatus(targetFilePath).getModificationTime();
			
 
				+      copyMapper.map(new Text(DistCpUtils.getRelativePath(
			
 
				+              new Path(SOURCE_PATH), sourceFilePath)), sourceFileStatus, context);
			
 
				+      long after = fs.getFileStatus(targetFilePath).getModificationTime();
			
 
				+
			
 
				+      Assert.assertTrue("File should have been skipped", before == after);
			
 
				+
			
 
				+      context.getConfiguration().set(
			
 
				+              DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH,
			
 
				+              targetFilePath.toString()); // Specify the file path.
			
 
				+      copyMapper.setup(context);
			
 
				+
			
 
				+      before = fs.getFileStatus(targetFilePath).getModificationTime();
			
 
				+      try { Thread.sleep(2); } catch (Throwable ignore) {}
			
 
				+      copyMapper.map(new Text(DistCpUtils.getRelativePath(
			
 
				+              new Path(SOURCE_PATH), sourceFilePath)), sourceFileStatus, context);
			
 
				+      after = fs.getFileStatus(targetFilePath).getModificationTime();
			
 
				+
			
 
				+      Assert.assertTrue("File should have been overwritten.", before < after);
			
 
				+
			
 
				+    } catch (Exception exception) {
			
 
				+      Assert.fail("Unexpected exception: " + exception.getMessage());
			
 
				+      exception.printStackTrace();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testPreserveUserGroup() {
			
 
				+    testPreserveUserGroupImpl(true);
			
 
				+    testPreserveUserGroupImpl(false);
			
 
				+  }
			
 
				+
			
 
				+  private void testPreserveUserGroupImpl(boolean preserve){
			
 
				+    try {
			
 
				+
			
 
				+      deleteState();
			
 
				+      createSourceData();
			
 
				+      changeUserGroup("Michael", "Corleone");
			
 
				+
			
 
				+      FileSystem fs = cluster.getFileSystem();
			
 
				+      CopyMapper copyMapper = new CopyMapper();
			
 
				+      StubContext stubContext = new StubContext(getConfiguration(), null, 0);
			
 
				+      Mapper<Text, FileStatus, Text, Text>.Context context
			
 
				+              = stubContext.getContext();
			
 
				+
			
 
				+      Configuration configuration = context.getConfiguration();
			
 
				+      EnumSet<DistCpOptions.FileAttribute> fileAttributes
			
 
				+              = EnumSet.noneOf(DistCpOptions.FileAttribute.class);
			
 
				+      if (preserve) {
			
 
				+        fileAttributes.add(DistCpOptions.FileAttribute.USER);
			
 
				+        fileAttributes.add(DistCpOptions.FileAttribute.GROUP);
			
 
				+        fileAttributes.add(DistCpOptions.FileAttribute.PERMISSION);
			
 
				+      }
			
 
				+
			
 
				+      configuration.set(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel(),
			
 
				+              DistCpUtils.packAttributes(fileAttributes));
			
 
				+      copyMapper.setup(context);
			
 
				+
			
 
				+      for (Path path : pathList) {
			
 
				+        final FileStatus fileStatus = fs.getFileStatus(path);
			
 
				+        copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
			
 
				+                fileStatus, context);
			
 
				+      }
			
 
				+
			
 
				+      // Check that the user/group attributes are preserved
			
 
				+      // (only) as necessary.
			
 
				+      for (Path path : pathList) {
			
 
				+        final Path targetPath = new Path(path.toString()
			
 
				+                .replaceAll(SOURCE_PATH, TARGET_PATH));
			
 
				+        final FileStatus source = fs.getFileStatus(path);
			
 
				+        final FileStatus target = fs.getFileStatus(targetPath);
			
 
				+        if (!source.isDirectory()) {
			
 
				+          Assert.assertTrue(!preserve || source.getOwner().equals(target.getOwner()));
			
 
				+          Assert.assertTrue(!preserve || source.getGroup().equals(target.getGroup()));
			
 
				+          Assert.assertTrue(!preserve || source.getPermission().equals(target.getPermission()));
			
 
				+          Assert.assertTrue( preserve || !source.getOwner().equals(target.getOwner()));
			
 
				+          Assert.assertTrue( preserve || !source.getGroup().equals(target.getGroup()));
			
 
				+          Assert.assertTrue( preserve || !source.getPermission().equals(target.getPermission()));
			
 
				+          Assert.assertTrue(source.isDirectory() ||
			
 
				+                  source.getReplication() != target.getReplication());
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				+    catch (Exception e) {
			
 
				+      Assert.assertTrue("Unexpected exception: " + e.getMessage(), false);
			
 
				+      e.printStackTrace();
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyOutputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyOutputFormat.java
@@ -0,0 +1,135 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.mapred;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.mapreduce.*;
			
 
				+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
			
 
				+import org.apache.hadoop.mapreduce.task.JobContextImpl;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.tools.DistCpConstants;
			
 
				+import org.junit.Test;
			
 
				+import org.junit.Assert;
			
 
				+
			
 
				+import java.io.IOException;
			
 
				+
			
 
				+public class TestCopyOutputFormat {
			
 
				+  private static final Log LOG = LogFactory.getLog(TestCopyOutputFormat.class);
			
 
				+
			
 
				+  @Test
			
 
				+  public void testSetCommitDirectory() {
			
 
				+    try {
			
 
				+      Job job = Job.getInstance(new Configuration());
			
 
				+      Assert.assertEquals(null, CopyOutputFormat.getCommitDirectory(job));
			
 
				+
			
 
				+      job.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, "");
			
 
				+      Assert.assertEquals(null, CopyOutputFormat.getCommitDirectory(job));
			
 
				+
			
 
				+      Path directory = new Path("/tmp/test");
			
 
				+      CopyOutputFormat.setCommitDirectory(job, directory);
			
 
				+      Assert.assertEquals(directory, CopyOutputFormat.getCommitDirectory(job));
			
 
				+      Assert.assertEquals(directory.toString(), job.getConfiguration().
			
 
				+          get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while running test", e);
			
 
				+      Assert.fail("Failed while testing for set Commit Directory");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testSetWorkingDirectory() {
			
 
				+    try {
			
 
				+      Job job = Job.getInstance(new Configuration());
			
 
				+      Assert.assertEquals(null, CopyOutputFormat.getWorkingDirectory(job));
			
 
				+
			
 
				+      job.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, "");
			
 
				+      Assert.assertEquals(null, CopyOutputFormat.getWorkingDirectory(job));
			
 
				+
			
 
				+      Path directory = new Path("/tmp/test");
			
 
				+      CopyOutputFormat.setWorkingDirectory(job, directory);
			
 
				+      Assert.assertEquals(directory, CopyOutputFormat.getWorkingDirectory(job));
			
 
				+      Assert.assertEquals(directory.toString(), job.getConfiguration().
			
 
				+          get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while running test", e);
			
 
				+      Assert.fail("Failed while testing for set Working Directory");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testGetOutputCommitter() {
			
 
				+    try {
			
 
				+      TaskAttemptContext context = new TaskAttemptContextImpl(new Configuration(),
			
 
				+        new TaskAttemptID("200707121733", 1, TaskType.MAP, 1, 1));
			
 
				+      context.getConfiguration().set("mapred.output.dir", "/out");
			
 
				+      Assert.assertTrue(new CopyOutputFormat().getOutputCommitter(context) instanceof CopyCommitter);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered ", e);
			
 
				+      Assert.fail("Unable to get output committer");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testCheckOutputSpecs() {
			
 
				+    try {
			
 
				+      OutputFormat outputFormat = new CopyOutputFormat();
			
 
				+      Job job = Job.getInstance(new Configuration());
			
 
				+      JobID jobID = new JobID("200707121733", 1);
			
 
				+
			
 
				+      try {
			
 
				+        JobContext context = new JobContextImpl(job.getConfiguration(), jobID);
			
 
				+        outputFormat.checkOutputSpecs(context);
			
 
				+        Assert.fail("No checking for invalid work/commit path");
			
 
				+      } catch (IllegalStateException ignore) { }
			
 
				+
			
 
				+      CopyOutputFormat.setWorkingDirectory(job, new Path("/tmp/work"));
			
 
				+      try {
			
 
				+        JobContext context = new JobContextImpl(job.getConfiguration(), jobID);
			
 
				+        outputFormat.checkOutputSpecs(context);
			
 
				+        Assert.fail("No checking for invalid commit path");
			
 
				+      } catch (IllegalStateException ignore) { }
			
 
				+
			
 
				+      job.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, "");
			
 
				+      CopyOutputFormat.setCommitDirectory(job, new Path("/tmp/commit"));
			
 
				+      try {
			
 
				+        JobContext context = new JobContextImpl(job.getConfiguration(), jobID);
			
 
				+        outputFormat.checkOutputSpecs(context);
			
 
				+        Assert.fail("No checking for invalid work path");
			
 
				+      } catch (IllegalStateException ignore) { }
			
 
				+
			
 
				+      CopyOutputFormat.setWorkingDirectory(job, new Path("/tmp/work"));
			
 
				+      CopyOutputFormat.setCommitDirectory(job, new Path("/tmp/commit"));
			
 
				+      try {
			
 
				+        JobContext context = new JobContextImpl(job.getConfiguration(), jobID);
			
 
				+        outputFormat.checkOutputSpecs(context);
			
 
				+      } catch (IllegalStateException ignore) {
			
 
				+        Assert.fail("Output spec check failed.");
			
 
				+      }
			
 
				+
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered while testing checkoutput specs", e);
			
 
				+      Assert.fail("Checkoutput Spec failure");
			
 
				+    } catch (InterruptedException e) {
			
 
				+      LOG.error("Exception encountered while testing checkoutput specs", e);
			
 
				+      Assert.fail("Checkoutput Spec failure");
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestUniformSizeInputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestUniformSizeInputFormat.java
@@ -0,0 +1,254 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.mapred;
			
 
				+
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.hdfs.MiniDFSCluster;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.io.SequenceFile;
			
 
				+import org.apache.hadoop.mapreduce.*;
			
 
				+import org.apache.hadoop.mapreduce.task.JobContextImpl;
			
 
				+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
			
 
				+import org.apache.hadoop.tools.CopyListing;
			
 
				+import org.apache.hadoop.tools.DistCpOptions;
			
 
				+import org.apache.hadoop.tools.StubContext;
			
 
				+import org.apache.hadoop.security.Credentials;
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.junit.AfterClass;
			
 
				+import org.junit.Assert;
			
 
				+import org.junit.BeforeClass;
			
 
				+import org.junit.Test;
			
 
				+
			
 
				+import java.io.DataOutputStream;
			
 
				+import java.io.IOException;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.List;
			
 
				+import java.util.Random;
			
 
				+
			
 
				+
			
 
				+public class TestUniformSizeInputFormat {
			
 
				+  private static final Log LOG
			
 
				+                = LogFactory.getLog(TestUniformSizeInputFormat.class);
			
 
				+
			
 
				+  private static MiniDFSCluster cluster;
			
 
				+  private static final int N_FILES = 20;
			
 
				+  private static final int SIZEOF_EACH_FILE=1024;
			
 
				+  private static final Random random = new Random();
			
 
				+  private static int totalFileSize = 0;
			
 
				+
			
 
				+  private static final Credentials CREDENTIALS = new Credentials();
			
 
				+
			
 
				+
			
 
				+  @BeforeClass
			
 
				+  public static void setup() throws Exception {
			
 
				+    cluster = new MiniDFSCluster.Builder(new Configuration()).numDataNodes(1)
			
 
				+                                          .format(true).build();
			
 
				+    totalFileSize = 0;
			
 
				+
			
 
				+    for (int i=0; i<N_FILES; ++i)
			
 
				+      totalFileSize += createFile("/tmp/source/" + String.valueOf(i), SIZEOF_EACH_FILE);
			
 
				+  }
			
 
				+
			
 
				+  private static DistCpOptions getOptions(int nMaps) throws Exception {
			
 
				+    Path sourcePath = new Path(cluster.getFileSystem().getUri().toString()
			
 
				+                               + "/tmp/source");
			
 
				+    Path targetPath = new Path(cluster.getFileSystem().getUri().toString()
			
 
				+                               + "/tmp/target");
			
 
				+
			
 
				+    List<Path> sourceList = new ArrayList<Path>();
			
 
				+    sourceList.add(sourcePath);
			
 
				+    final DistCpOptions distCpOptions = new DistCpOptions(sourceList, targetPath);
			
 
				+    distCpOptions.setMaxMaps(nMaps);
			
 
				+    return distCpOptions;
			
 
				+  }
			
 
				+
			
 
				+  private static int createFile(String path, int fileSize) throws Exception {
			
 
				+    FileSystem fileSystem = null;
			
 
				+    DataOutputStream outputStream = null;
			
 
				+    try {
			
 
				+      fileSystem = cluster.getFileSystem();
			
 
				+      outputStream = fileSystem.create(new Path(path), true, 0);
			
 
				+      int size = (int) Math.ceil(fileSize + (1 - random.nextFloat()) * fileSize);
			
 
				+      outputStream.write(new byte[size]);
			
 
				+      return size;
			
 
				+    }
			
 
				+    finally {
			
 
				+      IOUtils.cleanup(null, fileSystem, outputStream);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @AfterClass
			
 
				+  public static void tearDown() {
			
 
				+    cluster.shutdown();
			
 
				+  }
			
 
				+
			
 
				+  public void testGetSplits(int nMaps) throws Exception {
			
 
				+    DistCpOptions options = getOptions(nMaps);
			
 
				+    Configuration configuration = new Configuration();
			
 
				+    configuration.set("mapred.map.tasks",
			
 
				+                      String.valueOf(options.getMaxMaps()));
			
 
				+    Path listFile = new Path(cluster.getFileSystem().getUri().toString()
			
 
				+        + "/tmp/testGetSplits_1/fileList.seq");
			
 
				+    CopyListing.getCopyListing(configuration, CREDENTIALS, options).
			
 
				+        buildListing(listFile, options);
			
 
				+
			
 
				+    JobContext jobContext = new JobContextImpl(configuration, new JobID());
			
 
				+    UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat();
			
 
				+    List<InputSplit> splits
			
 
				+            = uniformSizeInputFormat.getSplits(jobContext);
			
 
				+
			
 
				+    List<InputSplit> legacySplits = legacyGetSplits(listFile, nMaps);
			
 
				+
			
 
				+    int sizePerMap = totalFileSize/nMaps;
			
 
				+
			
 
				+    checkSplits(listFile, splits);
			
 
				+    checkAgainstLegacy(splits, legacySplits);
			
 
				+
			
 
				+    int doubleCheckedTotalSize = 0;
			
 
				+    int previousSplitSize = -1;
			
 
				+    for (int i=0; i<splits.size(); ++i) {
			
 
				+      InputSplit split = splits.get(i);
			
 
				+      int currentSplitSize = 0;
			
 
				+      RecordReader<Text, FileStatus> recordReader = uniformSizeInputFormat.createRecordReader(
			
 
				+              split, null);
			
 
				+      StubContext stubContext = new StubContext(jobContext.getConfiguration(),
			
 
				+                                                recordReader, 0);
			
 
				+      final TaskAttemptContext taskAttemptContext
			
 
				+         = stubContext.getContext();
			
 
				+      recordReader.initialize(split, taskAttemptContext);
			
 
				+      while (recordReader.nextKeyValue()) {
			
 
				+        Path sourcePath = recordReader.getCurrentValue().getPath();
			
 
				+        FileSystem fs = sourcePath.getFileSystem(configuration);
			
 
				+        FileStatus fileStatus [] = fs.listStatus(sourcePath);
			
 
				+        Assert.assertEquals(fileStatus.length, 1);
			
 
				+        currentSplitSize += fileStatus[0].getLen();
			
 
				+      }
			
 
				+      Assert.assertTrue(
			
 
				+           previousSplitSize == -1
			
 
				+               || Math.abs(currentSplitSize - previousSplitSize) < 0.1*sizePerMap
			
 
				+               || i == splits.size()-1);
			
 
				+
			
 
				+      doubleCheckedTotalSize += currentSplitSize;
			
 
				+    }
			
 
				+
			
 
				+    Assert.assertEquals(totalFileSize, doubleCheckedTotalSize);
			
 
				+  }
			
 
				+
			
 
				+  // From
			
 
				+  // http://svn.apache.org/repos/asf/hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/DistCp.java
			
 
				+  private List<InputSplit> legacyGetSplits(Path listFile, int numSplits)
			
 
				+      throws IOException {
			
 
				+
			
 
				+    FileSystem fs = cluster.getFileSystem();
			
 
				+    FileStatus srcst = fs.getFileStatus(listFile);
			
 
				+    Configuration conf = fs.getConf();
			
 
				+
			
 
				+    ArrayList<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
			
 
				+    FileStatus value = new FileStatus();
			
 
				+    Text key = new Text();
			
 
				+    final long targetsize = totalFileSize / numSplits;
			
 
				+    long pos = 0L;
			
 
				+    long last = 0L;
			
 
				+    long acc = 0L;
			
 
				+    long cbrem = srcst.getLen();
			
 
				+    SequenceFile.Reader sl = null;
			
 
				+
			
 
				+    LOG.info("Average bytes per map: " + targetsize +
			
 
				+        ", Number of maps: " + numSplits + ", total size: " + totalFileSize);
			
 
				+
			
 
				+    try {
			
 
				+      sl = new SequenceFile.Reader(conf, SequenceFile.Reader.file(listFile));
			
 
				+      for (; sl.next(key, value); last = sl.getPosition()) {
			
 
				+        // if adding this split would put this split past the target size,
			
 
				+        // cut the last split and put this next file in the next split.
			
 
				+        if (acc + value.getLen() > targetsize && acc != 0) {
			
 
				+          long splitsize = last - pos;
			
 
				+          FileSplit fileSplit = new FileSplit(listFile, pos, splitsize, null);
			
 
				+          LOG.info ("Creating split : " + fileSplit + ", bytes in split: " + splitsize);
			
 
				+          splits.add(fileSplit);
			
 
				+          cbrem -= splitsize;
			
 
				+          pos = last;
			
 
				+          acc = 0L;
			
 
				+        }
			
 
				+        acc += value.getLen();
			
 
				+      }
			
 
				+    }
			
 
				+    finally {
			
 
				+      IOUtils.closeStream(sl);
			
 
				+    }
			
 
				+    if (cbrem != 0) {
			
 
				+      FileSplit fileSplit = new FileSplit(listFile, pos, cbrem, null);
			
 
				+      LOG.info ("Creating split : " + fileSplit + ", bytes in split: " + cbrem);
			
 
				+      splits.add(fileSplit);
			
 
				+    }
			
 
				+
			
 
				+    return splits;
			
 
				+  }
			
 
				+
			
 
				+  private void checkSplits(Path listFile, List<InputSplit> splits) throws IOException {
			
 
				+    long lastEnd = 0;
			
 
				+
			
 
				+    //Verify if each split's start is matching with the previous end and
			
 
				+    //we are not missing anything
			
 
				+    for (InputSplit split : splits) {
			
 
				+      FileSplit fileSplit = (FileSplit) split;
			
 
				+      long start = fileSplit.getStart();
			
 
				+      Assert.assertEquals(lastEnd, start);
			
 
				+      lastEnd = start + fileSplit.getLength();
			
 
				+    }
			
 
				+
			
 
				+    //Verify there is nothing more to read from the input file
			
 
				+    SequenceFile.Reader reader
			
 
				+            = new SequenceFile.Reader(cluster.getFileSystem().getConf(),
			
 
				+                    SequenceFile.Reader.file(listFile));
			
 
				+
			
 
				+    try {
			
 
				+      reader.seek(lastEnd);
			
 
				+      FileStatus srcFileStatus = new FileStatus();
			
 
				+      Text srcRelPath = new Text();
			
 
				+      Assert.assertFalse(reader.next(srcRelPath, srcFileStatus));
			
 
				+    } finally {
			
 
				+      IOUtils.closeStream(reader);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private void checkAgainstLegacy(List<InputSplit> splits,
			
 
				+                                  List<InputSplit> legacySplits)
			
 
				+      throws IOException, InterruptedException {
			
 
				+
			
 
				+    Assert.assertEquals(legacySplits.size(), splits.size());
			
 
				+    for (int index = 0; index < splits.size(); index++) {
			
 
				+      FileSplit fileSplit = (FileSplit) splits.get(index);
			
 
				+      FileSplit legacyFileSplit = (FileSplit) legacySplits.get(index);
			
 
				+      Assert.assertEquals(fileSplit.getStart(), legacyFileSplit.getStart());
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testGetSplits() throws Exception {
			
 
				+    testGetSplits(9);
			
 
				+    for (int i=1; i<N_FILES; ++i)
			
 
				+      testGetSplits(i);
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/lib/TestDynamicInputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/lib/TestDynamicInputFormat.java
@@ -0,0 +1,162 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.mapred.lib;
			
 
				+
			
 
				+import junit.framework.Assert;
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.hdfs.MiniDFSCluster;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.io.Text;
			
 
				+import org.apache.hadoop.mapreduce.*;
			
 
				+import org.apache.hadoop.mapreduce.task.JobContextImpl;
			
 
				+import org.apache.hadoop.tools.CopyListing;
			
 
				+import org.apache.hadoop.tools.DistCpOptions;
			
 
				+import org.apache.hadoop.tools.StubContext;
			
 
				+import org.apache.hadoop.security.Credentials;
			
 
				+import org.junit.AfterClass;
			
 
				+import org.junit.BeforeClass;
			
 
				+import org.junit.Test;
			
 
				+
			
 
				+import java.io.DataOutputStream;
			
 
				+import java.util.ArrayList;
			
 
				+import java.util.List;
			
 
				+
			
 
				+public class TestDynamicInputFormat {
			
 
				+  private static final Log LOG = LogFactory.getLog(TestDynamicInputFormat.class);
			
 
				+  private static MiniDFSCluster cluster;
			
 
				+  private static final int N_FILES = 1000;
			
 
				+  private static final int NUM_SPLITS = 7;
			
 
				+
			
 
				+  private static final Credentials CREDENTIALS = new Credentials();
			
 
				+
			
 
				+  private static List<String> expectedFilePaths = new ArrayList<String>(N_FILES);
			
 
				+
			
 
				+  @BeforeClass
			
 
				+  public static void setup() throws Exception {
			
 
				+    cluster = new MiniDFSCluster.Builder(getConfigurationForCluster())
			
 
				+                  .numDataNodes(1).format(true).build();
			
 
				+
			
 
				+    for (int i=0; i<N_FILES; ++i)
			
 
				+      createFile("/tmp/source/" + String.valueOf(i));
			
 
				+
			
 
				+  }
			
 
				+
			
 
				+  private static Configuration getConfigurationForCluster() {
			
 
				+    Configuration configuration = new Configuration();
			
 
				+    System.setProperty("test.build.data",
			
 
				+                       "target/tmp/build/TEST_DYNAMIC_INPUT_FORMAT/data");
			
 
				+    configuration.set("hadoop.log.dir", "target/tmp");
			
 
				+    LOG.debug("fs.default.name  == " + configuration.get("fs.default.name"));
			
 
				+    LOG.debug("dfs.http.address == " + configuration.get("dfs.http.address"));
			
 
				+    return configuration;
			
 
				+  }
			
 
				+
			
 
				+  private static DistCpOptions getOptions() throws Exception {
			
 
				+    Path sourcePath = new Path(cluster.getFileSystem().getUri().toString()
			
 
				+            + "/tmp/source");
			
 
				+    Path targetPath = new Path(cluster.getFileSystem().getUri().toString()
			
 
				+            + "/tmp/target");
			
 
				+
			
 
				+    List<Path> sourceList = new ArrayList<Path>();
			
 
				+    sourceList.add(sourcePath);
			
 
				+    DistCpOptions options = new DistCpOptions(sourceList, targetPath);
			
 
				+    options.setMaxMaps(NUM_SPLITS);
			
 
				+    return options;
			
 
				+  }
			
 
				+
			
 
				+  private static void createFile(String path) throws Exception {
			
 
				+    FileSystem fileSystem = null;
			
 
				+    DataOutputStream outputStream = null;
			
 
				+    try {
			
 
				+      fileSystem = cluster.getFileSystem();
			
 
				+      outputStream = fileSystem.create(new Path(path), true, 0);
			
 
				+      expectedFilePaths.add(fileSystem.listStatus(
			
 
				+                                    new Path(path))[0].getPath().toString());
			
 
				+    }
			
 
				+    finally {
			
 
				+      IOUtils.cleanup(null, fileSystem, outputStream);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @AfterClass
			
 
				+  public static void tearDown() {
			
 
				+    cluster.shutdown();
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testGetSplits() throws Exception {
			
 
				+    DistCpOptions options = getOptions();
			
 
				+    Configuration configuration = new Configuration();
			
 
				+    configuration.set("mapred.map.tasks",
			
 
				+                      String.valueOf(options.getMaxMaps()));
			
 
				+    CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(
			
 
				+            new Path(cluster.getFileSystem().getUri().toString()
			
 
				+                    +"/tmp/testDynInputFormat/fileList.seq"), options);
			
 
				+
			
 
				+    JobContext jobContext = new JobContextImpl(configuration, new JobID());
			
 
				+    DynamicInputFormat<Text, FileStatus> inputFormat =
			
 
				+        new DynamicInputFormat<Text, FileStatus>();
			
 
				+    List<InputSplit> splits = inputFormat.getSplits(jobContext);
			
 
				+
			
 
				+    int nFiles = 0;
			
 
				+    int taskId = 0;
			
 
				+
			
 
				+    for (InputSplit split : splits) {
			
 
				+      RecordReader<Text, FileStatus> recordReader =
			
 
				+           inputFormat.createRecordReader(split, null);
			
 
				+      StubContext stubContext = new StubContext(jobContext.getConfiguration(),
			
 
				+                                                recordReader, taskId);
			
 
				+      final TaskAttemptContext taskAttemptContext
			
 
				+         = stubContext.getContext();
			
 
				+      
			
 
				+      recordReader.initialize(splits.get(0), taskAttemptContext);
			
 
				+      float previousProgressValue = 0f;
			
 
				+      while (recordReader.nextKeyValue()) {
			
 
				+        FileStatus fileStatus = recordReader.getCurrentValue();
			
 
				+        String source = fileStatus.getPath().toString();
			
 
				+        System.out.println(source);
			
 
				+        Assert.assertTrue(expectedFilePaths.contains(source));
			
 
				+        final float progress = recordReader.getProgress();
			
 
				+        Assert.assertTrue(progress >= previousProgressValue);
			
 
				+        Assert.assertTrue(progress >= 0.0f);
			
 
				+        Assert.assertTrue(progress <= 1.0f);
			
 
				+        previousProgressValue = progress;
			
 
				+        ++nFiles;
			
 
				+      }
			
 
				+      Assert.assertTrue(recordReader.getProgress() == 1.0f);
			
 
				+
			
 
				+      ++taskId;
			
 
				+    }
			
 
				+
			
 
				+    Assert.assertEquals(expectedFilePaths.size(), nFiles);
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testGetSplitRatio() throws Exception {
			
 
				+    Assert.assertEquals(1, DynamicInputFormat.getSplitRatio(1, 1000000000));
			
 
				+    Assert.assertEquals(2, DynamicInputFormat.getSplitRatio(11000000, 10));
			
 
				+    Assert.assertEquals(4, DynamicInputFormat.getSplitRatio(30, 700));
			
 
				+    Assert.assertEquals(2, DynamicInputFormat.getSplitRatio(30, 200));
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestDistCpUtils.java
@@ -0,0 +1,220 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.util;
			
 
				+
			
 
				+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
			
 
				+import org.apache.hadoop.conf.Configuration;
			
 
				+import org.apache.hadoop.fs.FileSystem;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				+import org.apache.hadoop.fs.FileStatus;
			
 
				+import org.apache.hadoop.fs.permission.FsPermission;
			
 
				+import org.apache.hadoop.hdfs.MiniDFSCluster;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.junit.Assert;
			
 
				+import org.junit.Test;
			
 
				+import org.junit.BeforeClass;
			
 
				+import org.junit.AfterClass;
			
 
				+
			
 
				+import java.util.EnumSet;
			
 
				+import java.util.Random;
			
 
				+import java.util.Stack;
			
 
				+import java.io.IOException;
			
 
				+import java.io.OutputStream;
			
 
				+
			
 
				+public class TestDistCpUtils {
			
 
				+  private static final Log LOG = LogFactory.getLog(TestDistCpUtils.class);
			
 
				+
			
 
				+  private static final Configuration config = new Configuration();
			
 
				+  private static MiniDFSCluster cluster;
			
 
				+
			
 
				+  @BeforeClass
			
 
				+  public static void create() throws IOException {
			
 
				+    cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).format(true)
			
 
				+                                                .build(); 
			
 
				+  }
			
 
				+
			
 
				+  @AfterClass
			
 
				+  public static void destroy() {
			
 
				+    if (cluster != null) {
			
 
				+      cluster.shutdown();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testGetRelativePathRoot() {
			
 
				+    Path root = new Path("/tmp/abc");
			
 
				+    Path child = new Path("/tmp/abc/xyz/file");
			
 
				+    Assert.assertEquals(DistCpUtils.getRelativePath(root, child), "/xyz/file");
			
 
				+
			
 
				+    root = new Path("/");
			
 
				+    child = new Path("/a");
			
 
				+    Assert.assertEquals(DistCpUtils.getRelativePath(root, child), "/a");
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testPackAttributes() {
			
 
				+    EnumSet<FileAttribute> attributes = EnumSet.noneOf(FileAttribute.class);
			
 
				+    Assert.assertEquals(DistCpUtils.packAttributes(attributes), "");
			
 
				+
			
 
				+    attributes.add(FileAttribute.REPLICATION);
			
 
				+    Assert.assertEquals(DistCpUtils.packAttributes(attributes), "R");
			
 
				+    Assert.assertEquals(attributes, DistCpUtils.unpackAttributes("R"));
			
 
				+
			
 
				+    attributes.add(FileAttribute.BLOCKSIZE);
			
 
				+    Assert.assertEquals(DistCpUtils.packAttributes(attributes), "RB");
			
 
				+    Assert.assertEquals(attributes, DistCpUtils.unpackAttributes("RB"));
			
 
				+
			
 
				+    attributes.add(FileAttribute.USER);
			
 
				+    Assert.assertEquals(DistCpUtils.packAttributes(attributes), "RBU");
			
 
				+    Assert.assertEquals(attributes, DistCpUtils.unpackAttributes("RBU"));
			
 
				+
			
 
				+    attributes.add(FileAttribute.GROUP);
			
 
				+    Assert.assertEquals(DistCpUtils.packAttributes(attributes), "RBUG");
			
 
				+    Assert.assertEquals(attributes, DistCpUtils.unpackAttributes("RBUG"));
			
 
				+
			
 
				+    attributes.add(FileAttribute.PERMISSION);
			
 
				+    Assert.assertEquals(DistCpUtils.packAttributes(attributes), "RBUGP");
			
 
				+    Assert.assertEquals(attributes, DistCpUtils.unpackAttributes("RBUGP"));
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testPreserve() {
			
 
				+    try {
			
 
				+      FileSystem fs = FileSystem.get(config);
			
 
				+      EnumSet<FileAttribute> attributes = EnumSet.noneOf(FileAttribute.class);
			
 
				+
			
 
				+
			
 
				+      Path path = new Path("/tmp/abc");
			
 
				+      Path src = new Path("/tmp/src");
			
 
				+      fs.mkdirs(path);
			
 
				+      fs.mkdirs(src);
			
 
				+      FileStatus srcStatus = fs.getFileStatus(src);
			
 
				+
			
 
				+      FsPermission noPerm = new FsPermission((short) 0);
			
 
				+      fs.setPermission(path, noPerm);
			
 
				+      fs.setOwner(path, "nobody", "nobody");
			
 
				+
			
 
				+      DistCpUtils.preserve(fs, path, srcStatus, attributes);
			
 
				+      FileStatus target = fs.getFileStatus(path);
			
 
				+      Assert.assertEquals(target.getPermission(), noPerm);
			
 
				+      Assert.assertEquals(target.getOwner(), "nobody");
			
 
				+      Assert.assertEquals(target.getGroup(), "nobody");
			
 
				+
			
 
				+      attributes.add(FileAttribute.PERMISSION);
			
 
				+      DistCpUtils.preserve(fs, path, srcStatus, attributes);
			
 
				+      target = fs.getFileStatus(path);
			
 
				+      Assert.assertEquals(target.getPermission(), srcStatus.getPermission());
			
 
				+      Assert.assertEquals(target.getOwner(), "nobody");
			
 
				+      Assert.assertEquals(target.getGroup(), "nobody");
			
 
				+
			
 
				+      attributes.add(FileAttribute.GROUP);
			
 
				+      attributes.add(FileAttribute.USER);
			
 
				+      DistCpUtils.preserve(fs, path, srcStatus, attributes);
			
 
				+      target = fs.getFileStatus(path);
			
 
				+      Assert.assertEquals(target.getPermission(), srcStatus.getPermission());
			
 
				+      Assert.assertEquals(target.getOwner(), srcStatus.getOwner());
			
 
				+      Assert.assertEquals(target.getGroup(), srcStatus.getGroup());
			
 
				+
			
 
				+      fs.delete(path, true);
			
 
				+      fs.delete(src, true);
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered ", e);
			
 
				+      Assert.fail("Preserve test failure");
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private static Random rand = new Random();
			
 
				+
			
 
				+  public static String createTestSetup(FileSystem fs) throws IOException {
			
 
				+    return createTestSetup("/tmp1", fs, FsPermission.getDefault());
			
 
				+  }
			
 
				+  
			
 
				+  public static String createTestSetup(FileSystem fs,
			
 
				+                                       FsPermission perm) throws IOException {
			
 
				+    return createTestSetup("/tmp1", fs, perm);
			
 
				+  }
			
 
				+
			
 
				+  public static String createTestSetup(String baseDir,
			
 
				+                                       FileSystem fs,
			
 
				+                                       FsPermission perm) throws IOException {
			
 
				+    String base = getBase(baseDir);
			
 
				+    fs.mkdirs(new Path(base + "/newTest/hello/world1"));
			
 
				+    fs.mkdirs(new Path(base + "/newTest/hello/world2/newworld"));
			
 
				+    fs.mkdirs(new Path(base + "/newTest/hello/world3/oldworld"));
			
 
				+    fs.setPermission(new Path(base + "/newTest"), perm);
			
 
				+    fs.setPermission(new Path(base + "/newTest/hello"), perm);
			
 
				+    fs.setPermission(new Path(base + "/newTest/hello/world1"), perm);
			
 
				+    fs.setPermission(new Path(base + "/newTest/hello/world2"), perm);
			
 
				+    fs.setPermission(new Path(base + "/newTest/hello/world2/newworld"), perm);
			
 
				+    fs.setPermission(new Path(base + "/newTest/hello/world3"), perm);
			
 
				+    fs.setPermission(new Path(base + "/newTest/hello/world3/oldworld"), perm);
			
 
				+    createFile(fs, base + "/newTest/1");
			
 
				+    createFile(fs, base + "/newTest/hello/2");
			
 
				+    createFile(fs, base + "/newTest/hello/world3/oldworld/3");
			
 
				+    createFile(fs, base + "/newTest/hello/world2/4");
			
 
				+    return base;
			
 
				+  }
			
 
				+
			
 
				+  private static String getBase(String base) {
			
 
				+    String location = String.valueOf(rand.nextLong());
			
 
				+    return base + "/" + location;
			
 
				+  }
			
 
				+
			
 
				+  public static void delete(FileSystem fs, String path) {
			
 
				+    try {
			
 
				+      if (fs != null) {
			
 
				+        if (path != null) {
			
 
				+          fs.delete(new Path(path), true);
			
 
				+        }
			
 
				+      }
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.warn("Exception encountered ", e);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  public static void createFile(FileSystem fs, String filePath) throws IOException {
			
 
				+    OutputStream out = fs.create(new Path(filePath));
			
 
				+    IOUtils.closeStream(out);
			
 
				+  }
			
 
				+
			
 
				+  public static boolean checkIfFoldersAreInSync(FileSystem fs, String targetBase, String sourceBase)
			
 
				+      throws IOException {
			
 
				+    Path base = new Path(targetBase);
			
 
				+
			
 
				+     Stack<Path> stack = new Stack<Path>();
			
 
				+     stack.push(base);
			
 
				+     while (!stack.isEmpty()) {
			
 
				+       Path file = stack.pop();
			
 
				+       if (!fs.exists(file)) continue;
			
 
				+       FileStatus[] fStatus = fs.listStatus(file);
			
 
				+       if (fStatus == null || fStatus.length == 0) continue;
			
 
				+
			
 
				+       for (FileStatus status : fStatus) {
			
 
				+         if (status.isDirectory()) {
			
 
				+           stack.push(status.getPath());
			
 
				+         }
			
 
				+         Assert.assertTrue(fs.exists(new Path(sourceBase + "/" +
			
 
				+             DistCpUtils.getRelativePath(new Path(targetBase), status.getPath()))));
			
 
				+       }
			
 
				+     }
			
 
				+     return true;
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestRetriableCommand.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestRetriableCommand.java
@@ -0,0 +1,81 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.util;
			
 
				+
			
 
				+import org.apache.hadoop.io.retry.RetryPolicy;
			
 
				+import org.apache.hadoop.io.retry.RetryPolicies;
			
 
				+import org.junit.Assert;
			
 
				+import org.junit.Test;
			
 
				+
			
 
				+import java.util.concurrent.TimeUnit;
			
 
				+
			
 
				+public class TestRetriableCommand {
			
 
				+
			
 
				+  private static class MyRetriableCommand extends RetriableCommand {
			
 
				+
			
 
				+    private int succeedAfter;
			
 
				+    private int retryCount = 0;
			
 
				+
			
 
				+    public MyRetriableCommand(int succeedAfter) {
			
 
				+      super("MyRetriableCommand");
			
 
				+      this.succeedAfter = succeedAfter;
			
 
				+    }
			
 
				+
			
 
				+    public MyRetriableCommand(int succeedAfter, RetryPolicy retryPolicy) {
			
 
				+      super("MyRetriableCommand", retryPolicy);
			
 
				+      this.succeedAfter = succeedAfter;
			
 
				+    }
			
 
				+
			
 
				+    @Override
			
 
				+    protected Object doExecute(Object... arguments) throws Exception {
			
 
				+      if (++retryCount < succeedAfter)
			
 
				+        throw new Exception("Transient failure#" + retryCount);
			
 
				+      return 0;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  @Test
			
 
				+  public void testRetriableCommand() {
			
 
				+    try {
			
 
				+      new MyRetriableCommand(5).execute(0);
			
 
				+      Assert.assertTrue(false);
			
 
				+    }
			
 
				+    catch (Exception e) {
			
 
				+      Assert.assertTrue(true);
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+    try {
			
 
				+      new MyRetriableCommand(3).execute(0);
			
 
				+      Assert.assertTrue(true);
			
 
				+    }
			
 
				+    catch (Exception e) {
			
 
				+      Assert.assertTrue(false);
			
 
				+    }
			
 
				+
			
 
				+    try {
			
 
				+      new MyRetriableCommand(5, RetryPolicies.
			
 
				+          retryUpToMaximumCountWithFixedSleep(5, 0, TimeUnit.MILLISECONDS)).execute(0);
			
 
				+      Assert.assertTrue(true);
			
 
				+    }
			
 
				+    catch (Exception e) {
			
 
				+      Assert.assertTrue(false);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestThrottledInputStream.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/util/TestThrottledInputStream.java
@@ -0,0 +1,157 @@
 
				+/**
			
 
				+ * Licensed to the Apache Software Foundation (ASF) under one
			
 
				+ * or more contributor license agreements.  See the NOTICE file
			
 
				+ * distributed with this work for additional information
			
 
				+ * regarding copyright ownership.  The ASF licenses this file
			
 
				+ * to you under the Apache License, Version 2.0 (the
			
 
				+ * "License"); you may not use this file except in compliance
			
 
				+ * with the License.  You may obtain a copy of the License at
			
 
				+ *
			
 
				+ *     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+ *
			
 
				+ * Unless required by applicable law or agreed to in writing, software
			
 
				+ * distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ * See the License for the specific language governing permissions and
			
 
				+ * limitations under the License.
			
 
				+ */
			
 
				+
			
 
				+package org.apache.hadoop.tools.util;
			
 
				+
			
 
				+import org.apache.commons.logging.Log;
			
 
				+import org.apache.commons.logging.LogFactory;
			
 
				+import org.apache.hadoop.io.IOUtils;
			
 
				+import org.junit.Assert;
			
 
				+import org.junit.Test;
			
 
				+
			
 
				+import java.io.*;
			
 
				+
			
 
				+public class TestThrottledInputStream {
			
 
				+  private static final Log LOG = LogFactory.getLog(TestThrottledInputStream.class);
			
 
				+  private static final int BUFF_SIZE = 1024;
			
 
				+
			
 
				+  private enum CB {ONE_C, BUFFER, BUFF_OFFSET}
			
 
				+
			
 
				+  @Test
			
 
				+  public void testRead() {
			
 
				+    File tmpFile;
			
 
				+    File outFile;
			
 
				+    try {
			
 
				+      tmpFile = createFile(1024);
			
 
				+      outFile = createFile();
			
 
				+
			
 
				+      tmpFile.deleteOnExit();
			
 
				+      outFile.deleteOnExit();
			
 
				+
			
 
				+      long maxBandwidth = copyAndAssert(tmpFile, outFile, 0, 1, -1, CB.BUFFER);
			
 
				+
			
 
				+      copyAndAssert(tmpFile, outFile, maxBandwidth, 20, 0, CB.BUFFER);
			
 
				+/*
			
 
				+      copyAndAssert(tmpFile, outFile, maxBandwidth, 10, 0, CB.BUFFER);
			
 
				+      copyAndAssert(tmpFile, outFile, maxBandwidth, 50, 0, CB.BUFFER);
			
 
				+*/
			
 
				+
			
 
				+      copyAndAssert(tmpFile, outFile, maxBandwidth, 20, 0, CB.BUFF_OFFSET);
			
 
				+/*
			
 
				+      copyAndAssert(tmpFile, outFile, maxBandwidth, 10, 0, CB.BUFF_OFFSET);
			
 
				+      copyAndAssert(tmpFile, outFile, maxBandwidth, 50, 0, CB.BUFF_OFFSET);
			
 
				+*/
			
 
				+
			
 
				+      copyAndAssert(tmpFile, outFile, maxBandwidth, 20, 0, CB.ONE_C);
			
 
				+/*
			
 
				+      copyAndAssert(tmpFile, outFile, maxBandwidth, 10, 0, CB.ONE_C);
			
 
				+      copyAndAssert(tmpFile, outFile, maxBandwidth, 50, 0, CB.ONE_C);
			
 
				+*/
			
 
				+    } catch (IOException e) {
			
 
				+      LOG.error("Exception encountered ", e);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private long copyAndAssert(File tmpFile, File outFile,
			
 
				+                             long maxBandwidth, float factor,
			
 
				+                             int sleepTime, CB flag) throws IOException {
			
 
				+    long bandwidth;
			
 
				+    ThrottledInputStream in;
			
 
				+    long maxBPS = (long) (maxBandwidth / factor);
			
 
				+
			
 
				+    if (maxBandwidth == 0) {
			
 
				+      in = new ThrottledInputStream(new FileInputStream(tmpFile));
			
 
				+    } else {
			
 
				+      in = new ThrottledInputStream(new FileInputStream(tmpFile), maxBPS);
			
 
				+    }
			
 
				+    OutputStream out = new FileOutputStream(outFile);
			
 
				+    try {
			
 
				+      if (flag == CB.BUFFER) {
			
 
				+        copyBytes(in, out, BUFF_SIZE);
			
 
				+      } else if (flag == CB.BUFF_OFFSET){
			
 
				+        copyBytesWithOffset(in, out, BUFF_SIZE);
			
 
				+      } else {
			
 
				+        copyByteByByte(in, out);
			
 
				+      }
			
 
				+
			
 
				+      LOG.info(in);
			
 
				+      bandwidth = in.getBytesPerSec();
			
 
				+      Assert.assertEquals(in.getTotalBytesRead(), tmpFile.length());
			
 
				+      Assert.assertTrue(in.getBytesPerSec() > maxBandwidth / (factor * 1.2));
			
 
				+      Assert.assertTrue(in.getTotalSleepTime() >  sleepTime || in.getBytesPerSec() <= maxBPS);
			
 
				+    } finally {
			
 
				+      IOUtils.closeStream(in);
			
 
				+      IOUtils.closeStream(out);
			
 
				+    }
			
 
				+    return bandwidth;
			
 
				+  }
			
 
				+
			
 
				+  private static void copyBytesWithOffset(InputStream in, OutputStream out, int buffSize)
			
 
				+    throws IOException {
			
 
				+
			
 
				+    byte buf[] = new byte[buffSize];
			
 
				+    int bytesRead = in.read(buf, 0, buffSize);
			
 
				+    while (bytesRead >= 0) {
			
 
				+      out.write(buf, 0, bytesRead);
			
 
				+      bytesRead = in.read(buf);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private static void copyByteByByte(InputStream in, OutputStream out)
			
 
				+    throws IOException {
			
 
				+
			
 
				+    int ch = in.read();
			
 
				+    while (ch >= 0) {
			
 
				+      out.write(ch);
			
 
				+      ch = in.read();
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private static void copyBytes(InputStream in, OutputStream out, int buffSize)
			
 
				+    throws IOException {
			
 
				+
			
 
				+    byte buf[] = new byte[buffSize];
			
 
				+    int bytesRead = in.read(buf);
			
 
				+    while (bytesRead >= 0) {
			
 
				+      out.write(buf, 0, bytesRead);
			
 
				+      bytesRead = in.read(buf);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  private File createFile(long sizeInKB) throws IOException {
			
 
				+    File tmpFile = createFile();
			
 
				+    writeToFile(tmpFile, sizeInKB);
			
 
				+    return tmpFile;
			
 
				+  }
			
 
				+
			
 
				+  private File createFile() throws IOException {
			
 
				+    return File.createTempFile("tmp", "dat");
			
 
				+  }
			
 
				+
			
 
				+  private void writeToFile(File tmpFile, long sizeInKB) throws IOException {
			
 
				+    OutputStream out = new FileOutputStream(tmpFile);
			
 
				+    try {
			
 
				+      byte[] buffer = new byte [1024];
			
 
				+      for (long index = 0; index < sizeInKB; index++) {
			
 
				+        out.write(buffer);
			
 
				+      }
			
 
				+    } finally {
			
 
				+      IOUtils.closeStream(out);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/hadoop-tools/hadoop-distcp/src/test/resources/sslConfig.xml
+++ b/hadoop-tools/hadoop-distcp/src/test/resources/sslConfig.xml
@@ -0,0 +1,57 @@
 
				+<?xml version="1.0"?>
			
 
				+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
			
 
				+
			
 
				+<configuration>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.client.truststore.location</name>
			
 
				+  <value>/path/to/truststore/keys/keystore.jks</value>
			
 
				+  <description>Truststore to be used by clients like distcp. Must be
			
 
				+  specified.
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.client.truststore.password</name>
			
 
				+  <value>changeit</value>
			
 
				+  <description>Optional. Default value is "".
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.client.truststore.type</name>
			
 
				+  <value>jks</value>
			
 
				+  <description>Optional. Default value is "jks".
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.client.keystore.location</name>
			
 
				+  <value>/path/to/keystore/keys/keystore.jks</value>
			
 
				+  <description>Keystore to be used by clients like distcp. Must be
			
 
				+  specified.
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.client.keystore.password</name>
			
 
				+  <value>changeit</value>
			
 
				+  <description>Optional. Default value is "".
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.client.keystore.keypassword</name>
			
 
				+  <value>changeit</value>
			
 
				+  <description>Optional. Default value is "".
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+<property>
			
 
				+  <name>ssl.client.keystore.type</name>
			
 
				+  <value>jks</value>
			
 
				+  <description>Optional. Default value is "jks".
			
 
				+  </description>
			
 
				+</property>
			
 
				+
			
 
				+</configuration>
			
--- a/hadoop-tools/pom.xml
+++ b/hadoop-tools/pom.xml
@@ -29,6 +29,7 @@
 
				 
			
 
				   <modules>
			
 
				     <module>hadoop-streaming</module>
			
 
				+    <module>hadoop-distcp</module>
			
 
				     <module>hadoop-archives</module>
			
 
				     <module>hadoop-rumen</module>
			
 
				     <module>hadoop-tools-dist</module>