Browse Source

HADOOP-16147. Allow CopyListing sequence file keys and values to be more easily customized.

Author:    Andrew Olson
Andrew Olson 6 years ago
parent
commit
faba3591d3

+ 23 - 0
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java

@@ -248,6 +248,29 @@ public abstract class CopyListing extends Configured {
     return credentials;
   }
 
+  /**
+   * Returns the key for an entry in the copy listing sequence file.
+   * @param sourcePathRoot the root source path for determining the relative
+   *                       target path
+   * @param fileStatus the copy listing file status
+   * @return the key for the sequence file entry
+   */
+  protected Text getFileListingKey(Path sourcePathRoot,
+      CopyListingFileStatus fileStatus) {
+    return new Text(DistCpUtils.getRelativePath(sourcePathRoot,
+        fileStatus.getPath()));
+  }
+
+  /**
+   * Returns the value for an entry in the copy listing sequence file.
+   * @param fileStatus the copy listing file status
+   * @return the value for the sequence file entry
+   */
+  protected CopyListingFileStatus getFileListingValue(
+      CopyListingFileStatus fileStatus) {
+    return fileStatus;
+  }
+
   /**
    * Public Factory method with which the appropriate CopyListing implementation may be retrieved.
    * @param configuration The input configuration.

+ 2 - 2
hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java

@@ -718,8 +718,8 @@ public class SimpleCopyListing extends CopyListing {
       return;
     }
 
-    fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot,
-        fileStatus.getPath())), fileStatus);
+    fileListWriter.append(getFileListingKey(sourcePathRoot, fileStatus),
+        getFileListingValue(fileStatus));
     fileListWriter.sync();
 
     if (!fileStatus.isDirectory()) {