před 9 roky · 95f32015ad
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -211,6 +211,8 @@ Trunk (Unreleased)
 
				     HDFS-9582. TestLeaseRecoveryStriped file missing Apache License header 
			
 
				     and not well formatted. (umamahesh)
			
 
				 
			
 
				+    HDFS-9630. DistCp minor refactoring and clean up. (Kai Zheng via zhz)
			
 
				+
			
 
				   OPTIMIZATIONS
			
 
				 
			
 
				   BUG FIXES
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/RegexCopyFilter.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/RegexCopyFilter.java
@@ -20,10 +20,16 @@ package org.apache.hadoop.tools;
 
				 
			
 
				 import org.apache.commons.logging.Log;
			
 
				 import org.apache.commons.logging.LogFactory;
			
 
				-import org.apache.hadoop.fs.*;
			
 
				+import org.apache.hadoop.fs.Path;
			
 
				 import org.apache.hadoop.io.IOUtils;
			
 
				 
			
 
				-import java.io.*;
			
 
				+import java.io.BufferedReader;
			
 
				+import java.io.File;
			
 
				+import java.io.FileInputStream;
			
 
				+import java.io.FileNotFoundException;
			
 
				+import java.io.IOException;
			
 
				+import java.io.InputStream;
			
 
				+import java.io.InputStreamReader;
			
 
				 import java.nio.charset.Charset;
			
 
				 import java.util.ArrayList;
			
 
				 import java.util.List;
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
@@ -39,7 +39,8 @@ import org.apache.hadoop.security.Credentials;
 
				 
			
 
				 import com.google.common.annotations.VisibleForTesting;
			
 
				 
			
 
				-import java.io.*;
			
 
				+import java.io.FileNotFoundException;
			
 
				+import java.io.IOException;
			
 
				 import java.util.ArrayList;
			
 
				 import java.util.HashSet;
			
 
				 
			
@@ -165,9 +166,9 @@ public class SimpleCopyListing extends CopyListing {
 
				     }
			
 
				   }
			
 
				 
			
 
				-  /** {@inheritDoc} */
			
 
				   @Override
			
 
				-  public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException {
			
 
				+  protected void doBuildListing(Path pathToListingFile,
			
 
				+                                DistCpOptions options) throws IOException {
			
 
				     if(options.shouldUseDiff()) {
			
 
				       doBuildListingWithSnapshotDiff(getWriter(pathToListingFile), options);
			
 
				     }else {
			
@@ -227,8 +228,9 @@ public class SimpleCopyListing extends CopyListing {
 
				    * @throws IOException
			
 
				    */
			
 
				   @VisibleForTesting
			
 
				-  public void doBuildListingWithSnapshotDiff(SequenceFile.Writer fileListWriter,
			
 
				-      DistCpOptions options) throws IOException {
			
 
				+  protected void doBuildListingWithSnapshotDiff(
			
 
				+      SequenceFile.Writer fileListWriter, DistCpOptions options)
			
 
				+      throws IOException {
			
 
				     ArrayList<DiffInfo> diffList = distCpSync.prepareDiffList();
			
 
				     Path sourceRoot = options.getSourcePaths().get(0);
			
 
				     FileSystem sourceFS = sourceRoot.getFileSystem(getConf());
			
@@ -287,7 +289,7 @@ public class SimpleCopyListing extends CopyListing {
 
				    * @throws IOException
			
 
				    */
			
 
				   @VisibleForTesting
			
 
				-  public void doBuildListing(SequenceFile.Writer fileListWriter,
			
 
				+  protected void doBuildListing(SequenceFile.Writer fileListWriter,
			
 
				       DistCpOptions options) throws IOException {
			
 
				     if (options.getNumListstatusThreads() > 0) {
			
 
				       numListstatusThreads = options.getNumListstatusThreads();
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
@@ -27,10 +27,16 @@ import org.apache.hadoop.fs.Path;
 
				 import org.apache.hadoop.io.IOUtils;
			
 
				 import org.apache.hadoop.io.SequenceFile;
			
 
				 import org.apache.hadoop.io.Text;
			
 
				-import org.apache.hadoop.mapreduce.*;
			
 
				+import org.apache.hadoop.mapreduce.JobContext;
			
 
				+import org.apache.hadoop.mapreduce.JobStatus;
			
 
				+import org.apache.hadoop.mapreduce.TaskAttemptContext;
			
 
				 import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
			
 
				-import org.apache.hadoop.tools.*;
			
 
				+import org.apache.hadoop.tools.CopyListing;
			
 
				+import org.apache.hadoop.tools.CopyListingFileStatus;
			
 
				+import org.apache.hadoop.tools.DistCpConstants;
			
 
				+import org.apache.hadoop.tools.DistCpOptions;
			
 
				 import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
			
 
				+import org.apache.hadoop.tools.GlobbedCopyListing;
			
 
				 import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				 
			
 
				 import java.io.IOException;
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyOutputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyOutputFormat.java
@@ -20,7 +20,10 @@ package org.apache.hadoop.tools.mapred;
 
				 
			
 
				 import org.apache.hadoop.conf.Configuration;
			
 
				 import org.apache.hadoop.fs.Path;
			
 
				-import org.apache.hadoop.mapreduce.*;
			
 
				+import org.apache.hadoop.mapreduce.Job;
			
 
				+import org.apache.hadoop.mapreduce.JobContext;
			
 
				+import org.apache.hadoop.mapreduce.OutputCommitter;
			
 
				+import org.apache.hadoop.mapreduce.TaskAttemptContext;
			
 
				 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
			
 
				 import org.apache.hadoop.mapreduce.security.TokenCache;
			
 
				 import org.apache.hadoop.tools.DistCpConstants;
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
@@ -201,11 +201,13 @@ public class RetriableFileCopyCommand extends RetriableCommand {
 
				         targetFS, target)) {
			
 
				       StringBuilder errorMessage = new StringBuilder("Check-sum mismatch between ")
			
 
				           .append(source).append(" and ").append(target).append(".");
			
 
				-      if (sourceFS.getFileStatus(source).getBlockSize() != targetFS.getFileStatus(target).getBlockSize()) {
			
 
				+      if (sourceFS.getFileStatus(source).getBlockSize() !=
			
 
				+          targetFS.getFileStatus(target).getBlockSize()) {
			
 
				         errorMessage.append(" Source and target differ in block-size.")
			
 
				             .append(" Use -pb to preserve block-sizes during copy.")
			
 
				             .append(" Alternatively, skip checksum-checks altogether, using -skipCrc.")
			
 
				-						.append(" (NOTE: By skipping checksums, one runs the risk of masking data-corruption during file-transfer.)");
			
 
				+            .append(" (NOTE: By skipping checksums, one runs the risk of " +
			
 
				+                "masking data-corruption during file-transfer.)");
			
 
				       }
			
 
				       throw new IOException(errorMessage.toString());
			
 
				     }
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformSizeInputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformSizeInputFormat.java
@@ -23,12 +23,16 @@ import org.apache.commons.logging.LogFactory;
 
				 import org.apache.hadoop.io.Text;
			
 
				 import org.apache.hadoop.io.SequenceFile;
			
 
				 import org.apache.hadoop.io.IOUtils;
			
 
				+import org.apache.hadoop.mapreduce.InputFormat;
			
 
				+import org.apache.hadoop.mapreduce.InputSplit;
			
 
				+import org.apache.hadoop.mapreduce.JobContext;
			
 
				+import org.apache.hadoop.mapreduce.RecordReader;
			
 
				+import org.apache.hadoop.mapreduce.TaskAttemptContext;
			
 
				 import org.apache.hadoop.tools.CopyListingFileStatus;
			
 
				 import org.apache.hadoop.tools.DistCpConstants;
			
 
				 import org.apache.hadoop.tools.util.DistCpUtils;
			
 
				 import org.apache.hadoop.fs.Path;
			
 
				 import org.apache.hadoop.fs.FileSystem;
			
 
				-import org.apache.hadoop.mapreduce.*;
			
 
				 import org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader;
			
 
				 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
			
 
				 import org.apache.hadoop.conf.Configuration;
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java
@@ -18,17 +18,7 @@
 
				 
			
 
				 package org.apache.hadoop.tools.util;
			
 
				 
			
 
				-import java.io.IOException;
			
 
				-import java.net.InetAddress;
			
 
				-import java.net.URI;
			
 
				-import java.net.UnknownHostException;
			
 
				-import java.text.DecimalFormat;
			
 
				-import java.util.EnumSet;
			
 
				-import java.util.List;
			
 
				-import java.util.Locale;
			
 
				-import java.util.Map;
			
 
				-import java.util.Map.Entry;
			
 
				-
			
 
				+import com.google.common.collect.Maps;
			
 
				 import org.apache.commons.logging.Log;
			
 
				 import org.apache.commons.logging.LogFactory;
			
 
				 import org.apache.hadoop.conf.Configuration;
			
@@ -49,10 +39,15 @@ import org.apache.hadoop.tools.CopyListingFileStatus;
 
				 import org.apache.hadoop.tools.DistCpOptions;
			
 
				 import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
			
 
				 import org.apache.hadoop.tools.mapred.UniformSizeInputFormat;
			
 
				-
			
 
				-import com.google.common.collect.Maps;
			
 
				 import org.apache.hadoop.util.StringUtils;
			
 
				 
			
 
				+import java.io.IOException;
			
 
				+import java.text.DecimalFormat;
			
 
				+import java.util.EnumSet;
			
 
				+import java.util.List;
			
 
				+import java.util.Map;
			
 
				+import java.util.Map.Entry;
			
 
				+
			
 
				 /**
			
 
				  * Utility functions used in DistCp.
			
 
				  */
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ProducerConsumer.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ProducerConsumer.java
@@ -20,15 +20,11 @@ package org.apache.hadoop.tools.util;
 
				 
			
 
				 import org.apache.commons.logging.Log;
			
 
				 import org.apache.commons.logging.LogFactory;
			
 
				-import org.apache.hadoop.tools.util.WorkReport;
			
 
				-import org.apache.hadoop.tools.util.WorkRequest;
			
 
				-import org.apache.hadoop.tools.util.WorkRequestProcessor;
			
 
				 
			
 
				-import java.util.concurrent.atomic.AtomicInteger;
			
 
				-import java.util.ArrayList;
			
 
				 import java.util.concurrent.ExecutorService;
			
 
				 import java.util.concurrent.Executors;
			
 
				 import java.util.concurrent.LinkedBlockingQueue;
			
 
				+import java.util.concurrent.atomic.AtomicInteger;
			
 
				 
			
 
				 /**
			
 
				  * ProducerConsumer class encapsulates input and output queues and a
			
@@ -51,8 +47,8 @@ public class ProducerConsumer<T, R> {
 
				    *  @param numThreads   Size of thread-pool to execute Workers.
			
 
				    */
			
 
				   public ProducerConsumer(int numThreads) {
			
 
				-    this.inputQueue = new LinkedBlockingQueue<WorkRequest<T>>();
			
 
				-    this.outputQueue = new LinkedBlockingQueue<WorkReport<R>>();
			
 
				+    this.inputQueue = new LinkedBlockingQueue<>();
			
 
				+    this.outputQueue = new LinkedBlockingQueue<>();
			
 
				     executor = Executors.newFixedThreadPool(numThreads);
			
 
				     workCnt = new AtomicInteger(0);
			
 
				   }
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ThrottledInputStream.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/ThrottledInputStream.java
@@ -18,13 +18,10 @@
 
				 
			
 
				 package org.apache.hadoop.tools.util;
			
 
				 
			
 
				-import java.io.IOException;
			
 
				-import java.io.InputStream;
			
 
				-
			
 
				-import org.apache.hadoop.fs.FSDataInputStream;
			
 
				 import org.apache.hadoop.fs.PositionedReadable;
			
 
				 
			
 
				-import com.google.common.base.Preconditions;
			
 
				+import java.io.IOException;
			
 
				+import java.io.InputStream;
			
 
				 
			
 
				 /**
			
 
				  * The ThrottleInputStream provides bandwidth throttling on a specified
			
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/WorkRequestProcessor.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/WorkRequestProcessor.java
@@ -18,9 +18,6 @@
 
				 
			
 
				 package org.apache.hadoop.tools.util;
			
 
				 
			
 
				-import org.apache.hadoop.tools.util.WorkReport;
			
 
				-import org.apache.hadoop.tools.util.WorkRequest;
			
 
				-
			
 
				 /**
			
 
				  *  Interface for ProducerConsumer worker loop.
			
 
				  *
			
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java
@@ -378,8 +378,9 @@ public class TestCopyMapper {
 
				               workPath);
			
 
				       copyMapper.setup(context);
			
 
				 
			
 
				-      copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), pathList.get(0))),
			
 
				-              new CopyListingFileStatus(fs.getFileStatus(pathList.get(0))), context);
			
 
				+      copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH),
			
 
				+          pathList.get(0))),
			
 
				+          new CopyListingFileStatus(fs.getFileStatus(pathList.get(0))), context);
			
 
				 
			
 
				       Assert.assertTrue("There should have been an exception.", false);
			
 
				     }
			
@@ -525,7 +526,8 @@ public class TestCopyMapper {
 
				       mkdirs(TARGET_PATH);
			
 
				       cluster.getFileSystem().setPermission(new Path(SOURCE_PATH + "/src/file"),
			
 
				           new FsPermission(FsAction.READ, FsAction.READ, FsAction.READ));
			
 
				-      cluster.getFileSystem().setPermission(new Path(TARGET_PATH), new FsPermission((short)511));
			
 
				+      cluster.getFileSystem().setPermission(new Path(TARGET_PATH),
			
 
				+          new FsPermission((short)511));
			
 
				 
			
 
				       final FileSystem tmpFS = tmpUser.doAs(new PrivilegedAction<FileSystem>() {
			
 
				         @Override
			
@@ -785,7 +787,8 @@ public class TestCopyMapper {
 
				       }
			
 
				       if (ignoreFailures) {
			
 
				         for (Text value : stubContext.getWriter().values()) {
			
 
				-          Assert.assertTrue(value.toString() + " is not skipped", value.toString().startsWith("FAIL:"));
			
 
				+          Assert.assertTrue(value.toString() + " is not skipped",
			
 
				+              value.toString().startsWith("FAIL:"));
			
 
				         }
			
 
				       }
			
 
				       Assert.assertTrue("There should have been an exception.", ignoreFailures);
			
@@ -813,7 +816,6 @@ public class TestCopyMapper {
 
				   @Test(timeout=40000)
			
 
				   public void testCopyFailOnBlockSizeDifference() {
			
 
				     try {
			
 
				-
			
 
				       deleteState();
			
 
				       createSourceDataWithDifferentBlockSize();
			
 
				 
			
@@ -833,16 +835,18 @@ public class TestCopyMapper {
 
				 
			
 
				       for (Path path : pathList) {
			
 
				         final FileStatus fileStatus = fs.getFileStatus(path);
			
 
				-        copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
			
 
				-            new CopyListingFileStatus(fileStatus), context);
			
 
				+        copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH),
			
 
				+            path)), new CopyListingFileStatus(fileStatus), context);
			
 
				       }
			
 
				 
			
 
				       Assert.fail("Copy should have failed because of block-size difference.");
			
 
				     }
			
 
				     catch (Exception exception) {
			
 
				       // Check that the exception suggests the use of -pb/-skipCrc.
			
 
				-      Assert.assertTrue("Failure exception should have suggested the use of -pb.", exception.getCause().getCause().getMessage().contains("pb"));
			
 
				-      Assert.assertTrue("Failure exception should have suggested the use of -skipCrc.", exception.getCause().getCause().getMessage().contains("skipCrc"));
			
 
				+      Assert.assertTrue("Failure exception should have suggested the use of -pb.",
			
 
				+          exception.getCause().getCause().getMessage().contains("pb"));
			
 
				+      Assert.assertTrue("Failure exception should have suggested the use of -skipCrc.",
			
 
				+          exception.getCause().getCause().getMessage().contains("skipCrc"));
			
 
				     }
			
 
				   }