|
@@ -20,22 +20,35 @@ package org.apache.hadoop.tools.contract;
|
|
|
|
|
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
|
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
|
|
|
|
|
|
|
|
+import java.io.IOException;
|
|
import java.util.Collections;
|
|
import java.util.Collections;
|
|
|
|
+import java.util.HashMap;
|
|
|
|
+import java.util.Map;
|
|
|
|
|
|
import org.apache.hadoop.conf.Configuration;
|
|
import org.apache.hadoop.conf.Configuration;
|
|
import org.apache.hadoop.fs.FileSystem;
|
|
import org.apache.hadoop.fs.FileSystem;
|
|
|
|
+import org.apache.hadoop.fs.LocatedFileStatus;
|
|
import org.apache.hadoop.fs.Path;
|
|
import org.apache.hadoop.fs.Path;
|
|
|
|
+import org.apache.hadoop.fs.RemoteIterator;
|
|
import org.apache.hadoop.fs.contract.AbstractFSContractTestBase;
|
|
import org.apache.hadoop.fs.contract.AbstractFSContractTestBase;
|
|
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
|
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
|
|
|
+import org.apache.hadoop.io.SequenceFile;
|
|
|
|
+import org.apache.hadoop.io.Text;
|
|
|
|
+import org.apache.hadoop.mapreduce.Counter;
|
|
import org.apache.hadoop.mapreduce.Job;
|
|
import org.apache.hadoop.mapreduce.Job;
|
|
import org.apache.hadoop.test.GenericTestUtils;
|
|
import org.apache.hadoop.test.GenericTestUtils;
|
|
|
|
+import org.apache.hadoop.tools.CopyListingFileStatus;
|
|
import org.apache.hadoop.tools.DistCp;
|
|
import org.apache.hadoop.tools.DistCp;
|
|
|
|
+import org.apache.hadoop.tools.DistCpConstants;
|
|
import org.apache.hadoop.tools.DistCpOptions;
|
|
import org.apache.hadoop.tools.DistCpOptions;
|
|
|
|
+import org.apache.hadoop.tools.mapred.CopyMapper;
|
|
|
|
|
|
import org.junit.Before;
|
|
import org.junit.Before;
|
|
import org.junit.Rule;
|
|
import org.junit.Rule;
|
|
import org.junit.Test;
|
|
import org.junit.Test;
|
|
import org.junit.rules.TestName;
|
|
import org.junit.rules.TestName;
|
|
|
|
+import org.slf4j.Logger;
|
|
|
|
+import org.slf4j.LoggerFactory;
|
|
|
|
|
|
/**
|
|
/**
|
|
* Contract test suite covering a file system's integration with DistCp. The
|
|
* Contract test suite covering a file system's integration with DistCp. The
|
|
@@ -48,13 +61,70 @@ import org.junit.rules.TestName;
|
|
public abstract class AbstractContractDistCpTest
|
|
public abstract class AbstractContractDistCpTest
|
|
extends AbstractFSContractTestBase {
|
|
extends AbstractFSContractTestBase {
|
|
|
|
|
|
|
|
+ private static final Logger LOG =
|
|
|
|
+ LoggerFactory.getLogger(AbstractContractDistCpTest.class);
|
|
|
|
+
|
|
|
|
+ public static final String SCALE_TEST_DISTCP_FILE_SIZE_KB
|
|
|
|
+ = "scale.test.distcp.file.size.kb";
|
|
|
|
+
|
|
|
|
+ public static final int DEFAULT_DISTCP_SIZE_KB = 1024;
|
|
|
|
+
|
|
|
|
+ protected static final int MB = 1024 * 1024;
|
|
|
|
+
|
|
@Rule
|
|
@Rule
|
|
public TestName testName = new TestName();
|
|
public TestName testName = new TestName();
|
|
|
|
|
|
|
|
+ /**
|
|
|
|
+ * The timeout value is extended over the default so that large updates
|
|
|
|
+ * are allowed to take time, especially to remote stores.
|
|
|
|
+ * @return the current test timeout
|
|
|
|
+ */
|
|
|
|
+ protected int getTestTimeoutMillis() {
|
|
|
|
+ return 15 * 60 * 1000;
|
|
|
|
+ }
|
|
|
|
+
|
|
private Configuration conf;
|
|
private Configuration conf;
|
|
private FileSystem localFS, remoteFS;
|
|
private FileSystem localFS, remoteFS;
|
|
private Path localDir, remoteDir;
|
|
private Path localDir, remoteDir;
|
|
|
|
|
|
|
|
+ private Path inputDir;
|
|
|
|
+
|
|
|
|
+ private Path inputSubDir1;
|
|
|
|
+
|
|
|
|
+ private Path inputSubDir2;
|
|
|
|
+
|
|
|
|
+ private Path inputSubDir4;
|
|
|
|
+
|
|
|
|
+ private Path inputFile1;
|
|
|
|
+
|
|
|
|
+ private Path inputFile2;
|
|
|
|
+
|
|
|
|
+ private Path inputFile3;
|
|
|
|
+
|
|
|
|
+ private Path inputFile4;
|
|
|
|
+
|
|
|
|
+ private Path inputFile5;
|
|
|
|
+
|
|
|
|
+ private Path outputDir;
|
|
|
|
+
|
|
|
|
+ private Path outputSubDir1;
|
|
|
|
+
|
|
|
|
+ private Path outputSubDir2;
|
|
|
|
+
|
|
|
|
+ private Path outputSubDir4;
|
|
|
|
+
|
|
|
|
+ private Path outputFile1;
|
|
|
|
+
|
|
|
|
+ private Path outputFile2;
|
|
|
|
+
|
|
|
|
+ private Path outputFile3;
|
|
|
|
+
|
|
|
|
+ private Path outputFile4;
|
|
|
|
+
|
|
|
|
+ private Path outputFile5;
|
|
|
|
+
|
|
|
|
+ private Path inputDirUnderOutputDir;
|
|
|
|
+
|
|
@Override
|
|
@Override
|
|
protected Configuration createConfiguration() {
|
|
protected Configuration createConfiguration() {
|
|
Configuration newConf = new Configuration();
|
|
Configuration newConf = new Configuration();
|
|
@@ -73,20 +143,307 @@ public abstract class AbstractContractDistCpTest
|
|
// All paths are fully qualified including scheme (not taking advantage of
|
|
// All paths are fully qualified including scheme (not taking advantage of
|
|
// default file system), so if something fails, the messages will make it
|
|
// default file system), so if something fails, the messages will make it
|
|
// clear which paths are local and which paths are remote.
|
|
// clear which paths are local and which paths are remote.
|
|
- Path testSubDir = new Path(getClass().getSimpleName(),
|
|
|
|
- testName.getMethodName());
|
|
|
|
- localDir = localFS.makeQualified(new Path(new Path(
|
|
|
|
- GenericTestUtils.getTestDir().toURI()), testSubDir));
|
|
|
|
|
|
+ String className = getClass().getSimpleName();
|
|
|
|
+ String testSubDir = className + "/" + testName.getMethodName();
|
|
|
|
+ localDir =
|
|
|
|
+ localFS.makeQualified(new Path(new Path(
|
|
|
|
+ GenericTestUtils.getTestDir().toURI()), testSubDir + "/local"));
|
|
mkdirs(localFS, localDir);
|
|
mkdirs(localFS, localDir);
|
|
- remoteDir = remoteFS.makeQualified(
|
|
|
|
- new Path(getContract().getTestPath(), testSubDir));
|
|
|
|
|
|
+ remoteDir = path(testSubDir + "/remote");
|
|
mkdirs(remoteFS, remoteDir);
|
|
mkdirs(remoteFS, remoteDir);
|
|
|
|
+ // test teardown does this, but IDE-based test debugging can skip
|
|
|
|
+ // that teardown; this guarantees the initial state is clean
|
|
|
|
+ remoteFS.delete(remoteDir, true);
|
|
|
|
+ localFS.delete(localDir, true);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Set up both input and output fields.
|
|
|
|
+ * @param src source tree
|
|
|
|
+ * @param dest dest tree
|
|
|
|
+ */
|
|
|
|
+ protected void initPathFields(final Path src, final Path dest) {
|
|
|
|
+ initInputFields(src);
|
|
|
|
+ initOutputFields(dest);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Output field setup.
|
|
|
|
+ * @param path path to set up
|
|
|
|
+ */
|
|
|
|
+ protected void initOutputFields(final Path path) {
|
|
|
|
+ outputDir = new Path(path, "outputDir");
|
|
|
|
+ inputDirUnderOutputDir = new Path(outputDir, "inputDir");
|
|
|
|
+ outputFile1 = new Path(inputDirUnderOutputDir, "file1");
|
|
|
|
+ outputSubDir1 = new Path(inputDirUnderOutputDir, "subDir1");
|
|
|
|
+ outputFile2 = new Path(outputSubDir1, "file2");
|
|
|
|
+ outputSubDir2 = new Path(inputDirUnderOutputDir, "subDir2/subDir2");
|
|
|
|
+ outputFile3 = new Path(outputSubDir2, "file3");
|
|
|
|
+ outputSubDir4 = new Path(inputDirUnderOutputDir, "subDir4/subDir4");
|
|
|
|
+ outputFile4 = new Path(outputSubDir4, "file4");
|
|
|
|
+ outputFile5 = new Path(outputSubDir4, "file5");
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * this path setup is used across different methods (copy, update, track)
|
|
|
|
+ * so they are set up as fields.
|
|
|
|
+ * @param srcDir source directory for these to go under.
|
|
|
|
+ */
|
|
|
|
+ protected void initInputFields(final Path srcDir) {
|
|
|
|
+ inputDir = new Path(srcDir, "inputDir");
|
|
|
|
+ inputFile1 = new Path(inputDir, "file1");
|
|
|
|
+ inputSubDir1 = new Path(inputDir, "subDir1");
|
|
|
|
+ inputFile2 = new Path(inputSubDir1, "file2");
|
|
|
|
+ inputSubDir2 = new Path(inputDir, "subDir2/subDir2");
|
|
|
|
+ inputFile3 = new Path(inputSubDir2, "file3");
|
|
|
|
+ inputSubDir4 = new Path(inputDir, "subDir4/subDir4");
|
|
|
|
+ inputFile4 = new Path(inputSubDir4, "file4");
|
|
|
|
+ inputFile5 = new Path(inputSubDir4, "file5");
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ protected FileSystem getLocalFS() {
|
|
|
|
+ return localFS;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ protected FileSystem getRemoteFS() {
|
|
|
|
+ return remoteFS;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ protected Path getLocalDir() {
|
|
|
|
+ return localDir;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ protected Path getRemoteDir() {
|
|
|
|
+ return remoteDir;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ @Test
|
|
|
|
+ public void testUpdateDeepDirectoryStructureToRemote() throws Exception {
|
|
|
|
+ describe("update a deep directory structure from local to remote");
|
|
|
|
+ distCpDeepDirectoryStructure(localFS, localDir, remoteFS, remoteDir);
|
|
|
|
+ distCpUpdateDeepDirectoryStructure(inputDirUnderOutputDir);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ @Test
|
|
|
|
+ public void testUpdateDeepDirectoryStructureNoChange() throws Exception {
|
|
|
|
+ describe("update an unchanged directory structure"
|
|
|
|
+ + " from local to remote; expect no copy");
|
|
|
|
+ Path target = distCpDeepDirectoryStructure(localFS, localDir, remoteFS,
|
|
|
|
+ remoteDir);
|
|
|
|
+ describe("\nExecuting Update\n");
|
|
|
|
+ Job job = distCpUpdate(localDir, target);
|
|
|
|
+ assertCounterInRange(job, CopyMapper.Counter.SKIP, 1, -1);
|
|
|
|
+ assertCounterInRange(job, CopyMapper.Counter.BYTESCOPIED, 0, 0);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Assert that a counter is in a range; min and max values are inclusive.
|
|
|
|
+ * @param job job to query
|
|
|
|
+ * @param counter counter to examine
|
|
|
|
+ * @param min min value, if negative "no minimum"
|
|
|
|
+ * @param max max value, if negative "no maximum"
|
|
|
|
+ * @throws IOException IO problem
|
|
|
|
+ */
|
|
|
|
+ void assertCounterInRange(Job job, Enum<?> counter, long min, long max)
|
|
|
|
+ throws IOException {
|
|
|
|
+ Counter c = job.getCounters().findCounter(counter);
|
|
|
|
+ long value = c.getValue();
|
|
|
|
+ String description =
|
|
|
|
+ String.format("%s value %s", c.getDisplayName(), value);
|
|
|
|
+
|
|
|
|
+ if (min >= 0) {
|
|
|
|
+ assertTrue(description + " too below minimum " + min,
|
|
|
|
+ value >= min);
|
|
|
|
+ }
|
|
|
|
+ if (max >= 0) {
|
|
|
|
+ assertTrue(description + " above maximum " + max,
|
|
|
|
+ value <= max);
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ /**
|
|
|
|
+ * Do a distcp from the local source to the destination filesystem.
|
|
|
|
+ * This is executed as part of
|
|
|
|
+ * {@link #testUpdateDeepDirectoryStructureToRemote()}; it's designed to be
|
|
|
|
+ * overidden or wrapped by subclasses which wish to add more assertions.
|
|
|
|
+ *
|
|
|
|
+ * Life is complicated here by the way that the src/dest paths
|
|
|
|
+ * on a distcp is different with -update.
|
|
|
|
+ * @param destDir output directory used by the initial distcp
|
|
|
|
+ * @return the distcp job
|
|
|
|
+ */
|
|
|
|
+ protected Job distCpUpdateDeepDirectoryStructure(final Path destDir)
|
|
|
|
+ throws Exception {
|
|
|
|
+ describe("Now do an incremental update with deletion of missing files");
|
|
|
|
+ Path srcDir = inputDir;
|
|
|
|
+ LOG.info("Source directory = {}, dest={}", srcDir, destDir);
|
|
|
|
+
|
|
|
|
+ ContractTestUtils.assertPathsExist(localFS,
|
|
|
|
+ "Paths for test are wrong",
|
|
|
|
+ inputFile1, inputFile2, inputFile3, inputFile4, inputFile5);
|
|
|
|
+
|
|
|
|
+ modifySourceDirectories();
|
|
|
|
+
|
|
|
|
+ Job job = distCpUpdate(srcDir, destDir);
|
|
|
|
+
|
|
|
|
+ Path outputFileNew1 = new Path(outputSubDir2, "newfile1");
|
|
|
|
+
|
|
|
|
+ lsR("Updated Remote", remoteFS, destDir);
|
|
|
|
+
|
|
|
|
+ ContractTestUtils.assertPathDoesNotExist(remoteFS,
|
|
|
|
+ " deleted from " + inputFile1, outputFile1);
|
|
|
|
+ ContractTestUtils.assertIsFile(remoteFS, outputFileNew1);
|
|
|
|
+ ContractTestUtils.assertPathsDoNotExist(remoteFS,
|
|
|
|
+ "DistCP should have deleted",
|
|
|
|
+ outputFile3, outputFile4, outputSubDir4);
|
|
|
|
+ assertCounterInRange(job, CopyMapper.Counter.COPY, 1, 1);
|
|
|
|
+ assertCounterInRange(job, CopyMapper.Counter.SKIP, 1, -1);
|
|
|
|
+ return job;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Run distcp -update srcDir destDir.
|
|
|
|
+ * @param srcDir local source directory
|
|
|
|
+ * @param destDir remote destination directory.
|
|
|
|
+ * @return the completed job
|
|
|
|
+ * @throws Exception any failure.
|
|
|
|
+ */
|
|
|
|
+ private Job distCpUpdate(final Path srcDir, final Path destDir)
|
|
|
|
+ throws Exception {
|
|
|
|
+ describe("\nDistcp -update from " + srcDir + " to " + destDir);
|
|
|
|
+ lsR("Local to update", localFS, srcDir);
|
|
|
|
+ lsR("Remote before update", remoteFS, destDir);
|
|
|
|
+ return runDistCp(buildWithStandardOptions(
|
|
|
|
+ new DistCpOptions.Builder(
|
|
|
|
+ Collections.singletonList(srcDir), destDir)
|
|
|
|
+ .withDeleteMissing(true)
|
|
|
|
+ .withSyncFolder(true)
|
|
|
|
+ .withCRC(true)
|
|
|
|
+ .withOverwrite(false)));
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Update the source directories as various tests expect,
|
|
|
|
+ * including adding a new file.
|
|
|
|
+ * @return the path to the newly created file
|
|
|
|
+ * @throws IOException IO failure
|
|
|
|
+ */
|
|
|
|
+ private Path modifySourceDirectories() throws IOException {
|
|
|
|
+ localFS.delete(inputFile1, false);
|
|
|
|
+ localFS.delete(inputFile3, false);
|
|
|
|
+ // delete all of subdir4, so input/output file 4 & 5 will go
|
|
|
|
+ localFS.delete(inputSubDir4, true);
|
|
|
|
+ // add one new file
|
|
|
|
+ Path inputFileNew1 = new Path(inputSubDir2, "newfile1");
|
|
|
|
+ ContractTestUtils.touch(localFS, inputFileNew1);
|
|
|
|
+ return inputFileNew1;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
@Test
|
|
@Test
|
|
- public void deepDirectoryStructureToRemote() throws Exception {
|
|
|
|
|
|
+ public void testTrackDeepDirectoryStructureToRemote() throws Exception {
|
|
describe("copy a deep directory structure from local to remote");
|
|
describe("copy a deep directory structure from local to remote");
|
|
- deepDirectoryStructure(localFS, localDir, remoteFS, remoteDir);
|
|
|
|
|
|
+
|
|
|
|
+ Path destDir = distCpDeepDirectoryStructure(localFS, localDir, remoteFS,
|
|
|
|
+ remoteDir);
|
|
|
|
+ ContractTestUtils.assertIsDirectory(remoteFS, destDir);
|
|
|
|
+
|
|
|
|
+ describe("Now do an incremental update and save of missing files");
|
|
|
|
+ Path srcDir = inputDir;
|
|
|
|
+ // same path setup as in deepDirectoryStructure()
|
|
|
|
+ Path trackDir = new Path(localDir, "trackDir");
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ describe("\nDirectories\n");
|
|
|
|
+ lsR("Local to update", localFS, srcDir);
|
|
|
|
+ lsR("Remote before update", remoteFS, destDir);
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ ContractTestUtils.assertPathsExist(localFS,
|
|
|
|
+ "Paths for test are wrong",
|
|
|
|
+ inputFile2, inputFile3, inputFile4, inputFile5);
|
|
|
|
+
|
|
|
|
+ Path inputFileNew1 = modifySourceDirectories();
|
|
|
|
+
|
|
|
|
+ // Distcp set to track but not delete
|
|
|
|
+ runDistCp(buildWithStandardOptions(
|
|
|
|
+ new DistCpOptions.Builder(
|
|
|
|
+ Collections.singletonList(srcDir),
|
|
|
|
+ inputDirUnderOutputDir)
|
|
|
|
+ .withTrackMissing(trackDir)
|
|
|
|
+ .withSyncFolder(true)
|
|
|
|
+ .withOverwrite(false)));
|
|
|
|
+
|
|
|
|
+ lsR("tracked udpate", remoteFS, destDir);
|
|
|
|
+ // new file went over
|
|
|
|
+ Path outputFileNew1 = new Path(outputSubDir2, "newfile1");
|
|
|
|
+ ContractTestUtils.assertIsFile(remoteFS, outputFileNew1);
|
|
|
|
+
|
|
|
|
+ ContractTestUtils.assertPathExists(localFS, "tracking directory",
|
|
|
|
+ trackDir);
|
|
|
|
+
|
|
|
|
+ // now read in the listings
|
|
|
|
+ Path sortedSourceListing = new Path(trackDir,
|
|
|
|
+ DistCpConstants.SOURCE_SORTED_FILE);
|
|
|
|
+ ContractTestUtils.assertIsFile(localFS, sortedSourceListing);
|
|
|
|
+ Path sortedTargetListing = new Path(trackDir,
|
|
|
|
+ DistCpConstants.TARGET_SORTED_FILE);
|
|
|
|
+ ContractTestUtils.assertIsFile(localFS, sortedTargetListing);
|
|
|
|
+ // deletion didn't happen
|
|
|
|
+ ContractTestUtils.assertPathsExist(remoteFS,
|
|
|
|
+ "DistCP should have retained",
|
|
|
|
+ outputFile2, outputFile3, outputFile4, outputSubDir4);
|
|
|
|
+
|
|
|
|
+ // now scan the table and see that things are there.
|
|
|
|
+ Map<String, Path> sourceFiles = new HashMap<>(10);
|
|
|
|
+ Map<String, Path> targetFiles = new HashMap<>(10);
|
|
|
|
+
|
|
|
|
+ try (SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
|
|
|
|
+ SequenceFile.Reader.file(sortedSourceListing));
|
|
|
|
+ SequenceFile.Reader targetReader = new SequenceFile.Reader(conf,
|
|
|
|
+ SequenceFile.Reader.file(sortedTargetListing))) {
|
|
|
|
+ CopyListingFileStatus copyStatus = new CopyListingFileStatus();
|
|
|
|
+ Text name = new Text();
|
|
|
|
+ while(sourceReader.next(name, copyStatus)) {
|
|
|
|
+ String key = name.toString();
|
|
|
|
+ Path path = copyStatus.getPath();
|
|
|
|
+ LOG.info("{}: {}", key, path);
|
|
|
|
+ sourceFiles.put(key, path);
|
|
|
|
+ }
|
|
|
|
+ while(targetReader.next(name, copyStatus)) {
|
|
|
|
+ String key = name.toString();
|
|
|
|
+ Path path = copyStatus.getPath();
|
|
|
|
+ LOG.info("{}: {}", key, path);
|
|
|
|
+ targetFiles.put(name.toString(), copyStatus.getPath());
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // look for the new file in both lists
|
|
|
|
+ assertTrue("No " + outputFileNew1 + " in source listing",
|
|
|
|
+ sourceFiles.containsValue(inputFileNew1));
|
|
|
|
+ assertTrue("No " + outputFileNew1 + " in target listing",
|
|
|
|
+ targetFiles.containsValue(outputFileNew1));
|
|
|
|
+ assertTrue("No " + outputSubDir4 + " in target listing",
|
|
|
|
+ targetFiles.containsValue(outputSubDir4));
|
|
|
|
+ assertFalse("Found " + inputSubDir4 + " in source listing",
|
|
|
|
+ sourceFiles.containsValue(inputSubDir4));
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public void lsR(final String description,
|
|
|
|
+ final FileSystem fs,
|
|
|
|
+ final Path dir) throws IOException {
|
|
|
|
+ RemoteIterator<LocatedFileStatus> files = fs.listFiles(dir, true);
|
|
|
|
+ LOG.info("{}: {}:", description, dir);
|
|
|
|
+ StringBuilder sb = new StringBuilder();
|
|
|
|
+ while(files.hasNext()) {
|
|
|
|
+ LocatedFileStatus status = files.next();
|
|
|
|
+ sb.append(String.format(" %s; type=%s; length=%d",
|
|
|
|
+ status.getPath(),
|
|
|
|
+ status.isDirectory()? "dir" : "file",
|
|
|
|
+ status.getLen()));
|
|
|
|
+ }
|
|
|
|
+ LOG.info("{}", sb);
|
|
}
|
|
}
|
|
|
|
|
|
@Test
|
|
@Test
|
|
@@ -96,34 +453,35 @@ public abstract class AbstractContractDistCpTest
|
|
}
|
|
}
|
|
|
|
|
|
@Test
|
|
@Test
|
|
- public void deepDirectoryStructureFromRemote() throws Exception {
|
|
|
|
|
|
+ public void testDeepDirectoryStructureFromRemote() throws Exception {
|
|
describe("copy a deep directory structure from remote to local");
|
|
describe("copy a deep directory structure from remote to local");
|
|
- deepDirectoryStructure(remoteFS, remoteDir, localFS, localDir);
|
|
|
|
|
|
+ distCpDeepDirectoryStructure(remoteFS, remoteDir, localFS, localDir);
|
|
}
|
|
}
|
|
|
|
|
|
@Test
|
|
@Test
|
|
- public void largeFilesFromRemote() throws Exception {
|
|
|
|
|
|
+ public void testLargeFilesFromRemote() throws Exception {
|
|
describe("copy multiple large files from remote to local");
|
|
describe("copy multiple large files from remote to local");
|
|
largeFiles(remoteFS, remoteDir, localFS, localDir);
|
|
largeFiles(remoteFS, remoteDir, localFS, localDir);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
- * Executes a test using a file system sub-tree with multiple nesting levels.
|
|
|
|
|
|
+ * Executes a DistCp using a file system sub-tree with multiple nesting
|
|
|
|
+ * levels.
|
|
|
|
+ * The filenames are those of the fields initialized in setup.
|
|
*
|
|
*
|
|
* @param srcFS source FileSystem
|
|
* @param srcFS source FileSystem
|
|
* @param srcDir source directory
|
|
* @param srcDir source directory
|
|
* @param dstFS destination FileSystem
|
|
* @param dstFS destination FileSystem
|
|
* @param dstDir destination directory
|
|
* @param dstDir destination directory
|
|
|
|
+ * @return the target directory of the copy
|
|
* @throws Exception if there is a failure
|
|
* @throws Exception if there is a failure
|
|
*/
|
|
*/
|
|
- private void deepDirectoryStructure(FileSystem srcFS, Path srcDir,
|
|
|
|
- FileSystem dstFS, Path dstDir) throws Exception {
|
|
|
|
- Path inputDir = new Path(srcDir, "inputDir");
|
|
|
|
- Path inputSubDir1 = new Path(inputDir, "subDir1");
|
|
|
|
- Path inputSubDir2 = new Path(inputDir, "subDir2/subDir3");
|
|
|
|
- Path inputFile1 = new Path(inputDir, "file1");
|
|
|
|
- Path inputFile2 = new Path(inputSubDir1, "file2");
|
|
|
|
- Path inputFile3 = new Path(inputSubDir2, "file3");
|
|
|
|
|
|
+ private Path distCpDeepDirectoryStructure(FileSystem srcFS,
|
|
|
|
+ Path srcDir,
|
|
|
|
+ FileSystem dstFS,
|
|
|
|
+ Path dstDir) throws Exception {
|
|
|
|
+ initPathFields(srcDir, dstDir);
|
|
|
|
+
|
|
mkdirs(srcFS, inputSubDir1);
|
|
mkdirs(srcFS, inputSubDir1);
|
|
mkdirs(srcFS, inputSubDir2);
|
|
mkdirs(srcFS, inputSubDir2);
|
|
byte[] data1 = dataset(100, 33, 43);
|
|
byte[] data1 = dataset(100, 33, 43);
|
|
@@ -132,14 +490,18 @@ public abstract class AbstractContractDistCpTest
|
|
createFile(srcFS, inputFile2, true, data2);
|
|
createFile(srcFS, inputFile2, true, data2);
|
|
byte[] data3 = dataset(300, 53, 63);
|
|
byte[] data3 = dataset(300, 53, 63);
|
|
createFile(srcFS, inputFile3, true, data3);
|
|
createFile(srcFS, inputFile3, true, data3);
|
|
|
|
+ createFile(srcFS, inputFile4, true, dataset(400, 53, 63));
|
|
|
|
+ createFile(srcFS, inputFile5, true, dataset(500, 53, 63));
|
|
Path target = new Path(dstDir, "outputDir");
|
|
Path target = new Path(dstDir, "outputDir");
|
|
runDistCp(inputDir, target);
|
|
runDistCp(inputDir, target);
|
|
ContractTestUtils.assertIsDirectory(dstFS, target);
|
|
ContractTestUtils.assertIsDirectory(dstFS, target);
|
|
|
|
+ lsR("Destination tree after distcp", dstFS, target);
|
|
verifyFileContents(dstFS, new Path(target, "inputDir/file1"), data1);
|
|
verifyFileContents(dstFS, new Path(target, "inputDir/file1"), data1);
|
|
verifyFileContents(dstFS,
|
|
verifyFileContents(dstFS,
|
|
new Path(target, "inputDir/subDir1/file2"), data2);
|
|
new Path(target, "inputDir/subDir1/file2"), data2);
|
|
verifyFileContents(dstFS,
|
|
verifyFileContents(dstFS,
|
|
- new Path(target, "inputDir/subDir2/subDir3/file3"), data3);
|
|
|
|
|
|
+ new Path(target, "inputDir/subDir2/subDir2/file3"), data3);
|
|
|
|
+ return target;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -153,20 +515,21 @@ public abstract class AbstractContractDistCpTest
|
|
*/
|
|
*/
|
|
private void largeFiles(FileSystem srcFS, Path srcDir, FileSystem dstFS,
|
|
private void largeFiles(FileSystem srcFS, Path srcDir, FileSystem dstFS,
|
|
Path dstDir) throws Exception {
|
|
Path dstDir) throws Exception {
|
|
- Path inputDir = new Path(srcDir, "inputDir");
|
|
|
|
- Path inputFile1 = new Path(inputDir, "file1");
|
|
|
|
- Path inputFile2 = new Path(inputDir, "file2");
|
|
|
|
- Path inputFile3 = new Path(inputDir, "file3");
|
|
|
|
|
|
+ initPathFields(srcDir, dstDir);
|
|
|
|
+ Path largeFile1 = new Path(inputDir, "file1");
|
|
|
|
+ Path largeFile2 = new Path(inputDir, "file2");
|
|
|
|
+ Path largeFile3 = new Path(inputDir, "file3");
|
|
mkdirs(srcFS, inputDir);
|
|
mkdirs(srcFS, inputDir);
|
|
- int fileSizeKb = conf.getInt("scale.test.distcp.file.size.kb", 10 * 1024);
|
|
|
|
|
|
+ int fileSizeKb = conf.getInt(SCALE_TEST_DISTCP_FILE_SIZE_KB,
|
|
|
|
+ DEFAULT_DISTCP_SIZE_KB);
|
|
int fileSizeMb = fileSizeKb / 1024;
|
|
int fileSizeMb = fileSizeKb / 1024;
|
|
getLog().info("{} with file size {}", testName.getMethodName(), fileSizeMb);
|
|
getLog().info("{} with file size {}", testName.getMethodName(), fileSizeMb);
|
|
- byte[] data1 = dataset((fileSizeMb + 1) * 1024 * 1024, 33, 43);
|
|
|
|
- createFile(srcFS, inputFile1, true, data1);
|
|
|
|
- byte[] data2 = dataset((fileSizeMb + 2) * 1024 * 1024, 43, 53);
|
|
|
|
- createFile(srcFS, inputFile2, true, data2);
|
|
|
|
- byte[] data3 = dataset((fileSizeMb + 3) * 1024 * 1024, 53, 63);
|
|
|
|
- createFile(srcFS, inputFile3, true, data3);
|
|
|
|
|
|
+ byte[] data1 = dataset((fileSizeMb + 1) * MB, 33, 43);
|
|
|
|
+ createFile(srcFS, largeFile1, true, data1);
|
|
|
|
+ byte[] data2 = dataset((fileSizeMb + 2) * MB, 43, 53);
|
|
|
|
+ createFile(srcFS, largeFile2, true, data2);
|
|
|
|
+ byte[] data3 = dataset((fileSizeMb + 3) * MB, 53, 63);
|
|
|
|
+ createFile(srcFS, largeFile3, true, data3);
|
|
Path target = new Path(dstDir, "outputDir");
|
|
Path target = new Path(dstDir, "outputDir");
|
|
runDistCp(inputDir, target);
|
|
runDistCp(inputDir, target);
|
|
ContractTestUtils.assertIsDirectory(dstFS, target);
|
|
ContractTestUtils.assertIsDirectory(dstFS, target);
|
|
@@ -183,12 +546,34 @@ public abstract class AbstractContractDistCpTest
|
|
* @throws Exception if there is a failure
|
|
* @throws Exception if there is a failure
|
|
*/
|
|
*/
|
|
private void runDistCp(Path src, Path dst) throws Exception {
|
|
private void runDistCp(Path src, Path dst) throws Exception {
|
|
- DistCpOptions options = new DistCpOptions.Builder(
|
|
|
|
- Collections.singletonList(src), dst).build();
|
|
|
|
|
|
+ runDistCp(buildWithStandardOptions(
|
|
|
|
+ new DistCpOptions.Builder(Collections.singletonList(src), dst)));
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Run the distcp job.
|
|
|
|
+ * @param optons distcp options
|
|
|
|
+ * @return the job. It will have already completed.
|
|
|
|
+ * @throws Exception failure
|
|
|
|
+ */
|
|
|
|
+ private Job runDistCp(final DistCpOptions options) throws Exception {
|
|
Job job = new DistCp(conf, options).execute();
|
|
Job job = new DistCp(conf, options).execute();
|
|
assertNotNull("Unexpected null job returned from DistCp execution.", job);
|
|
assertNotNull("Unexpected null job returned from DistCp execution.", job);
|
|
assertTrue("DistCp job did not complete.", job.isComplete());
|
|
assertTrue("DistCp job did not complete.", job.isComplete());
|
|
assertTrue("DistCp job did not complete successfully.", job.isSuccessful());
|
|
assertTrue("DistCp job did not complete successfully.", job.isSuccessful());
|
|
|
|
+ return job;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Add any standard options and then build.
|
|
|
|
+ * @param builder DistCp option builder
|
|
|
|
+ * @return the build options
|
|
|
|
+ */
|
|
|
|
+ private DistCpOptions buildWithStandardOptions(
|
|
|
|
+ DistCpOptions.Builder builder) {
|
|
|
|
+ return builder
|
|
|
|
+ .withNumListstatusThreads(8)
|
|
|
|
+ .build();
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|