|
@@ -17,11 +17,12 @@
|
|
|
*/
|
|
|
package org.apache.hadoop.mapreduce.filecache;
|
|
|
|
|
|
-import java.io.File;
|
|
|
import java.io.IOException;
|
|
|
import java.net.URI;
|
|
|
import java.util.HashMap;
|
|
|
+import java.util.HashSet;
|
|
|
import java.util.Map;
|
|
|
+import java.util.Set;
|
|
|
|
|
|
import org.apache.commons.logging.Log;
|
|
|
import org.apache.commons.logging.LogFactory;
|
|
@@ -37,6 +38,8 @@ import org.apache.hadoop.mapreduce.Job;
|
|
|
import org.apache.hadoop.mapreduce.MRJobConfig;
|
|
|
import org.junit.After;
|
|
|
import org.junit.Assert;
|
|
|
+import static org.junit.Assert.assertEquals;
|
|
|
+import static org.junit.Assert.assertTrue;
|
|
|
import org.junit.Before;
|
|
|
import org.junit.Test;
|
|
|
|
|
@@ -55,22 +58,22 @@ public class TestClientDistributedCacheManager {
|
|
|
private static final Path TEST_VISIBILITY_CHILD_DIR =
|
|
|
new Path(TEST_VISIBILITY_PARENT_DIR, "TestCacheVisibility_Child");
|
|
|
|
|
|
+ private static final String FIRST_CACHE_FILE = "firstcachefile";
|
|
|
+ private static final String SECOND_CACHE_FILE = "secondcachefile";
|
|
|
+
|
|
|
private FileSystem fs;
|
|
|
private Path firstCacheFile;
|
|
|
private Path secondCacheFile;
|
|
|
- private Path thirdCacheFile;
|
|
|
private Configuration conf;
|
|
|
|
|
|
@Before
|
|
|
public void setup() throws IOException {
|
|
|
conf = new Configuration();
|
|
|
fs = FileSystem.get(conf);
|
|
|
- firstCacheFile = new Path(TEST_ROOT_DIR, "firstcachefile");
|
|
|
- secondCacheFile = new Path(TEST_ROOT_DIR, "secondcachefile");
|
|
|
- thirdCacheFile = new Path(TEST_VISIBILITY_CHILD_DIR,"thirdCachefile");
|
|
|
+ firstCacheFile = new Path(TEST_VISIBILITY_PARENT_DIR, FIRST_CACHE_FILE);
|
|
|
+ secondCacheFile = new Path(TEST_VISIBILITY_CHILD_DIR, SECOND_CACHE_FILE);
|
|
|
createTempFile(firstCacheFile, conf);
|
|
|
createTempFile(secondCacheFile, conf);
|
|
|
- createTempFile(thirdCacheFile, conf);
|
|
|
}
|
|
|
|
|
|
@After
|
|
@@ -88,37 +91,147 @@ public class TestClientDistributedCacheManager {
|
|
|
job.addCacheFile(secondCacheFile.toUri());
|
|
|
Configuration jobConf = job.getConfiguration();
|
|
|
|
|
|
- Map<URI, FileStatus> statCache = new HashMap<URI, FileStatus>();
|
|
|
+ Map<URI, FileStatus> statCache = new HashMap<>();
|
|
|
ClientDistributedCacheManager.determineTimestamps(jobConf, statCache);
|
|
|
|
|
|
FileStatus firstStatus = statCache.get(firstCacheFile.toUri());
|
|
|
FileStatus secondStatus = statCache.get(secondCacheFile.toUri());
|
|
|
|
|
|
- Assert.assertNotNull(firstStatus);
|
|
|
- Assert.assertNotNull(secondStatus);
|
|
|
- Assert.assertEquals(2, statCache.size());
|
|
|
+ Assert.assertNotNull(firstCacheFile + " was not found in the stats cache",
|
|
|
+ firstStatus);
|
|
|
+ Assert.assertNotNull(secondCacheFile + " was not found in the stats cache",
|
|
|
+ secondStatus);
|
|
|
+ Assert.assertEquals("Missing/extra entries found in the stas cache",
|
|
|
+ 2, statCache.size());
|
|
|
String expected = firstStatus.getModificationTime() + ","
|
|
|
+ secondStatus.getModificationTime();
|
|
|
Assert.assertEquals(expected, jobConf.get(MRJobConfig.CACHE_FILE_TIMESTAMPS));
|
|
|
+
|
|
|
+ job = Job.getInstance(conf);
|
|
|
+ job.addCacheFile(new Path(TEST_VISIBILITY_CHILD_DIR, "*").toUri());
|
|
|
+ jobConf = job.getConfiguration();
|
|
|
+ statCache.clear();
|
|
|
+ ClientDistributedCacheManager.determineTimestamps(jobConf, statCache);
|
|
|
+
|
|
|
+ FileStatus thirdStatus = statCache.get(TEST_VISIBILITY_CHILD_DIR.toUri());
|
|
|
+
|
|
|
+ Assert.assertEquals("Missing/extra entries found in the stas cache",
|
|
|
+ 1, statCache.size());
|
|
|
+ Assert.assertNotNull(TEST_VISIBILITY_CHILD_DIR
|
|
|
+ + " was not found in the stats cache", thirdStatus);
|
|
|
+ expected = Long.toString(thirdStatus.getModificationTime());
|
|
|
+ Assert.assertEquals("Incorrect timestamp for " + TEST_VISIBILITY_CHILD_DIR,
|
|
|
+ expected, jobConf.get(MRJobConfig.CACHE_FILE_TIMESTAMPS));
|
|
|
}
|
|
|
|
|
|
@Test
|
|
|
public void testDetermineCacheVisibilities() throws IOException {
|
|
|
- fs.setWorkingDirectory(TEST_VISIBILITY_CHILD_DIR);
|
|
|
+ fs.setPermission(TEST_VISIBILITY_PARENT_DIR,
|
|
|
+ new FsPermission((short)00777));
|
|
|
fs.setPermission(TEST_VISIBILITY_CHILD_DIR,
|
|
|
new FsPermission((short)00777));
|
|
|
+ fs.setWorkingDirectory(TEST_VISIBILITY_CHILD_DIR);
|
|
|
+ Job job = Job.getInstance(conf);
|
|
|
+ Path relativePath = new Path(SECOND_CACHE_FILE);
|
|
|
+ Path wildcardPath = new Path("*");
|
|
|
+ Map<URI, FileStatus> statCache = new HashMap<>();
|
|
|
+ Configuration jobConf;
|
|
|
+
|
|
|
+ job.addCacheFile(firstCacheFile.toUri());
|
|
|
+ job.addCacheFile(relativePath.toUri());
|
|
|
+ jobConf = job.getConfiguration();
|
|
|
+
|
|
|
+ ClientDistributedCacheManager.determineCacheVisibilities(jobConf,
|
|
|
+ statCache);
|
|
|
+ // We use get() instead of getBoolean() so we can tell the difference
|
|
|
+ // between wrong and missing
|
|
|
+ assertEquals("The file paths were not found to be publicly visible "
|
|
|
+ + "even though the full path is publicly accessible",
|
|
|
+ "true,true", jobConf.get(MRJobConfig.CACHE_FILE_VISIBILITIES));
|
|
|
+ checkCacheEntries(statCache, null, firstCacheFile, relativePath);
|
|
|
+
|
|
|
+ job = Job.getInstance(conf);
|
|
|
+ job.addCacheFile(wildcardPath.toUri());
|
|
|
+ jobConf = job.getConfiguration();
|
|
|
+ statCache.clear();
|
|
|
+
|
|
|
+ ClientDistributedCacheManager.determineCacheVisibilities(jobConf,
|
|
|
+ statCache);
|
|
|
+ // We use get() instead of getBoolean() so we can tell the difference
|
|
|
+ // between wrong and missing
|
|
|
+ assertEquals("The file path was not found to be publicly visible "
|
|
|
+ + "even though the full path is publicly accessible",
|
|
|
+ "true", jobConf.get(MRJobConfig.CACHE_FILE_VISIBILITIES));
|
|
|
+ checkCacheEntries(statCache, null, wildcardPath.getParent());
|
|
|
+
|
|
|
+ Path qualifiedParent = fs.makeQualified(TEST_VISIBILITY_PARENT_DIR);
|
|
|
fs.setPermission(TEST_VISIBILITY_PARENT_DIR,
|
|
|
new FsPermission((short)00700));
|
|
|
- Job job = Job.getInstance(conf);
|
|
|
- Path relativePath = new Path("thirdCachefile");
|
|
|
+ job = Job.getInstance(conf);
|
|
|
+ job.addCacheFile(firstCacheFile.toUri());
|
|
|
job.addCacheFile(relativePath.toUri());
|
|
|
- Configuration jobConf = job.getConfiguration();
|
|
|
+ jobConf = job.getConfiguration();
|
|
|
+ statCache.clear();
|
|
|
+
|
|
|
+ ClientDistributedCacheManager.determineCacheVisibilities(jobConf,
|
|
|
+ statCache);
|
|
|
+ // We use get() instead of getBoolean() so we can tell the difference
|
|
|
+ // between wrong and missing
|
|
|
+ assertEquals("The file paths were found to be publicly visible "
|
|
|
+ + "even though the parent directory is not publicly accessible",
|
|
|
+ "false,false", jobConf.get(MRJobConfig.CACHE_FILE_VISIBILITIES));
|
|
|
+ checkCacheEntries(statCache, qualifiedParent,
|
|
|
+ firstCacheFile, relativePath);
|
|
|
+
|
|
|
+ job = Job.getInstance(conf);
|
|
|
+ job.addCacheFile(wildcardPath.toUri());
|
|
|
+ jobConf = job.getConfiguration();
|
|
|
+ statCache.clear();
|
|
|
+
|
|
|
+ ClientDistributedCacheManager.determineCacheVisibilities(jobConf,
|
|
|
+ statCache);
|
|
|
+ // We use get() instead of getBoolean() so we can tell the difference
|
|
|
+ // between wrong and missing
|
|
|
+ assertEquals("The file path was found to be publicly visible "
|
|
|
+ + "even though the parent directory is not publicly accessible",
|
|
|
+ "false", jobConf.get(MRJobConfig.CACHE_FILE_VISIBILITIES));
|
|
|
+ checkCacheEntries(statCache, qualifiedParent, wildcardPath.getParent());
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * Validate that the file status cache contains all and only entries for a
|
|
|
+ * given set of paths up to a common parent.
|
|
|
+ *
|
|
|
+ * @param statCache the cache
|
|
|
+ * @param top the common parent at which to stop digging
|
|
|
+ * @param paths the paths to compare against the cache
|
|
|
+ */
|
|
|
+ private void checkCacheEntries(Map<URI, FileStatus> statCache, Path top,
|
|
|
+ Path... paths) {
|
|
|
+ Set<URI> expected = new HashSet<>();
|
|
|
+
|
|
|
+ for (Path path : paths) {
|
|
|
+ Path p = fs.makeQualified(path);
|
|
|
+
|
|
|
+ while (!p.isRoot() && !p.equals(top)) {
|
|
|
+ expected.add(p.toUri());
|
|
|
+ p = p.getParent();
|
|
|
+ }
|
|
|
+
|
|
|
+ expected.add(p.toUri());
|
|
|
+ }
|
|
|
+
|
|
|
+ Set<URI> uris = statCache.keySet();
|
|
|
+ Set<URI> missing = new HashSet<>(uris);
|
|
|
+ Set<URI> extra = new HashSet<>(expected);
|
|
|
+
|
|
|
+ missing.removeAll(expected);
|
|
|
+ extra.removeAll(uris);
|
|
|
|
|
|
- Map<URI, FileStatus> statCache = new HashMap<URI, FileStatus>();
|
|
|
- ClientDistributedCacheManager.
|
|
|
- determineCacheVisibilities(jobConf, statCache);
|
|
|
- Assert.assertFalse(jobConf.
|
|
|
- getBoolean(MRJobConfig.CACHE_FILE_VISIBILITIES,true));
|
|
|
+ assertTrue("File status cache does not contain an entries for " + missing,
|
|
|
+ missing.isEmpty());
|
|
|
+ assertTrue("File status cache contains extra extries: " + extra,
|
|
|
+ extra.isEmpty());
|
|
|
}
|
|
|
|
|
|
@SuppressWarnings("deprecation")
|