|
@@ -24,14 +24,16 @@ import static org.mockito.Mockito.doAnswer;
|
|
|
|
|
|
import java.io.ByteArrayOutputStream;
|
|
import java.io.ByteArrayOutputStream;
|
|
import java.io.IOException;
|
|
import java.io.IOException;
|
|
|
|
+import java.security.PrivilegedAction;
|
|
import java.util.ArrayList;
|
|
import java.util.ArrayList;
|
|
import java.util.Arrays;
|
|
import java.util.Arrays;
|
|
|
|
+import java.util.Iterator;
|
|
import java.util.List;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.Map;
|
|
import java.util.TreeMap;
|
|
import java.util.TreeMap;
|
|
|
|
|
|
|
|
+import org.apache.hadoop.fs.FSDataInputStream;
|
|
import org.junit.Assert;
|
|
import org.junit.Assert;
|
|
-
|
|
|
|
import org.apache.hadoop.conf.Configuration;
|
|
import org.apache.hadoop.conf.Configuration;
|
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
import org.apache.hadoop.fs.FileSystem;
|
|
import org.apache.hadoop.fs.FileSystem;
|
|
@@ -51,10 +53,16 @@ import org.apache.hadoop.mapred.RawKeyValueIterator;
|
|
import org.apache.hadoop.mapred.Reporter;
|
|
import org.apache.hadoop.mapred.Reporter;
|
|
import org.apache.hadoop.mapreduce.JobID;
|
|
import org.apache.hadoop.mapreduce.JobID;
|
|
import org.apache.hadoop.mapreduce.MRConfig;
|
|
import org.apache.hadoop.mapreduce.MRConfig;
|
|
|
|
+import org.apache.hadoop.mapreduce.MRJobConfig;
|
|
import org.apache.hadoop.mapreduce.TaskAttemptID;
|
|
import org.apache.hadoop.mapreduce.TaskAttemptID;
|
|
import org.apache.hadoop.mapreduce.TaskID;
|
|
import org.apache.hadoop.mapreduce.TaskID;
|
|
import org.apache.hadoop.mapreduce.TaskType;
|
|
import org.apache.hadoop.mapreduce.TaskType;
|
|
|
|
+import org.apache.hadoop.mapreduce.security.TokenCache;
|
|
|
|
+import org.apache.hadoop.mapreduce.CryptoUtils;
|
|
import org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl;
|
|
import org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl;
|
|
|
|
+import org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath;
|
|
|
|
+import org.apache.hadoop.security.Credentials;
|
|
|
|
+import org.apache.hadoop.security.UserGroupInformation;
|
|
import org.apache.hadoop.util.Progress;
|
|
import org.apache.hadoop.util.Progress;
|
|
import org.apache.hadoop.util.Progressable;
|
|
import org.apache.hadoop.util.Progressable;
|
|
import org.junit.After;
|
|
import org.junit.After;
|
|
@@ -63,40 +71,48 @@ import org.junit.Test;
|
|
import org.mockito.invocation.InvocationOnMock;
|
|
import org.mockito.invocation.InvocationOnMock;
|
|
import org.mockito.stubbing.Answer;
|
|
import org.mockito.stubbing.Answer;
|
|
|
|
|
|
|
|
+import com.google.common.collect.Lists;
|
|
|
|
+
|
|
public class TestMerger {
|
|
public class TestMerger {
|
|
|
|
|
|
private Configuration conf;
|
|
private Configuration conf;
|
|
private JobConf jobConf;
|
|
private JobConf jobConf;
|
|
private FileSystem fs;
|
|
private FileSystem fs;
|
|
-
|
|
|
|
|
|
+
|
|
@Before
|
|
@Before
|
|
public void setup() throws IOException {
|
|
public void setup() throws IOException {
|
|
conf = new Configuration();
|
|
conf = new Configuration();
|
|
jobConf = new JobConf();
|
|
jobConf = new JobConf();
|
|
fs = FileSystem.getLocal(conf);
|
|
fs = FileSystem.getLocal(conf);
|
|
}
|
|
}
|
|
-
|
|
|
|
- @After
|
|
|
|
- public void cleanup() throws IOException {
|
|
|
|
- fs.delete(new Path(jobConf.getLocalDirs()[0]), true);
|
|
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ @Test
|
|
|
|
+ public void testEncryptedMerger() throws Throwable {
|
|
|
|
+ jobConf.setBoolean(MRJobConfig.MR_ENCRYPTED_INTERMEDIATE_DATA, true);
|
|
|
|
+ conf.setBoolean(MRJobConfig.MR_ENCRYPTED_INTERMEDIATE_DATA, true);
|
|
|
|
+ Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
|
|
|
|
+ TokenCache.setShuffleSecretKey(new byte[16], credentials);
|
|
|
|
+ UserGroupInformation.getCurrentUser().addCredentials(credentials);
|
|
|
|
+ testInMemoryAndOnDiskMerger();
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
+
|
|
@Test
|
|
@Test
|
|
- public void testInMemoryMerger() throws Throwable {
|
|
|
|
|
|
+ public void testInMemoryAndOnDiskMerger() throws Throwable {
|
|
JobID jobId = new JobID("a", 0);
|
|
JobID jobId = new JobID("a", 0);
|
|
- TaskAttemptID reduceId = new TaskAttemptID(
|
|
|
|
|
|
+ TaskAttemptID reduceId1 = new TaskAttemptID(
|
|
new TaskID(jobId, TaskType.REDUCE, 0), 0);
|
|
new TaskID(jobId, TaskType.REDUCE, 0), 0);
|
|
TaskAttemptID mapId1 = new TaskAttemptID(
|
|
TaskAttemptID mapId1 = new TaskAttemptID(
|
|
new TaskID(jobId, TaskType.MAP, 1), 0);
|
|
new TaskID(jobId, TaskType.MAP, 1), 0);
|
|
TaskAttemptID mapId2 = new TaskAttemptID(
|
|
TaskAttemptID mapId2 = new TaskAttemptID(
|
|
new TaskID(jobId, TaskType.MAP, 2), 0);
|
|
new TaskID(jobId, TaskType.MAP, 2), 0);
|
|
-
|
|
|
|
|
|
+
|
|
LocalDirAllocator lda = new LocalDirAllocator(MRConfig.LOCAL_DIR);
|
|
LocalDirAllocator lda = new LocalDirAllocator(MRConfig.LOCAL_DIR);
|
|
-
|
|
|
|
|
|
+
|
|
MergeManagerImpl<Text, Text> mergeManager = new MergeManagerImpl<Text, Text>(
|
|
MergeManagerImpl<Text, Text> mergeManager = new MergeManagerImpl<Text, Text>(
|
|
- reduceId, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null,
|
|
|
|
|
|
+ reduceId1, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null,
|
|
null, null, new Progress(), new MROutputFiles());
|
|
null, null, new Progress(), new MROutputFiles());
|
|
-
|
|
|
|
|
|
+
|
|
// write map outputs
|
|
// write map outputs
|
|
Map<String, String> map1 = new TreeMap<String, String>();
|
|
Map<String, String> map1 = new TreeMap<String, String>();
|
|
map1.put("apple", "disgusting");
|
|
map1.put("apple", "disgusting");
|
|
@@ -113,32 +129,88 @@ public class TestMerger {
|
|
mapOutputBytes1.length);
|
|
mapOutputBytes1.length);
|
|
System.arraycopy(mapOutputBytes2, 0, mapOutput2.getMemory(), 0,
|
|
System.arraycopy(mapOutputBytes2, 0, mapOutput2.getMemory(), 0,
|
|
mapOutputBytes2.length);
|
|
mapOutputBytes2.length);
|
|
-
|
|
|
|
|
|
+
|
|
// create merger and run merge
|
|
// create merger and run merge
|
|
MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger =
|
|
MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger =
|
|
mergeManager.createInMemoryMerger();
|
|
mergeManager.createInMemoryMerger();
|
|
- List<InMemoryMapOutput<Text, Text>> mapOutputs =
|
|
|
|
|
|
+ List<InMemoryMapOutput<Text, Text>> mapOutputs1 =
|
|
new ArrayList<InMemoryMapOutput<Text, Text>>();
|
|
new ArrayList<InMemoryMapOutput<Text, Text>>();
|
|
- mapOutputs.add(mapOutput1);
|
|
|
|
- mapOutputs.add(mapOutput2);
|
|
|
|
-
|
|
|
|
- inMemoryMerger.merge(mapOutputs);
|
|
|
|
-
|
|
|
|
|
|
+ mapOutputs1.add(mapOutput1);
|
|
|
|
+ mapOutputs1.add(mapOutput2);
|
|
|
|
+
|
|
|
|
+ inMemoryMerger.merge(mapOutputs1);
|
|
|
|
+
|
|
Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
|
|
Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
|
|
- Path outPath = mergeManager.onDiskMapOutputs.iterator().next();
|
|
|
|
-
|
|
|
|
|
|
+
|
|
|
|
+ TaskAttemptID reduceId2 = new TaskAttemptID(
|
|
|
|
+ new TaskID(jobId, TaskType.REDUCE, 3), 0);
|
|
|
|
+ TaskAttemptID mapId3 = new TaskAttemptID(
|
|
|
|
+ new TaskID(jobId, TaskType.MAP, 4), 0);
|
|
|
|
+ TaskAttemptID mapId4 = new TaskAttemptID(
|
|
|
|
+ new TaskID(jobId, TaskType.MAP, 5), 0);
|
|
|
|
+ // write map outputs
|
|
|
|
+ Map<String, String> map3 = new TreeMap<String, String>();
|
|
|
|
+ map3.put("apple", "awesome");
|
|
|
|
+ map3.put("carrot", "amazing");
|
|
|
|
+ Map<String, String> map4 = new TreeMap<String, String>();
|
|
|
|
+ map4.put("banana", "bla");
|
|
|
|
+ byte[] mapOutputBytes3 = writeMapOutput(conf, map3);
|
|
|
|
+ byte[] mapOutputBytes4 = writeMapOutput(conf, map4);
|
|
|
|
+ InMemoryMapOutput<Text, Text> mapOutput3 = new InMemoryMapOutput<Text, Text>(
|
|
|
|
+ conf, mapId3, mergeManager, mapOutputBytes3.length, null, true);
|
|
|
|
+ InMemoryMapOutput<Text, Text> mapOutput4 = new InMemoryMapOutput<Text, Text>(
|
|
|
|
+ conf, mapId4, mergeManager, mapOutputBytes4.length, null, true);
|
|
|
|
+ System.arraycopy(mapOutputBytes3, 0, mapOutput3.getMemory(), 0,
|
|
|
|
+ mapOutputBytes3.length);
|
|
|
|
+ System.arraycopy(mapOutputBytes4, 0, mapOutput4.getMemory(), 0,
|
|
|
|
+ mapOutputBytes4.length);
|
|
|
|
+
|
|
|
|
+// // create merger and run merge
|
|
|
|
+ MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger2 =
|
|
|
|
+ mergeManager.createInMemoryMerger();
|
|
|
|
+ List<InMemoryMapOutput<Text, Text>> mapOutputs2 =
|
|
|
|
+ new ArrayList<InMemoryMapOutput<Text, Text>>();
|
|
|
|
+ mapOutputs2.add(mapOutput3);
|
|
|
|
+ mapOutputs2.add(mapOutput4);
|
|
|
|
+
|
|
|
|
+ inMemoryMerger2.merge(mapOutputs2);
|
|
|
|
+
|
|
|
|
+ Assert.assertEquals(2, mergeManager.onDiskMapOutputs.size());
|
|
|
|
+
|
|
|
|
+ List<CompressAwarePath> paths = new ArrayList<CompressAwarePath>();
|
|
|
|
+ Iterator<CompressAwarePath> iterator = mergeManager.onDiskMapOutputs.iterator();
|
|
List<String> keys = new ArrayList<String>();
|
|
List<String> keys = new ArrayList<String>();
|
|
List<String> values = new ArrayList<String>();
|
|
List<String> values = new ArrayList<String>();
|
|
- readOnDiskMapOutput(conf, fs, outPath, keys, values);
|
|
|
|
- Assert.assertEquals(keys, Arrays.asList("apple", "banana", "carrot"));
|
|
|
|
- Assert.assertEquals(values, Arrays.asList("disgusting", "pretty good", "delicious"));
|
|
|
|
|
|
+ while (iterator.hasNext()) {
|
|
|
|
+ CompressAwarePath next = iterator.next();
|
|
|
|
+ readOnDiskMapOutput(conf, fs, next, keys, values);
|
|
|
|
+ paths.add(next);
|
|
|
|
+ }
|
|
|
|
+ Assert.assertEquals(keys, Arrays.asList("apple", "banana", "carrot", "apple", "banana", "carrot"));
|
|
|
|
+ Assert.assertEquals(values, Arrays.asList("awesome", "bla", "amazing", "disgusting", "pretty good", "delicious"));
|
|
|
|
+ mergeManager.close();
|
|
|
|
+
|
|
|
|
+ mergeManager = new MergeManagerImpl<Text, Text>(
|
|
|
|
+ reduceId2, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null,
|
|
|
|
+ null, null, new Progress(), new MROutputFiles());
|
|
|
|
+
|
|
|
|
+ MergeThread<CompressAwarePath,Text,Text> onDiskMerger = mergeManager.createOnDiskMerger();
|
|
|
|
+ onDiskMerger.merge(paths);
|
|
|
|
+
|
|
|
|
+ Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
|
|
|
|
+
|
|
|
|
+ keys = new ArrayList<String>();
|
|
|
|
+ values = new ArrayList<String>();
|
|
|
|
+ readOnDiskMapOutput(conf, fs, mergeManager.onDiskMapOutputs.iterator().next(), keys, values);
|
|
|
|
+ Assert.assertEquals(keys, Arrays.asList("apple", "apple", "banana", "banana", "carrot", "carrot"));
|
|
|
|
+ Assert.assertEquals(values, Arrays.asList("awesome", "disgusting", "pretty good", "bla", "amazing", "delicious"));
|
|
|
|
|
|
mergeManager.close();
|
|
mergeManager.close();
|
|
Assert.assertEquals(0, mergeManager.inMemoryMapOutputs.size());
|
|
Assert.assertEquals(0, mergeManager.inMemoryMapOutputs.size());
|
|
Assert.assertEquals(0, mergeManager.inMemoryMergedMapOutputs.size());
|
|
Assert.assertEquals(0, mergeManager.inMemoryMergedMapOutputs.size());
|
|
Assert.assertEquals(0, mergeManager.onDiskMapOutputs.size());
|
|
Assert.assertEquals(0, mergeManager.onDiskMapOutputs.size());
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
+
|
|
private byte[] writeMapOutput(Configuration conf, Map<String, String> keysToValues)
|
|
private byte[] writeMapOutput(Configuration conf, Map<String, String> keysToValues)
|
|
throws IOException {
|
|
throws IOException {
|
|
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
|
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
|
@@ -152,11 +224,13 @@ public class TestMerger {
|
|
writer.close();
|
|
writer.close();
|
|
return baos.toByteArray();
|
|
return baos.toByteArray();
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
+
|
|
private void readOnDiskMapOutput(Configuration conf, FileSystem fs, Path path,
|
|
private void readOnDiskMapOutput(Configuration conf, FileSystem fs, Path path,
|
|
List<String> keys, List<String> values) throws IOException {
|
|
List<String> keys, List<String> values) throws IOException {
|
|
- IFile.Reader<Text, Text> reader = new IFile.Reader<Text, Text>(conf, fs,
|
|
|
|
- path, null, null);
|
|
|
|
|
|
+ FSDataInputStream in = CryptoUtils.wrapIfNecessary(conf, fs.open(path));
|
|
|
|
+
|
|
|
|
+ IFile.Reader<Text, Text> reader = new IFile.Reader<Text, Text>(conf, in,
|
|
|
|
+ fs.getFileStatus(path).getLen(), null, null);
|
|
DataInputBuffer keyBuff = new DataInputBuffer();
|
|
DataInputBuffer keyBuff = new DataInputBuffer();
|
|
DataInputBuffer valueBuff = new DataInputBuffer();
|
|
DataInputBuffer valueBuff = new DataInputBuffer();
|
|
Text key = new Text();
|
|
Text key = new Text();
|
|
@@ -169,17 +243,17 @@ public class TestMerger {
|
|
values.add(value.toString());
|
|
values.add(value.toString());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
+
|
|
@Test
|
|
@Test
|
|
public void testCompressed() throws IOException {
|
|
public void testCompressed() throws IOException {
|
|
testMergeShouldReturnProperProgress(getCompressedSegments());
|
|
testMergeShouldReturnProperProgress(getCompressedSegments());
|
|
- }
|
|
|
|
-
|
|
|
|
|
|
+}
|
|
|
|
+
|
|
@Test
|
|
@Test
|
|
public void testUncompressed() throws IOException {
|
|
public void testUncompressed() throws IOException {
|
|
testMergeShouldReturnProperProgress(getUncompressedSegments());
|
|
testMergeShouldReturnProperProgress(getUncompressedSegments());
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
+
|
|
@SuppressWarnings( { "deprecation", "unchecked" })
|
|
@SuppressWarnings( { "deprecation", "unchecked" })
|
|
public void testMergeShouldReturnProperProgress(
|
|
public void testMergeShouldReturnProperProgress(
|
|
List<Segment<Text, Text>> segments) throws IOException {
|
|
List<Segment<Text, Text>> segments) throws IOException {
|
|
@@ -212,7 +286,7 @@ public class TestMerger {
|
|
}
|
|
}
|
|
return segments;
|
|
return segments;
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
+
|
|
private List<Segment<Text, Text>> getCompressedSegments() throws IOException {
|
|
private List<Segment<Text, Text>> getCompressedSegments() throws IOException {
|
|
List<Segment<Text, Text>> segments = new ArrayList<Segment<Text, Text>>();
|
|
List<Segment<Text, Text>> segments = new ArrayList<Segment<Text, Text>>();
|
|
for (int i = 1; i < 1; i++) {
|
|
for (int i = 1; i < 1; i++) {
|
|
@@ -220,7 +294,7 @@ public class TestMerger {
|
|
}
|
|
}
|
|
return segments;
|
|
return segments;
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
+
|
|
private Segment<Text, Text> getUncompressedSegment(int i) throws IOException {
|
|
private Segment<Text, Text> getUncompressedSegment(int i) throws IOException {
|
|
return new Segment<Text, Text>(getReader(i), false);
|
|
return new Segment<Text, Text>(getReader(i), false);
|
|
}
|
|
}
|
|
@@ -258,7 +332,7 @@ public class TestMerger {
|
|
}
|
|
}
|
|
};
|
|
};
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
+
|
|
private Answer<?> getValueAnswer(final String segmentName) {
|
|
private Answer<?> getValueAnswer(final String segmentName) {
|
|
return new Answer<Void>() {
|
|
return new Answer<Void>() {
|
|
int i = 0;
|
|
int i = 0;
|