|
@@ -452,11 +452,17 @@ class ReduceTask extends Task {
|
|
|
new ArrayList<MapOutputLocation>();
|
|
|
|
|
|
/**
|
|
|
- * a TreeSet for needed map outputs
|
|
|
+ * The set of required map outputs
|
|
|
*/
|
|
|
private Set <Integer> neededOutputs =
|
|
|
Collections.synchronizedSet(new TreeSet<Integer>());
|
|
|
|
|
|
+ /**
|
|
|
+ * The set of obsolete map taskids.
|
|
|
+ */
|
|
|
+ private Set <String> obsoleteMapIds =
|
|
|
+ Collections.synchronizedSet(new TreeSet<String>());
|
|
|
+
|
|
|
private Random random = null;
|
|
|
|
|
|
/**
|
|
@@ -635,6 +641,9 @@ class ReduceTask extends Task {
|
|
|
loc.getMapTaskId() + " from " + loc.getHost());
|
|
|
LOG.warn(StringUtils.stringifyException(e));
|
|
|
shuffleClientMetrics.failedFetch();
|
|
|
+
|
|
|
+ // Reset
|
|
|
+ size = -1;
|
|
|
} finally {
|
|
|
shuffleClientMetrics.threadFree();
|
|
|
finish(size);
|
|
@@ -648,7 +657,7 @@ class ReduceTask extends Task {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- /** Copies a a map output from a remote host, using raw RPC.
|
|
|
+ /** Copies a a map output from a remote host, via HTTP.
|
|
|
* @param currentLocation the map output location to be copied
|
|
|
* @return the path (fully qualified) of the copied file
|
|
|
* @throws IOException if there is an error copying the file
|
|
@@ -656,9 +665,12 @@ class ReduceTask extends Task {
|
|
|
*/
|
|
|
private long copyOutput(MapOutputLocation loc
|
|
|
) throws IOException, InterruptedException {
|
|
|
- if (!neededOutputs.contains(loc.getMapId())) {
|
|
|
+ // check if we still need to copy the output from this location
|
|
|
+ if (!neededOutputs.contains(loc.getMapId()) ||
|
|
|
+ obsoleteMapIds.contains(loc.getMapTaskId())) {
|
|
|
return CopyResult.OBSOLETE;
|
|
|
}
|
|
|
+
|
|
|
String reduceId = reduceTask.getTaskId();
|
|
|
LOG.info(reduceId + " Copying " + loc.getMapTaskId() +
|
|
|
" output from " + loc.getHost() + ".");
|
|
@@ -865,18 +877,21 @@ class ReduceTask extends Task {
|
|
|
// tasktracker and put the mapId hashkeys with new
|
|
|
// MapOutputLocations as values
|
|
|
knownOutputs.addAll(retryFetches);
|
|
|
- // The call getsMapCompletionEvents will modify fromEventId to a val
|
|
|
- // that it should be for the next call to getSuccessMapEvents
|
|
|
- List <MapOutputLocation> locs = getMapCompletionEvents(fromEventId);
|
|
|
+
|
|
|
+ // The call getMapCompletionEvents will update fromEventId to
|
|
|
+ // used for the next call to getMapCompletionEvents
|
|
|
+ int currentNumKnownMaps = knownOutputs.size();
|
|
|
+ int currentNumObsoleteMapIds = obsoleteMapIds.size();
|
|
|
+ getMapCompletionEvents(fromEventId, knownOutputs);
|
|
|
+
|
|
|
+ LOG.info(reduceTask.getTaskId() + ": " +
|
|
|
+ "Got " + (knownOutputs.size()-currentNumKnownMaps) +
|
|
|
+ " new map-outputs & " +
|
|
|
+ (obsoleteMapIds.size()-currentNumObsoleteMapIds) +
|
|
|
+ " obsolete map-outputs from tasktracker and " +
|
|
|
+ retryFetches.size() + " map-outputs from previous failures"
|
|
|
+ );
|
|
|
|
|
|
- // put discovered them on the known list
|
|
|
- for (int i=0; i < locs.size(); i++) {
|
|
|
- knownOutputs.add(locs.get(i));
|
|
|
- }
|
|
|
- LOG.info(reduceTask.getTaskId() +
|
|
|
- " Got " + locs.size() +
|
|
|
- " new map outputs from tasktracker and " + retryFetches.size()
|
|
|
- + " map outputs from previous failures");
|
|
|
// clear the "failed" fetches hashmap
|
|
|
retryFetches.clear();
|
|
|
}
|
|
@@ -904,6 +919,13 @@ class ReduceTask extends Task {
|
|
|
while (locIt.hasNext()) {
|
|
|
|
|
|
MapOutputLocation loc = (MapOutputLocation)locIt.next();
|
|
|
+
|
|
|
+ // Do not schedule fetches from OBSOLETE maps
|
|
|
+ if (obsoleteMapIds.contains(loc.getMapTaskId())) {
|
|
|
+ locIt.remove();
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
Long penaltyEnd = penaltyBox.get(loc.getHost());
|
|
|
boolean penalized = false, duplicate = false;
|
|
|
|
|
@@ -1104,14 +1126,17 @@ class ReduceTask extends Task {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- /** Queries the task tracker for a set of outputs ready to be copied
|
|
|
+ /**
|
|
|
+ * Queries the task tracker for a set of map-completion events from
|
|
|
+ * a given event ID.
|
|
|
* @param fromEventId the first event ID we want to start from, this is
|
|
|
- * modified by the call to this method
|
|
|
+ * modified by the call to this method
|
|
|
* @param jobClient the job tracker
|
|
|
* @return a set of locations to copy outputs from
|
|
|
* @throws IOException
|
|
|
*/
|
|
|
- private List <MapOutputLocation> getMapCompletionEvents(IntWritable fromEventId)
|
|
|
+ private void getMapCompletionEvents(IntWritable fromEventId,
|
|
|
+ List<MapOutputLocation> knownOutputs)
|
|
|
throws IOException {
|
|
|
|
|
|
long currentTime = System.currentTimeMillis();
|
|
@@ -1122,31 +1147,54 @@ class ReduceTask extends Task {
|
|
|
} catch (InterruptedException ie) { } // IGNORE
|
|
|
currentTime = System.currentTimeMillis();
|
|
|
}
|
|
|
- lastPollTime = currentTime;
|
|
|
|
|
|
- TaskCompletionEvent t[] = umbilical.getMapCompletionEvents(
|
|
|
- reduceTask.getJobId(),
|
|
|
- fromEventId.get(),
|
|
|
- probe_sample_size);
|
|
|
+ TaskCompletionEvent events[] =
|
|
|
+ umbilical.getMapCompletionEvents(reduceTask.getJobId(),
|
|
|
+ fromEventId.get(), probe_sample_size);
|
|
|
|
|
|
- List <MapOutputLocation> mapOutputsList =
|
|
|
- new ArrayList<MapOutputLocation>();
|
|
|
- for (TaskCompletionEvent event : t) {
|
|
|
- if (event.getTaskStatus() == TaskCompletionEvent.Status.SUCCEEDED) {
|
|
|
- URI u = URI.create(event.getTaskTrackerHttp());
|
|
|
- String host = u.getHost();
|
|
|
- int port = u.getPort();
|
|
|
- String taskId = event.getTaskId();
|
|
|
- int mId = event.idWithinJob();
|
|
|
- mapOutputsList.add(new MapOutputLocation(taskId, mId, host, port));
|
|
|
- } else if (event.getTaskStatus() == TaskCompletionEvent.Status.TIPFAILED) {
|
|
|
- neededOutputs.remove(event.idWithinJob());
|
|
|
- LOG.info("Ignoring output of failed map: '" + event.getTaskId() + "'");
|
|
|
+ // Note the last successful poll time-stamp
|
|
|
+ lastPollTime = currentTime;
|
|
|
+
|
|
|
+ // Update the last seen event ID
|
|
|
+ fromEventId.set(fromEventId.get() + events.length);
|
|
|
+
|
|
|
+ // Process the TaskCompletionEvents:
|
|
|
+ // 1. Save the SUCCEEDED maps in knownOutputs to fetch the outputs.
|
|
|
+ // 2. Save the OBSOLETE/FAILED/KILLED maps in obsoleteOutputs to stop fetching
|
|
|
+ // from those maps.
|
|
|
+ // 3. Remove TIPFAILED maps from neededOutputs since we don't need their
|
|
|
+ // outputs at all.
|
|
|
+ for (TaskCompletionEvent event : events) {
|
|
|
+ switch (event.getTaskStatus()) {
|
|
|
+ case SUCCEEDED:
|
|
|
+ {
|
|
|
+ URI u = URI.create(event.getTaskTrackerHttp());
|
|
|
+ String host = u.getHost();
|
|
|
+ int port = u.getPort();
|
|
|
+ String taskId = event.getTaskId();
|
|
|
+ int mId = event.idWithinJob();
|
|
|
+ knownOutputs.add(new MapOutputLocation(taskId, mId, host, port));
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ case FAILED:
|
|
|
+ case KILLED:
|
|
|
+ case OBSOLETE:
|
|
|
+ {
|
|
|
+ obsoleteMapIds.add(event.getTaskId());
|
|
|
+ LOG.info("Ignoring obsolete output of " + event.getTaskStatus() +
|
|
|
+ " map-task: '" + event.getTaskId() + "'");
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ case TIPFAILED:
|
|
|
+ {
|
|
|
+ neededOutputs.remove(event.idWithinJob());
|
|
|
+ LOG.info("Ignoring output of failed map TIP: '" +
|
|
|
+ event.getTaskId() + "'");
|
|
|
+ }
|
|
|
+ break;
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
- fromEventId.set(fromEventId.get() + t.length);
|
|
|
- return mapOutputsList;
|
|
|
+
|
|
|
}
|
|
|
|
|
|
|