|
@@ -25,6 +25,7 @@ import java.util.Optional;
|
|
|
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
|
|
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityLevel;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.DiagnosticsCollector;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
|
|
@@ -100,15 +101,18 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
|
|
|
* We will consider stuffs like exclusivity, pending resource, node partition,
|
|
|
* headroom, etc.
|
|
|
*/
|
|
|
- private ContainerAllocation preCheckForNodeCandidateSet(FiCaSchedulerNode node,
|
|
|
+ private ContainerAllocation preCheckRequest(
|
|
|
+ CandidateNodeSet<FiCaSchedulerNode> candidates,
|
|
|
SchedulingMode schedulingMode, ResourceLimits resourceLimits,
|
|
|
SchedulerRequestKey schedulerKey) {
|
|
|
PendingAsk offswitchPendingAsk = application.getPendingAsk(schedulerKey,
|
|
|
ResourceRequest.ANY);
|
|
|
+ SchedulerNode recordNode =
|
|
|
+ CandidateNodeSetUtils.getSingleNode(candidates);
|
|
|
|
|
|
if (offswitchPendingAsk.getCount() <= 0) {
|
|
|
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
|
|
|
- activitiesManager, node, application, schedulerKey,
|
|
|
+ activitiesManager, recordNode, application, schedulerKey,
|
|
|
ActivityDiagnosticConstant.REQUEST_DO_NOT_NEED_RESOURCE,
|
|
|
ActivityLevel.REQUEST);
|
|
|
return ContainerAllocation.PRIORITY_SKIPPED;
|
|
@@ -120,7 +124,7 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
|
|
|
// Do we need containers at this 'priority'?
|
|
|
if (application.getOutstandingAsksCount(schedulerKey) <= 0) {
|
|
|
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
|
|
|
- activitiesManager, node, application, schedulerKey,
|
|
|
+ activitiesManager, recordNode, application, schedulerKey,
|
|
|
ActivityDiagnosticConstant.REQUEST_DO_NOT_NEED_RESOURCE,
|
|
|
ActivityLevel.REQUEST);
|
|
|
return ContainerAllocation.PRIORITY_SKIPPED;
|
|
@@ -130,12 +134,12 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
|
|
|
if (application.isWaitingForAMContainer() && !rmContext.getYarnConfiguration()
|
|
|
.getBoolean(AM_ALLOW_NON_EXCLUSIVE_ALLOCATION, false)) {
|
|
|
LOG.debug("Skip allocating AM container to app_attempt={},"
|
|
|
- + " don't allow to allocate AM container in non-exclusive mode",
|
|
|
+ + " don't allow to allocate AM container in non-exclusive mode",
|
|
|
application.getApplicationAttemptId());
|
|
|
application.updateAppSkipNodeDiagnostics(
|
|
|
"Skipping assigning to Node in Ignore Exclusivity mode. ");
|
|
|
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
|
|
|
- activitiesManager, node, application, schedulerKey,
|
|
|
+ activitiesManager, recordNode, application, schedulerKey,
|
|
|
ActivityDiagnosticConstant.
|
|
|
REQUEST_SKIPPED_IN_IGNORE_EXCLUSIVITY_MODE,
|
|
|
ActivityLevel.REQUEST);
|
|
@@ -143,37 +147,22 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- // Is the nodePartition of pending request matches the node's partition
|
|
|
- // If not match, jump to next priority.
|
|
|
- Optional<DiagnosticsCollector> dcOpt = activitiesManager == null ?
|
|
|
- Optional.empty() :
|
|
|
- activitiesManager.getOptionalDiagnosticsCollector();
|
|
|
- if (!appInfo.precheckNode(schedulerKey, node, schedulingMode, dcOpt)) {
|
|
|
- ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
|
|
|
- activitiesManager, node, application, schedulerKey,
|
|
|
- ActivityDiagnosticConstant.
|
|
|
- NODE_DO_NOT_MATCH_PARTITION_OR_PLACEMENT_CONSTRAINTS
|
|
|
- + ActivitiesManager.getDiagnostics(dcOpt),
|
|
|
- ActivityLevel.NODE);
|
|
|
- return ContainerAllocation.PRIORITY_SKIPPED;
|
|
|
- }
|
|
|
-
|
|
|
if (!application.getCSLeafQueue().isReservationsContinueLooking()) {
|
|
|
if (!shouldAllocOrReserveNewContainer(schedulerKey, required)) {
|
|
|
LOG.debug("doesn't need containers based on reservation algo!");
|
|
|
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
|
|
|
- activitiesManager, node, application, schedulerKey,
|
|
|
+ activitiesManager, recordNode, application, schedulerKey,
|
|
|
ActivityDiagnosticConstant.REQUEST_SKIPPED_BECAUSE_OF_RESERVATION,
|
|
|
ActivityLevel.REQUEST);
|
|
|
return ContainerAllocation.PRIORITY_SKIPPED;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- if (!checkHeadroom(resourceLimits, required, node.getPartition())) {
|
|
|
+ if (!checkHeadroom(resourceLimits, required, candidates.getPartition())) {
|
|
|
LOG.debug("cannot allocate required resource={} because of headroom",
|
|
|
required);
|
|
|
ActivitiesLogger.APP.recordAppActivityWithoutAllocation(
|
|
|
- activitiesManager, node, application, schedulerKey,
|
|
|
+ activitiesManager, recordNode, application, schedulerKey,
|
|
|
ActivityDiagnosticConstant.QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM,
|
|
|
ActivityState.REJECTED,
|
|
|
ActivityLevel.REQUEST);
|
|
@@ -190,7 +179,7 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
|
|
|
// This is possible when #pending resource decreased by a different
|
|
|
// thread.
|
|
|
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
|
|
|
- activitiesManager, node, application, schedulerKey,
|
|
|
+ activitiesManager, recordNode, application, schedulerKey,
|
|
|
ActivityDiagnosticConstant.REQUEST_SKIPPED_BECAUSE_NULL_ANY_REQUEST,
|
|
|
ActivityLevel.REQUEST);
|
|
|
return ContainerAllocation.PRIORITY_SKIPPED;
|
|
@@ -221,13 +210,38 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
|
|
|
+ rmContext.getScheduler().getNumClusterNodes());
|
|
|
}
|
|
|
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
|
|
|
- activitiesManager, node, application, schedulerKey,
|
|
|
+ activitiesManager, recordNode, application, schedulerKey,
|
|
|
ActivityDiagnosticConstant.
|
|
|
REQUEST_SKIPPED_BECAUSE_NON_PARTITIONED_PARTITION_FIRST,
|
|
|
ActivityLevel.REQUEST);
|
|
|
return ContainerAllocation.APP_SKIPPED;
|
|
|
}
|
|
|
}
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Pre-check if we can allocate a pending resource request
|
|
|
+ * (given schedulerKey) to a given node.
|
|
|
+ * We will consider stuffs like placement-constraints, etc.
|
|
|
+ */
|
|
|
+ private ContainerAllocation preCheckForNode(FiCaSchedulerNode node,
|
|
|
+ SchedulingMode schedulingMode, SchedulerRequestKey schedulerKey) {
|
|
|
+
|
|
|
+ // Is the nodePartition of pending request matches the node's partition
|
|
|
+ // If not match, jump to next priority.
|
|
|
+ Optional<DiagnosticsCollector> dcOpt = activitiesManager == null ?
|
|
|
+ Optional.empty() :
|
|
|
+ activitiesManager.getOptionalDiagnosticsCollector();
|
|
|
+ if (!appInfo.precheckNode(schedulerKey, node, schedulingMode, dcOpt)) {
|
|
|
+ ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
|
|
|
+ activitiesManager, node, application, schedulerKey,
|
|
|
+ ActivityDiagnosticConstant.
|
|
|
+ NODE_DO_NOT_MATCH_PARTITION_OR_PLACEMENT_CONSTRAINTS
|
|
|
+ + ActivitiesManager.getDiagnostics(dcOpt),
|
|
|
+ ActivityLevel.NODE);
|
|
|
+ return ContainerAllocation.PRIORITY_SKIPPED;
|
|
|
+ }
|
|
|
|
|
|
return null;
|
|
|
}
|
|
@@ -849,6 +863,21 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
|
|
|
AllocationState.PRIORITY_SKIPPED);
|
|
|
}
|
|
|
|
|
|
+ // pre-check request
|
|
|
+ if (reservedContainer == null) {
|
|
|
+ result = preCheckRequest(candidates,
|
|
|
+ schedulingMode, resourceLimits, schedulerKey);
|
|
|
+ if (null != result) {
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ // pre-check when allocating reserved container
|
|
|
+ if (application.getOutstandingAsksCount(schedulerKey) == 0) {
|
|
|
+ return new ContainerAllocation(reservedContainer, null,
|
|
|
+ AllocationState.QUEUE_SKIPPED);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
result = ContainerAllocation.PRIORITY_SKIPPED;
|
|
|
|
|
|
Iterator<FiCaSchedulerNode> iter = schedulingPS.getPreferredNodeIterator(
|
|
@@ -872,19 +901,10 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
|
|
|
}
|
|
|
|
|
|
if (reservedContainer == null) {
|
|
|
- result = preCheckForNodeCandidateSet(node,
|
|
|
- schedulingMode, resourceLimits, schedulerKey);
|
|
|
+ result = preCheckForNode(node, schedulingMode, schedulerKey);
|
|
|
if (null != result) {
|
|
|
continue;
|
|
|
}
|
|
|
- } else {
|
|
|
- // pre-check when allocating reserved container
|
|
|
- if (application.getOutstandingAsksCount(schedulerKey) == 0) {
|
|
|
- // Release
|
|
|
- result = new ContainerAllocation(reservedContainer, null,
|
|
|
- AllocationState.QUEUE_SKIPPED);
|
|
|
- continue;
|
|
|
- }
|
|
|
}
|
|
|
|
|
|
result = tryAllocateOnNode(clusterResource, node, schedulingMode,
|