|
@@ -18,22 +18,6 @@
|
|
|
|
|
|
package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
|
|
package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
|
|
|
|
|
|
-import java.util.ArrayList;
|
|
|
|
-import java.util.Collection;
|
|
|
|
-import java.util.HashMap;
|
|
|
|
-import java.util.HashSet;
|
|
|
|
-import java.util.Iterator;
|
|
|
|
-import java.util.List;
|
|
|
|
-import java.util.Map;
|
|
|
|
-import java.util.Set;
|
|
|
|
-import java.util.TreeMap;
|
|
|
|
-import java.util.concurrent.ConcurrentHashMap;
|
|
|
|
-import java.util.concurrent.ConcurrentSkipListMap;
|
|
|
|
-import java.util.concurrent.atomic.AtomicBoolean;
|
|
|
|
-import java.util.concurrent.atomic.AtomicLong;
|
|
|
|
-import java.util.concurrent.locks.ReentrantReadWriteLock;
|
|
|
|
-
|
|
|
|
-import org.apache.commons.collections.IteratorUtils;
|
|
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.LogFactory;
|
|
import org.apache.commons.logging.LogFactory;
|
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
|
@@ -48,16 +32,30 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
|
-import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
|
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
|
|
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.PlacementSet;
|
|
|
|
|
|
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.LocalitySchedulingPlacementSet;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.ResourceRequestUpdateResult;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.ResourceRequestUpdateResult;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.SchedulingPlacementSet;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.SchedulingPlacementSet;
|
|
|
|
|
|
import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey;
|
|
import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey;
|
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
|
|
|
|
|
|
|
+import java.util.ArrayList;
|
|
|
|
+import java.util.Collection;
|
|
|
|
+import java.util.Collections;
|
|
|
|
+import java.util.HashMap;
|
|
|
|
+import java.util.HashSet;
|
|
|
|
+import java.util.List;
|
|
|
|
+import java.util.Map;
|
|
|
|
+import java.util.Set;
|
|
|
|
+import java.util.TreeMap;
|
|
|
|
+import java.util.concurrent.ConcurrentHashMap;
|
|
|
|
+import java.util.concurrent.ConcurrentSkipListMap;
|
|
|
|
+import java.util.concurrent.atomic.AtomicBoolean;
|
|
|
|
+import java.util.concurrent.atomic.AtomicLong;
|
|
|
|
+import java.util.concurrent.locks.ReentrantReadWriteLock;
|
|
|
|
+
|
|
/**
|
|
/**
|
|
* This class keeps track of all the consumption of an application. This also
|
|
* This class keeps track of all the consumption of an application. This also
|
|
* keeps track of current running/completed containers for the application.
|
|
* keeps track of current running/completed containers for the application.
|
|
@@ -89,8 +87,8 @@ public class AppSchedulingInfo {
|
|
|
|
|
|
private final ConcurrentSkipListMap<SchedulerRequestKey, Integer>
|
|
private final ConcurrentSkipListMap<SchedulerRequestKey, Integer>
|
|
schedulerKeys = new ConcurrentSkipListMap<>();
|
|
schedulerKeys = new ConcurrentSkipListMap<>();
|
|
- final Map<SchedulerRequestKey, Map<String, ResourceRequest>>
|
|
|
|
- resourceRequestMap = new ConcurrentHashMap<>();
|
|
|
|
|
|
+ final Map<SchedulerRequestKey, SchedulingPlacementSet<SchedulerNode>>
|
|
|
|
+ schedulerKeyToPlacementSets = new ConcurrentHashMap<>();
|
|
final Map<NodeId, Map<SchedulerRequestKey, Map<ContainerId,
|
|
final Map<NodeId, Map<SchedulerRequestKey, Map<ContainerId,
|
|
SchedContainerChangeRequest>>> containerIncreaseRequestMap =
|
|
SchedContainerChangeRequest>>> containerIncreaseRequestMap =
|
|
new ConcurrentHashMap<>();
|
|
new ConcurrentHashMap<>();
|
|
@@ -153,7 +151,7 @@ public class AppSchedulingInfo {
|
|
*/
|
|
*/
|
|
private void clearRequests() {
|
|
private void clearRequests() {
|
|
schedulerKeys.clear();
|
|
schedulerKeys.clear();
|
|
- resourceRequestMap.clear();
|
|
|
|
|
|
+ schedulerKeyToPlacementSets.clear();
|
|
LOG.info("Application " + applicationId + " requests cleared");
|
|
LOG.info("Application " + applicationId + " requests cleared");
|
|
}
|
|
}
|
|
|
|
|
|
@@ -299,7 +297,7 @@ public class AppSchedulingInfo {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- private void decrementSchedulerKeyReference(
|
|
|
|
|
|
+ public void decrementSchedulerKeyReference(
|
|
SchedulerRequestKey schedulerKey) {
|
|
SchedulerRequestKey schedulerKey) {
|
|
Integer schedulerKeyCount = schedulerKeys.get(schedulerKey);
|
|
Integer schedulerKeyCount = schedulerKeys.get(schedulerKey);
|
|
if (schedulerKeyCount != null) {
|
|
if (schedulerKeyCount != null) {
|
|
@@ -391,49 +389,55 @@ public class AppSchedulingInfo {
|
|
*/
|
|
*/
|
|
public boolean updateResourceRequests(List<ResourceRequest> requests,
|
|
public boolean updateResourceRequests(List<ResourceRequest> requests,
|
|
boolean recoverPreemptedRequestForAContainer) {
|
|
boolean recoverPreemptedRequestForAContainer) {
|
|
|
|
+ if (null == requests || requests.isEmpty()) {
|
|
|
|
+ return false;
|
|
|
|
+ }
|
|
|
|
+
|
|
// Flag to track if any incoming requests update "ANY" requests
|
|
// Flag to track if any incoming requests update "ANY" requests
|
|
- boolean anyResourcesUpdated = false;
|
|
|
|
|
|
+ boolean offswitchResourcesUpdated = false;
|
|
|
|
|
|
try {
|
|
try {
|
|
this.writeLock.lock();
|
|
this.writeLock.lock();
|
|
- // Update resource requests
|
|
|
|
- for (ResourceRequest request : requests) {
|
|
|
|
- SchedulerRequestKey schedulerKey = SchedulerRequestKey.create(request);
|
|
|
|
- String resourceName = request.getResourceName();
|
|
|
|
|
|
|
|
- // Update node labels if required
|
|
|
|
- updateNodeLabels(request);
|
|
|
|
-
|
|
|
|
- Map<String, ResourceRequest> asks =
|
|
|
|
- this.resourceRequestMap.get(schedulerKey);
|
|
|
|
- if (asks == null) {
|
|
|
|
- asks = new ConcurrentHashMap<>();
|
|
|
|
- this.resourceRequestMap.put(schedulerKey, asks);
|
|
|
|
- }
|
|
|
|
|
|
+ // A map to group resource requests and dedup
|
|
|
|
+ Map<SchedulerRequestKey, Map<String, ResourceRequest>> dedupRequests =
|
|
|
|
+ new HashMap<>();
|
|
|
|
|
|
- // Increment number of containers if recovering preempted resources
|
|
|
|
- ResourceRequest lastRequest = asks.get(resourceName);
|
|
|
|
- if (recoverPreemptedRequestForAContainer && lastRequest != null) {
|
|
|
|
- request.setNumContainers(lastRequest.getNumContainers() + 1);
|
|
|
|
|
|
+ // Group resource request by schedulerRequestKey and resourceName
|
|
|
|
+ for (ResourceRequest request : requests) {
|
|
|
|
+ SchedulerRequestKey schedulerKey = SchedulerRequestKey.create(request);
|
|
|
|
+ if (!dedupRequests.containsKey(schedulerKey)) {
|
|
|
|
+ dedupRequests.put(schedulerKey, new HashMap<>());
|
|
}
|
|
}
|
|
|
|
+ dedupRequests.get(schedulerKey).put(request.getResourceName(), request);
|
|
|
|
+ }
|
|
|
|
|
|
- // Update asks
|
|
|
|
- asks.put(resourceName, request);
|
|
|
|
-
|
|
|
|
- if (resourceName.equals(ResourceRequest.ANY)) {
|
|
|
|
- //update the applications requested labels set
|
|
|
|
- requestedPartitions.add(request.getNodeLabelExpression() == null
|
|
|
|
- ? RMNodeLabelsManager.NO_LABEL :
|
|
|
|
- request.getNodeLabelExpression());
|
|
|
|
|
|
+ // Update scheduling placement set
|
|
|
|
+ for (Map.Entry<SchedulerRequestKey, Map<String, ResourceRequest>> entry : dedupRequests.entrySet()) {
|
|
|
|
+ SchedulerRequestKey schedulerRequestKey = entry.getKey();
|
|
|
|
|
|
- anyResourcesUpdated = true;
|
|
|
|
|
|
+ if (!schedulerKeyToPlacementSets.containsKey(schedulerRequestKey)) {
|
|
|
|
+ schedulerKeyToPlacementSets.put(schedulerRequestKey,
|
|
|
|
+ new LocalitySchedulingPlacementSet<>(this));
|
|
|
|
+ }
|
|
|
|
|
|
- // Update pendingResources
|
|
|
|
- updatePendingResources(lastRequest, request, schedulerKey,
|
|
|
|
|
|
+ // Update placement set
|
|
|
|
+ ResourceRequestUpdateResult pendingAmountChanges =
|
|
|
|
+ schedulerKeyToPlacementSets.get(schedulerRequestKey)
|
|
|
|
+ .updateResourceRequests(
|
|
|
|
+ entry.getValue().values(),
|
|
|
|
+ recoverPreemptedRequestForAContainer);
|
|
|
|
+
|
|
|
|
+ if (null != pendingAmountChanges) {
|
|
|
|
+ updatePendingResources(
|
|
|
|
+ pendingAmountChanges.getLastAnyResourceRequest(),
|
|
|
|
+ pendingAmountChanges.getNewResourceRequest(), schedulerRequestKey,
|
|
queue.getMetrics());
|
|
queue.getMetrics());
|
|
|
|
+ offswitchResourcesUpdated = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- return anyResourcesUpdated;
|
|
|
|
|
|
+
|
|
|
|
+ return offswitchResourcesUpdated;
|
|
} finally {
|
|
} finally {
|
|
this.writeLock.unlock();
|
|
this.writeLock.unlock();
|
|
}
|
|
}
|
|
@@ -483,35 +487,13 @@ public class AppSchedulingInfo {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- private void updateNodeLabels(ResourceRequest request) {
|
|
|
|
- SchedulerRequestKey schedulerKey = SchedulerRequestKey.create(request);
|
|
|
|
- String resourceName = request.getResourceName();
|
|
|
|
- if (resourceName.equals(ResourceRequest.ANY)) {
|
|
|
|
- ResourceRequest previousAnyRequest =
|
|
|
|
- getResourceRequest(schedulerKey, resourceName);
|
|
|
|
-
|
|
|
|
- // When there is change in ANY request label expression, we should
|
|
|
|
- // update label for all resource requests already added of same
|
|
|
|
- // priority as ANY resource request.
|
|
|
|
- if ((null == previousAnyRequest)
|
|
|
|
- || hasRequestLabelChanged(previousAnyRequest, request)) {
|
|
|
|
- Map<String, ResourceRequest> resourceRequest =
|
|
|
|
- getResourceRequests(schedulerKey);
|
|
|
|
- if (resourceRequest != null) {
|
|
|
|
- for (ResourceRequest r : resourceRequest.values()) {
|
|
|
|
- if (!r.getResourceName().equals(ResourceRequest.ANY)) {
|
|
|
|
- r.setNodeLabelExpression(request.getNodeLabelExpression());
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- } else {
|
|
|
|
- ResourceRequest anyRequest =
|
|
|
|
- getResourceRequest(schedulerKey, ResourceRequest.ANY);
|
|
|
|
- if (anyRequest != null) {
|
|
|
|
- request.setNodeLabelExpression(anyRequest.getNodeLabelExpression());
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+ public void addRequestedPartition(String partition) {
|
|
|
|
+ requestedPartitions.add(partition);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public void decPendingResource(String partition, Resource toDecrease) {
|
|
|
|
+ queue.decPendingResource(partition, toDecrease);
|
|
|
|
+ appResourceUsage.decPending(partition, toDecrease);
|
|
}
|
|
}
|
|
|
|
|
|
private boolean hasRequestLabelChanged(ResourceRequest requestOne,
|
|
private boolean hasRequestLabelChanged(ResourceRequest requestOne,
|
|
@@ -584,17 +566,22 @@ public class AppSchedulingInfo {
|
|
return schedulerKeys.keySet();
|
|
return schedulerKeys.keySet();
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ @SuppressWarnings("unchecked")
|
|
public Map<String, ResourceRequest> getResourceRequests(
|
|
public Map<String, ResourceRequest> getResourceRequests(
|
|
SchedulerRequestKey schedulerKey) {
|
|
SchedulerRequestKey schedulerKey) {
|
|
- return resourceRequestMap.get(schedulerKey);
|
|
|
|
|
|
+ SchedulingPlacementSet ps = schedulerKeyToPlacementSets.get(schedulerKey);
|
|
|
|
+ if (null != ps) {
|
|
|
|
+ return ps.getResourceRequests();
|
|
|
|
+ }
|
|
|
|
+ return Collections.emptyMap();
|
|
}
|
|
}
|
|
|
|
|
|
public List<ResourceRequest> getAllResourceRequests() {
|
|
public List<ResourceRequest> getAllResourceRequests() {
|
|
List<ResourceRequest> ret = new ArrayList<>();
|
|
List<ResourceRequest> ret = new ArrayList<>();
|
|
try {
|
|
try {
|
|
this.readLock.lock();
|
|
this.readLock.lock();
|
|
- for (Map<String, ResourceRequest> r : resourceRequestMap.values()) {
|
|
|
|
- ret.addAll(r.values());
|
|
|
|
|
|
+ for (SchedulingPlacementSet ps : schedulerKeyToPlacementSets.values()) {
|
|
|
|
+ ret.addAll(ps.getResourceRequests().values());
|
|
}
|
|
}
|
|
} finally {
|
|
} finally {
|
|
this.readLock.unlock();
|
|
this.readLock.unlock();
|
|
@@ -606,9 +593,9 @@ public class AppSchedulingInfo {
|
|
String resourceName) {
|
|
String resourceName) {
|
|
try {
|
|
try {
|
|
this.readLock.lock();
|
|
this.readLock.lock();
|
|
- Map<String, ResourceRequest> nodeRequests =
|
|
|
|
- resourceRequestMap.get(schedulerKey);
|
|
|
|
- return (nodeRequests == null) ? null : nodeRequests.get(resourceName);
|
|
|
|
|
|
+ SchedulingPlacementSet ps =
|
|
|
|
+ schedulerKeyToPlacementSets.get(schedulerKey);
|
|
|
|
+ return (ps == null) ? null : ps.getResourceRequest(resourceName);
|
|
} finally {
|
|
} finally {
|
|
this.readLock.unlock();
|
|
this.readLock.unlock();
|
|
}
|
|
}
|
|
@@ -635,10 +622,14 @@ public class AppSchedulingInfo {
|
|
*/
|
|
*/
|
|
@Unstable
|
|
@Unstable
|
|
public synchronized ResourceRequest getNextResourceRequest() {
|
|
public synchronized ResourceRequest getNextResourceRequest() {
|
|
- for (ResourceRequest rr:
|
|
|
|
- resourceRequestMap.get(schedulerKeys.firstKey()).values()) {
|
|
|
|
- return rr;
|
|
|
|
|
|
+ SchedulingPlacementSet<SchedulerNode> ps = schedulerKeyToPlacementSets.get(
|
|
|
|
+ schedulerKeys.firstKey());
|
|
|
|
+ if (null != ps) {
|
|
|
|
+ for (ResourceRequest rr : ps.getResourceRequests().values()) {
|
|
|
|
+ return rr;
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
+
|
|
return null;
|
|
return null;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -717,141 +708,29 @@ public class AppSchedulingInfo {
|
|
|
|
|
|
public List<ResourceRequest> allocate(NodeType type,
|
|
public List<ResourceRequest> allocate(NodeType type,
|
|
SchedulerNode node, SchedulerRequestKey schedulerKey,
|
|
SchedulerNode node, SchedulerRequestKey schedulerKey,
|
|
|
|
+ ResourceRequest request,
|
|
Container containerAllocated) {
|
|
Container containerAllocated) {
|
|
try {
|
|
try {
|
|
writeLock.lock();
|
|
writeLock.lock();
|
|
- ResourceRequest request;
|
|
|
|
- if (type == NodeType.NODE_LOCAL) {
|
|
|
|
- request = resourceRequestMap.get(schedulerKey).get(node.getNodeName());
|
|
|
|
- } else if (type == NodeType.RACK_LOCAL) {
|
|
|
|
- request = resourceRequestMap.get(schedulerKey).get(node.getRackName());
|
|
|
|
- } else{
|
|
|
|
- request = resourceRequestMap.get(schedulerKey).get(ResourceRequest.ANY);
|
|
|
|
- }
|
|
|
|
- return allocate(type, node, schedulerKey, request, containerAllocated);
|
|
|
|
- } finally {
|
|
|
|
- writeLock.unlock();
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /**
|
|
|
|
- * Resources have been allocated to this application by the resource
|
|
|
|
- * scheduler. Track them.
|
|
|
|
- * @param type Node Type
|
|
|
|
- * @param node SchedulerNode
|
|
|
|
- * @param schedulerKey SchedulerRequestKey
|
|
|
|
- * @param request ResourceRequest
|
|
|
|
- * @param containerAllocated Container Allocated
|
|
|
|
- * @return List of ResourceRequests
|
|
|
|
- */
|
|
|
|
- public List<ResourceRequest> allocate(NodeType type,
|
|
|
|
- SchedulerNode node, SchedulerRequestKey schedulerKey,
|
|
|
|
- ResourceRequest request, Container containerAllocated) {
|
|
|
|
- try {
|
|
|
|
- writeLock.lock();
|
|
|
|
- List<ResourceRequest> resourceRequests = new ArrayList<>();
|
|
|
|
- if (type == NodeType.NODE_LOCAL) {
|
|
|
|
- allocateNodeLocal(node, schedulerKey, request, resourceRequests);
|
|
|
|
- } else if (type == NodeType.RACK_LOCAL) {
|
|
|
|
- allocateRackLocal(node, schedulerKey, request, resourceRequests);
|
|
|
|
- } else{
|
|
|
|
- allocateOffSwitch(request, resourceRequests, schedulerKey);
|
|
|
|
- }
|
|
|
|
|
|
|
|
if (null != containerAllocated) {
|
|
if (null != containerAllocated) {
|
|
- updateMetricsForAllocatedContainer(request, type, containerAllocated);
|
|
|
|
|
|
+ updateMetricsForAllocatedContainer(type, containerAllocated);
|
|
}
|
|
}
|
|
- return resourceRequests;
|
|
|
|
|
|
+
|
|
|
|
+ return schedulerKeyToPlacementSets.get(schedulerKey).allocate(type, node,
|
|
|
|
+ request);
|
|
} finally {
|
|
} finally {
|
|
writeLock.unlock();
|
|
writeLock.unlock();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- /**
|
|
|
|
- * The {@link ResourceScheduler} is allocating data-local resources to the
|
|
|
|
- * application.
|
|
|
|
- */
|
|
|
|
- private void allocateNodeLocal(SchedulerNode node,
|
|
|
|
- SchedulerRequestKey schedulerKey, ResourceRequest nodeLocalRequest,
|
|
|
|
- List<ResourceRequest> resourceRequests) {
|
|
|
|
- // Update future requirements
|
|
|
|
- decResourceRequest(node.getNodeName(), schedulerKey, nodeLocalRequest);
|
|
|
|
-
|
|
|
|
- ResourceRequest rackLocalRequest = resourceRequestMap.get(schedulerKey).get(
|
|
|
|
- node.getRackName());
|
|
|
|
- decResourceRequest(node.getRackName(), schedulerKey, rackLocalRequest);
|
|
|
|
-
|
|
|
|
- ResourceRequest offRackRequest = resourceRequestMap.get(schedulerKey).get(
|
|
|
|
- ResourceRequest.ANY);
|
|
|
|
- decrementOutstanding(offRackRequest, schedulerKey);
|
|
|
|
-
|
|
|
|
- // Update cloned NodeLocal, RackLocal and OffRack requests for recovery
|
|
|
|
- resourceRequests.add(cloneResourceRequest(nodeLocalRequest));
|
|
|
|
- resourceRequests.add(cloneResourceRequest(rackLocalRequest));
|
|
|
|
- resourceRequests.add(cloneResourceRequest(offRackRequest));
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- private void decResourceRequest(String resourceName,
|
|
|
|
- SchedulerRequestKey schedulerKey, ResourceRequest request) {
|
|
|
|
- request.setNumContainers(request.getNumContainers() - 1);
|
|
|
|
- if (request.getNumContainers() == 0) {
|
|
|
|
- resourceRequestMap.get(schedulerKey).remove(resourceName);
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /**
|
|
|
|
- * The {@link ResourceScheduler} is allocating data-local resources to the
|
|
|
|
- * application.
|
|
|
|
- */
|
|
|
|
- private void allocateRackLocal(SchedulerNode node,
|
|
|
|
- SchedulerRequestKey schedulerKey, ResourceRequest rackLocalRequest,
|
|
|
|
- List<ResourceRequest> resourceRequests) {
|
|
|
|
- // Update future requirements
|
|
|
|
- decResourceRequest(node.getRackName(), schedulerKey, rackLocalRequest);
|
|
|
|
-
|
|
|
|
- ResourceRequest offRackRequest = resourceRequestMap.get(schedulerKey).get(
|
|
|
|
- ResourceRequest.ANY);
|
|
|
|
- decrementOutstanding(offRackRequest, schedulerKey);
|
|
|
|
-
|
|
|
|
- // Update cloned RackLocal and OffRack requests for recovery
|
|
|
|
- resourceRequests.add(cloneResourceRequest(rackLocalRequest));
|
|
|
|
- resourceRequests.add(cloneResourceRequest(offRackRequest));
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- /**
|
|
|
|
- * The {@link ResourceScheduler} is allocating data-local resources to the
|
|
|
|
- * application.
|
|
|
|
- */
|
|
|
|
- private void allocateOffSwitch(ResourceRequest offSwitchRequest,
|
|
|
|
- List<ResourceRequest> resourceRequests,
|
|
|
|
- SchedulerRequestKey schedulerKey) {
|
|
|
|
- // Update future requirements
|
|
|
|
- decrementOutstanding(offSwitchRequest, schedulerKey);
|
|
|
|
- // Update cloned OffRack requests for recovery
|
|
|
|
- resourceRequests.add(cloneResourceRequest(offSwitchRequest));
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- private void decrementOutstanding(ResourceRequest offSwitchRequest,
|
|
|
|
- SchedulerRequestKey schedulerKey) {
|
|
|
|
- int numOffSwitchContainers = offSwitchRequest.getNumContainers() - 1;
|
|
|
|
-
|
|
|
|
- // Do not remove ANY
|
|
|
|
- offSwitchRequest.setNumContainers(numOffSwitchContainers);
|
|
|
|
-
|
|
|
|
- // Do we have any outstanding requests?
|
|
|
|
- // If there is nothing, we need to deactivate this application
|
|
|
|
- if (numOffSwitchContainers == 0) {
|
|
|
|
- decrementSchedulerKeyReference(schedulerKey);
|
|
|
|
- checkForDeactivation();
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- appResourceUsage.decPending(offSwitchRequest.getNodeLabelExpression(),
|
|
|
|
- offSwitchRequest.getCapability());
|
|
|
|
- queue.decPendingResource(offSwitchRequest.getNodeLabelExpression(),
|
|
|
|
- offSwitchRequest.getCapability());
|
|
|
|
|
|
+ public List<ResourceRequest> allocate(NodeType type,
|
|
|
|
+ SchedulerNode node, SchedulerRequestKey schedulerKey,
|
|
|
|
+ Container containerAllocated) {
|
|
|
|
+ return allocate(type, node, schedulerKey, null, containerAllocated);
|
|
}
|
|
}
|
|
|
|
|
|
- private void checkForDeactivation() {
|
|
|
|
|
|
+ public void checkForDeactivation() {
|
|
if (schedulerKeys.isEmpty()) {
|
|
if (schedulerKeys.isEmpty()) {
|
|
activeUsersManager.deactivateApplication(user, applicationId);
|
|
activeUsersManager.deactivateApplication(user, applicationId);
|
|
}
|
|
}
|
|
@@ -862,9 +741,9 @@ public class AppSchedulingInfo {
|
|
this.writeLock.lock();
|
|
this.writeLock.lock();
|
|
QueueMetrics oldMetrics = queue.getMetrics();
|
|
QueueMetrics oldMetrics = queue.getMetrics();
|
|
QueueMetrics newMetrics = newQueue.getMetrics();
|
|
QueueMetrics newMetrics = newQueue.getMetrics();
|
|
- for (Map<String, ResourceRequest> asks : resourceRequestMap.values()) {
|
|
|
|
- ResourceRequest request = asks.get(ResourceRequest.ANY);
|
|
|
|
- if (request != null) {
|
|
|
|
|
|
+ for (SchedulingPlacementSet ps : schedulerKeyToPlacementSets.values()) {
|
|
|
|
+ ResourceRequest request = ps.getResourceRequest(ResourceRequest.ANY);
|
|
|
|
+ if (request != null && request.getNumContainers() > 0) {
|
|
oldMetrics.decrPendingResources(user, request.getNumContainers(),
|
|
oldMetrics.decrPendingResources(user, request.getNumContainers(),
|
|
request.getCapability());
|
|
request.getCapability());
|
|
newMetrics.incrPendingResources(user, request.getNumContainers(),
|
|
newMetrics.incrPendingResources(user, request.getNumContainers(),
|
|
@@ -893,9 +772,9 @@ public class AppSchedulingInfo {
|
|
try {
|
|
try {
|
|
this.writeLock.lock();
|
|
this.writeLock.lock();
|
|
QueueMetrics metrics = queue.getMetrics();
|
|
QueueMetrics metrics = queue.getMetrics();
|
|
- for (Map<String, ResourceRequest> asks : resourceRequestMap.values()) {
|
|
|
|
- ResourceRequest request = asks.get(ResourceRequest.ANY);
|
|
|
|
- if (request != null) {
|
|
|
|
|
|
+ for (SchedulingPlacementSet ps : schedulerKeyToPlacementSets.values()) {
|
|
|
|
+ ResourceRequest request = ps.getResourceRequest(ResourceRequest.ANY);
|
|
|
|
+ if (request != null && request.getNumContainers() > 0) {
|
|
metrics.decrPendingResources(user, request.getNumContainers(),
|
|
metrics.decrPendingResources(user, request.getNumContainers(),
|
|
request.getCapability());
|
|
request.getCapability());
|
|
|
|
|
|
@@ -964,18 +843,6 @@ public class AppSchedulingInfo {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- public ResourceRequest cloneResourceRequest(ResourceRequest request) {
|
|
|
|
- ResourceRequest newRequest = ResourceRequest.newBuilder()
|
|
|
|
- .priority(request.getPriority())
|
|
|
|
- .allocationRequestId(request.getAllocationRequestId())
|
|
|
|
- .resourceName(request.getResourceName())
|
|
|
|
- .capability(request.getCapability())
|
|
|
|
- .numContainers(1)
|
|
|
|
- .relaxLocality(request.getRelaxLocality())
|
|
|
|
- .nodeLabelExpression(request.getNodeLabelExpression()).build();
|
|
|
|
- return newRequest;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
/*
|
|
/*
|
|
* In async environment, pending resource request could be updated during
|
|
* In async environment, pending resource request could be updated during
|
|
* scheduling, this method checks pending request before allocating
|
|
* scheduling, this method checks pending request before allocating
|
|
@@ -984,107 +851,43 @@ public class AppSchedulingInfo {
|
|
SchedulerRequestKey schedulerKey) {
|
|
SchedulerRequestKey schedulerKey) {
|
|
try {
|
|
try {
|
|
readLock.lock();
|
|
readLock.lock();
|
|
- ResourceRequest r = resourceRequestMap.get(schedulerKey).get(
|
|
|
|
- ResourceRequest.ANY);
|
|
|
|
- if (r == null || r.getNumContainers() <= 0) {
|
|
|
|
|
|
+ SchedulingPlacementSet ps = schedulerKeyToPlacementSets.get(schedulerKey);
|
|
|
|
+ if (null == ps) {
|
|
return false;
|
|
return false;
|
|
}
|
|
}
|
|
- if (type == NodeType.RACK_LOCAL || type == NodeType.NODE_LOCAL) {
|
|
|
|
- r = resourceRequestMap.get(schedulerKey).get(node.getRackName());
|
|
|
|
- if (r == null || r.getNumContainers() <= 0) {
|
|
|
|
- return false;
|
|
|
|
- }
|
|
|
|
- if (type == NodeType.NODE_LOCAL) {
|
|
|
|
- r = resourceRequestMap.get(schedulerKey).get(node.getNodeName());
|
|
|
|
- if (r == null || r.getNumContainers() <= 0) {
|
|
|
|
- return false;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- return true;
|
|
|
|
|
|
+ return ps.canAllocate(type, node);
|
|
} finally {
|
|
} finally {
|
|
readLock.unlock();
|
|
readLock.unlock();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- public void updateMetricsForAllocatedContainer(
|
|
|
|
- ResourceRequest request, NodeType type, Container containerAllocated) {
|
|
|
|
- try {
|
|
|
|
- writeLock.lock();
|
|
|
|
- QueueMetrics metrics = queue.getMetrics();
|
|
|
|
- if (pending) {
|
|
|
|
- // once an allocation is done we assume the application is
|
|
|
|
- // running from scheduler's POV.
|
|
|
|
- pending = false;
|
|
|
|
- metrics.runAppAttempt(applicationId, user);
|
|
|
|
- }
|
|
|
|
|
|
+ private void updateMetricsForAllocatedContainer(
|
|
|
|
+ NodeType type, Container containerAllocated) {
|
|
|
|
+ QueueMetrics metrics = queue.getMetrics();
|
|
|
|
+ if (pending) {
|
|
|
|
+ // once an allocation is done we assume the application is
|
|
|
|
+ // running from scheduler's POV.
|
|
|
|
+ pending = false;
|
|
|
|
+ metrics.runAppAttempt(applicationId, user);
|
|
|
|
+ }
|
|
|
|
|
|
- if (LOG.isDebugEnabled()) {
|
|
|
|
- LOG.debug("allocate: applicationId=" + applicationId + " container="
|
|
|
|
- + containerAllocated.getId() + " host=" + containerAllocated
|
|
|
|
- .getNodeId().toString() + " user=" + user + " resource=" + request
|
|
|
|
- .getCapability() + " type=" + type);
|
|
|
|
- }
|
|
|
|
- metrics.allocateResources(user, 1, request.getCapability(), true);
|
|
|
|
- metrics.incrNodeTypeAggregations(user, type);
|
|
|
|
- } finally {
|
|
|
|
- writeLock.unlock();
|
|
|
|
|
|
+ if (LOG.isDebugEnabled()) {
|
|
|
|
+ LOG.debug("allocate: applicationId=" + applicationId + " container="
|
|
|
|
+ + containerAllocated.getId() + " host=" + containerAllocated
|
|
|
|
+ .getNodeId().toString() + " user=" + user + " resource="
|
|
|
|
+ + containerAllocated.getResource() + " type="
|
|
|
|
+ + type);
|
|
}
|
|
}
|
|
|
|
+ metrics.allocateResources(user, 1, containerAllocated.getResource(),
|
|
|
|
+ true);
|
|
|
|
+ metrics.incrNodeTypeAggregations(user, type);
|
|
}
|
|
}
|
|
|
|
|
|
// Get placement-set by specified schedulerKey
|
|
// Get placement-set by specified schedulerKey
|
|
// Now simply return all node of the input clusterPlacementSet
|
|
// Now simply return all node of the input clusterPlacementSet
|
|
- // TODO, need update this when we support global scheduling
|
|
|
|
public <N extends SchedulerNode> SchedulingPlacementSet<N> getSchedulingPlacementSet(
|
|
public <N extends SchedulerNode> SchedulingPlacementSet<N> getSchedulingPlacementSet(
|
|
SchedulerRequestKey schedulerkey) {
|
|
SchedulerRequestKey schedulerkey) {
|
|
- return new SchedulingPlacementSet<N>() {
|
|
|
|
- @Override
|
|
|
|
- @SuppressWarnings("unchecked")
|
|
|
|
- public Iterator<N> getPreferredNodeIterator(
|
|
|
|
- PlacementSet<N> clusterPlacementSet) {
|
|
|
|
- return IteratorUtils.singletonIterator(
|
|
|
|
- clusterPlacementSet.getAllNodes().values().iterator().next());
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- @Override
|
|
|
|
- public ResourceRequestUpdateResult updateResourceRequests(
|
|
|
|
- List<ResourceRequest> requests,
|
|
|
|
- boolean recoverPreemptedRequestForAContainer) {
|
|
|
|
- return null;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- @Override
|
|
|
|
- public Map<String, ResourceRequest> getResourceRequests() {
|
|
|
|
- return null;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- @Override
|
|
|
|
- public ResourceRequest getResourceRequest(String resourceName,
|
|
|
|
- SchedulerRequestKey requestKey) {
|
|
|
|
- return null;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- @Override
|
|
|
|
- public List<ResourceRequest> allocate(NodeType type, SchedulerNode node,
|
|
|
|
- ResourceRequest request) {
|
|
|
|
- return null;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- @Override
|
|
|
|
- public Map<NodeId, N> getAllNodes() {
|
|
|
|
- return null;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- @Override
|
|
|
|
- public long getVersion() {
|
|
|
|
- return 0;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- @Override
|
|
|
|
- public String getPartition() {
|
|
|
|
- return null;
|
|
|
|
- }
|
|
|
|
- };
|
|
|
|
|
|
+ return (SchedulingPlacementSet<N>) schedulerKeyToPlacementSets.get(
|
|
|
|
+ schedulerkey);
|
|
}
|
|
}
|
|
}
|
|
}
|