|
@@ -25,15 +25,15 @@ import java.util.Collection;
|
|
import java.util.Comparator;
|
|
import java.util.Comparator;
|
|
import java.util.EnumSet;
|
|
import java.util.EnumSet;
|
|
import java.util.HashMap;
|
|
import java.util.HashMap;
|
|
|
|
+import java.util.HashSet;
|
|
import java.util.List;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.Map;
|
|
|
|
+import java.util.Map.Entry;
|
|
import java.util.Random;
|
|
import java.util.Random;
|
|
import java.util.Set;
|
|
import java.util.Set;
|
|
import java.util.concurrent.ConcurrentHashMap;
|
|
import java.util.concurrent.ConcurrentHashMap;
|
|
import java.util.concurrent.atomic.AtomicBoolean;
|
|
import java.util.concurrent.atomic.AtomicBoolean;
|
|
import java.util.concurrent.atomic.AtomicInteger;
|
|
import java.util.concurrent.atomic.AtomicInteger;
|
|
-import java.util.HashSet;
|
|
|
|
-import java.util.Set;
|
|
|
|
|
|
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.Log;
|
|
import org.apache.commons.logging.LogFactory;
|
|
import org.apache.commons.logging.LogFactory;
|
|
@@ -48,12 +48,15 @@ import org.apache.hadoop.security.UserGroupInformation;
|
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
|
import org.apache.hadoop.yarn.api.records.Container;
|
|
import org.apache.hadoop.yarn.api.records.Container;
|
|
|
|
+import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
|
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
|
|
|
+import org.apache.hadoop.yarn.api.records.ContainerState;
|
|
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
|
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
|
import org.apache.hadoop.yarn.api.records.QueueACL;
|
|
import org.apache.hadoop.yarn.api.records.QueueACL;
|
|
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
|
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
|
import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
|
|
import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
|
|
|
|
+import org.apache.hadoop.yarn.api.records.ReservationId;
|
|
import org.apache.hadoop.yarn.api.records.ResourceOption;
|
|
import org.apache.hadoop.yarn.api.records.ResourceOption;
|
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
|
@@ -79,12 +82,15 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.PreemptableResourceScheduler;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.PreemptableResourceScheduler;
|
|
|
|
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueNotFoundException;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueNotFoundException;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
|
|
|
|
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerDynamicEditException;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration.QueueMapping;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration.QueueMapping;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration.QueueMapping.MappingType;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration.QueueMapping.MappingType;
|
|
|
|
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.QueueEntitlement;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
|
|
@@ -93,6 +99,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptR
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.ContainerExpiredSchedulerEvent;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.ContainerExpiredSchedulerEvent;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
|
|
|
|
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeLabelsUpdateSchedulerEvent;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeResourceUpdateSchedulerEvent;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeResourceUpdateSchedulerEvent;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
|
|
@@ -106,11 +113,6 @@ import org.apache.hadoop.yarn.util.resource.Resources;
|
|
import com.google.common.annotations.VisibleForTesting;
|
|
import com.google.common.annotations.VisibleForTesting;
|
|
import com.google.common.base.Preconditions;
|
|
import com.google.common.base.Preconditions;
|
|
|
|
|
|
-import org.apache.hadoop.yarn.api.records.ReservationId;
|
|
|
|
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.QueueEntitlement;
|
|
|
|
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerDynamicEditException;
|
|
|
|
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue;
|
|
|
|
-
|
|
|
|
@LimitedPrivate("yarn")
|
|
@LimitedPrivate("yarn")
|
|
@Evolving
|
|
@Evolving
|
|
@SuppressWarnings("unchecked")
|
|
@SuppressWarnings("unchecked")
|
|
@@ -966,6 +968,51 @@ public class CapacityScheduler extends
|
|
updateNodeResource(nm, resourceOption);
|
|
updateNodeResource(nm, resourceOption);
|
|
root.updateClusterResource(clusterResource);
|
|
root.updateClusterResource(clusterResource);
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Process node labels update on a node.
|
|
|
|
+ *
|
|
|
|
+ * TODO: Currently capacity scheduler will kill containers on a node when
|
|
|
|
+ * labels on the node changed. It is a simply solution to ensure guaranteed
|
|
|
|
+ * capacity on labels of queues. When YARN-2498 completed, we can let
|
|
|
|
+ * preemption policy to decide if such containers need to be killed or just
|
|
|
|
+ * keep them running.
|
|
|
|
+ */
|
|
|
|
+ private synchronized void updateLabelsOnNode(NodeId nodeId,
|
|
|
|
+ Set<String> newLabels) {
|
|
|
|
+ FiCaSchedulerNode node = nodes.get(nodeId);
|
|
|
|
+ if (null == node) {
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // labels is same, we don't need do update
|
|
|
|
+ if (node.getLabels().size() == newLabels.size()
|
|
|
|
+ && node.getLabels().containsAll(newLabels)) {
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // Kill running containers since label is changed
|
|
|
|
+ for (RMContainer rmContainer : node.getRunningContainers()) {
|
|
|
|
+ ContainerId containerId = rmContainer.getContainerId();
|
|
|
|
+ completedContainer(rmContainer,
|
|
|
|
+ ContainerStatus.newInstance(containerId,
|
|
|
|
+ ContainerState.COMPLETE,
|
|
|
|
+ String.format(
|
|
|
|
+ "Container=%s killed since labels on the node=%s changed",
|
|
|
|
+ containerId.toString(), nodeId.toString()),
|
|
|
|
+ ContainerExitStatus.KILLED_BY_RESOURCEMANAGER),
|
|
|
|
+ RMContainerEventType.KILL);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // Unreserve container on this node
|
|
|
|
+ RMContainer reservedContainer = node.getReservedContainer();
|
|
|
|
+ if (null != reservedContainer) {
|
|
|
|
+ dropContainerReservation(reservedContainer);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // Update node labels after we've done this
|
|
|
|
+ node.updateLabels(newLabels);
|
|
|
|
+ }
|
|
|
|
|
|
private synchronized void allocateContainersToNode(FiCaSchedulerNode node) {
|
|
private synchronized void allocateContainersToNode(FiCaSchedulerNode node) {
|
|
if (rmContext.isWorkPreservingRecoveryEnabled()
|
|
if (rmContext.isWorkPreservingRecoveryEnabled()
|
|
@@ -1049,6 +1096,19 @@ public class CapacityScheduler extends
|
|
nodeResourceUpdatedEvent.getResourceOption());
|
|
nodeResourceUpdatedEvent.getResourceOption());
|
|
}
|
|
}
|
|
break;
|
|
break;
|
|
|
|
+ case NODE_LABELS_UPDATE:
|
|
|
|
+ {
|
|
|
|
+ NodeLabelsUpdateSchedulerEvent labelUpdateEvent =
|
|
|
|
+ (NodeLabelsUpdateSchedulerEvent) event;
|
|
|
|
+
|
|
|
|
+ for (Entry<NodeId, Set<String>> entry : labelUpdateEvent
|
|
|
|
+ .getUpdatedNodeToLabels().entrySet()) {
|
|
|
|
+ NodeId id = entry.getKey();
|
|
|
|
+ Set<String> labels = entry.getValue();
|
|
|
|
+ updateLabelsOnNode(id, labels);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ break;
|
|
case NODE_UPDATE:
|
|
case NODE_UPDATE:
|
|
{
|
|
{
|
|
NodeUpdateSchedulerEvent nodeUpdatedEvent = (NodeUpdateSchedulerEvent)event;
|
|
NodeUpdateSchedulerEvent nodeUpdatedEvent = (NodeUpdateSchedulerEvent)event;
|
|
@@ -1117,14 +1177,8 @@ public class CapacityScheduler extends
|
|
}
|
|
}
|
|
|
|
|
|
private synchronized void addNode(RMNode nodeManager) {
|
|
private synchronized void addNode(RMNode nodeManager) {
|
|
- // update this node to node label manager
|
|
|
|
- if (labelManager != null) {
|
|
|
|
- labelManager.activateNode(nodeManager.getNodeID(),
|
|
|
|
- nodeManager.getTotalCapability());
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
this.nodes.put(nodeManager.getNodeID(), new FiCaSchedulerNode(nodeManager,
|
|
this.nodes.put(nodeManager.getNodeID(), new FiCaSchedulerNode(nodeManager,
|
|
- usePortForNodeName));
|
|
|
|
|
|
+ usePortForNodeName, nodeManager.getNodeLabels()));
|
|
Resources.addTo(clusterResource, nodeManager.getTotalCapability());
|
|
Resources.addTo(clusterResource, nodeManager.getTotalCapability());
|
|
root.updateClusterResource(clusterResource);
|
|
root.updateClusterResource(clusterResource);
|
|
int numNodes = numNodeManagers.incrementAndGet();
|
|
int numNodes = numNodeManagers.incrementAndGet();
|
|
@@ -1135,6 +1189,12 @@ public class CapacityScheduler extends
|
|
if (scheduleAsynchronously && numNodes == 1) {
|
|
if (scheduleAsynchronously && numNodes == 1) {
|
|
asyncSchedulerThread.beginSchedule();
|
|
asyncSchedulerThread.beginSchedule();
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ // update this node to node label manager
|
|
|
|
+ if (labelManager != null) {
|
|
|
|
+ labelManager.activateNode(nodeManager.getNodeID(),
|
|
|
|
+ nodeManager.getTotalCapability());
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
private synchronized void removeNode(RMNode nodeInfo) {
|
|
private synchronized void removeNode(RMNode nodeInfo) {
|