|
@@ -18,12 +18,14 @@
|
|
|
|
|
|
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
|
|
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
|
|
|
|
|
|
|
|
+import com.google.common.collect.ImmutableList;
|
|
import org.apache.hadoop.conf.Configuration;
|
|
import org.apache.hadoop.conf.Configuration;
|
|
import org.apache.hadoop.yarn.api.records.Container;
|
|
import org.apache.hadoop.yarn.api.records.Container;
|
|
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
|
|
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
|
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
|
import org.apache.hadoop.yarn.api.records.ContainerState;
|
|
import org.apache.hadoop.yarn.api.records.ContainerState;
|
|
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
|
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
|
|
|
+import org.apache.hadoop.yarn.api.records.NodeState;
|
|
import org.apache.hadoop.yarn.api.records.Priority;
|
|
import org.apache.hadoop.yarn.api.records.Priority;
|
|
import org.apache.hadoop.yarn.api.records.Resource;
|
|
import org.apache.hadoop.yarn.api.records.Resource;
|
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
|
@@ -40,6 +42,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEven
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
|
|
|
|
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
|
|
|
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent;
|
|
|
|
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
|
@@ -52,6 +57,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaS
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
|
|
|
|
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.PlacementSet;
|
|
|
|
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.placement.SimplePlacementSet;
|
|
import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey;
|
|
import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey;
|
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
|
import org.junit.Assert;
|
|
import org.junit.Assert;
|
|
@@ -411,6 +418,93 @@ public class TestCapacitySchedulerAsyncScheduling {
|
|
rm.stop();
|
|
rm.stop();
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ @Test(timeout = 30000)
|
|
|
|
+ public void testReturnNullWhenGetSchedulerContainer() throws Exception {
|
|
|
|
+ // disable async-scheduling for simulating complex scenario
|
|
|
|
+ Configuration disableAsyncConf = new Configuration(conf);
|
|
|
|
+ disableAsyncConf.setBoolean(
|
|
|
|
+ CapacitySchedulerConfiguration.SCHEDULE_ASYNCHRONOUSLY_ENABLE, false);
|
|
|
|
+
|
|
|
|
+ // init RM & NMs
|
|
|
|
+ final MockRM rm = new MockRM(disableAsyncConf);
|
|
|
|
+ rm.start();
|
|
|
|
+ final MockNM nm1 = rm.registerNode("192.168.0.1:1234", 8 * GB);
|
|
|
|
+ final MockNM nm2 = rm.registerNode("192.168.0.2:2234", 8 * GB);
|
|
|
|
+ rm.drainEvents();
|
|
|
|
+ final CapacityScheduler cs =
|
|
|
|
+ (CapacityScheduler) rm.getRMContext().getScheduler();
|
|
|
|
+ SchedulerNode sn1 = cs.getSchedulerNode(nm1.getNodeId());
|
|
|
|
+ final RMNode rmNode1 = cs.getNode(nm1.getNodeId()).getRMNode();
|
|
|
|
+ SchedulerNode sn2 = cs.getSchedulerNode(nm2.getNodeId());
|
|
|
|
+
|
|
|
|
+ // launch app1-am on nm1
|
|
|
|
+ RMApp app1 = rm.submitApp(1 * GB, "app1", "user", null, false, "default",
|
|
|
|
+ YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS, null, null, true, true);
|
|
|
|
+ MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm1);
|
|
|
|
+
|
|
|
|
+ // app2 asks 1 * 1G container
|
|
|
|
+ am1.allocate(ImmutableList.of(ResourceRequest
|
|
|
|
+ .newInstance(Priority.newInstance(0), "*",
|
|
|
|
+ Resources.createResource(1 * GB), 1)), null);
|
|
|
|
+ final RMContainer amContainer = cs.getRMContainer(
|
|
|
|
+ ContainerId.newContainerId(am1.getApplicationAttemptId(), 1));
|
|
|
|
+
|
|
|
|
+ // spy CapacityScheduler
|
|
|
|
+ final CapacityScheduler spyCs = Mockito.spy(cs);
|
|
|
|
+ // hook CapacityScheduler#submitResourceCommitRequest
|
|
|
|
+ final List<CSAssignment> assignmentSnapshots = new ArrayList<>();
|
|
|
|
+ Mockito.doAnswer(new Answer<Object>() {
|
|
|
|
+ public Boolean answer(InvocationOnMock invocation) throws Exception {
|
|
|
|
+ CSAssignment assignment = (CSAssignment) invocation.getArguments()[1];
|
|
|
|
+ if (cs.getNode(nm1.getNodeId()) != null) {
|
|
|
|
+ // decommission nm1 for first allocation on nm1
|
|
|
|
+ cs.getRMContext().getDispatcher().getEventHandler().handle(
|
|
|
|
+ new RMNodeEvent(nm1.getNodeId(), RMNodeEventType.DECOMMISSION));
|
|
|
|
+ rm.drainEvents();
|
|
|
|
+ Assert.assertEquals(NodeState.DECOMMISSIONED, rmNode1.getState());
|
|
|
|
+ Assert.assertNull(cs.getNode(nm1.getNodeId()));
|
|
|
|
+ assignmentSnapshots.add(assignment);
|
|
|
|
+ } else {
|
|
|
|
+ // add am container on nm1 to containersToKill
|
|
|
|
+ // for second allocation on nm2
|
|
|
|
+ assignment.setContainersToKill(ImmutableList.of(amContainer));
|
|
|
|
+ }
|
|
|
|
+ // check no NPE in actual submit, before YARN-8233 will throw NPE
|
|
|
|
+ cs.submitResourceCommitRequest((Resource) invocation.getArguments()[0],
|
|
|
|
+ assignment);
|
|
|
|
+ return false;
|
|
|
|
+ }
|
|
|
|
+ }).when(spyCs).submitResourceCommitRequest(Mockito.any(Resource.class),
|
|
|
|
+ Mockito.any(CSAssignment.class));
|
|
|
|
+
|
|
|
|
+ // allocation on nm1, test return null when get scheduler container
|
|
|
|
+ PlacementSet<FiCaSchedulerNode> candidateNodeSet =
|
|
|
|
+ new SimplePlacementSet(sn1);
|
|
|
|
+ spyCs.allocateContainersToNode(candidateNodeSet, false);
|
|
|
|
+ // make sure unconfirmed resource is decreased correctly
|
|
|
|
+ Assert.assertTrue(spyCs.getApplicationAttempt(am1.getApplicationAttemptId())
|
|
|
|
+ .hasPendingResourceRequest(
|
|
|
|
+ rm.getResourceScheduler().getResourceCalculator(),
|
|
|
|
+ RMNodeLabelsManager.NO_LABEL,
|
|
|
|
+ rm.getResourceScheduler().getClusterResource(),
|
|
|
|
+ SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY));
|
|
|
|
+
|
|
|
|
+ // allocation on nm2,
|
|
|
|
+ // test return null when get scheduler container to release
|
|
|
|
+ candidateNodeSet =
|
|
|
|
+ new SimplePlacementSet(sn2);
|
|
|
|
+ spyCs.allocateContainersToNode(candidateNodeSet, false);
|
|
|
|
+ // make sure unconfirmed resource is decreased correctly
|
|
|
|
+ Assert.assertTrue(spyCs.getApplicationAttempt(am1.getApplicationAttemptId())
|
|
|
|
+ .hasPendingResourceRequest(
|
|
|
|
+ rm.getResourceScheduler().getResourceCalculator(),
|
|
|
|
+ RMNodeLabelsManager.NO_LABEL,
|
|
|
|
+ rm.getResourceScheduler().getClusterResource(),
|
|
|
|
+ SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY));
|
|
|
|
+
|
|
|
|
+ rm.stop();
|
|
|
|
+ }
|
|
|
|
+
|
|
private void allocateAndLaunchContainers(MockAM am, MockNM nm, MockRM rm,
|
|
private void allocateAndLaunchContainers(MockAM am, MockNM nm, MockRM rm,
|
|
int nContainer, Resource resource, int priority, int startContainerId)
|
|
int nContainer, Resource resource, int priority, int startContainerId)
|
|
throws Exception {
|
|
throws Exception {
|