|
@@ -34,11 +34,15 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
|
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
|
|
import org.apache.hadoop.yarn.api.records.Container;
|
|
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
|
|
+import org.apache.hadoop.yarn.api.records.ContainerState;
|
|
|
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
|
|
+import org.apache.hadoop.yarn.api.records.ContainerUpdateType;
|
|
|
import org.apache.hadoop.yarn.api.records.ExecutionTypeRequest;
|
|
|
import org.apache.hadoop.yarn.api.records.ExecutionType;
|
|
|
import org.apache.hadoop.yarn.api.records.Priority;
|
|
|
import org.apache.hadoop.yarn.api.records.Resource;
|
|
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
|
|
+import org.apache.hadoop.yarn.api.records.UpdateContainerRequest;
|
|
|
import org.apache.hadoop.yarn.server.api.DistributedSchedulingAMProtocolPB;
|
|
|
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
|
|
|
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
|
|
@@ -64,8 +68,11 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RegisterDistrib
|
|
|
import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
|
|
|
+import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
|
|
+import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
|
|
|
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
|
|
@@ -75,13 +82,17 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateS
|
|
|
import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
|
|
|
import org.apache.hadoop.yarn.server.scheduler.OpportunisticContainerContext;
|
|
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
|
|
+import org.junit.After;
|
|
|
import org.junit.Assert;
|
|
|
+import org.junit.Before;
|
|
|
import org.junit.Test;
|
|
|
import org.mockito.Mockito;
|
|
|
|
|
|
import java.io.IOException;
|
|
|
import java.net.InetSocketAddress;
|
|
|
+import java.util.ArrayList;
|
|
|
import java.util.Arrays;
|
|
|
+import java.util.HashMap;
|
|
|
import java.util.List;
|
|
|
|
|
|
/**
|
|
@@ -91,8 +102,10 @@ public class TestOpportunisticContainerAllocatorAMService {
|
|
|
|
|
|
private static final int GB = 1024;
|
|
|
|
|
|
- @Test(timeout = 60000)
|
|
|
- public void testNodeRemovalDuringAllocate() throws Exception {
|
|
|
+ private MockRM rm;
|
|
|
+
|
|
|
+ @Before
|
|
|
+ public void createAndStartRM() {
|
|
|
CapacitySchedulerConfiguration csConf =
|
|
|
new CapacitySchedulerConfiguration();
|
|
|
YarnConfiguration conf = new YarnConfiguration(csConf);
|
|
@@ -102,8 +115,445 @@ public class TestOpportunisticContainerAllocatorAMService {
|
|
|
YarnConfiguration.OPPORTUNISTIC_CONTAINER_ALLOCATION_ENABLED, true);
|
|
|
conf.setInt(
|
|
|
YarnConfiguration.NM_CONTAINER_QUEUING_SORTING_NODES_INTERVAL_MS, 100);
|
|
|
- MockRM rm = new MockRM(conf);
|
|
|
+ rm = new MockRM(conf);
|
|
|
rm.start();
|
|
|
+ }
|
|
|
+
|
|
|
+ @After
|
|
|
+ public void stopRM() {
|
|
|
+ if (rm != null) {
|
|
|
+ rm.stop();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @Test(timeout = 600000)
|
|
|
+ public void testContainerPromoteAndDemoteBeforeContainerStart() throws Exception {
|
|
|
+ HashMap<NodeId, MockNM> nodes = new HashMap<>();
|
|
|
+ MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
|
|
|
+ nodes.put(nm1.getNodeId(), nm1);
|
|
|
+ MockNM nm2 = new MockNM("h1:4321", 4096, rm.getResourceTrackerService());
|
|
|
+ nodes.put(nm2.getNodeId(), nm2);
|
|
|
+ MockNM nm3 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
|
|
|
+ nodes.put(nm3.getNodeId(), nm3);
|
|
|
+ MockNM nm4 = new MockNM("h2:4321", 4096, rm.getResourceTrackerService());
|
|
|
+ nodes.put(nm4.getNodeId(), nm4);
|
|
|
+ nm1.registerNode();
|
|
|
+ nm2.registerNode();
|
|
|
+ nm3.registerNode();
|
|
|
+ nm4.registerNode();
|
|
|
+
|
|
|
+ OpportunisticContainerAllocatorAMService amservice =
|
|
|
+ (OpportunisticContainerAllocatorAMService) rm
|
|
|
+ .getApplicationMasterService();
|
|
|
+ RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
|
|
|
+ ApplicationAttemptId attemptId =
|
|
|
+ app1.getCurrentAppAttempt().getAppAttemptId();
|
|
|
+ MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
|
|
|
+ ResourceScheduler scheduler = rm.getResourceScheduler();
|
|
|
+ RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
|
|
|
+ RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
|
|
|
+ RMNode rmNode3 = rm.getRMContext().getRMNodes().get(nm3.getNodeId());
|
|
|
+ RMNode rmNode4 = rm.getRMContext().getRMNodes().get(nm4.getNodeId());
|
|
|
+
|
|
|
+ nm1.nodeHeartbeat(true);
|
|
|
+ nm2.nodeHeartbeat(true);
|
|
|
+ nm3.nodeHeartbeat(true);
|
|
|
+ nm4.nodeHeartbeat(true);
|
|
|
+
|
|
|
+ ((RMNodeImpl) rmNode1)
|
|
|
+ .setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
|
|
|
+ ((RMNodeImpl) rmNode2)
|
|
|
+ .setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
|
|
|
+ ((RMNodeImpl) rmNode3)
|
|
|
+ .setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
|
|
|
+ ((RMNodeImpl) rmNode4)
|
|
|
+ .setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
|
|
|
+
|
|
|
+ OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler)
|
|
|
+ .getApplicationAttempt(attemptId).getOpportunisticContainerContext();
|
|
|
+ // Send add and update node events to AM Service.
|
|
|
+ amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
|
|
|
+ amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
|
|
|
+ amservice.handle(new NodeAddedSchedulerEvent(rmNode3));
|
|
|
+ amservice.handle(new NodeAddedSchedulerEvent(rmNode4));
|
|
|
+ amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
|
|
|
+ amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
|
|
|
+ amservice.handle(new NodeUpdateSchedulerEvent(rmNode3));
|
|
|
+ amservice.handle(new NodeUpdateSchedulerEvent(rmNode4));
|
|
|
+ // All nodes 1 - 4 will be applicable for scheduling.
|
|
|
+ nm1.nodeHeartbeat(true);
|
|
|
+ nm2.nodeHeartbeat(true);
|
|
|
+ nm3.nodeHeartbeat(true);
|
|
|
+ nm4.nodeHeartbeat(true);
|
|
|
+
|
|
|
+ Thread.sleep(1000);
|
|
|
+
|
|
|
+ QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue()
|
|
|
+ .getMetrics();
|
|
|
+
|
|
|
+ // Verify Metrics
|
|
|
+ verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
|
|
|
+
|
|
|
+ AllocateResponse allocateResponse = am1.allocate(
|
|
|
+ Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1),
|
|
|
+ "*", Resources.createResource(1 * GB), 2, true, null,
|
|
|
+ ExecutionTypeRequest.newInstance(
|
|
|
+ ExecutionType.OPPORTUNISTIC, true))),
|
|
|
+ null);
|
|
|
+ List<Container> allocatedContainers = allocateResponse
|
|
|
+ .getAllocatedContainers();
|
|
|
+ Assert.assertEquals(2, allocatedContainers.size());
|
|
|
+ Container container = allocatedContainers.get(0);
|
|
|
+ MockNM allocNode = nodes.get(container.getNodeId());
|
|
|
+ MockNM sameHostDiffNode = null;
|
|
|
+ for (NodeId n : nodes.keySet()) {
|
|
|
+ if (n.getHost().equals(allocNode.getNodeId().getHost()) &&
|
|
|
+ n.getPort() != allocNode.getNodeId().getPort()) {
|
|
|
+ sameHostDiffNode = nodes.get(n);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // Verify Metrics After OPP allocation (Nothing should change)
|
|
|
+ verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
|
|
|
+
|
|
|
+ am1.sendContainerUpdateRequest(
|
|
|
+ Arrays.asList(UpdateContainerRequest.newInstance(0,
|
|
|
+ container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE,
|
|
|
+ null, ExecutionType.GUARANTEED)));
|
|
|
+ // Node on same host should not result in allocation
|
|
|
+ sameHostDiffNode.nodeHeartbeat(true);
|
|
|
+ Thread.sleep(200);
|
|
|
+ allocateResponse = am1.allocate(null, null);
|
|
|
+ Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
|
|
|
+
|
|
|
+ // Verify Metrics After OPP allocation (Nothing should change again)
|
|
|
+ verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
|
|
|
+
|
|
|
+ // Send Promotion req again... this should result in update error
|
|
|
+ allocateResponse = am1.sendContainerUpdateRequest(
|
|
|
+ Arrays.asList(UpdateContainerRequest.newInstance(0,
|
|
|
+ container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE,
|
|
|
+ null, ExecutionType.GUARANTEED)));
|
|
|
+ Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
|
|
|
+ Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
|
|
|
+ Assert.assertEquals("UPDATE_OUTSTANDING_ERROR",
|
|
|
+ allocateResponse.getUpdateErrors().get(0).getReason());
|
|
|
+ Assert.assertEquals(container.getId(),
|
|
|
+ allocateResponse.getUpdateErrors().get(0)
|
|
|
+ .getUpdateContainerRequest().getContainerId());
|
|
|
+
|
|
|
+ // Send Promotion req again with incorrect version...
|
|
|
+ // this should also result in update error
|
|
|
+ allocateResponse = am1.sendContainerUpdateRequest(
|
|
|
+ Arrays.asList(UpdateContainerRequest.newInstance(1,
|
|
|
+ container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE,
|
|
|
+ null, ExecutionType.GUARANTEED)));
|
|
|
+
|
|
|
+ Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
|
|
|
+ Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
|
|
|
+ Assert.assertEquals("INCORRECT_CONTAINER_VERSION_ERROR|1|0",
|
|
|
+ allocateResponse.getUpdateErrors().get(0).getReason());
|
|
|
+ Assert.assertEquals(container.getId(),
|
|
|
+ allocateResponse.getUpdateErrors().get(0)
|
|
|
+ .getUpdateContainerRequest().getContainerId());
|
|
|
+
|
|
|
+ // Ensure after correct node heartbeats, we should get the allocation
|
|
|
+ allocNode.nodeHeartbeat(true);
|
|
|
+ Thread.sleep(200);
|
|
|
+ allocateResponse = am1.allocate(null, null);
|
|
|
+ Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
|
|
|
+ Container uc =
|
|
|
+ allocateResponse.getUpdatedContainers().get(0).getContainer();
|
|
|
+ Assert.assertEquals(ExecutionType.GUARANTEED, uc.getExecutionType());
|
|
|
+ Assert.assertEquals(uc.getId(), container.getId());
|
|
|
+ Assert.assertEquals(uc.getVersion(), container.getVersion() + 1);
|
|
|
+
|
|
|
+ // Verify Metrics After OPP allocation :
|
|
|
+ // Allocated cores+mem should have increased, available should decrease
|
|
|
+ verifyMetrics(metrics, 14336, 14, 2048, 2, 2);
|
|
|
+
|
|
|
+ nm1.nodeHeartbeat(true);
|
|
|
+ nm2.nodeHeartbeat(true);
|
|
|
+ nm3.nodeHeartbeat(true);
|
|
|
+ nm4.nodeHeartbeat(true);
|
|
|
+ Thread.sleep(200);
|
|
|
+
|
|
|
+ // Verify that the container is still in ACQUIRED state wrt the RM.
|
|
|
+ RMContainer rmContainer = ((CapacityScheduler) scheduler)
|
|
|
+ .getApplicationAttempt(
|
|
|
+ uc.getId().getApplicationAttemptId()).getRMContainer(uc.getId());
|
|
|
+ Assert.assertEquals(RMContainerState.ACQUIRED, rmContainer.getState());
|
|
|
+
|
|
|
+ // Now demote the container back..
|
|
|
+ allocateResponse = am1.sendContainerUpdateRequest(
|
|
|
+ Arrays.asList(UpdateContainerRequest.newInstance(uc.getVersion(),
|
|
|
+ uc.getId(), ContainerUpdateType.DEMOTE_EXECUTION_TYPE,
|
|
|
+ null, ExecutionType.OPPORTUNISTIC)));
|
|
|
+ // This should happen in the same heartbeat..
|
|
|
+ Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
|
|
|
+ uc = allocateResponse.getUpdatedContainers().get(0).getContainer();
|
|
|
+ Assert.assertEquals(ExecutionType.OPPORTUNISTIC, uc.getExecutionType());
|
|
|
+ Assert.assertEquals(uc.getId(), container.getId());
|
|
|
+ Assert.assertEquals(uc.getVersion(), container.getVersion() + 2);
|
|
|
+
|
|
|
+ // Verify Metrics After OPP allocation :
|
|
|
+ // Everything should have reverted to what it was
|
|
|
+ verifyMetrics(metrics, 15360, 15, 1024, 1, 1);
|
|
|
+ }
|
|
|
+
|
|
|
+ @Test(timeout = 60000)
|
|
|
+ public void testContainerPromoteAfterContainerStart() throws Exception {
|
|
|
+ HashMap<NodeId, MockNM> nodes = new HashMap<>();
|
|
|
+ MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
|
|
|
+ nodes.put(nm1.getNodeId(), nm1);
|
|
|
+ MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
|
|
|
+ nodes.put(nm2.getNodeId(), nm2);
|
|
|
+ nm1.registerNode();
|
|
|
+ nm2.registerNode();
|
|
|
+
|
|
|
+ OpportunisticContainerAllocatorAMService amservice =
|
|
|
+ (OpportunisticContainerAllocatorAMService) rm
|
|
|
+ .getApplicationMasterService();
|
|
|
+ RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
|
|
|
+ ApplicationAttemptId attemptId =
|
|
|
+ app1.getCurrentAppAttempt().getAppAttemptId();
|
|
|
+ MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
|
|
|
+ ResourceScheduler scheduler = rm.getResourceScheduler();
|
|
|
+ RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
|
|
|
+ RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
|
|
|
+
|
|
|
+ nm1.nodeHeartbeat(true);
|
|
|
+ nm2.nodeHeartbeat(true);
|
|
|
+
|
|
|
+ ((RMNodeImpl) rmNode1)
|
|
|
+ .setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
|
|
|
+ ((RMNodeImpl) rmNode2)
|
|
|
+ .setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
|
|
|
+
|
|
|
+ OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler)
|
|
|
+ .getApplicationAttempt(attemptId).getOpportunisticContainerContext();
|
|
|
+ // Send add and update node events to AM Service.
|
|
|
+ amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
|
|
|
+ amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
|
|
|
+ amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
|
|
|
+ amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
|
|
|
+
|
|
|
+ // All nodes 1 to 2 will be applicable for scheduling.
|
|
|
+ nm1.nodeHeartbeat(true);
|
|
|
+ nm2.nodeHeartbeat(true);
|
|
|
+
|
|
|
+ Thread.sleep(1000);
|
|
|
+
|
|
|
+ QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue()
|
|
|
+ .getMetrics();
|
|
|
+
|
|
|
+ // Verify Metrics
|
|
|
+ verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
|
|
|
+
|
|
|
+ AllocateResponse allocateResponse = am1.allocate(
|
|
|
+ Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1),
|
|
|
+ "*", Resources.createResource(1 * GB), 2, true, null,
|
|
|
+ ExecutionTypeRequest.newInstance(
|
|
|
+ ExecutionType.OPPORTUNISTIC, true))),
|
|
|
+ null);
|
|
|
+ List<Container> allocatedContainers = allocateResponse
|
|
|
+ .getAllocatedContainers();
|
|
|
+ Assert.assertEquals(2, allocatedContainers.size());
|
|
|
+ Container container = allocatedContainers.get(0);
|
|
|
+ MockNM allocNode = nodes.get(container.getNodeId());
|
|
|
+
|
|
|
+ // Start Container in NM
|
|
|
+ allocNode.nodeHeartbeat(Arrays.asList(
|
|
|
+ ContainerStatus.newInstance(container.getId(),
|
|
|
+ ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)),
|
|
|
+ true);
|
|
|
+ Thread.sleep(200);
|
|
|
+
|
|
|
+ // Verify that container is actually running wrt the RM..
|
|
|
+ RMContainer rmContainer = ((CapacityScheduler) scheduler)
|
|
|
+ .getApplicationAttempt(
|
|
|
+ container.getId().getApplicationAttemptId()).getRMContainer(
|
|
|
+ container.getId());
|
|
|
+ Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
|
|
|
+
|
|
|
+ // Verify Metrics After OPP allocation (Nothing should change)
|
|
|
+ verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
|
|
|
+
|
|
|
+ am1.sendContainerUpdateRequest(
|
|
|
+ Arrays.asList(UpdateContainerRequest.newInstance(0,
|
|
|
+ container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE,
|
|
|
+ null, ExecutionType.GUARANTEED)));
|
|
|
+
|
|
|
+ // Verify Metrics After OPP allocation (Nothing should change again)
|
|
|
+ verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
|
|
|
+
|
|
|
+ // Send Promotion req again... this should result in update error
|
|
|
+ allocateResponse = am1.sendContainerUpdateRequest(
|
|
|
+ Arrays.asList(UpdateContainerRequest.newInstance(0,
|
|
|
+ container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE,
|
|
|
+ null, ExecutionType.GUARANTEED)));
|
|
|
+ Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
|
|
|
+ Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
|
|
|
+ Assert.assertEquals("UPDATE_OUTSTANDING_ERROR",
|
|
|
+ allocateResponse.getUpdateErrors().get(0).getReason());
|
|
|
+ Assert.assertEquals(container.getId(),
|
|
|
+ allocateResponse.getUpdateErrors().get(0)
|
|
|
+ .getUpdateContainerRequest().getContainerId());
|
|
|
+
|
|
|
+ // Start Container in NM
|
|
|
+ allocNode.nodeHeartbeat(Arrays.asList(
|
|
|
+ ContainerStatus.newInstance(container.getId(),
|
|
|
+ ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)),
|
|
|
+ true);
|
|
|
+ Thread.sleep(200);
|
|
|
+
|
|
|
+ allocateResponse = am1.allocate(null, null);
|
|
|
+ Assert.assertEquals(1, allocateResponse.getUpdatedContainers().size());
|
|
|
+ Container uc =
|
|
|
+ allocateResponse.getUpdatedContainers().get(0).getContainer();
|
|
|
+ Assert.assertEquals(ExecutionType.GUARANTEED, uc.getExecutionType());
|
|
|
+ Assert.assertEquals(uc.getId(), container.getId());
|
|
|
+ Assert.assertEquals(uc.getVersion(), container.getVersion() + 1);
|
|
|
+
|
|
|
+ // Verify that the Container is still in RUNNING state wrt RM..
|
|
|
+ rmContainer = ((CapacityScheduler) scheduler)
|
|
|
+ .getApplicationAttempt(
|
|
|
+ uc.getId().getApplicationAttemptId()).getRMContainer(uc.getId());
|
|
|
+ Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
|
|
|
+
|
|
|
+ // Verify Metrics After OPP allocation :
|
|
|
+ // Allocated cores+mem should have increased, available should decrease
|
|
|
+ verifyMetrics(metrics, 6144, 6, 2048, 2, 2);
|
|
|
+ }
|
|
|
+
|
|
|
+ @Test(timeout = 600000)
|
|
|
+ public void testContainerPromoteAfterContainerComplete() throws Exception {
|
|
|
+ HashMap<NodeId, MockNM> nodes = new HashMap<>();
|
|
|
+ MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
|
|
|
+ nodes.put(nm1.getNodeId(), nm1);
|
|
|
+ MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
|
|
|
+ nodes.put(nm2.getNodeId(), nm2);
|
|
|
+ nm1.registerNode();
|
|
|
+ nm2.registerNode();
|
|
|
+
|
|
|
+ OpportunisticContainerAllocatorAMService amservice =
|
|
|
+ (OpportunisticContainerAllocatorAMService) rm
|
|
|
+ .getApplicationMasterService();
|
|
|
+ RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "default");
|
|
|
+ ApplicationAttemptId attemptId =
|
|
|
+ app1.getCurrentAppAttempt().getAppAttemptId();
|
|
|
+ MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm2);
|
|
|
+ ResourceScheduler scheduler = rm.getResourceScheduler();
|
|
|
+ RMNode rmNode1 = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
|
|
|
+ RMNode rmNode2 = rm.getRMContext().getRMNodes().get(nm2.getNodeId());
|
|
|
+
|
|
|
+ nm1.nodeHeartbeat(true);
|
|
|
+ nm2.nodeHeartbeat(true);
|
|
|
+
|
|
|
+ ((RMNodeImpl) rmNode1)
|
|
|
+ .setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
|
|
|
+ ((RMNodeImpl) rmNode2)
|
|
|
+ .setOpportunisticContainersStatus(getOppurtunisticStatus(-1, 100));
|
|
|
+
|
|
|
+ OpportunisticContainerContext ctxt = ((CapacityScheduler) scheduler)
|
|
|
+ .getApplicationAttempt(attemptId).getOpportunisticContainerContext();
|
|
|
+ // Send add and update node events to AM Service.
|
|
|
+ amservice.handle(new NodeAddedSchedulerEvent(rmNode1));
|
|
|
+ amservice.handle(new NodeAddedSchedulerEvent(rmNode2));
|
|
|
+ amservice.handle(new NodeUpdateSchedulerEvent(rmNode1));
|
|
|
+ amservice.handle(new NodeUpdateSchedulerEvent(rmNode2));
|
|
|
+
|
|
|
+ // All nodes 1 to 2 will be applicable for scheduling.
|
|
|
+ nm1.nodeHeartbeat(true);
|
|
|
+ nm2.nodeHeartbeat(true);
|
|
|
+
|
|
|
+ Thread.sleep(1000);
|
|
|
+
|
|
|
+ QueueMetrics metrics = ((CapacityScheduler) scheduler).getRootQueue()
|
|
|
+ .getMetrics();
|
|
|
+
|
|
|
+ // Verify Metrics
|
|
|
+ verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
|
|
|
+
|
|
|
+ AllocateResponse allocateResponse = am1.allocate(
|
|
|
+ Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1),
|
|
|
+ "*", Resources.createResource(1 * GB), 2, true, null,
|
|
|
+ ExecutionTypeRequest.newInstance(
|
|
|
+ ExecutionType.OPPORTUNISTIC, true))),
|
|
|
+ null);
|
|
|
+ List<Container> allocatedContainers = allocateResponse
|
|
|
+ .getAllocatedContainers();
|
|
|
+ Assert.assertEquals(2, allocatedContainers.size());
|
|
|
+ Container container = allocatedContainers.get(0);
|
|
|
+ MockNM allocNode = nodes.get(container.getNodeId());
|
|
|
+
|
|
|
+ // Start Container in NM
|
|
|
+ allocNode.nodeHeartbeat(Arrays.asList(
|
|
|
+ ContainerStatus.newInstance(container.getId(),
|
|
|
+ ExecutionType.OPPORTUNISTIC, ContainerState.RUNNING, "", 0)),
|
|
|
+ true);
|
|
|
+ Thread.sleep(200);
|
|
|
+
|
|
|
+ // Verify that container is actually running wrt the RM..
|
|
|
+ RMContainer rmContainer = ((CapacityScheduler) scheduler)
|
|
|
+ .getApplicationAttempt(
|
|
|
+ container.getId().getApplicationAttemptId()).getRMContainer(
|
|
|
+ container.getId());
|
|
|
+ Assert.assertEquals(RMContainerState.RUNNING, rmContainer.getState());
|
|
|
+
|
|
|
+ // Container Completed in the NM
|
|
|
+ allocNode.nodeHeartbeat(Arrays.asList(
|
|
|
+ ContainerStatus.newInstance(container.getId(),
|
|
|
+ ExecutionType.OPPORTUNISTIC, ContainerState.COMPLETE, "", 0)),
|
|
|
+ true);
|
|
|
+ Thread.sleep(200);
|
|
|
+
|
|
|
+ // Verify that container has been removed..
|
|
|
+ rmContainer = ((CapacityScheduler) scheduler)
|
|
|
+ .getApplicationAttempt(
|
|
|
+ container.getId().getApplicationAttemptId()).getRMContainer(
|
|
|
+ container.getId());
|
|
|
+ Assert.assertNull(rmContainer);
|
|
|
+
|
|
|
+ // Verify Metrics After OPP allocation (Nothing should change)
|
|
|
+ verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
|
|
|
+
|
|
|
+ // Send Promotion req... this should result in update error
|
|
|
+ // Since the container doesn't exist anymore..
|
|
|
+ allocateResponse = am1.sendContainerUpdateRequest(
|
|
|
+ Arrays.asList(UpdateContainerRequest.newInstance(0,
|
|
|
+ container.getId(), ContainerUpdateType.PROMOTE_EXECUTION_TYPE,
|
|
|
+ null, ExecutionType.GUARANTEED)));
|
|
|
+
|
|
|
+ Assert.assertEquals(1,
|
|
|
+ allocateResponse.getCompletedContainersStatuses().size());
|
|
|
+ Assert.assertEquals(container.getId(),
|
|
|
+ allocateResponse.getCompletedContainersStatuses().get(0)
|
|
|
+ .getContainerId());
|
|
|
+ Assert.assertEquals(0, allocateResponse.getUpdatedContainers().size());
|
|
|
+ Assert.assertEquals(1, allocateResponse.getUpdateErrors().size());
|
|
|
+ Assert.assertEquals("INVALID_CONTAINER_ID",
|
|
|
+ allocateResponse.getUpdateErrors().get(0).getReason());
|
|
|
+ Assert.assertEquals(container.getId(),
|
|
|
+ allocateResponse.getUpdateErrors().get(0)
|
|
|
+ .getUpdateContainerRequest().getContainerId());
|
|
|
+
|
|
|
+ // Verify Metrics After OPP allocation (Nothing should change again)
|
|
|
+ verifyMetrics(metrics, 7168, 7, 1024, 1, 1);
|
|
|
+ }
|
|
|
+
|
|
|
+ private void verifyMetrics(QueueMetrics metrics, long availableMB,
|
|
|
+ int availableVirtualCores, long allocatedMB,
|
|
|
+ int allocatedVirtualCores, int allocatedContainers) {
|
|
|
+ Assert.assertEquals(availableMB, metrics.getAvailableMB());
|
|
|
+ Assert.assertEquals(availableVirtualCores, metrics.getAvailableVirtualCores());
|
|
|
+ Assert.assertEquals(allocatedMB, metrics.getAllocatedMB());
|
|
|
+ Assert.assertEquals(allocatedVirtualCores, metrics.getAllocatedVirtualCores());
|
|
|
+ Assert.assertEquals(allocatedContainers, metrics.getAllocatedContainers());
|
|
|
+ }
|
|
|
+
|
|
|
+ @Test(timeout = 60000)
|
|
|
+ public void testNodeRemovalDuringAllocate() throws Exception {
|
|
|
MockNM nm1 = new MockNM("h1:1234", 4096, rm.getResourceTrackerService());
|
|
|
MockNM nm2 = new MockNM("h2:1234", 4096, rm.getResourceTrackerService());
|
|
|
nm1.registerNode();
|