|
@@ -294,6 +294,90 @@ public class TestContainerManagerRecovery extends BaseContainerManagerTest {
|
|
|
cm.stop();
|
|
|
}
|
|
|
|
|
|
+ @Test
|
|
|
+ public void testNMRecoveryForAppFinishedWithLogAggregationFailure()
|
|
|
+ throws Exception {
|
|
|
+ conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
|
|
|
+ conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, true);
|
|
|
+
|
|
|
+ NMStateStoreService stateStore = new NMMemoryStateStoreService();
|
|
|
+ stateStore.init(conf);
|
|
|
+ stateStore.start();
|
|
|
+ Context context = createContext(conf, stateStore);
|
|
|
+ ContainerManagerImpl cm = createContainerManager(context);
|
|
|
+ cm.init(conf);
|
|
|
+ cm.start();
|
|
|
+
|
|
|
+ // add an application by starting a container
|
|
|
+ ApplicationId appId = ApplicationId.newInstance(0, 1);
|
|
|
+ ApplicationAttemptId attemptId =
|
|
|
+ ApplicationAttemptId.newInstance(appId, 1);
|
|
|
+ ContainerId cid = ContainerId.newContainerId(attemptId, 1);
|
|
|
+ Map<String, LocalResource> localResources = Collections.emptyMap();
|
|
|
+ Map<String, String> containerEnv = Collections.emptyMap();
|
|
|
+ List<String> containerCmds = Collections.emptyList();
|
|
|
+ Map<String, ByteBuffer> serviceData = Collections.emptyMap();
|
|
|
+
|
|
|
+ ContainerLaunchContext clc = ContainerLaunchContext.newInstance(
|
|
|
+ localResources, containerEnv, containerCmds, serviceData,
|
|
|
+ null, null);
|
|
|
+
|
|
|
+ StartContainersResponse startResponse = startContainer(context, cm, cid,
|
|
|
+ clc, null);
|
|
|
+ assertTrue(startResponse.getFailedRequests().isEmpty());
|
|
|
+ assertEquals(1, context.getApplications().size());
|
|
|
+ Application app = context.getApplications().get(appId);
|
|
|
+ assertNotNull(app);
|
|
|
+ waitForAppState(app, ApplicationState.INITING);
|
|
|
+
|
|
|
+ // simulate application completion
|
|
|
+ List<ApplicationId> finishedApps = new ArrayList<ApplicationId>();
|
|
|
+ finishedApps.add(appId);
|
|
|
+ cm.handle(new CMgrCompletedAppsEvent(finishedApps,
|
|
|
+ CMgrCompletedAppsEvent.Reason.BY_RESOURCEMANAGER));
|
|
|
+ waitForAppState(app, ApplicationState.APPLICATION_RESOURCES_CLEANINGUP);
|
|
|
+
|
|
|
+ app.handle(new ApplicationEvent(app.getAppId(),
|
|
|
+ ApplicationEventType.APPLICATION_RESOURCES_CLEANEDUP));
|
|
|
+ assertEquals(app.getApplicationState(), ApplicationState.FINISHED);
|
|
|
+ // application is still in NM context.
|
|
|
+ assertEquals(1, context.getApplications().size());
|
|
|
+
|
|
|
+ // restart and verify app is still there and marked as finished.
|
|
|
+ cm.stop();
|
|
|
+ context = createContext(conf, stateStore);
|
|
|
+ cm = createContainerManager(context);
|
|
|
+ cm.init(conf);
|
|
|
+ cm.start();
|
|
|
+ assertEquals(1, context.getApplications().size());
|
|
|
+ app = context.getApplications().get(appId);
|
|
|
+ assertNotNull(app);
|
|
|
+
|
|
|
+ // no longer saving FINISH_APP event in NM stateStore,
|
|
|
+ // simulate by resending FINISH_APP event
|
|
|
+ cm.handle(new CMgrCompletedAppsEvent(finishedApps,
|
|
|
+ CMgrCompletedAppsEvent.Reason.BY_RESOURCEMANAGER));
|
|
|
+ waitForAppState(app, ApplicationState.APPLICATION_RESOURCES_CLEANINGUP);
|
|
|
+ // TODO need to figure out why additional APPLICATION_RESOURCES_CLEANEDUP
|
|
|
+ // is needed.
|
|
|
+ app.handle(new ApplicationEvent(app.getAppId(),
|
|
|
+ ApplicationEventType.APPLICATION_RESOURCES_CLEANEDUP));
|
|
|
+ assertEquals(app.getApplicationState(), ApplicationState.FINISHED);
|
|
|
+
|
|
|
+ // simulate log aggregation failed.
|
|
|
+ app.handle(new ApplicationEvent(app.getAppId(),
|
|
|
+ ApplicationEventType.APPLICATION_LOG_HANDLING_FAILED));
|
|
|
+
|
|
|
+ // restart and verify app is no longer present after recovery
|
|
|
+ cm.stop();
|
|
|
+ context = createContext(conf, stateStore);
|
|
|
+ cm = createContainerManager(context);
|
|
|
+ cm.init(conf);
|
|
|
+ cm.start();
|
|
|
+ assertTrue(context.getApplications().isEmpty());
|
|
|
+ cm.stop();
|
|
|
+ }
|
|
|
+
|
|
|
@Test
|
|
|
public void testContainerResizeRecovery() throws Exception {
|
|
|
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
|