|
@@ -45,6 +45,7 @@ import org.apache.hadoop.fs.FileContext;
|
|
|
import org.apache.hadoop.fs.Path;
|
|
|
import org.apache.hadoop.fs.UnsupportedFileSystemException;
|
|
|
import org.apache.hadoop.io.DataOutputBuffer;
|
|
|
+import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
|
|
import org.apache.hadoop.net.ServerSocketUtil;
|
|
|
import org.apache.hadoop.security.Credentials;
|
|
|
import org.apache.hadoop.security.UserGroupInformation;
|
|
@@ -100,6 +101,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.Contai
|
|
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerScheduler;
|
|
|
|
|
|
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
|
|
|
+import org.apache.hadoop.yarn.server.nodemanager.metrics.TestNodeManagerMetrics;
|
|
|
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMMemoryStateStoreService;
|
|
|
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMNullStateStoreService;
|
|
|
import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
|
|
@@ -393,6 +395,61 @@ public class TestContainerManagerRecovery extends BaseContainerManagerTest {
|
|
|
cm.stop();
|
|
|
}
|
|
|
|
|
|
+ @Test
|
|
|
+ public void testNodeManagerMetricsRecovery() throws Exception {
|
|
|
+ conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
|
|
|
+ conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, true);
|
|
|
+
|
|
|
+ NMStateStoreService stateStore = new NMMemoryStateStoreService();
|
|
|
+ stateStore.init(conf);
|
|
|
+ stateStore.start();
|
|
|
+ Context context = createContext(conf, stateStore);
|
|
|
+ ContainerManagerImpl cm = createContainerManager(context, delSrvc);
|
|
|
+ cm.init(conf);
|
|
|
+ cm.start();
|
|
|
+ metrics.addResource(Resource.newInstance(10240, 8));
|
|
|
+
|
|
|
+ // add an application by starting a container
|
|
|
+ ApplicationId appId = ApplicationId.newInstance(0, 1);
|
|
|
+ ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1);
|
|
|
+ ContainerId cid = ContainerId.newContainerId(attemptId, 1);
|
|
|
+ Map<String, String> containerEnv = Collections.emptyMap();
|
|
|
+ Map<String, ByteBuffer> serviceData = Collections.emptyMap();
|
|
|
+ Map<String, LocalResource> localResources = Collections.emptyMap();
|
|
|
+ List<String> commands = Arrays.asList("sleep 60s".split(" "));
|
|
|
+ ContainerLaunchContext clc = ContainerLaunchContext.newInstance(
|
|
|
+ localResources, containerEnv, commands, serviceData,
|
|
|
+ null, null);
|
|
|
+ StartContainersResponse startResponse = startContainer(context, cm, cid,
|
|
|
+ clc, null);
|
|
|
+ assertTrue(startResponse.getFailedRequests().isEmpty());
|
|
|
+ assertEquals(1, context.getApplications().size());
|
|
|
+ Application app = context.getApplications().get(appId);
|
|
|
+ assertNotNull(app);
|
|
|
+
|
|
|
+ // make sure the container reaches RUNNING state
|
|
|
+ waitForNMContainerState(cm, cid,
|
|
|
+ org.apache.hadoop.yarn.server.nodemanager
|
|
|
+ .containermanager.container.ContainerState.RUNNING);
|
|
|
+ TestNodeManagerMetrics.checkMetrics(1, 0, 0, 0, 0, 1, 1, 1, 9, 1, 7);
|
|
|
+
|
|
|
+ // restart and verify metrics could be recovered
|
|
|
+ cm.stop();
|
|
|
+ DefaultMetricsSystem.shutdown();
|
|
|
+ metrics = NodeManagerMetrics.create();
|
|
|
+ metrics.addResource(Resource.newInstance(10240, 8));
|
|
|
+ TestNodeManagerMetrics.checkMetrics(0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 8);
|
|
|
+ context = createContext(conf, stateStore);
|
|
|
+ cm = createContainerManager(context, delSrvc);
|
|
|
+ cm.init(conf);
|
|
|
+ cm.start();
|
|
|
+ assertEquals(1, context.getApplications().size());
|
|
|
+ app = context.getApplications().get(appId);
|
|
|
+ assertNotNull(app);
|
|
|
+ TestNodeManagerMetrics.checkMetrics(1, 0, 0, 0, 0, 1, 1, 1, 9, 1, 7);
|
|
|
+ cm.stop();
|
|
|
+ }
|
|
|
+
|
|
|
@Test
|
|
|
public void testContainerResizeRecovery() throws Exception {
|
|
|
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
|