Pārlūkot izejas kodu

YARN-4649. Add additional logging to some NM state store operations. Contributed by Sidharta Seethana

Varun Vasudev 9 gadi atpakaļ
vecāks
revīzija
1cd55e0c17

+ 3 - 0
hadoop-yarn-project/CHANGES.txt

@@ -115,6 +115,9 @@ Release 2.9.0 - UNRELEASED
 
     YARN-4647. Make RegisterNodeManagerRequestPBImpl thread-safe. (kasha)
 
+    YARN-4649. Add additional logging to some NM state store operations.
+    (Sidharta Seethana via vvasudev)
+
   OPTIMIZATIONS
 
   BUG FIXES

+ 14 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java

@@ -286,18 +286,32 @@ public class ContainerManagerImpl extends CompositeService implements
       RecoveredApplicationsState appsState = stateStore.loadApplicationsState();
       for (ContainerManagerApplicationProto proto :
            appsState.getApplications()) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Recovering application with state: " + proto.toString());
+        }
         recoverApplication(proto);
       }
 
       for (RecoveredContainerState rcs : stateStore.loadContainersState()) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Recovering container with state: " + rcs);
+        }
+
         recoverContainer(rcs);
       }
 
       String diagnostic = "Application marked finished during recovery";
       for (ApplicationId appId : appsState.getFinishedApplications()) {
+
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Application marked finished during recovery: " + appId);
+        }
+
         dispatcher.getEventHandler().handle(
             new ApplicationFinishEvent(appId, diagnostic));
       }
+    } else {
+      LOG.info("Not a recoverable state store. Nothing to recover.");
     }
   }
 

+ 44 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java

@@ -248,6 +248,11 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
   @Override
   public void storeContainer(ContainerId containerId,
       StartContainerRequest startRequest) throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("storeContainer: containerId= " + containerId
+          + ", startRequest= " + startRequest);
+    }
+
     String key = CONTAINERS_KEY_PREFIX + containerId.toString()
         + CONTAINER_REQUEST_KEY_SUFFIX;
     try {
@@ -261,6 +266,11 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
   @Override
   public void storeContainerDiagnostics(ContainerId containerId,
       StringBuilder diagnostics) throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("storeContainerDiagnostics: containerId=" + containerId
+          + ", diagnostics=" + diagnostics);
+    }
+
     String key = CONTAINERS_KEY_PREFIX + containerId.toString()
         + CONTAINER_DIAGS_KEY_SUFFIX;
     try {
@@ -273,6 +283,10 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
   @Override
   public void storeContainerLaunched(ContainerId containerId)
       throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("storeContainerLaunched: containerId=" + containerId);
+    }
+
     String key = CONTAINERS_KEY_PREFIX + containerId.toString()
         + CONTAINER_LAUNCHED_KEY_SUFFIX;
     try {
@@ -285,6 +299,11 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
   @Override
   public void storeContainerResourceChanged(ContainerId containerId,
       Resource capability) throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("storeContainerResourceChanged: containerId=" + containerId
+          + ", capability=" + capability);
+    }
+
     String key = CONTAINERS_KEY_PREFIX + containerId.toString()
         + CONTAINER_RESOURCE_CHANGED_KEY_SUFFIX;
     try {
@@ -299,6 +318,10 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
   @Override
   public void storeContainerKilled(ContainerId containerId)
       throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("storeContainerKilled: containerId=" + containerId);
+    }
+
     String key = CONTAINERS_KEY_PREFIX + containerId.toString()
         + CONTAINER_KILLED_KEY_SUFFIX;
     try {
@@ -311,6 +334,10 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
   @Override
   public void storeContainerCompleted(ContainerId containerId,
       int exitCode) throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("storeContainerCompleted: containerId=" + containerId);
+    }
+
     String key = CONTAINERS_KEY_PREFIX + containerId.toString()
         + CONTAINER_EXIT_CODE_KEY_SUFFIX;
     try {
@@ -323,6 +350,10 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
   @Override
   public void removeContainer(ContainerId containerId)
       throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("removeContainer: containerId=" + containerId);
+    }
+
     String keyPrefix = CONTAINERS_KEY_PREFIX + containerId.toString();
     try {
       WriteBatch batch = db.createWriteBatch();
@@ -389,6 +420,11 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
   @Override
   public void storeApplication(ApplicationId appId,
       ContainerManagerApplicationProto p) throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("storeApplication: appId=" + appId
+          + ", proto=" + p);
+    }
+
     String key = APPLICATIONS_KEY_PREFIX + appId;
     try {
       db.put(bytes(key), p.toByteArray());
@@ -400,6 +436,10 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
   @Override
   public void storeFinishedApplication(ApplicationId appId)
       throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("storeFinishedApplication.appId: " + appId);
+    }
+
     String key = FINISHED_APPS_KEY_PREFIX + appId;
     try {
       db.put(bytes(key), new byte[0]);
@@ -411,6 +451,10 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
   @Override
   public void removeApplication(ApplicationId appId)
       throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("removeApplication: appId=" + appId);
+    }
+
     try {
       WriteBatch batch = db.createWriteBatch();
       try {

+ 11 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java

@@ -100,6 +100,17 @@ public abstract class NMStateStoreService extends AbstractService {
     public Resource getCapability() {
       return capability;
     }
+
+    @Override
+    public String toString() {
+      return new StringBuffer("Status: ").append(getStatus())
+          .append(", Exit code: ").append(exitCode)
+          .append(", Killed: ").append(getKilled())
+          .append(", Diagnostics: ").append(getDiagnostics())
+          .append(", Capability: ").append(getCapability())
+          .append(", StartRequest: ").append(getStartRequest())
+          .toString();
+    }
   }
 
   public static class LocalResourceTrackerState {