Browse Source

YARN-5112. Excessive log warnings for directory permission issue on NM recovery. Contributed by Jian He.
(cherry picked from commit 867cd2f5a2e5966d6a7c5f5accb2fce78f9e7778)

Junping Du 9 years ago
parent
commit
6161d9ba52

+ 2 - 43
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java

@@ -95,11 +95,6 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
   // NM_LOG_AGGREGATION_ROLL_MONITORING_INTERVAL_SECONDS.
   // NM_LOG_AGGREGATION_ROLL_MONITORING_INTERVAL_SECONDS.
   private static final String NM_LOG_AGGREGATION_DEBUG_ENABLED
   private static final String NM_LOG_AGGREGATION_DEBUG_ENABLED
       = YarnConfiguration.NM_PREFIX + "log-aggregation.debug-enabled";
       = YarnConfiguration.NM_PREFIX + "log-aggregation.debug-enabled";
-  private static final boolean
-      DEFAULT_NM_LOG_AGGREGATION_DEBUG_ENABLED = false;
-
-  private static final long
-      NM_LOG_AGGREGATION_MIN_ROLL_MONITORING_INTERVAL_SECONDS = 3600;
 
 
   private final LocalDirsHandlerService dirsHandler;
   private final LocalDirsHandlerService dirsHandler;
   private final Dispatcher dispatcher;
   private final Dispatcher dispatcher;
@@ -142,7 +137,7 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
       LocalDirsHandlerService dirsHandler, Path remoteNodeLogFileForApp,
       LocalDirsHandlerService dirsHandler, Path remoteNodeLogFileForApp,
       Map<ApplicationAccessType, String> appAcls,
       Map<ApplicationAccessType, String> appAcls,
       LogAggregationContext logAggregationContext, Context context,
       LogAggregationContext logAggregationContext, Context context,
-      FileContext lfs) {
+      FileContext lfs, long rollingMonitorInterval) {
     this.dispatcher = dispatcher;
     this.dispatcher = dispatcher;
     this.conf = conf;
     this.conf = conf;
     this.delService = deletionService;
     this.delService = deletionService;
@@ -167,43 +162,7 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
     } else {
     } else {
       this.retentionSize = configuredRentionSize;
       this.retentionSize = configuredRentionSize;
     }
     }
-    long configuredRollingMonitorInterval = conf.getLong(
-      YarnConfiguration
-        .NM_LOG_AGGREGATION_ROLL_MONITORING_INTERVAL_SECONDS,
-      YarnConfiguration
-        .DEFAULT_NM_LOG_AGGREGATION_ROLL_MONITORING_INTERVAL_SECONDS);
-    boolean debug_mode =
-        conf.getBoolean(NM_LOG_AGGREGATION_DEBUG_ENABLED,
-          DEFAULT_NM_LOG_AGGREGATION_DEBUG_ENABLED);
-    if (configuredRollingMonitorInterval > 0
-        && configuredRollingMonitorInterval <
-          NM_LOG_AGGREGATION_MIN_ROLL_MONITORING_INTERVAL_SECONDS) {
-      if (debug_mode) {
-        this.rollingMonitorInterval = configuredRollingMonitorInterval;
-      } else {
-        LOG.warn(
-            "rollingMonitorIntervall should be more than or equal to "
-            + NM_LOG_AGGREGATION_MIN_ROLL_MONITORING_INTERVAL_SECONDS
-            + " seconds. Using "
-            + NM_LOG_AGGREGATION_MIN_ROLL_MONITORING_INTERVAL_SECONDS
-            + " seconds instead.");
-        this.rollingMonitorInterval =
-            NM_LOG_AGGREGATION_MIN_ROLL_MONITORING_INTERVAL_SECONDS;
-      }
-    } else {
-      if (configuredRollingMonitorInterval <= 0) {
-        LOG.info("rollingMonitorInterval is set as "
-            + configuredRollingMonitorInterval + ". "
-            + "The log rolling monitoring interval is disabled. "
-            + "The logs will be aggregated after this application is finished.");
-      } else {
-        LOG.info("rollingMonitorInterval is set as "
-            + configuredRollingMonitorInterval + ". "
-            + "The logs will be aggregated every "
-            + configuredRollingMonitorInterval + " seconds");
-      }
-      this.rollingMonitorInterval = configuredRollingMonitorInterval;
-    }
+    this.rollingMonitorInterval = rollingMonitorInterval;
     this.logAggregationInRolling =
     this.logAggregationInRolling =
         this.rollingMonitorInterval <= 0 || this.logAggregationContext == null
         this.rollingMonitorInterval <= 0 || this.logAggregationContext == null
             || this.logAggregationContext.getRolledLogsIncludePattern() == null
             || this.logAggregationContext.getRolledLogsIncludePattern() == null

+ 43 - 4
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java

@@ -69,8 +69,14 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder;
 public class LogAggregationService extends AbstractService implements
 public class LogAggregationService extends AbstractService implements
     LogHandler {
     LogHandler {
 
 
-  private static final Log LOG = LogFactory
-      .getLog(LogAggregationService.class);
+  private static final Log LOG = LogFactory.getLog(LogAggregationService.class);
+  private static final long MIN_LOG_ROLLING_INTERVAL = 3600;
+  // This configuration is for debug and test purpose. By setting
+  // this configuration as true. We can break the lower bound of
+  // NM_LOG_AGGREGATION_ROLL_MONITORING_INTERVAL_SECONDS.
+  private static final String NM_LOG_AGGREGATION_DEBUG_ENABLED
+      = YarnConfiguration.NM_PREFIX + "log-aggregation.debug-enabled";
+  private long rollingMonitorInterval;
 
 
   /*
   /*
    * Expected deployment TLD will be 1777, owner=<NMOwner>, group=<NMGroup -
    * Expected deployment TLD will be 1777, owner=<NMOwner>, group=<NMGroup -
@@ -101,6 +107,7 @@ public class LogAggregationService extends AbstractService implements
   private NodeId nodeId;
   private NodeId nodeId;
 
 
   private final ConcurrentMap<ApplicationId, AppLogAggregator> appLogAggregators;
   private final ConcurrentMap<ApplicationId, AppLogAggregator> appLogAggregators;
+  private boolean logPermError = true;
 
 
   @VisibleForTesting
   @VisibleForTesting
   ExecutorService threadPool;
   ExecutorService threadPool;
@@ -128,6 +135,35 @@ public class LogAggregationService extends AbstractService implements
         new ThreadFactoryBuilder()
         new ThreadFactoryBuilder()
             .setNameFormat("LogAggregationService #%d")
             .setNameFormat("LogAggregationService #%d")
             .build());
             .build());
+
+    rollingMonitorInterval = conf.getLong(
+        YarnConfiguration.NM_LOG_AGGREGATION_ROLL_MONITORING_INTERVAL_SECONDS,
+        YarnConfiguration.DEFAULT_NM_LOG_AGGREGATION_ROLL_MONITORING_INTERVAL_SECONDS);
+
+    boolean logAggregationDebugMode =
+        conf.getBoolean(NM_LOG_AGGREGATION_DEBUG_ENABLED, false);
+
+    if (rollingMonitorInterval > 0
+        && rollingMonitorInterval < MIN_LOG_ROLLING_INTERVAL) {
+      if (logAggregationDebugMode) {
+        LOG.info("Log aggregation debug mode enabled. rollingMonitorInterval = "
+            + rollingMonitorInterval);
+      } else {
+        LOG.warn("rollingMonitorIntervall should be more than or equal to "
+            + MIN_LOG_ROLLING_INTERVAL + " seconds. Using "
+            + MIN_LOG_ROLLING_INTERVAL + " seconds instead.");
+        this.rollingMonitorInterval = MIN_LOG_ROLLING_INTERVAL;
+      }
+    } else if (rollingMonitorInterval <= 0) {
+      LOG.info("rollingMonitorInterval is set as " + rollingMonitorInterval
+          + ". The log rolling monitoring interval is disabled. "
+          + "The logs will be aggregated after this application is finished.");
+    } else {
+      LOG.info("rollingMonitorInterval is set as " + rollingMonitorInterval
+          + ". The logs will be aggregated every " + rollingMonitorInterval
+          + " seconds");
+    }
+
     super.serviceInit(conf);
     super.serviceInit(conf);
   }
   }
 
 
@@ -197,11 +233,14 @@ public class LogAggregationService extends AbstractService implements
     try {
     try {
       FsPermission perms =
       FsPermission perms =
           remoteFS.getFileStatus(this.remoteRootLogDir).getPermission();
           remoteFS.getFileStatus(this.remoteRootLogDir).getPermission();
-      if (!perms.equals(TLDIR_PERMISSIONS)) {
+      if (!perms.equals(TLDIR_PERMISSIONS) && logPermError) {
         LOG.warn("Remote Root Log Dir [" + this.remoteRootLogDir
         LOG.warn("Remote Root Log Dir [" + this.remoteRootLogDir
             + "] already exist, but with incorrect permissions. "
             + "] already exist, but with incorrect permissions. "
             + "Expected: [" + TLDIR_PERMISSIONS + "], Found: [" + perms
             + "Expected: [" + TLDIR_PERMISSIONS + "], Found: [" + perms
             + "]." + " The cluster may have problems with multiple users.");
             + "]." + " The cluster may have problems with multiple users.");
+        logPermError = false;
+      } else {
+        logPermError = true;
       }
       }
     } catch (FileNotFoundException e) {
     } catch (FileNotFoundException e) {
       remoteExists = false;
       remoteExists = false;
@@ -360,7 +399,7 @@ public class LogAggregationService extends AbstractService implements
             getConfig(), appId, userUgi, this.nodeId, dirsHandler,
             getConfig(), appId, userUgi, this.nodeId, dirsHandler,
             getRemoteNodeLogFileForApp(appId, user),
             getRemoteNodeLogFileForApp(appId, user),
             appAcls, logAggregationContext, this.context,
             appAcls, logAggregationContext, this.context,
-            getLocalFileContext(getConfig()));
+            getLocalFileContext(getConfig()), this.rollingMonitorInterval);
     if (this.appLogAggregators.putIfAbsent(appId, appLogAggregator) != null) {
     if (this.appLogAggregators.putIfAbsent(appId, appLogAggregator) != null) {
       throw new YarnRuntimeException("Duplicate initApp for " + appId);
       throw new YarnRuntimeException("Duplicate initApp for " + appId);
     }
     }