Browse Source

HADOOP-5118. Reduced the retries to 30 minutes, and 48 retries.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/trunk@748360 13f79535-47bb-0310-9956-ffa450edef68
Eric Yang 16 years ago
parent
commit
b3b6561700

+ 9 - 5
src/contrib/chukwa/src/java/org/apache/hadoop/chukwa/inputtools/log4j/ChukwaDailyRollingFileAppender.java

@@ -527,13 +527,17 @@ public class ChukwaDailyRollingFileAppender extends FileAppender {
 
 					  chukwaClientIsNull = false;
 					  
-					  //if they haven't specified, default to retrying every minute for 2 hours
+					  // Watchdog is watching for ChukwaAgent only once every 5 minutes, so there's no point in retrying more than once every 5 mins.
+					  // In practice, if the watchdog is not able to automatically restart the agent, it will take more than 20 minutes to get Ops to restart it.
+					  // Also its a good  to limit the number of communications between Hadoop and Chukwa, that's why 30 minutes.
 					  long retryInterval = chukwaClientConnectRetryInterval;
-					  if (retryInterval == 0)
-						  retryInterval = 1000 * 60;
+					  if (retryInterval == 0) {
+					    retryInterval = 1000 * 60 * 30;
+					  }
 					  long numRetries = chukwaClientConnectNumRetry;
-					  if (numRetries == 0)
-						  numRetries = 120;
+					  if (numRetries == 0) {
+					    numRetries = 48;
+					  }
 					  String log4jFileName = getFile();
 					  String recordType = getRecordType();
 					  long adaptorID = chukwaClient.addFile(recordType, log4jFileName, numRetries, retryInterval);