소스 검색

YARN-8032. Added ability to configure failure validity interval for YARN service.
Contributed by Chandni Singh

Eric Yang 7 년 전
부모
커밋
647058efc0

+ 2 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/conf/YarnServiceConf.java

@@ -31,6 +31,8 @@ public class YarnServiceConf {
   // Retry settings for container failures
   public static final String CONTAINER_RETRY_MAX = "yarn.service.container-failure.retry.max";
   public static final String CONTAINER_RETRY_INTERVAL = "yarn.service.container-failure.retry-interval-ms";
+  public static final String CONTAINER_FAILURES_VALIDITY_INTERVAL =
+      "yarn.service.container-failure.validity-interval-ms";
 
   public static final String AM_RESTART_MAX = "yarn.service.am-restart.max-attempts";
   public static final String AM_RESOURCE_MEM = "yarn.service.am-resource.memory";

+ 4 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/containerlaunch/AbstractLauncher.java

@@ -169,10 +169,11 @@ public class AbstractLauncher {
     return containerLaunchContext;
   }
 
-  public void setRetryContext(int maxRetries, int retryInterval) {
+  public void setRetryContext(int maxRetries, int retryInterval,
+      long failuresValidityInterval) {
     ContainerRetryContext retryContext = ContainerRetryContext
-        .newInstance(ContainerRetryPolicy.RETRY_ON_ALL_ERRORS, null, maxRetries,
-            retryInterval);
+        .newInstance(ContainerRetryPolicy.RETRY_ON_ALL_ERRORS, null,
+            maxRetries, retryInterval, failuresValidityInterval);
     containerLaunchContext.setContainerRetryContext(retryContext);
   }
 

+ 4 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/AbstractProviderService.java

@@ -39,6 +39,7 @@ import java.io.IOException;
 import java.util.Map;
 import java.util.Map.Entry;
 
+import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.CONTAINER_FAILURES_VALIDITY_INTERVAL;
 import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.CONTAINER_RETRY_INTERVAL;
 import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.CONTAINER_RETRY_MAX;
 import static org.apache.hadoop.yarn.service.utils.ServiceApiUtil.$;
@@ -109,6 +110,8 @@ public abstract class AbstractProviderService implements ProviderService,
         .getInt(CONTAINER_RETRY_MAX, -1, service.getConfiguration(),
             yarnConf), YarnServiceConf
         .getInt(CONTAINER_RETRY_INTERVAL, 30000, service.getConfiguration(),
-            yarnConf));
+            yarnConf),
+        YarnServiceConf.getLong(CONTAINER_FAILURES_VALIDITY_INTERVAL, -1,
+            service.getConfiguration(), yarnConf));
   }
 }

+ 1 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/Configurations.md

@@ -113,6 +113,7 @@ Above config make the service AM to be retried at max 10 times.
 |yarn.service.client-am.retry-interval-ms | the retry interval in milliseconds for the service client to talk to the service AM. By default, it is 2000, i.e. 2 seconds |
 |yarn.service.container-failure.retry.max | the max number of retries for the container to be auto restarted if it fails. By default, it is set to -1, which means forever.
 |yarn.service.container-failure.retry-interval-ms| the retry interval in milliseconds for the container to be restarted. By default, it is 30000, i.e. 30 seconds |
+|yarn.service.container-failure.validity-interval-ms | the failure validity interval in milliseconds which when set to a value greater than 0, will not take the failures that happened outside of this interval into failure count. By default, it is set to -1, which means that all the failures so far will be included in failure count. |
 |yarn.service.am-restart.max-attempts| the max number of attempts for the framework AM
 |yarn.service.am-resource.memory | the memory size in GB for the framework AM. By default, it is set to 1024
 |yarn.service.queue | the default queue to which the service will be submitted. By default, it is submitted to `default` queue