Bläddra i källkod

YARN-8360. Improve YARN service restart policy and node manager auto restart policy.
Contributed by Suma Shivaprasad

(cherry picked from commit 84d7bf1eeff6b9418361afa4aa713e5e6f771365)

Eric Yang 6 år sedan
förälder
incheckning
e665c0a9dd

+ 5 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/AlwaysRestartPolicy.java

@@ -79,4 +79,9 @@ public final class AlwaysRestartPolicy implements ComponentRestartPolicy {
   @Override public boolean shouldTerminate(Component component) {
     return false;
   }
+
+  @Override public boolean allowContainerRetriesForInstance(
+      ComponentInstance componentInstance) {
+    return true;
+  }
 }

+ 2 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/ComponentRestartPolicy.java

@@ -42,4 +42,6 @@ public interface ComponentRestartPolicy {
 
   boolean shouldTerminate(Component component);
 
+  boolean allowContainerRetriesForInstance(ComponentInstance componentInstance);
+
 }

+ 5 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/NeverRestartPolicy.java

@@ -79,4 +79,9 @@ public final class NeverRestartPolicy implements ComponentRestartPolicy {
     }
     return true;
   }
+
+  @Override public boolean allowContainerRetriesForInstance(
+      ComponentInstance componentInstance) {
+    return false;
+  }
 }

+ 5 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/OnFailureRestartPolicy.java

@@ -84,4 +84,9 @@ public final class OnFailureRestartPolicy implements ComponentRestartPolicy {
     }
     return true;
   }
+
+  @Override public boolean allowContainerRetriesForInstance(
+      ComponentInstance componentInstance) {
+    return true;
+  }
 }

+ 17 - 12
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/AbstractProviderService.java

@@ -22,6 +22,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.service.api.records.Service;
+import org.apache.hadoop.yarn.service.component.ComponentRestartPolicy;
 import org.apache.hadoop.yarn.service.conf.YarnServiceConf;
 import org.apache.hadoop.yarn.service.conf.YarnServiceConstants;
 import org.apache.hadoop.yarn.service.containerlaunch.ContainerLaunchService;
@@ -116,18 +117,22 @@ public abstract class AbstractProviderService implements ProviderService,
 
   public void buildContainerRetry(AbstractLauncher launcher,
       Configuration yarnConf,
-      ContainerLaunchService.ComponentLaunchContext compLaunchContext) {
+      ContainerLaunchService.ComponentLaunchContext compLaunchContext,
+      ComponentInstance instance) {
     // By default retry forever every 30 seconds
-    launcher.setRetryContext(
-        YarnServiceConf.getInt(CONTAINER_RETRY_MAX,
-            DEFAULT_CONTAINER_RETRY_MAX,
-            compLaunchContext.getConfiguration(), yarnConf),
-        YarnServiceConf.getInt(CONTAINER_RETRY_INTERVAL,
-            DEFAULT_CONTAINER_RETRY_INTERVAL,
-            compLaunchContext.getConfiguration(), yarnConf),
-        YarnServiceConf.getLong(CONTAINER_FAILURES_VALIDITY_INTERVAL,
-            DEFAULT_CONTAINER_FAILURES_VALIDITY_INTERVAL,
-            compLaunchContext.getConfiguration(), yarnConf));
+
+    ComponentRestartPolicy restartPolicy = instance.getComponent()
+        .getRestartPolicyHandler();
+    if (restartPolicy.allowContainerRetriesForInstance(instance)) {
+      launcher.setRetryContext(YarnServiceConf
+          .getInt(CONTAINER_RETRY_MAX, DEFAULT_CONTAINER_RETRY_MAX,
+              compLaunchContext.getConfiguration(), yarnConf), YarnServiceConf
+          .getInt(CONTAINER_RETRY_INTERVAL, DEFAULT_CONTAINER_RETRY_INTERVAL,
+              compLaunchContext.getConfiguration(), yarnConf), YarnServiceConf
+          .getLong(CONTAINER_FAILURES_VALIDITY_INTERVAL,
+              DEFAULT_CONTAINER_FAILURES_VALIDITY_INTERVAL,
+              compLaunchContext.getConfiguration(), yarnConf));
+    }
   }
 
   public void buildContainerLaunchContext(AbstractLauncher launcher,
@@ -161,6 +166,6 @@ public abstract class AbstractProviderService implements ProviderService,
         yarnConf, container, compLaunchContext, tokensForSubstitution);
 
     // Setup container retry settings
-    buildContainerRetry(launcher, yarnConf, compLaunchContext);
+    buildContainerRetry(launcher, yarnConf, compLaunchContext, instance);
   }
 }

+ 1 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/ServiceTestUtils.java

@@ -115,7 +115,7 @@ public class ServiceTestUtils {
     exampleApp.setName(serviceName);
     exampleApp.setVersion("v1");
     exampleApp.addComponent(
-        createComponent("terminating-comp1", 2, "sleep " + "1000",
+        createComponent("terminating-comp1", 2, "sleep 1000",
             Component.RestartPolicyEnum.NEVER, null));
     exampleApp.addComponent(
         createComponent("terminating-comp2", 2, "sleep 1000",

+ 66 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/containerlaunch/TestAbstractLauncher.java

@@ -19,13 +19,33 @@
 package org.apache.hadoop.yarn.service.containerlaunch;
 
 import org.apache.hadoop.yarn.service.ServiceContext;
+import org.apache.hadoop.yarn.service.api.records.Configuration;
+import org.apache.hadoop.yarn.service.component.AlwaysRestartPolicy;
+import org.apache.hadoop.yarn.service.component.Component;
+import org.apache.hadoop.yarn.service.component.NeverRestartPolicy;
+import org.apache.hadoop.yarn.service.component.OnFailureRestartPolicy;
+import org.apache.hadoop.yarn.service.component.instance.ComponentInstance;
+import org.apache.hadoop.yarn.service.provider.defaultImpl
+    .DefaultProviderService;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
 import java.io.IOException;
 
+import static org.apache.hadoop.fi.FiConfig.getConfig;
+import static org.apache.hadoop.yarn.service.conf.YarnServiceConf
+    .DEFAULT_CONTAINER_FAILURES_VALIDITY_INTERVAL;
+import static org.apache.hadoop.yarn.service.conf.YarnServiceConf
+    .DEFAULT_CONTAINER_RETRY_INTERVAL;
+import static org.apache.hadoop.yarn.service.conf.YarnServiceConf
+    .DEFAULT_CONTAINER_RETRY_MAX;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.reset;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyZeroInteractions;
+import static org.mockito.Mockito.when;
 
 /**
  * Tests for {@link AbstractLauncher}.
@@ -51,4 +71,50 @@ public class TestAbstractLauncher {
 
     Assert.assertEquals("s1:t1:ro,s2:t2:ro", dockerContainerMounts);
   }
+
+  @Test
+  public void testContainerRetries() throws Exception {
+
+    DefaultProviderService providerService = new DefaultProviderService();
+    AbstractLauncher mockLauncher = mock(AbstractLauncher.class);
+    ContainerLaunchService.ComponentLaunchContext componentLaunchContext =
+        mock(ContainerLaunchService.ComponentLaunchContext.class);
+
+    ComponentInstance componentInstance = mock(ComponentInstance.class);
+
+    //Never Restart Policy
+    Component component = mock(Component.class);
+    when(componentInstance.getComponent()).thenReturn(component);
+
+    when(component.getRestartPolicyHandler()).thenReturn(NeverRestartPolicy
+        .getInstance());
+
+    providerService.buildContainerRetry(mockLauncher, getConfig(),
+        componentLaunchContext, componentInstance);
+    verifyZeroInteractions(mockLauncher);
+
+
+    //OnFailure restart policy
+    when(component.getRestartPolicyHandler()).thenReturn(OnFailureRestartPolicy
+        .getInstance());
+    when(componentLaunchContext.getConfiguration()).thenReturn(new
+        Configuration());
+    providerService.buildContainerRetry(mockLauncher, getConfig(),
+        componentLaunchContext, componentInstance);
+    verify(mockLauncher).setRetryContext(DEFAULT_CONTAINER_RETRY_MAX,
+        DEFAULT_CONTAINER_RETRY_INTERVAL,
+        DEFAULT_CONTAINER_FAILURES_VALIDITY_INTERVAL);
+
+    reset(mockLauncher);
+
+    //Always restart policy
+    when(component.getRestartPolicyHandler()).thenReturn(AlwaysRestartPolicy
+        .getInstance());
+    providerService.buildContainerRetry(mockLauncher, getConfig(),
+        componentLaunchContext, componentInstance);
+
+    verify(mockLauncher).setRetryContext(DEFAULT_CONTAINER_RETRY_MAX,
+        DEFAULT_CONTAINER_RETRY_INTERVAL,
+        DEFAULT_CONTAINER_FAILURES_VALIDITY_INTERVAL);
+  }
 }