Browse Source

YARN-5067 Support specifying resources for AM containers in SLS. (Yufei Gu via Haibo Chen)

Haibo Chen 7 years ago
parent
commit
147df300bf

+ 31 - 7
hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java

@@ -406,7 +406,7 @@ public class SLSRunner extends Configured implements Tool {
     }
     }
 
 
     runNewAM(amType, user, queue, oldAppId, jobStartTime, jobFinishTime,
     runNewAM(amType, user, queue, oldAppId, jobStartTime, jobFinishTime,
-        getTaskContainers(jsonJob), null);
+        getTaskContainers(jsonJob), null, getAMContainerResource(jsonJob));
   }
   }
 
 
   private List<ContainerSimulator> getTaskContainers(Map jsonJob)
   private List<ContainerSimulator> getTaskContainers(Map jsonJob)
@@ -558,7 +558,8 @@ public class SLSRunner extends Configured implements Tool {
 
 
     // Only supports the default job type currently
     // Only supports the default job type currently
     runNewAM(SLSUtils.DEFAULT_JOB_TYPE, user, jobQueue, oldJobId,
     runNewAM(SLSUtils.DEFAULT_JOB_TYPE, user, jobQueue, oldJobId,
-        jobStartTimeMS, jobFinishTimeMS, containerList, null);
+        jobStartTimeMS, jobFinishTimeMS, containerList, null,
+        getAMContainerResource(null));
   }
   }
 
 
   private Resource getDefaultContainerResource() {
   private Resource getDefaultContainerResource() {
@@ -676,7 +677,8 @@ public class SLSRunner extends Configured implements Tool {
         }
         }
 
 
         runNewAM(SLSUtils.DEFAULT_JOB_TYPE, user, jobQueue, oldJobId,
         runNewAM(SLSUtils.DEFAULT_JOB_TYPE, user, jobQueue, oldJobId,
-            jobStartTimeMS, jobFinishTimeMS, containerList, rr);
+            jobStartTimeMS, jobFinishTimeMS, containerList, rr,
+            getAMContainerResource(null));
       }
       }
     } finally {
     } finally {
       stjp.close();
       stjp.close();
@@ -684,6 +686,26 @@ public class SLSRunner extends Configured implements Tool {
 
 
   }
   }
 
 
+  private Resource getAMContainerResource(Map jsonJob) {
+    Resource amContainerResource =
+        SLSConfiguration.getAMContainerResource(getConf());
+
+    if (jsonJob == null) {
+      return amContainerResource;
+    }
+
+    if (jsonJob.containsKey("am.memory")) {
+      amContainerResource.setMemorySize(
+          Long.parseLong(jsonJob.get("am.memory").toString()));
+    }
+
+    if (jsonJob.containsKey("am.vcores")) {
+      amContainerResource.setVirtualCores(
+          Integer.parseInt(jsonJob.get("am.vcores").toString()));
+    }
+    return amContainerResource;
+  }
+
   private void increaseQueueAppNum(String queue) throws YarnException {
   private void increaseQueueAppNum(String queue) throws YarnException {
     SchedulerWrapper wrapper = (SchedulerWrapper)rm.getResourceScheduler();
     SchedulerWrapper wrapper = (SchedulerWrapper)rm.getResourceScheduler();
     String queueName = wrapper.getRealQueueName(queue);
     String queueName = wrapper.getRealQueueName(queue);
@@ -700,7 +722,7 @@ public class SLSRunner extends Configured implements Tool {
   private void runNewAM(String jobType, String user,
   private void runNewAM(String jobType, String user,
       String jobQueue, String oldJobId, long jobStartTimeMS,
       String jobQueue, String oldJobId, long jobStartTimeMS,
       long jobFinishTimeMS, List<ContainerSimulator> containerList,
       long jobFinishTimeMS, List<ContainerSimulator> containerList,
-      ReservationSubmissionRequest rr) {
+      ReservationSubmissionRequest rr, Resource amContainerResource) {
 
 
     AMSimulator amSim = (AMSimulator) ReflectionUtils.newInstance(
     AMSimulator amSim = (AMSimulator) ReflectionUtils.newInstance(
         amClassMap.get(jobType), new Configuration());
         amClassMap.get(jobType), new Configuration());
@@ -710,9 +732,11 @@ public class SLSRunner extends Configured implements Tool {
           SLSConfiguration.AM_HEARTBEAT_INTERVAL_MS,
           SLSConfiguration.AM_HEARTBEAT_INTERVAL_MS,
           SLSConfiguration.AM_HEARTBEAT_INTERVAL_MS_DEFAULT);
           SLSConfiguration.AM_HEARTBEAT_INTERVAL_MS_DEFAULT);
       boolean isTracked = trackedApps.contains(oldJobId);
       boolean isTracked = trackedApps.contains(oldJobId);
-      amSim.init(AM_ID++, heartbeatInterval, containerList,
-          rm, this, jobStartTimeMS, jobFinishTimeMS, user, jobQueue,
-          isTracked, oldJobId, rr, runner.getStartTimeMS());
+      AM_ID++;
+
+      amSim.init(heartbeatInterval, containerList, rm, this, jobStartTimeMS,
+          jobFinishTimeMS, user, jobQueue, isTracked, oldJobId, rr,
+          runner.getStartTimeMS(), amContainerResource);
       runner.schedule(amSim);
       runner.schedule(amSim);
       maxRuntime = Math.max(maxRuntime, jobFinishTimeMS);
       maxRuntime = Math.max(maxRuntime, jobFinishTimeMS);
       numTasks += containerList.size();
       numTasks += containerList.size();

+ 14 - 25
hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java

@@ -20,7 +20,6 @@ package org.apache.hadoop.yarn.sls.appmaster;
 
 
 import java.io.IOException;
 import java.io.IOException;
 import java.lang.reflect.UndeclaredThrowableException;
 import java.lang.reflect.UndeclaredThrowableException;
-import java.nio.ByteBuffer;
 import java.security.PrivilegedExceptionAction;
 import java.security.PrivilegedExceptionAction;
 import java.util.ArrayList;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashMap;
@@ -35,18 +34,13 @@ import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
-import org.apache.hadoop.yarn.api.protocolrecords
-        .FinishApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
-
-import org.apache.hadoop.yarn.api.protocolrecords
-        .RegisterApplicationMasterRequest;
-import org.apache.hadoop.yarn.api.protocolrecords
-        .RegisterApplicationMasterResponse;
-import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
@@ -54,7 +48,6 @@ import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
-import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.ReservationId;
 import org.apache.hadoop.yarn.api.records.ReservationId;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.Resource;
@@ -66,7 +59,6 @@ import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.sls.scheduler.SchedulerMetrics;
 import org.apache.hadoop.yarn.sls.scheduler.SchedulerMetrics;
 import org.apache.hadoop.yarn.util.Records;
 import org.apache.hadoop.yarn.util.Records;
-import org.apache.hadoop.yarn.util.resource.Resources;
 import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator;
 import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator;
 import org.apache.hadoop.yarn.sls.scheduler.SchedulerWrapper;
 import org.apache.hadoop.yarn.sls.scheduler.SchedulerWrapper;
 import org.apache.hadoop.yarn.sls.SLSRunner;
 import org.apache.hadoop.yarn.sls.SLSRunner;
@@ -116,9 +108,7 @@ public abstract class AMSimulator extends TaskRunner.Task {
   
   
   private static final Logger LOG = LoggerFactory.getLogger(AMSimulator.class);
   private static final Logger LOG = LoggerFactory.getLogger(AMSimulator.class);
 
 
-  // resource for AM container
-  private final static int MR_AM_CONTAINER_RESOURCE_MEMORY_MB = 1024;
-  private final static int MR_AM_CONTAINER_RESOURCE_VCORES = 1;
+  private Resource amContainerResource;
 
 
   private ReservationSubmissionRequest reservationRequest;
   private ReservationSubmissionRequest reservationRequest;
 
 
@@ -127,11 +117,12 @@ public abstract class AMSimulator extends TaskRunner.Task {
   }
   }
 
 
   @SuppressWarnings("checkstyle:parameternumber")
   @SuppressWarnings("checkstyle:parameternumber")
-  public void init(int id, int heartbeatInterval,
+  public void init(int heartbeatInterval,
       List<ContainerSimulator> containerList, ResourceManager resourceManager,
       List<ContainerSimulator> containerList, ResourceManager resourceManager,
       SLSRunner slsRunnner, long startTime, long finishTime, String simUser,
       SLSRunner slsRunnner, long startTime, long finishTime, String simUser,
       String simQueue, boolean tracked, String oldApp,
       String simQueue, boolean tracked, String oldApp,
-      ReservationSubmissionRequest rr, long baseTimeMS) {
+      ReservationSubmissionRequest rr, long baseTimeMS,
+      Resource amContainerResource) {
     super.init(startTime, startTime + 1000000L * heartbeatInterval,
     super.init(startTime, startTime + 1000000L * heartbeatInterval,
         heartbeatInterval);
         heartbeatInterval);
     this.user = simUser;
     this.user = simUser;
@@ -144,6 +135,7 @@ public abstract class AMSimulator extends TaskRunner.Task {
     this.traceStartTimeMS = startTime;
     this.traceStartTimeMS = startTime;
     this.traceFinishTimeMS = finishTime;
     this.traceFinishTimeMS = finishTime;
     this.reservationRequest = rr;
     this.reservationRequest = rr;
+    this.amContainerResource = amContainerResource;
   }
   }
 
 
   /**
   /**
@@ -318,16 +310,13 @@ public abstract class AMSimulator extends TaskRunner.Task {
     appSubContext.setPriority(Priority.newInstance(0));
     appSubContext.setPriority(Priority.newInstance(0));
     ContainerLaunchContext conLauContext = 
     ContainerLaunchContext conLauContext = 
         Records.newRecord(ContainerLaunchContext.class);
         Records.newRecord(ContainerLaunchContext.class);
-    conLauContext.setApplicationACLs(
-        new HashMap<ApplicationAccessType, String>());
-    conLauContext.setCommands(new ArrayList<String>());
-    conLauContext.setEnvironment(new HashMap<String, String>());
-    conLauContext.setLocalResources(new HashMap<String, LocalResource>());
-    conLauContext.setServiceData(new HashMap<String, ByteBuffer>());
+    conLauContext.setApplicationACLs(new HashMap<>());
+    conLauContext.setCommands(new ArrayList<>());
+    conLauContext.setEnvironment(new HashMap<>());
+    conLauContext.setLocalResources(new HashMap<>());
+    conLauContext.setServiceData(new HashMap<>());
     appSubContext.setAMContainerSpec(conLauContext);
     appSubContext.setAMContainerSpec(conLauContext);
-    appSubContext.setResource(Resources
-        .createResource(MR_AM_CONTAINER_RESOURCE_MEMORY_MB,
-            MR_AM_CONTAINER_RESOURCE_VCORES));
+    appSubContext.setResource(amContainerResource);
 
 
     if(reservationId != null) {
     if(reservationId != null) {
       appSubContext.setReservationID(reservationId);
       appSubContext.setReservationID(reservationId);

+ 6 - 5
hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/MRAMSimulator.java

@@ -34,6 +34,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.ReservationSubmissionRequest;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
 import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
@@ -114,14 +115,14 @@ public class MRAMSimulator extends AMSimulator {
       LoggerFactory.getLogger(MRAMSimulator.class);
       LoggerFactory.getLogger(MRAMSimulator.class);
 
 
   @SuppressWarnings("checkstyle:parameternumber")
   @SuppressWarnings("checkstyle:parameternumber")
-  public void init(int id, int heartbeatInterval,
+  public void init(int heartbeatInterval,
       List<ContainerSimulator> containerList, ResourceManager rm, SLSRunner se,
       List<ContainerSimulator> containerList, ResourceManager rm, SLSRunner se,
       long traceStartTime, long traceFinishTime, String user, String queue, 
       long traceStartTime, long traceFinishTime, String user, String queue, 
       boolean isTracked, String oldAppId, ReservationSubmissionRequest rr,
       boolean isTracked, String oldAppId, ReservationSubmissionRequest rr,
-      long baselineStartTimeMS) {
-    super.init(id, heartbeatInterval, containerList, rm, se, 
-              traceStartTime, traceFinishTime, user, queue,
-              isTracked, oldAppId, rr, baselineStartTimeMS);
+      long baselineStartTimeMS, Resource amContainerResource) {
+    super.init(heartbeatInterval, containerList, rm, se,
+        traceStartTime, traceFinishTime, user, queue, isTracked, oldAppId,
+        rr, baselineStartTimeMS, amContainerResource);
     amtype = "mapreduce";
     amtype = "mapreduce";
     
     
     // get map/reduce tasks
     // get map/reduce tasks

+ 15 - 0
hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/conf/SLSConfiguration.java

@@ -20,6 +20,8 @@ package org.apache.hadoop.yarn.sls.conf;
 
 
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.records.Resource;
 
 
 @Private
 @Private
 @Unstable
 @Unstable
@@ -62,6 +64,14 @@ public class SLSConfiguration {
   public static final int AM_HEARTBEAT_INTERVAL_MS_DEFAULT = 1000;
   public static final int AM_HEARTBEAT_INTERVAL_MS_DEFAULT = 1000;
   public static final String AM_TYPE = AM_PREFIX + "type.";
   public static final String AM_TYPE = AM_PREFIX + "type.";
 
 
+  public static final String AM_CONTAINER_MEMORY = AM_PREFIX +
+      "container.memory";
+  public static final int AM_CONTAINER_MEMORY_DEFAULT = 1024;
+
+  public static final String AM_CONTAINER_VCORES = AM_PREFIX +
+      "container.vcores";
+  public static final int AM_CONTAINER_VCORES_DEFAULT = 1;
+
   // container
   // container
   public static final String CONTAINER_PREFIX = PREFIX + "container.";
   public static final String CONTAINER_PREFIX = PREFIX + "container.";
   public static final String CONTAINER_MEMORY_MB = CONTAINER_PREFIX
   public static final String CONTAINER_MEMORY_MB = CONTAINER_PREFIX
@@ -70,4 +80,9 @@ public class SLSConfiguration {
   public static final String CONTAINER_VCORES = CONTAINER_PREFIX + "vcores";
   public static final String CONTAINER_VCORES = CONTAINER_PREFIX + "vcores";
   public static final int CONTAINER_VCORES_DEFAULT = 1;
   public static final int CONTAINER_VCORES_DEFAULT = 1;
 
 
+  public static Resource getAMContainerResource(Configuration conf) {
+    return Resource.newInstance(
+        conf.getLong(AM_CONTAINER_MEMORY, AM_CONTAINER_MEMORY_DEFAULT),
+        conf.getInt(AM_CONTAINER_VCORES, AM_CONTAINER_VCORES_DEFAULT));
+  }
 }
 }

+ 2 - 2
hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/appmaster/TestAMSimulator.java

@@ -133,8 +133,8 @@ public class TestAMSimulator {
     String appId = "app1";
     String appId = "app1";
     String queue = "default";
     String queue = "default";
     List<ContainerSimulator> containers = new ArrayList<>();
     List<ContainerSimulator> containers = new ArrayList<>();
-    app.init(1, 1000, containers, rm, null, 0, 1000000L, "user1", queue,
-        true, appId, null, 0);
+    app.init(1000, containers, rm, null, 0, 1000000L, "user1", queue, true,
+        appId, null, 0, SLSConfiguration.getAMContainerResource(conf));
     app.firstStep();
     app.firstStep();
 
 
     verifySchedulerMetrics(appId);
     verifySchedulerMetrics(appId);