|
@@ -37,7 +37,6 @@ import java.util.concurrent.ConcurrentHashMap;
|
|
|
import java.util.concurrent.ConcurrentMap;
|
|
|
import java.util.concurrent.atomic.AtomicInteger;
|
|
|
|
|
|
-import com.google.common.annotations.VisibleForTesting;
|
|
|
import org.apache.commons.cli.CommandLine;
|
|
|
import org.apache.commons.cli.GnuParser;
|
|
|
import org.apache.commons.cli.HelpFormatter;
|
|
@@ -89,6 +88,8 @@ import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
|
|
|
import org.apache.hadoop.yarn.util.ConverterUtils;
|
|
|
import org.apache.hadoop.yarn.util.Records;
|
|
|
|
|
|
+import com.google.common.annotations.VisibleForTesting;
|
|
|
+
|
|
|
/**
|
|
|
* An ApplicationMaster for executing shell commands on a set of launched
|
|
|
* containers using the YARN framework.
|
|
@@ -169,7 +170,8 @@ public class ApplicationMaster {
|
|
|
private NMCallbackHandler containerListener;
|
|
|
|
|
|
// Application Attempt Id ( combination of attemptId and fail count )
|
|
|
- private ApplicationAttemptId appAttemptID;
|
|
|
+ @VisibleForTesting
|
|
|
+ protected ApplicationAttemptId appAttemptID;
|
|
|
|
|
|
// TODO
|
|
|
// For status update for clients - yet to be implemented
|
|
@@ -194,13 +196,15 @@ public class ApplicationMaster {
|
|
|
private AtomicInteger numCompletedContainers = new AtomicInteger();
|
|
|
// Allocated container count so that we know how many containers has the RM
|
|
|
// allocated to us
|
|
|
- private AtomicInteger numAllocatedContainers = new AtomicInteger();
|
|
|
+ @VisibleForTesting
|
|
|
+ protected AtomicInteger numAllocatedContainers = new AtomicInteger();
|
|
|
// Count of failed containers
|
|
|
private AtomicInteger numFailedContainers = new AtomicInteger();
|
|
|
// Count of containers already requested from the RM
|
|
|
// Needed as once requested, we should not request for containers again.
|
|
|
// Only request for more if the original requirement changes.
|
|
|
- private AtomicInteger numRequestedContainers = new AtomicInteger();
|
|
|
+ @VisibleForTesting
|
|
|
+ protected AtomicInteger numRequestedContainers = new AtomicInteger();
|
|
|
|
|
|
// Shell command to be executed
|
|
|
private String shellCommand = "";
|
|
@@ -251,6 +255,7 @@ public class ApplicationMaster {
|
|
|
System.exit(0);
|
|
|
}
|
|
|
result = appMaster.run();
|
|
|
+ appMaster.finish();
|
|
|
} catch (Throwable t) {
|
|
|
LOG.fatal("Error running ApplicationMaster", t);
|
|
|
System.exit(1);
|
|
@@ -537,26 +542,25 @@ public class ApplicationMaster {
|
|
|
containerVirtualCores = maxVCores;
|
|
|
}
|
|
|
|
|
|
+ List<Container> previousAMRunningContainers =
|
|
|
+ response.getContainersFromPreviousAttempt();
|
|
|
+ LOG.info("Received " + previousAMRunningContainers.size()
|
|
|
+ + " previous AM's running containers on AM registration.");
|
|
|
+ numAllocatedContainers.addAndGet(previousAMRunningContainers.size());
|
|
|
+
|
|
|
+ int numTotalContainersToRequest =
|
|
|
+ numTotalContainers - previousAMRunningContainers.size();
|
|
|
// Setup ask for containers from RM
|
|
|
// Send request for containers to RM
|
|
|
// Until we get our fully allocated quota, we keep on polling RM for
|
|
|
// containers
|
|
|
// Keep looping until all the containers are launched and shell script
|
|
|
// executed on them ( regardless of success/failure).
|
|
|
- for (int i = 0; i < numTotalContainers; ++i) {
|
|
|
+ for (int i = 0; i < numTotalContainersToRequest; ++i) {
|
|
|
ContainerRequest containerAsk = setupContainerAskForRM();
|
|
|
amRMClient.addContainerRequest(containerAsk);
|
|
|
}
|
|
|
- numRequestedContainers.set(numTotalContainers);
|
|
|
-
|
|
|
- while (!done
|
|
|
- && (numCompletedContainers.get() != numTotalContainers)) {
|
|
|
- try {
|
|
|
- Thread.sleep(200);
|
|
|
- } catch (InterruptedException ex) {}
|
|
|
- }
|
|
|
- finish();
|
|
|
-
|
|
|
+ numRequestedContainers.set(numTotalContainersToRequest);
|
|
|
return success;
|
|
|
}
|
|
|
|
|
@@ -565,7 +569,15 @@ public class ApplicationMaster {
|
|
|
return new NMCallbackHandler(this);
|
|
|
}
|
|
|
|
|
|
- private void finish() {
|
|
|
+ protected void finish() {
|
|
|
+ // wait for completion.
|
|
|
+ while (!done
|
|
|
+ && (numCompletedContainers.get() != numTotalContainers)) {
|
|
|
+ try {
|
|
|
+ Thread.sleep(200);
|
|
|
+ } catch (InterruptedException ex) {}
|
|
|
+ }
|
|
|
+
|
|
|
// Join all launched threads
|
|
|
// needed for when we time out
|
|
|
// and we need to release containers
|