|
@@ -65,6 +65,7 @@ import org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest;
|
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
|
import org.apache.hadoop.yarn.api.records.StrictPreemptionContract;
|
|
import org.apache.hadoop.yarn.api.records.StrictPreemptionContract;
|
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
|
|
|
+import org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException;
|
|
import org.apache.hadoop.yarn.exceptions.InvalidApplicationMasterRequestException;
|
|
import org.apache.hadoop.yarn.exceptions.InvalidApplicationMasterRequestException;
|
|
import org.apache.hadoop.yarn.exceptions.InvalidContainerReleaseException;
|
|
import org.apache.hadoop.yarn.exceptions.InvalidContainerReleaseException;
|
|
import org.apache.hadoop.yarn.exceptions.InvalidResourceBlacklistRequestException;
|
|
import org.apache.hadoop.yarn.exceptions.InvalidResourceBlacklistRequestException;
|
|
@@ -107,12 +108,15 @@ public class ApplicationMasterService extends AbstractService implements
|
|
new ConcurrentHashMap<ApplicationAttemptId, AllocateResponseLock>();
|
|
new ConcurrentHashMap<ApplicationAttemptId, AllocateResponseLock>();
|
|
private final AllocateResponse resync =
|
|
private final AllocateResponse resync =
|
|
recordFactory.newRecordInstance(AllocateResponse.class);
|
|
recordFactory.newRecordInstance(AllocateResponse.class);
|
|
|
|
+ private final AllocateResponse shutdown =
|
|
|
|
+ recordFactory.newRecordInstance(AllocateResponse.class);
|
|
private final RMContext rmContext;
|
|
private final RMContext rmContext;
|
|
|
|
|
|
public ApplicationMasterService(RMContext rmContext, YarnScheduler scheduler) {
|
|
public ApplicationMasterService(RMContext rmContext, YarnScheduler scheduler) {
|
|
super(ApplicationMasterService.class.getName());
|
|
super(ApplicationMasterService.class.getName());
|
|
this.amLivelinessMonitor = rmContext.getAMLivelinessMonitor();
|
|
this.amLivelinessMonitor = rmContext.getAMLivelinessMonitor();
|
|
this.rScheduler = scheduler;
|
|
this.rScheduler = scheduler;
|
|
|
|
+ this.shutdown.setAMCommand(AMCommand.AM_SHUTDOWN);
|
|
this.resync.setAMCommand(AMCommand.AM_RESYNC);
|
|
this.resync.setAMCommand(AMCommand.AM_RESYNC);
|
|
this.rmContext = rmContext;
|
|
this.rmContext = rmContext;
|
|
}
|
|
}
|
|
@@ -346,9 +350,9 @@ public class ApplicationMasterService extends AbstractService implements
|
|
AuditConstants.UNREGISTER_AM, "", "ApplicationMasterService",
|
|
AuditConstants.UNREGISTER_AM, "", "ApplicationMasterService",
|
|
message, applicationAttemptId.getApplicationId(),
|
|
message, applicationAttemptId.getApplicationId(),
|
|
applicationAttemptId);
|
|
applicationAttemptId);
|
|
- throw new InvalidApplicationMasterRequestException(message);
|
|
|
|
|
|
+ throw new ApplicationMasterNotRegisteredException(message);
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
+
|
|
this.amLivelinessMonitor.receivedPing(applicationAttemptId);
|
|
this.amLivelinessMonitor.receivedPing(applicationAttemptId);
|
|
|
|
|
|
RMApp rmApp =
|
|
RMApp rmApp =
|
|
@@ -409,22 +413,23 @@ public class ApplicationMasterService extends AbstractService implements
|
|
AllocateResponseLock lock = responseMap.get(appAttemptId);
|
|
AllocateResponseLock lock = responseMap.get(appAttemptId);
|
|
if (lock == null) {
|
|
if (lock == null) {
|
|
LOG.error("AppAttemptId doesnt exist in cache " + appAttemptId);
|
|
LOG.error("AppAttemptId doesnt exist in cache " + appAttemptId);
|
|
- return resync;
|
|
|
|
|
|
+ return shutdown;
|
|
}
|
|
}
|
|
synchronized (lock) {
|
|
synchronized (lock) {
|
|
AllocateResponse lastResponse = lock.getAllocateResponse();
|
|
AllocateResponse lastResponse = lock.getAllocateResponse();
|
|
if (!hasApplicationMasterRegistered(appAttemptId)) {
|
|
if (!hasApplicationMasterRegistered(appAttemptId)) {
|
|
String message =
|
|
String message =
|
|
- "Application Master is trying to allocate before registering for: "
|
|
|
|
- + appAttemptId.getApplicationId();
|
|
|
|
- LOG.error(message);
|
|
|
|
|
|
+ "Application Master is not registered for known application: "
|
|
|
|
+ + appAttemptId.getApplicationId()
|
|
|
|
+ + ". Let AM resync.";
|
|
|
|
+ LOG.info(message);
|
|
RMAuditLogger.logFailure(
|
|
RMAuditLogger.logFailure(
|
|
this.rmContext.getRMApps().get(appAttemptId.getApplicationId())
|
|
this.rmContext.getRMApps().get(appAttemptId.getApplicationId())
|
|
.getUser(), AuditConstants.REGISTER_AM, "",
|
|
.getUser(), AuditConstants.REGISTER_AM, "",
|
|
"ApplicationMasterService", message,
|
|
"ApplicationMasterService", message,
|
|
appAttemptId.getApplicationId(),
|
|
appAttemptId.getApplicationId(),
|
|
appAttemptId);
|
|
appAttemptId);
|
|
- throw new InvalidApplicationMasterRequestException(message);
|
|
|
|
|
|
+ return resync;
|
|
}
|
|
}
|
|
|
|
|
|
if ((request.getResponseId() + 1) == lastResponse.getResponseId()) {
|
|
if ((request.getResponseId() + 1) == lastResponse.getResponseId()) {
|