|
@@ -79,18 +79,12 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent;
|
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent;
|
|
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
|
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
|
-import org.apache.hadoop.yarn.server.webproxy.ProxyUriUtils;
|
|
|
|
import org.apache.hadoop.yarn.state.InvalidStateTransitonException;
|
|
import org.apache.hadoop.yarn.state.InvalidStateTransitonException;
|
|
import org.apache.hadoop.yarn.state.MultipleArcTransition;
|
|
import org.apache.hadoop.yarn.state.MultipleArcTransition;
|
|
import org.apache.hadoop.yarn.state.SingleArcTransition;
|
|
import org.apache.hadoop.yarn.state.SingleArcTransition;
|
|
import org.apache.hadoop.yarn.state.StateMachine;
|
|
import org.apache.hadoop.yarn.state.StateMachine;
|
|
import org.apache.hadoop.yarn.state.StateMachineFactory;
|
|
import org.apache.hadoop.yarn.state.StateMachineFactory;
|
|
-import org.apache.hadoop.yarn.util.Clock;
|
|
|
|
-import org.apache.hadoop.yarn.util.SystemClock;
|
|
|
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
|
-import org.apache.hadoop.yarn.webapp.util.WebAppUtils;
|
|
|
|
-
|
|
|
|
-import com.google.common.annotations.VisibleForTesting;
|
|
|
|
|
|
|
|
@SuppressWarnings({ "rawtypes", "unchecked" })
|
|
@SuppressWarnings({ "rawtypes", "unchecked" })
|
|
public class RMAppImpl implements RMApp, Recoverable {
|
|
public class RMAppImpl implements RMApp, Recoverable {
|
|
@@ -119,12 +113,6 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|
private final String applicationType;
|
|
private final String applicationType;
|
|
private final Set<String> applicationTags;
|
|
private final Set<String> applicationTags;
|
|
|
|
|
|
- private final long attemptFailuresValidityInterval;
|
|
|
|
-
|
|
|
|
- private Clock systemClock;
|
|
|
|
-
|
|
|
|
- private boolean isNumAttemptsBeyondThreshold = false;
|
|
|
|
-
|
|
|
|
// Mutable fields
|
|
// Mutable fields
|
|
private long startTime;
|
|
private long startTime;
|
|
private long finishTime = 0;
|
|
private long finishTime = 0;
|
|
@@ -343,8 +331,6 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|
ApplicationMasterService masterService, long submitTime,
|
|
ApplicationMasterService masterService, long submitTime,
|
|
String applicationType, Set<String> applicationTags) {
|
|
String applicationType, Set<String> applicationTags) {
|
|
|
|
|
|
- this.systemClock = new SystemClock();
|
|
|
|
-
|
|
|
|
this.applicationId = applicationId;
|
|
this.applicationId = applicationId;
|
|
this.name = name;
|
|
this.name = name;
|
|
this.rmContext = rmContext;
|
|
this.rmContext = rmContext;
|
|
@@ -357,7 +343,7 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|
this.scheduler = scheduler;
|
|
this.scheduler = scheduler;
|
|
this.masterService = masterService;
|
|
this.masterService = masterService;
|
|
this.submitTime = submitTime;
|
|
this.submitTime = submitTime;
|
|
- this.startTime = this.systemClock.getTime();
|
|
|
|
|
|
+ this.startTime = System.currentTimeMillis();
|
|
this.applicationType = applicationType;
|
|
this.applicationType = applicationType;
|
|
this.applicationTags = applicationTags;
|
|
this.applicationTags = applicationTags;
|
|
|
|
|
|
@@ -375,9 +361,6 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|
this.maxAppAttempts = individualMaxAppAttempts;
|
|
this.maxAppAttempts = individualMaxAppAttempts;
|
|
}
|
|
}
|
|
|
|
|
|
- this.attemptFailuresValidityInterval =
|
|
|
|
- submissionContext.getAttemptFailuresValidityInterval();
|
|
|
|
-
|
|
|
|
ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
|
|
ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
|
|
this.readLock = lock.readLock();
|
|
this.readLock = lock.readLock();
|
|
this.writeLock = lock.writeLock();
|
|
this.writeLock = lock.writeLock();
|
|
@@ -385,7 +368,6 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|
this.stateMachine = stateMachineFactory.make(this);
|
|
this.stateMachine = stateMachineFactory.make(this);
|
|
|
|
|
|
rmContext.getRMApplicationHistoryWriter().applicationStarted(this);
|
|
rmContext.getRMApplicationHistoryWriter().applicationStarted(this);
|
|
- rmContext.getSystemMetricsPublisher().appCreated(this, startTime);
|
|
|
|
}
|
|
}
|
|
|
|
|
|
@Override
|
|
@Override
|
|
@@ -547,7 +529,6 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|
float progress = 0.0f;
|
|
float progress = 0.0f;
|
|
org.apache.hadoop.yarn.api.records.Token amrmToken = null;
|
|
org.apache.hadoop.yarn.api.records.Token amrmToken = null;
|
|
if (allowAccess) {
|
|
if (allowAccess) {
|
|
- trackingUrl = getDefaultProxyTrackingUrl();
|
|
|
|
if (this.currentAttempt != null) {
|
|
if (this.currentAttempt != null) {
|
|
currentApplicationAttemptId = this.currentAttempt.getAppAttemptId();
|
|
currentApplicationAttemptId = this.currentAttempt.getAppAttemptId();
|
|
trackingUrl = this.currentAttempt.getTrackingUrl();
|
|
trackingUrl = this.currentAttempt.getTrackingUrl();
|
|
@@ -608,20 +589,6 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- private String getDefaultProxyTrackingUrl() {
|
|
|
|
- try {
|
|
|
|
- final String scheme = WebAppUtils.getHttpSchemePrefix(conf);
|
|
|
|
- String proxy = WebAppUtils.getProxyHostAndPort(conf);
|
|
|
|
- URI proxyUri = ProxyUriUtils.getUriFromAMUrl(scheme, proxy);
|
|
|
|
- URI result = ProxyUriUtils.getProxyUri(null, proxyUri, applicationId);
|
|
|
|
- return result.toASCIIString();
|
|
|
|
- } catch (URISyntaxException e) {
|
|
|
|
- LOG.warn("Could not generate default proxy tracking URL for "
|
|
|
|
- + applicationId);
|
|
|
|
- return UNAVAILABLE;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
@Override
|
|
@Override
|
|
public long getFinishTime() {
|
|
public long getFinishTime() {
|
|
this.readLock.lock();
|
|
this.readLock.lock();
|
|
@@ -663,20 +630,6 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- @Override
|
|
|
|
- public String getOriginalTrackingUrl() {
|
|
|
|
- this.readLock.lock();
|
|
|
|
-
|
|
|
|
- try {
|
|
|
|
- if (this.currentAttempt != null) {
|
|
|
|
- return this.currentAttempt.getOriginalTrackingUrl();
|
|
|
|
- }
|
|
|
|
- return null;
|
|
|
|
- } finally {
|
|
|
|
- this.readLock.unlock();
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
@Override
|
|
@Override
|
|
public StringBuilder getDiagnostics() {
|
|
public StringBuilder getDiagnostics() {
|
|
this.readLock.lock();
|
|
this.readLock.lock();
|
|
@@ -935,7 +888,7 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|
msg = "Unmanaged application " + this.getApplicationId()
|
|
msg = "Unmanaged application " + this.getApplicationId()
|
|
+ " failed due to " + failedEvent.getDiagnostics()
|
|
+ " failed due to " + failedEvent.getDiagnostics()
|
|
+ ". Failing the application.";
|
|
+ ". Failing the application.";
|
|
- } else if (this.isNumAttemptsBeyondThreshold) {
|
|
|
|
|
|
+ } else if (getNumFailedAppAttempts() >= this.maxAppAttempts) {
|
|
msg = "Application " + this.getApplicationId() + " failed "
|
|
msg = "Application " + this.getApplicationId() + " failed "
|
|
+ this.maxAppAttempts + " times due to "
|
|
+ this.maxAppAttempts + " times due to "
|
|
+ failedEvent.getDiagnostics() + ". Failing the application.";
|
|
+ failedEvent.getDiagnostics() + ". Failing the application.";
|
|
@@ -968,7 +921,7 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|
RMAppState stateToBeStored) {
|
|
RMAppState stateToBeStored) {
|
|
rememberTargetTransitions(event, transitionToDo, targetFinalState);
|
|
rememberTargetTransitions(event, transitionToDo, targetFinalState);
|
|
this.stateBeforeFinalSaving = getState();
|
|
this.stateBeforeFinalSaving = getState();
|
|
- this.storedFinishTime = this.systemClock.getTime();
|
|
|
|
|
|
+ this.storedFinishTime = System.currentTimeMillis();
|
|
|
|
|
|
LOG.info("Updating application " + this.applicationId
|
|
LOG.info("Updating application " + this.applicationId
|
|
+ " with final state: " + this.targetedFinalState);
|
|
+ " with final state: " + this.targetedFinalState);
|
|
@@ -1135,7 +1088,7 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|
}
|
|
}
|
|
app.finishTime = app.storedFinishTime;
|
|
app.finishTime = app.storedFinishTime;
|
|
if (app.finishTime == 0 ) {
|
|
if (app.finishTime == 0 ) {
|
|
- app.finishTime = app.systemClock.getTime();
|
|
|
|
|
|
+ app.finishTime = System.currentTimeMillis();
|
|
}
|
|
}
|
|
// Recovered apps that are completed were not added to scheduler, so no
|
|
// Recovered apps that are completed were not added to scheduler, so no
|
|
// need to remove them from scheduler.
|
|
// need to remove them from scheduler.
|
|
@@ -1149,23 +1102,16 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|
|
|
|
|
app.rmContext.getRMApplicationHistoryWriter()
|
|
app.rmContext.getRMApplicationHistoryWriter()
|
|
.applicationFinished(app, finalState);
|
|
.applicationFinished(app, finalState);
|
|
- app.rmContext.getSystemMetricsPublisher()
|
|
|
|
- .appFinished(app, finalState, app.finishTime);
|
|
|
|
};
|
|
};
|
|
}
|
|
}
|
|
|
|
|
|
private int getNumFailedAppAttempts() {
|
|
private int getNumFailedAppAttempts() {
|
|
int completedAttempts = 0;
|
|
int completedAttempts = 0;
|
|
- long endTime = this.systemClock.getTime();
|
|
|
|
// Do not count AM preemption, hardware failures or NM resync
|
|
// Do not count AM preemption, hardware failures or NM resync
|
|
// as attempt failure.
|
|
// as attempt failure.
|
|
for (RMAppAttempt attempt : attempts.values()) {
|
|
for (RMAppAttempt attempt : attempts.values()) {
|
|
if (attempt.shouldCountTowardsMaxAttemptRetry()) {
|
|
if (attempt.shouldCountTowardsMaxAttemptRetry()) {
|
|
- if (this.attemptFailuresValidityInterval <= 0
|
|
|
|
- || (attempt.getFinishTime() > endTime
|
|
|
|
- - this.attemptFailuresValidityInterval)) {
|
|
|
|
- completedAttempts++;
|
|
|
|
- }
|
|
|
|
|
|
+ completedAttempts++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return completedAttempts;
|
|
return completedAttempts;
|
|
@@ -1182,10 +1128,9 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|
|
|
|
|
@Override
|
|
@Override
|
|
public RMAppState transition(RMAppImpl app, RMAppEvent event) {
|
|
public RMAppState transition(RMAppImpl app, RMAppEvent event) {
|
|
- int numberOfFailure = app.getNumFailedAppAttempts();
|
|
|
|
if (!app.submissionContext.getUnmanagedAM()
|
|
if (!app.submissionContext.getUnmanagedAM()
|
|
- && numberOfFailure < app.maxAppAttempts) {
|
|
|
|
- boolean transferStateFromPreviousAttempt;
|
|
|
|
|
|
+ && app.getNumFailedAppAttempts() < app.maxAppAttempts) {
|
|
|
|
+ boolean transferStateFromPreviousAttempt = false;
|
|
RMAppFailedAttemptEvent failedEvent = (RMAppFailedAttemptEvent) event;
|
|
RMAppFailedAttemptEvent failedEvent = (RMAppFailedAttemptEvent) event;
|
|
transferStateFromPreviousAttempt =
|
|
transferStateFromPreviousAttempt =
|
|
failedEvent.getTransferStateFromPreviousAttempt();
|
|
failedEvent.getTransferStateFromPreviousAttempt();
|
|
@@ -1195,16 +1140,13 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|
// Transfer the state from the previous attempt to the current attempt.
|
|
// Transfer the state from the previous attempt to the current attempt.
|
|
// Note that the previous failed attempt may still be collecting the
|
|
// Note that the previous failed attempt may still be collecting the
|
|
// container events from the scheduler and update its data structures
|
|
// container events from the scheduler and update its data structures
|
|
- // before the new attempt is created. We always transferState for
|
|
|
|
- // finished containers so that they can be acked to NM,
|
|
|
|
- // but when pulling finished container we will check this flag again.
|
|
|
|
- ((RMAppAttemptImpl) app.currentAttempt)
|
|
|
|
- .transferStateFromPreviousAttempt(oldAttempt);
|
|
|
|
|
|
+ // before the new attempt is created.
|
|
|
|
+ if (transferStateFromPreviousAttempt) {
|
|
|
|
+ ((RMAppAttemptImpl) app.currentAttempt)
|
|
|
|
+ .transferStateFromPreviousAttempt(oldAttempt);
|
|
|
|
+ }
|
|
return initialState;
|
|
return initialState;
|
|
} else {
|
|
} else {
|
|
- if (numberOfFailure >= app.maxAppAttempts) {
|
|
|
|
- app.isNumAttemptsBeyondThreshold = true;
|
|
|
|
- }
|
|
|
|
app.rememberTargetTransitionsAndStoreState(event,
|
|
app.rememberTargetTransitionsAndStoreState(event,
|
|
new AttemptFailedFinalStateSavedTransition(), RMAppState.FAILED,
|
|
new AttemptFailedFinalStateSavedTransition(), RMAppState.FAILED,
|
|
RMAppState.FAILED);
|
|
RMAppState.FAILED);
|