|
@@ -44,10 +44,12 @@ import javax.ws.rs.core.Response;
|
|
|
import javax.ws.rs.core.Response.Status;
|
|
|
|
|
|
import org.apache.commons.lang3.NotImplementedException;
|
|
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
import org.apache.hadoop.conf.Configuration;
|
|
|
import org.apache.hadoop.security.authorize.AuthorizationException;
|
|
|
import org.apache.hadoop.util.ReflectionUtils;
|
|
|
import org.apache.hadoop.util.Sets;
|
|
|
+import org.apache.hadoop.util.Time;
|
|
|
import org.apache.hadoop.util.concurrent.HadoopExecutors;
|
|
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
|
|
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
|
|
@@ -1000,11 +1002,11 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
|
|
|
nodes.addAll(nodesInfo.getNodes());
|
|
|
});
|
|
|
} catch (NotFoundException e) {
|
|
|
- LOG.error("Get all active sub cluster(s) error.", e);
|
|
|
+ LOG.error("get all active sub cluster(s) error.", e);
|
|
|
} catch (YarnException e) {
|
|
|
- LOG.error("getNodes error.", e);
|
|
|
+ LOG.error("getNodes by states = {} error.", states, e);
|
|
|
} catch (IOException e) {
|
|
|
- LOG.error("getNodes error with io error.", e);
|
|
|
+ LOG.error("getNodes by states = {} error with io error.", states, e);
|
|
|
}
|
|
|
|
|
|
// Delete duplicate from all the node reports got from all the available
|
|
@@ -1170,32 +1172,45 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
|
|
|
// Only verify the app_id,
|
|
|
// because the specific subCluster needs to be found according to the app_id,
|
|
|
// and other verifications are directly handed over to the corresponding subCluster RM
|
|
|
- if (appId == null || appId.isEmpty()) {
|
|
|
- throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
|
|
+ // Check that the appId format is accurate
|
|
|
+ try {
|
|
|
+ RouterServerUtil.validateApplicationId(appId);
|
|
|
+ } catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrGetAppActivitiesFailedRetrieved();
|
|
|
+ throw e;
|
|
|
}
|
|
|
|
|
|
try {
|
|
|
+ long startTime = clock.getTime();
|
|
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
|
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
|
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
|
|
-
|
|
|
final HttpServletRequest hsrCopy = clone(hsr);
|
|
|
- return interceptor.getAppActivities(hsrCopy, appId, time, requestPriorities,
|
|
|
- allocationRequestIds, groupBy, limit, actions, summarize);
|
|
|
+ AppActivitiesInfo appActivitiesInfo = interceptor.getAppActivities(hsrCopy, appId, time,
|
|
|
+ requestPriorities, allocationRequestIds, groupBy, limit, actions, summarize);
|
|
|
+ if (appActivitiesInfo != null) {
|
|
|
+ long stopTime = clock.getTime();
|
|
|
+ routerMetrics.succeededGetAppActivitiesRetrieved(stopTime - startTime);
|
|
|
+ return appActivitiesInfo;
|
|
|
+ }
|
|
|
} catch (IllegalArgumentException e) {
|
|
|
- RouterServerUtil.logAndThrowRunTimeException(e, "Unable to get subCluster by appId: %s.",
|
|
|
- appId);
|
|
|
+ routerMetrics.incrGetAppActivitiesFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowRunTimeException(e,
|
|
|
+ "Unable to get subCluster by appId: %s.", appId);
|
|
|
} catch (YarnException e) {
|
|
|
- RouterServerUtil.logAndThrowRunTimeException("getAppActivities Failed.", e);
|
|
|
+ routerMetrics.incrGetAppActivitiesFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowRunTimeException(e,
|
|
|
+ "getAppActivities by appId = %s error .", appId);
|
|
|
}
|
|
|
-
|
|
|
- return null;
|
|
|
+ routerMetrics.incrGetAppActivitiesFailedRetrieved();
|
|
|
+ throw new RuntimeException("getAppActivities Failed.");
|
|
|
}
|
|
|
|
|
|
@Override
|
|
|
public ApplicationStatisticsInfo getAppStatistics(HttpServletRequest hsr,
|
|
|
Set<String> stateQueries, Set<String> typeQueries) {
|
|
|
try {
|
|
|
+ long startTime = clock.getTime();
|
|
|
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
|
|
|
final HttpServletRequest hsrCopy = clone(hsr);
|
|
|
Class[] argsClasses = new Class[]{HttpServletRequest.class, Set.class, Set.class};
|
|
@@ -1203,19 +1218,38 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
|
|
|
ClientMethod remoteMethod = new ClientMethod("getAppStatistics", argsClasses, args);
|
|
|
Map<SubClusterInfo, ApplicationStatisticsInfo> appStatisticsMap = invokeConcurrent(
|
|
|
subClustersActive.values(), remoteMethod, ApplicationStatisticsInfo.class);
|
|
|
- return RouterWebServiceUtil.mergeApplicationStatisticsInfo(appStatisticsMap.values());
|
|
|
+ ApplicationStatisticsInfo applicationStatisticsInfo =
|
|
|
+ RouterWebServiceUtil.mergeApplicationStatisticsInfo(appStatisticsMap.values());
|
|
|
+ if (applicationStatisticsInfo != null) {
|
|
|
+ long stopTime = clock.getTime();
|
|
|
+ routerMetrics.succeededGetAppStatisticsRetrieved(stopTime - startTime);
|
|
|
+ return applicationStatisticsInfo;
|
|
|
+ }
|
|
|
+ } catch (NotFoundException e) {
|
|
|
+ routerMetrics.incrGetAppStatisticsFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowRunTimeException("get all active sub cluster(s) error.", e);
|
|
|
} catch (IOException e) {
|
|
|
- RouterServerUtil.logAndThrowRunTimeException(e, "Get all active sub cluster(s) error.");
|
|
|
+ routerMetrics.incrGetAppStatisticsFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowRunTimeException(e,
|
|
|
+ "getAppStatistics error by stateQueries = %s, typeQueries = %s with io error.",
|
|
|
+ StringUtils.join(stateQueries, ","), StringUtils.join(typeQueries, ","));
|
|
|
} catch (YarnException e) {
|
|
|
- RouterServerUtil.logAndThrowRunTimeException(e, "getAppStatistics error.");
|
|
|
+ routerMetrics.incrGetAppStatisticsFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowRunTimeException(e,
|
|
|
+ "getAppStatistics by stateQueries = %s, typeQueries = %s with yarn error.",
|
|
|
+ StringUtils.join(stateQueries, ","), StringUtils.join(typeQueries, ","));
|
|
|
}
|
|
|
- return null;
|
|
|
+ routerMetrics.incrGetAppStatisticsFailedRetrieved();
|
|
|
+ throw RouterServerUtil.logAndReturnRunTimeException(
|
|
|
+ "getAppStatistics by stateQueries = %s, typeQueries = %s Failed.",
|
|
|
+ StringUtils.join(stateQueries, ","), StringUtils.join(typeQueries, ","));
|
|
|
}
|
|
|
|
|
|
@Override
|
|
|
public NodeToLabelsInfo getNodeToLabels(HttpServletRequest hsr)
|
|
|
throws IOException {
|
|
|
try {
|
|
|
+ long startTime = clock.getTime();
|
|
|
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
|
|
|
final HttpServletRequest hsrCopy = clone(hsr);
|
|
|
Class[] argsClasses = new Class[]{HttpServletRequest.class};
|
|
@@ -1223,27 +1257,35 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
|
|
|
ClientMethod remoteMethod = new ClientMethod("getNodeToLabels", argsClasses, args);
|
|
|
Map<SubClusterInfo, NodeToLabelsInfo> nodeToLabelsInfoMap =
|
|
|
invokeConcurrent(subClustersActive.values(), remoteMethod, NodeToLabelsInfo.class);
|
|
|
- return RouterWebServiceUtil.mergeNodeToLabels(nodeToLabelsInfoMap);
|
|
|
+ NodeToLabelsInfo nodeToLabelsInfo =
|
|
|
+ RouterWebServiceUtil.mergeNodeToLabels(nodeToLabelsInfoMap);
|
|
|
+ if (nodeToLabelsInfo != null) {
|
|
|
+ long stopTime = clock.getTime();
|
|
|
+ routerMetrics.succeededGetNodeToLabelsRetrieved(stopTime - startTime);
|
|
|
+ return nodeToLabelsInfo;
|
|
|
+ }
|
|
|
} catch (NotFoundException e) {
|
|
|
- LOG.error("Get all active sub cluster(s) error.", e);
|
|
|
- throw new IOException("Get all active sub cluster(s) error.", e);
|
|
|
+ routerMetrics.incrNodeToLabelsFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowIOException("get all active sub cluster(s) error.", e);
|
|
|
} catch (YarnException e) {
|
|
|
- LOG.error("getNodeToLabels error.", e);
|
|
|
- throw new IOException("getNodeToLabels error.", e);
|
|
|
+ routerMetrics.incrNodeToLabelsFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowIOException("getNodeToLabels error.", e);
|
|
|
}
|
|
|
+ routerMetrics.incrGetAppStatisticsFailedRetrieved();
|
|
|
+ throw new RuntimeException("getNodeToLabels Failed.");
|
|
|
}
|
|
|
|
|
|
@Override
|
|
|
public LabelsToNodesInfo getLabelsToNodes(Set<String> labels)
|
|
|
throws IOException {
|
|
|
try {
|
|
|
+ long startTime = clock.getTime();
|
|
|
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
|
|
|
Class[] argsClasses = new Class[]{Set.class};
|
|
|
Object[] args = new Object[]{labels};
|
|
|
ClientMethod remoteMethod = new ClientMethod("getLabelsToNodes", argsClasses, args);
|
|
|
Map<SubClusterInfo, LabelsToNodesInfo> labelsToNodesInfoMap =
|
|
|
invokeConcurrent(subClustersActive.values(), remoteMethod, LabelsToNodesInfo.class);
|
|
|
-
|
|
|
Map<NodeLabelInfo, NodeIDsInfo> labelToNodesMap = new HashMap<>();
|
|
|
labelsToNodesInfoMap.values().forEach(labelsToNode -> {
|
|
|
Map<NodeLabelInfo, NodeIDsInfo> values = labelsToNode.getLabelsToNodes();
|
|
@@ -1255,13 +1297,23 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
|
|
|
labelToNodesMap.put(key, newValue);
|
|
|
}
|
|
|
});
|
|
|
- return new LabelsToNodesInfo(labelToNodesMap);
|
|
|
+ LabelsToNodesInfo labelsToNodesInfo = new LabelsToNodesInfo(labelToNodesMap);
|
|
|
+ if (labelsToNodesInfo != null) {
|
|
|
+ long stopTime = clock.getTime();
|
|
|
+ routerMetrics.succeededGetLabelsToNodesRetrieved(stopTime - startTime);
|
|
|
+ return labelsToNodesInfo;
|
|
|
+ }
|
|
|
} catch (NotFoundException e) {
|
|
|
- RouterServerUtil.logAndThrowIOException("Get all active sub cluster(s) error.", e);
|
|
|
+ routerMetrics.incrLabelsToNodesFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowIOException("get all active sub cluster(s) error.", e);
|
|
|
} catch (YarnException e) {
|
|
|
- RouterServerUtil.logAndThrowIOException("getLabelsToNodes error.", e);
|
|
|
+ routerMetrics.incrLabelsToNodesFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowIOException(
|
|
|
+ e, "getLabelsToNodes by labels = %s with yarn error.", StringUtils.join(labels, ","));
|
|
|
}
|
|
|
- return null;
|
|
|
+ routerMetrics.incrLabelsToNodesFailedRetrieved();
|
|
|
+ throw RouterServerUtil.logAndReturnRunTimeException(
|
|
|
+ "getLabelsToNodes by labels = %s Failed.", StringUtils.join(labels, ","));
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -1280,6 +1332,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
|
|
|
public NodeLabelsInfo getClusterNodeLabels(HttpServletRequest hsr)
|
|
|
throws IOException {
|
|
|
try {
|
|
|
+ long startTime = clock.getTime();
|
|
|
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
|
|
|
final HttpServletRequest hsrCopy = clone(hsr);
|
|
|
Class[] argsClasses = new Class[]{HttpServletRequest.class};
|
|
@@ -1289,13 +1342,21 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
|
|
|
invokeConcurrent(subClustersActive.values(), remoteMethod, NodeLabelsInfo.class);
|
|
|
Set<NodeLabel> hashSets = Sets.newHashSet();
|
|
|
nodeToLabelsInfoMap.values().forEach(item -> hashSets.addAll(item.getNodeLabels()));
|
|
|
- return new NodeLabelsInfo(hashSets);
|
|
|
+ NodeLabelsInfo nodeLabelsInfo = new NodeLabelsInfo(hashSets);
|
|
|
+ if (nodeLabelsInfo != null) {
|
|
|
+ long stopTime = clock.getTime();
|
|
|
+ routerMetrics.succeededGetClusterNodeLabelsRetrieved(stopTime - startTime);
|
|
|
+ return nodeLabelsInfo;
|
|
|
+ }
|
|
|
} catch (NotFoundException e) {
|
|
|
- RouterServerUtil.logAndThrowIOException("Get all active sub cluster(s) error.", e);
|
|
|
+ routerMetrics.incrClusterNodeLabelsFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowIOException("get all active sub cluster(s) error.", e);
|
|
|
} catch (YarnException e) {
|
|
|
- RouterServerUtil.logAndThrowIOException("getClusterNodeLabels error.", e);
|
|
|
+ routerMetrics.incrClusterNodeLabelsFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowIOException("getClusterNodeLabels with yarn error.", e);
|
|
|
}
|
|
|
- return null;
|
|
|
+ routerMetrics.incrClusterNodeLabelsFailedRetrieved();
|
|
|
+ throw new RuntimeException("getClusterNodeLabels Failed.");
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -1314,45 +1375,68 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
|
|
|
public NodeLabelsInfo getLabelsOnNode(HttpServletRequest hsr, String nodeId)
|
|
|
throws IOException {
|
|
|
try {
|
|
|
+ long startTime = clock.getTime();
|
|
|
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
|
|
|
final HttpServletRequest hsrCopy = clone(hsr);
|
|
|
Class[] argsClasses = new Class[]{HttpServletRequest.class, String.class};
|
|
|
Object[] args = new Object[]{hsrCopy, nodeId};
|
|
|
ClientMethod remoteMethod = new ClientMethod("getLabelsOnNode", argsClasses, args);
|
|
|
Map<SubClusterInfo, NodeLabelsInfo> nodeToLabelsInfoMap =
|
|
|
- invokeConcurrent(subClustersActive.values(), remoteMethod, NodeLabelsInfo.class);
|
|
|
+ invokeConcurrent(subClustersActive.values(), remoteMethod, NodeLabelsInfo.class);
|
|
|
Set<NodeLabel> hashSets = Sets.newHashSet();
|
|
|
nodeToLabelsInfoMap.values().forEach(item -> hashSets.addAll(item.getNodeLabels()));
|
|
|
- return new NodeLabelsInfo(hashSets);
|
|
|
+ NodeLabelsInfo nodeLabelsInfo = new NodeLabelsInfo(hashSets);
|
|
|
+ if (nodeLabelsInfo != null) {
|
|
|
+ long stopTime = clock.getTime();
|
|
|
+ routerMetrics.succeededGetLabelsToNodesRetrieved(stopTime - startTime);
|
|
|
+ return nodeLabelsInfo;
|
|
|
+ }
|
|
|
} catch (NotFoundException e) {
|
|
|
- RouterServerUtil.logAndThrowIOException("Get all active sub cluster(s) error.", e);
|
|
|
+ routerMetrics.incrLabelsToNodesFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowIOException("get all active sub cluster(s) error.", e);
|
|
|
} catch (YarnException e) {
|
|
|
- RouterServerUtil.logAndThrowIOException("getClusterNodeLabels error.", e);
|
|
|
+ routerMetrics.incrLabelsToNodesFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowIOException(
|
|
|
+ e, "getLabelsOnNode nodeId = %s with yarn error.", nodeId);
|
|
|
}
|
|
|
- return null;
|
|
|
+ routerMetrics.incrLabelsToNodesFailedRetrieved();
|
|
|
+ throw RouterServerUtil.logAndReturnRunTimeException(
|
|
|
+ "getLabelsOnNode by nodeId = %s Failed.", nodeId);
|
|
|
}
|
|
|
|
|
|
@Override
|
|
|
public AppPriority getAppPriority(HttpServletRequest hsr, String appId)
|
|
|
throws AuthorizationException {
|
|
|
|
|
|
- if (appId == null || appId.isEmpty()) {
|
|
|
- throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
|
|
+ // Check that the appId format is accurate
|
|
|
+ try {
|
|
|
+ RouterServerUtil.validateApplicationId(appId);
|
|
|
+ } catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrGetAppPriorityFailedRetrieved();
|
|
|
+ throw e;
|
|
|
}
|
|
|
|
|
|
try {
|
|
|
+ long startTime = clock.getTime();
|
|
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
|
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
|
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
|
|
- return interceptor.getAppPriority(hsr, appId);
|
|
|
+ AppPriority appPriority = interceptor.getAppPriority(hsr, appId);
|
|
|
+ if (appPriority != null) {
|
|
|
+ long stopTime = clock.getTime();
|
|
|
+ routerMetrics.succeededGetAppPriorityRetrieved(stopTime - startTime);
|
|
|
+ return appPriority;
|
|
|
+ }
|
|
|
} catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrGetAppPriorityFailedRetrieved();
|
|
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
|
|
"Unable to get the getAppPriority appId: %s.", appId);
|
|
|
} catch (YarnException e) {
|
|
|
- RouterServerUtil.logAndThrowRunTimeException("getAppPriority Failed.", e);
|
|
|
+ routerMetrics.incrGetAppPriorityFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowRunTimeException("getAppPriority error.", e);
|
|
|
}
|
|
|
-
|
|
|
- return null;
|
|
|
+ routerMetrics.incrGetAppPriorityFailedRetrieved();
|
|
|
+ throw new RuntimeException("getAppPriority Failed.");
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -1360,50 +1444,74 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
|
|
|
HttpServletRequest hsr, String appId) throws AuthorizationException,
|
|
|
YarnException, InterruptedException, IOException {
|
|
|
|
|
|
- if (appId == null || appId.isEmpty()) {
|
|
|
- throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
|
|
+ // Check that the appId format is accurate
|
|
|
+ try {
|
|
|
+ RouterServerUtil.validateApplicationId(appId);
|
|
|
+ } catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrUpdateAppPriorityFailedRetrieved();
|
|
|
+ throw e;
|
|
|
}
|
|
|
|
|
|
if (targetPriority == null) {
|
|
|
+ routerMetrics.incrUpdateAppPriorityFailedRetrieved();
|
|
|
throw new IllegalArgumentException("Parameter error, the targetPriority is empty or null.");
|
|
|
}
|
|
|
|
|
|
try {
|
|
|
+ long startTime = clock.getTime();
|
|
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
|
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
|
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
|
|
- return interceptor.updateApplicationPriority(targetPriority, hsr, appId);
|
|
|
+ Response response = interceptor.updateApplicationPriority(targetPriority, hsr, appId);
|
|
|
+ if (response != null) {
|
|
|
+ long stopTime = clock.getTime();
|
|
|
+ routerMetrics.succeededUpdateAppPriorityRetrieved(stopTime - startTime);
|
|
|
+ return response;
|
|
|
+ }
|
|
|
} catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrUpdateAppPriorityFailedRetrieved();
|
|
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
|
|
"Unable to get the updateApplicationPriority appId: %s.", appId);
|
|
|
} catch (YarnException e) {
|
|
|
- RouterServerUtil.logAndThrowRunTimeException("updateApplicationPriority Failed.", e);
|
|
|
+ routerMetrics.incrUpdateAppPriorityFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowRunTimeException("updateApplicationPriority error.", e);
|
|
|
}
|
|
|
-
|
|
|
- return null;
|
|
|
+ routerMetrics.incrUpdateAppPriorityFailedRetrieved();
|
|
|
+ throw new RuntimeException("updateApplicationPriority Failed.");
|
|
|
}
|
|
|
|
|
|
@Override
|
|
|
public AppQueue getAppQueue(HttpServletRequest hsr, String appId)
|
|
|
throws AuthorizationException {
|
|
|
|
|
|
- if (appId == null || appId.isEmpty()) {
|
|
|
- throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
|
|
+ // Check that the appId format is accurate
|
|
|
+ try {
|
|
|
+ RouterServerUtil.validateApplicationId(appId);
|
|
|
+ } catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrGetAppQueueFailedRetrieved();
|
|
|
+ throw e;
|
|
|
}
|
|
|
|
|
|
try {
|
|
|
+ long startTime = clock.getTime();
|
|
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
|
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
|
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
|
|
- return interceptor.getAppQueue(hsr, appId);
|
|
|
+ AppQueue queue = interceptor.getAppQueue(hsr, appId);
|
|
|
+ if (queue != null) {
|
|
|
+ long stopTime = clock.getTime();
|
|
|
+ routerMetrics.succeededGetAppQueueRetrieved((stopTime - startTime));
|
|
|
+ return queue;
|
|
|
+ }
|
|
|
} catch (IllegalArgumentException e) {
|
|
|
- RouterServerUtil.logAndThrowRunTimeException(e,
|
|
|
- "Unable to get queue by appId: %s.", appId);
|
|
|
+ routerMetrics.incrGetAppQueueFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowRunTimeException(e, "Unable to get queue by appId: %s.", appId);
|
|
|
} catch (YarnException e) {
|
|
|
- RouterServerUtil.logAndThrowRunTimeException("getAppQueue Failed.", e);
|
|
|
+ routerMetrics.incrGetAppQueueFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowRunTimeException("getAppQueue error.", e);
|
|
|
}
|
|
|
-
|
|
|
- return null;
|
|
|
+ routerMetrics.incrGetAppQueueFailedRetrieved();
|
|
|
+ throw new RuntimeException("getAppQueue Failed.");
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -1411,27 +1519,40 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
|
|
|
String appId) throws AuthorizationException, YarnException,
|
|
|
InterruptedException, IOException {
|
|
|
|
|
|
- if (appId == null || appId.isEmpty()) {
|
|
|
- throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
|
|
+ // Check that the appId format is accurate
|
|
|
+ try {
|
|
|
+ RouterServerUtil.validateApplicationId(appId);
|
|
|
+ } catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrUpdateAppQueueFailedRetrieved();
|
|
|
+ throw e;
|
|
|
}
|
|
|
|
|
|
if (targetQueue == null) {
|
|
|
+ routerMetrics.incrUpdateAppQueueFailedRetrieved();
|
|
|
throw new IllegalArgumentException("Parameter error, the targetQueue is null.");
|
|
|
}
|
|
|
|
|
|
try {
|
|
|
+ long startTime = clock.getTime();
|
|
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
|
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
|
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
|
|
- return interceptor.updateAppQueue(targetQueue, hsr, appId);
|
|
|
+ Response response = interceptor.updateAppQueue(targetQueue, hsr, appId);
|
|
|
+ if (response != null) {
|
|
|
+ long stopTime = clock.getTime();
|
|
|
+ routerMetrics.succeededUpdateAppQueueRetrieved(stopTime - startTime);
|
|
|
+ return response;
|
|
|
+ }
|
|
|
} catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrUpdateAppQueueFailedRetrieved();
|
|
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
|
|
"Unable to update app queue by appId: %s.", appId);
|
|
|
} catch (YarnException e) {
|
|
|
- RouterServerUtil.logAndThrowRunTimeException("updateAppQueue Failed.", e);
|
|
|
+ routerMetrics.incrUpdateAppQueueFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowRunTimeException("updateAppQueue error.", e);
|
|
|
}
|
|
|
-
|
|
|
- return null;
|
|
|
+ routerMetrics.incrUpdateAppQueueFailedRetrieved();
|
|
|
+ throw new RuntimeException("updateAppQueue Failed.");
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -1497,7 +1618,16 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
|
|
|
throw new IllegalArgumentException("Parameter error, the reservationId is empty or null.");
|
|
|
}
|
|
|
|
|
|
+ // Check that the appId format is accurate
|
|
|
+ try {
|
|
|
+ ReservationId.parseReservationId(reservationId);
|
|
|
+ } catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrListReservationFailedRetrieved();
|
|
|
+ throw e;
|
|
|
+ }
|
|
|
+
|
|
|
try {
|
|
|
+ long startTime1 = clock.getTime();
|
|
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByReservationId(reservationId);
|
|
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
|
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
|
@@ -1505,11 +1635,13 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
|
|
|
Response response = interceptor.listReservation(queue, reservationId, startTime, endTime,
|
|
|
includeResourceAllocations, hsrCopy);
|
|
|
if (response != null) {
|
|
|
+ long stopTime = clock.getTime();
|
|
|
+ routerMetrics.succeededListReservationRetrieved(stopTime - startTime1);
|
|
|
return response;
|
|
|
}
|
|
|
} catch (YarnException e) {
|
|
|
routerMetrics.incrListReservationFailedRetrieved();
|
|
|
- RouterServerUtil.logAndThrowRunTimeException("listReservation Failed.", e);
|
|
|
+ RouterServerUtil.logAndThrowRunTimeException("listReservation error.", e);
|
|
|
}
|
|
|
|
|
|
routerMetrics.incrListReservationFailedRetrieved();
|
|
@@ -1521,47 +1653,80 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
|
|
|
String type) throws AuthorizationException {
|
|
|
|
|
|
if (appId == null || appId.isEmpty()) {
|
|
|
+ routerMetrics.incrGetAppTimeoutFailedRetrieved();
|
|
|
throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
|
|
}
|
|
|
|
|
|
+ // Check that the appId format is accurate
|
|
|
+ try {
|
|
|
+ ApplicationId.fromString(appId);
|
|
|
+ } catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrGetAppTimeoutFailedRetrieved();
|
|
|
+ throw e;
|
|
|
+ }
|
|
|
+
|
|
|
if (type == null || type.isEmpty()) {
|
|
|
+ routerMetrics.incrGetAppTimeoutFailedRetrieved();
|
|
|
throw new IllegalArgumentException("Parameter error, the type is empty or null.");
|
|
|
}
|
|
|
|
|
|
try {
|
|
|
+ long startTime = clock.getTime();
|
|
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
|
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
|
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
|
|
- return interceptor.getAppTimeout(hsr, appId, type);
|
|
|
+ AppTimeoutInfo appTimeoutInfo = interceptor.getAppTimeout(hsr, appId, type);
|
|
|
+ if (appTimeoutInfo != null) {
|
|
|
+ long stopTime = clock.getTime();
|
|
|
+ routerMetrics.succeededGetAppTimeoutRetrieved((stopTime - startTime));
|
|
|
+ return appTimeoutInfo;
|
|
|
+ }
|
|
|
} catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrGetAppTimeoutFailedRetrieved();
|
|
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
|
|
"Unable to get the getAppTimeout appId: %s.", appId);
|
|
|
} catch (YarnException e) {
|
|
|
- RouterServerUtil.logAndThrowRunTimeException("getAppTimeout Failed.", e);
|
|
|
+ routerMetrics.incrGetAppTimeoutFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowRunTimeException("getAppTimeout error.", e);
|
|
|
}
|
|
|
- return null;
|
|
|
+ routerMetrics.incrGetAppTimeoutFailedRetrieved();
|
|
|
+ throw new RuntimeException("getAppTimeout Failed.");
|
|
|
}
|
|
|
|
|
|
@Override
|
|
|
public AppTimeoutsInfo getAppTimeouts(HttpServletRequest hsr, String appId)
|
|
|
throws AuthorizationException {
|
|
|
|
|
|
- if (appId == null || appId.isEmpty()) {
|
|
|
- throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
|
|
+ // Check that the appId format is accurate
|
|
|
+ try {
|
|
|
+ RouterServerUtil.validateApplicationId(appId);
|
|
|
+ } catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrGetAppTimeoutsFailedRetrieved();
|
|
|
+ throw e;
|
|
|
}
|
|
|
|
|
|
try {
|
|
|
+ long startTime = clock.getTime();
|
|
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
|
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
|
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
|
|
- return interceptor.getAppTimeouts(hsr, appId);
|
|
|
+ AppTimeoutsInfo appTimeoutsInfo = interceptor.getAppTimeouts(hsr, appId);
|
|
|
+ if (appTimeoutsInfo != null) {
|
|
|
+ long stopTime = clock.getTime();
|
|
|
+ routerMetrics.succeededGetAppTimeoutsRetrieved((stopTime - startTime));
|
|
|
+ return appTimeoutsInfo;
|
|
|
+ }
|
|
|
} catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrGetAppTimeoutsFailedRetrieved();
|
|
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
|
|
"Unable to get the getAppTimeouts appId: %s.", appId);
|
|
|
} catch (YarnException e) {
|
|
|
- RouterServerUtil.logAndThrowRunTimeException("getAppTimeouts Failed.", e);
|
|
|
+ routerMetrics.incrGetAppTimeoutsFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowRunTimeException("getAppTimeouts error.", e);
|
|
|
}
|
|
|
- return null;
|
|
|
+
|
|
|
+ routerMetrics.incrGetAppTimeoutsFailedRetrieved();
|
|
|
+ throw new RuntimeException("getAppTimeouts Failed.");
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -1569,47 +1734,76 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
|
|
|
HttpServletRequest hsr, String appId) throws AuthorizationException,
|
|
|
YarnException, InterruptedException, IOException {
|
|
|
|
|
|
- if (appId == null || appId.isEmpty()) {
|
|
|
- throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
|
|
+ // Check that the appId format is accurate
|
|
|
+ try {
|
|
|
+ RouterServerUtil.validateApplicationId(appId);
|
|
|
+ } catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrUpdateApplicationTimeoutsRetrieved();
|
|
|
+ throw e;
|
|
|
}
|
|
|
|
|
|
if (appTimeout == null) {
|
|
|
+ routerMetrics.incrUpdateApplicationTimeoutsRetrieved();
|
|
|
throw new IllegalArgumentException("Parameter error, the appTimeout is null.");
|
|
|
}
|
|
|
|
|
|
try {
|
|
|
+ long startTime = Time.now();
|
|
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
|
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
|
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
|
|
- return interceptor.updateApplicationTimeout(appTimeout, hsr, appId);
|
|
|
+ Response response = interceptor.updateApplicationTimeout(appTimeout, hsr, appId);
|
|
|
+ if (response != null) {
|
|
|
+ long stopTime = clock.getTime();
|
|
|
+ routerMetrics.succeededUpdateAppTimeoutsRetrieved((stopTime - startTime));
|
|
|
+ return response;
|
|
|
+ }
|
|
|
} catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrUpdateApplicationTimeoutsRetrieved();
|
|
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
|
|
"Unable to get the updateApplicationTimeout appId: %s.", appId);
|
|
|
} catch (YarnException e) {
|
|
|
- RouterServerUtil.logAndThrowRunTimeException("updateApplicationTimeout Failed.", e);
|
|
|
+ routerMetrics.incrUpdateApplicationTimeoutsRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowRunTimeException("updateApplicationTimeout error.", e);
|
|
|
}
|
|
|
- return null;
|
|
|
+
|
|
|
+ routerMetrics.incrUpdateApplicationTimeoutsRetrieved();
|
|
|
+ throw new RuntimeException("updateApplicationTimeout Failed.");
|
|
|
}
|
|
|
|
|
|
@Override
|
|
|
public AppAttemptsInfo getAppAttempts(HttpServletRequest hsr, String appId) {
|
|
|
|
|
|
- if (appId == null || appId.isEmpty()) {
|
|
|
- throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
|
|
+ // Check that the appId format is accurate
|
|
|
+ try {
|
|
|
+ RouterServerUtil.validateApplicationId(appId);
|
|
|
+ } catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrAppAttemptsFailedRetrieved();
|
|
|
+ throw e;
|
|
|
}
|
|
|
|
|
|
try {
|
|
|
+ long startTime = Time.now();
|
|
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
|
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
|
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
|
|
- return interceptor.getAppAttempts(hsr, appId);
|
|
|
+ AppAttemptsInfo appAttemptsInfo = interceptor.getAppAttempts(hsr, appId);
|
|
|
+ if (appAttemptsInfo != null) {
|
|
|
+ long stopTime = Time.now();
|
|
|
+ routerMetrics.succeededAppAttemptsRetrieved(stopTime - startTime);
|
|
|
+ return appAttemptsInfo;
|
|
|
+ }
|
|
|
} catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrAppAttemptsFailedRetrieved();
|
|
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
|
|
"Unable to get the AppAttempt appId: %s.", appId);
|
|
|
} catch (YarnException e) {
|
|
|
- RouterServerUtil.logAndThrowRunTimeException("getAppAttempts Failed.", e);
|
|
|
+ routerMetrics.incrAppAttemptsFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowRunTimeException("getAppAttempts error.", e);
|
|
|
}
|
|
|
- return null;
|
|
|
+
|
|
|
+ routerMetrics.incrAppAttemptsFailedRetrieved();
|
|
|
+ throw new RuntimeException("getAppAttempts Failed.");
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -1622,59 +1816,87 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
|
|
|
public AppAttemptInfo getAppAttempt(HttpServletRequest req,
|
|
|
HttpServletResponse res, String appId, String appAttemptId) {
|
|
|
|
|
|
- if (appId == null || appId.isEmpty()) {
|
|
|
- throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
|
|
- }
|
|
|
- if (appAttemptId == null || appAttemptId.isEmpty()) {
|
|
|
- throw new IllegalArgumentException("Parameter error, the appAttemptId is empty or null.");
|
|
|
+ // Check that the appId/appAttemptId format is accurate
|
|
|
+ try {
|
|
|
+ RouterServerUtil.validateApplicationId(appId);
|
|
|
+ RouterServerUtil.validateApplicationAttemptId(appAttemptId);
|
|
|
+ } catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrAppAttemptReportFailedRetrieved();
|
|
|
+ throw e;
|
|
|
}
|
|
|
|
|
|
+ // Call the getAppAttempt method
|
|
|
try {
|
|
|
+ long startTime = Time.now();
|
|
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
|
|
-
|
|
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
|
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
|
|
- return interceptor.getAppAttempt(req, res, appId, appAttemptId);
|
|
|
+ AppAttemptInfo appAttemptInfo = interceptor.getAppAttempt(req, res, appId, appAttemptId);
|
|
|
+ if (appAttemptInfo != null) {
|
|
|
+ long stopTime = Time.now();
|
|
|
+ routerMetrics.succeededAppAttemptReportRetrieved(stopTime - startTime);
|
|
|
+ return appAttemptInfo;
|
|
|
+ }
|
|
|
} catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrAppAttemptReportFailedRetrieved();
|
|
|
RouterServerUtil.logAndThrowRunTimeException(e,
|
|
|
- "Unable to get the AppAttempt appId: %s, appAttemptId: %s.", appId, appAttemptId);
|
|
|
+ "Unable to getAppAttempt by appId: %s, appAttemptId: %s.", appId, appAttemptId);
|
|
|
} catch (YarnException e) {
|
|
|
- RouterServerUtil.logAndThrowRunTimeException("getContainer Failed.", e);
|
|
|
+ routerMetrics.incrAppAttemptReportFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowRunTimeException(e,
|
|
|
+ "getAppAttempt error, appId: %s, appAttemptId: %s.", appId, appAttemptId);
|
|
|
}
|
|
|
|
|
|
- return null;
|
|
|
+ routerMetrics.incrAppAttemptReportFailedRetrieved();
|
|
|
+ throw RouterServerUtil.logAndReturnRunTimeException(
|
|
|
+ "getAppAttempt failed, appId: %s, appAttemptId: %s.", appId, appAttemptId);
|
|
|
}
|
|
|
|
|
|
@Override
|
|
|
public ContainersInfo getContainers(HttpServletRequest req,
|
|
|
HttpServletResponse res, String appId, String appAttemptId) {
|
|
|
|
|
|
- ContainersInfo containersInfo = new ContainersInfo();
|
|
|
-
|
|
|
- Map<SubClusterId, SubClusterInfo> subClustersActive;
|
|
|
+ // Check that the appId/appAttemptId format is accurate
|
|
|
try {
|
|
|
- subClustersActive = getActiveSubclusters();
|
|
|
- } catch (NotFoundException e) {
|
|
|
- LOG.error("Get all active sub cluster(s) error.", e);
|
|
|
- return containersInfo;
|
|
|
+ RouterServerUtil.validateApplicationId(appId);
|
|
|
+ RouterServerUtil.validateApplicationAttemptId(appAttemptId);
|
|
|
+ } catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrGetContainersFailedRetrieved();
|
|
|
+ throw e;
|
|
|
}
|
|
|
|
|
|
try {
|
|
|
+ long startTime = clock.getTime();
|
|
|
+ ContainersInfo containersInfo = new ContainersInfo();
|
|
|
+ Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
|
|
|
Class[] argsClasses = new Class[]{
|
|
|
HttpServletRequest.class, HttpServletResponse.class, String.class, String.class};
|
|
|
Object[] args = new Object[]{req, res, appId, appAttemptId};
|
|
|
ClientMethod remoteMethod = new ClientMethod("getContainers", argsClasses, args);
|
|
|
Map<SubClusterInfo, ContainersInfo> containersInfoMap =
|
|
|
invokeConcurrent(subClustersActive.values(), remoteMethod, ContainersInfo.class);
|
|
|
- if (containersInfoMap != null) {
|
|
|
+ if (containersInfoMap != null && !containersInfoMap.isEmpty()) {
|
|
|
containersInfoMap.values().forEach(containers ->
|
|
|
containersInfo.addAll(containers.getContainers()));
|
|
|
}
|
|
|
- } catch (Exception ex) {
|
|
|
- LOG.error("Failed to return GetContainers.", ex);
|
|
|
+ if (containersInfo != null) {
|
|
|
+ long stopTime = clock.getTime();
|
|
|
+ routerMetrics.succeededGetContainersRetrieved(stopTime - startTime);
|
|
|
+ return containersInfo;
|
|
|
+ }
|
|
|
+ } catch (NotFoundException e) {
|
|
|
+ routerMetrics.incrGetContainersFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowRunTimeException(e, "getContainers error, appId = %s, " +
|
|
|
+ " appAttemptId = %s, Probably getActiveSubclusters error.", appId, appAttemptId);
|
|
|
+ } catch (IOException | YarnException e) {
|
|
|
+ routerMetrics.incrGetContainersFailedRetrieved();
|
|
|
+ RouterServerUtil.logAndThrowRunTimeException(e, "getContainers error, appId = %s, " +
|
|
|
+ " appAttemptId = %s.", appId, appAttemptId);
|
|
|
}
|
|
|
|
|
|
- return containersInfo;
|
|
|
+ routerMetrics.incrGetContainersFailedRetrieved();
|
|
|
+ throw RouterServerUtil.logAndReturnRunTimeException(
|
|
|
+ "getContainers failed, appId: %s, appAttemptId: %s.", appId, appAttemptId);
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -1682,32 +1904,45 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
|
|
|
HttpServletResponse res, String appId, String appAttemptId,
|
|
|
String containerId) {
|
|
|
|
|
|
- if (appId == null || appId.isEmpty()) {
|
|
|
- throw new IllegalArgumentException("Parameter error, the appId is empty or null.");
|
|
|
- }
|
|
|
- if (appAttemptId == null || appAttemptId.isEmpty()) {
|
|
|
- throw new IllegalArgumentException("Parameter error, the appAttemptId is empty or null.");
|
|
|
- }
|
|
|
- if (containerId == null || containerId.isEmpty()) {
|
|
|
- throw new IllegalArgumentException("Parameter error, the containerId is empty or null.");
|
|
|
+ // FederationInterceptorREST#getContainer is logically
|
|
|
+ // the same as FederationClientInterceptor#getContainerReport,
|
|
|
+ // so use the same Metric.
|
|
|
+
|
|
|
+ // Check that the appId/appAttemptId/containerId format is accurate
|
|
|
+ try {
|
|
|
+ RouterServerUtil.validateApplicationId(appId);
|
|
|
+ RouterServerUtil.validateApplicationAttemptId(appAttemptId);
|
|
|
+ RouterServerUtil.validateContainerId(containerId);
|
|
|
+ } catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrGetContainerReportFailedRetrieved();
|
|
|
+ throw e;
|
|
|
}
|
|
|
|
|
|
try {
|
|
|
+ long startTime = Time.now();
|
|
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(appId);
|
|
|
-
|
|
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
|
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
|
|
- return interceptor.getContainer(req, res, appId, appAttemptId, containerId);
|
|
|
+ ContainerInfo containerInfo =
|
|
|
+ interceptor.getContainer(req, res, appId, appAttemptId, containerId);
|
|
|
+ if (containerInfo != null) {
|
|
|
+ long stopTime = Time.now();
|
|
|
+ routerMetrics.succeededGetContainerReportRetrieved(stopTime - startTime);
|
|
|
+ return containerInfo;
|
|
|
+ }
|
|
|
} catch (IllegalArgumentException e) {
|
|
|
String msg = String.format(
|
|
|
"Unable to get the AppAttempt appId: %s, appAttemptId: %s, containerId: %s.", appId,
|
|
|
appAttemptId, containerId);
|
|
|
+ routerMetrics.incrGetContainerReportFailedRetrieved();
|
|
|
RouterServerUtil.logAndThrowRunTimeException(msg, e);
|
|
|
} catch (YarnException e) {
|
|
|
+ routerMetrics.incrGetContainerReportFailedRetrieved();
|
|
|
RouterServerUtil.logAndThrowRunTimeException("getContainer Failed.", e);
|
|
|
}
|
|
|
|
|
|
- return null;
|
|
|
+ routerMetrics.incrGetContainerReportFailedRetrieved();
|
|
|
+ throw new RuntimeException("getContainer Failed.");
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -1735,31 +1970,45 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
|
|
|
public Response signalToContainer(String containerId, String command,
|
|
|
HttpServletRequest req) {
|
|
|
|
|
|
- if (containerId == null || containerId.isEmpty()) {
|
|
|
- throw new IllegalArgumentException("Parameter error, the containerId is empty or null.");
|
|
|
+ // Check if containerId is empty or null
|
|
|
+ try {
|
|
|
+ RouterServerUtil.validateContainerId(containerId);
|
|
|
+ } catch (IllegalArgumentException e) {
|
|
|
+ routerMetrics.incrSignalToContainerFailedRetrieved();
|
|
|
+ throw e;
|
|
|
}
|
|
|
|
|
|
+ // Check if command is empty or null
|
|
|
if (command == null || command.isEmpty()) {
|
|
|
+ routerMetrics.incrSignalToContainerFailedRetrieved();
|
|
|
throw new IllegalArgumentException("Parameter error, the command is empty or null.");
|
|
|
}
|
|
|
|
|
|
try {
|
|
|
+ long startTime = Time.now();
|
|
|
+
|
|
|
ContainerId containerIdObj = ContainerId.fromString(containerId);
|
|
|
ApplicationId applicationId = containerIdObj.getApplicationAttemptId().getApplicationId();
|
|
|
-
|
|
|
SubClusterInfo subClusterInfo = getHomeSubClusterInfoByAppId(applicationId.toString());
|
|
|
-
|
|
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
|
|
subClusterInfo.getSubClusterId(), subClusterInfo.getRMWebServiceAddress());
|
|
|
- return interceptor.signalToContainer(containerId, command, req);
|
|
|
|
|
|
+ Response response = interceptor.signalToContainer(containerId, command, req);
|
|
|
+ if (response != null) {
|
|
|
+ long stopTime = Time.now();
|
|
|
+ routerMetrics.succeededSignalToContainerRetrieved(stopTime - startTime);
|
|
|
+ return response;
|
|
|
+ }
|
|
|
} catch (YarnException e) {
|
|
|
+ routerMetrics.incrSignalToContainerFailedRetrieved();
|
|
|
RouterServerUtil.logAndThrowRunTimeException("signalToContainer Failed.", e);
|
|
|
} catch (AuthorizationException e) {
|
|
|
+ routerMetrics.incrSignalToContainerFailedRetrieved();
|
|
|
RouterServerUtil.logAndThrowRunTimeException("signalToContainer Author Failed.", e);
|
|
|
}
|
|
|
|
|
|
- return null;
|
|
|
+ routerMetrics.incrSignalToContainerFailedRetrieved();
|
|
|
+ throw new RuntimeException("signalToContainer Failed.");
|
|
|
}
|
|
|
|
|
|
@Override
|
|
@@ -1777,6 +2026,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
|
|
|
// Send the requests in parallel
|
|
|
CompletionService<R> compSvc = new ExecutorCompletionService<>(this.threadpool);
|
|
|
|
|
|
+ // Error Msg
|
|
|
for (final SubClusterInfo info : clusterIds) {
|
|
|
compSvc.submit(() -> {
|
|
|
DefaultRequestInterceptorREST interceptor = getOrCreateInterceptorForSubCluster(
|
|
@@ -1831,6 +2081,8 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
|
|
|
}
|
|
|
subClusterInfo = federationFacade.getSubCluster(subClusterId);
|
|
|
return subClusterInfo;
|
|
|
+ } catch (IllegalArgumentException e){
|
|
|
+ throw new IllegalArgumentException(e);
|
|
|
} catch (YarnException e) {
|
|
|
RouterServerUtil.logAndThrowException(e,
|
|
|
"Get HomeSubClusterInfo by applicationId %s failed.", appId);
|