|
@@ -137,8 +137,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
|
private boolean registeredWithRM = false;
|
|
private boolean registeredWithRM = false;
|
|
Set<ContainerId> pendingContainersToRemove = new HashSet<ContainerId>();
|
|
Set<ContainerId> pendingContainersToRemove = new HashSet<ContainerId>();
|
|
|
|
|
|
- private final NodeLabelsProvider nodeLabelsProvider;
|
|
|
|
- private final boolean hasNodeLabelsProvider;
|
|
|
|
|
|
+ private NMNodeLabelsHandler nodeLabelsHandler;
|
|
|
|
|
|
public NodeStatusUpdaterImpl(Context context, Dispatcher dispatcher,
|
|
public NodeStatusUpdaterImpl(Context context, Dispatcher dispatcher,
|
|
NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) {
|
|
NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) {
|
|
@@ -150,8 +149,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
|
NodeLabelsProvider nodeLabelsProvider) {
|
|
NodeLabelsProvider nodeLabelsProvider) {
|
|
super(NodeStatusUpdaterImpl.class.getName());
|
|
super(NodeStatusUpdaterImpl.class.getName());
|
|
this.healthChecker = healthChecker;
|
|
this.healthChecker = healthChecker;
|
|
- this.nodeLabelsProvider = nodeLabelsProvider;
|
|
|
|
- this.hasNodeLabelsProvider = (nodeLabelsProvider != null);
|
|
|
|
|
|
+ nodeLabelsHandler = createNMNodeLabelsHandler(nodeLabelsProvider);
|
|
this.context = context;
|
|
this.context = context;
|
|
this.dispatcher = dispatcher;
|
|
this.dispatcher = dispatcher;
|
|
this.metrics = metrics;
|
|
this.metrics = metrics;
|
|
@@ -313,13 +311,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
|
protected void registerWithRM()
|
|
protected void registerWithRM()
|
|
throws YarnException, IOException {
|
|
throws YarnException, IOException {
|
|
List<NMContainerStatus> containerReports = getNMContainerStatuses();
|
|
List<NMContainerStatus> containerReports = getNMContainerStatuses();
|
|
- Set<NodeLabel> nodeLabels = null;
|
|
|
|
- if (hasNodeLabelsProvider) {
|
|
|
|
- nodeLabels = nodeLabelsProvider.getNodeLabels();
|
|
|
|
- nodeLabels =
|
|
|
|
- (null == nodeLabels) ? CommonNodeLabelsManager.EMPTY_NODELABEL_SET
|
|
|
|
- : nodeLabels;
|
|
|
|
- }
|
|
|
|
|
|
+ Set<NodeLabel> nodeLabels = nodeLabelsHandler.getNodeLabelsForRegistration();
|
|
RegisterNodeManagerRequest request =
|
|
RegisterNodeManagerRequest request =
|
|
RegisterNodeManagerRequest.newInstance(nodeId, httpPort, totalResource,
|
|
RegisterNodeManagerRequest.newInstance(nodeId, httpPort, totalResource,
|
|
nodeManagerVersionId, containerReports, getRunningApplications(),
|
|
nodeManagerVersionId, containerReports, getRunningApplications(),
|
|
@@ -380,14 +372,8 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
|
.append(this.nodeId).append(" with total resource of ")
|
|
.append(this.nodeId).append(" with total resource of ")
|
|
.append(this.totalResource);
|
|
.append(this.totalResource);
|
|
|
|
|
|
- if (regNMResponse.getAreNodeLabelsAcceptedByRM()) {
|
|
|
|
- successfullRegistrationMsg
|
|
|
|
- .append(" and with following Node label(s) : {")
|
|
|
|
- .append(StringUtils.join(",", nodeLabels)).append("}");
|
|
|
|
- } else if (hasNodeLabelsProvider) {
|
|
|
|
- //case where provider is set but RM did not accept the Node Labels
|
|
|
|
- LOG.error(regNMResponse.getDiagnosticsMessage());
|
|
|
|
- }
|
|
|
|
|
|
+ successfullRegistrationMsg.append(nodeLabelsHandler
|
|
|
|
+ .verifyRMRegistrationResponseForNodeLabels(regNMResponse));
|
|
|
|
|
|
LOG.info(successfullRegistrationMsg);
|
|
LOG.info(successfullRegistrationMsg);
|
|
LOG.info("Notifying ContainerManager to unblock new container-requests");
|
|
LOG.info("Notifying ContainerManager to unblock new container-requests");
|
|
@@ -688,33 +674,14 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
|
@SuppressWarnings("unchecked")
|
|
@SuppressWarnings("unchecked")
|
|
public void run() {
|
|
public void run() {
|
|
int lastHeartbeatID = 0;
|
|
int lastHeartbeatID = 0;
|
|
- Set<NodeLabel> lastUpdatedNodeLabelsToRM = null;
|
|
|
|
- if (hasNodeLabelsProvider) {
|
|
|
|
- lastUpdatedNodeLabelsToRM = nodeLabelsProvider.getNodeLabels();
|
|
|
|
- lastUpdatedNodeLabelsToRM =
|
|
|
|
- (null == lastUpdatedNodeLabelsToRM) ? CommonNodeLabelsManager.EMPTY_NODELABEL_SET
|
|
|
|
- : lastUpdatedNodeLabelsToRM;
|
|
|
|
- }
|
|
|
|
while (!isStopped) {
|
|
while (!isStopped) {
|
|
// Send heartbeat
|
|
// Send heartbeat
|
|
try {
|
|
try {
|
|
NodeHeartbeatResponse response = null;
|
|
NodeHeartbeatResponse response = null;
|
|
- Set<NodeLabel> nodeLabelsForHeartbeat = null;
|
|
|
|
|
|
+ Set<NodeLabel> nodeLabelsForHeartbeat =
|
|
|
|
+ nodeLabelsHandler.getNodeLabelsForHeartbeat();
|
|
NodeStatus nodeStatus = getNodeStatus(lastHeartbeatID);
|
|
NodeStatus nodeStatus = getNodeStatus(lastHeartbeatID);
|
|
|
|
|
|
- if (hasNodeLabelsProvider) {
|
|
|
|
- nodeLabelsForHeartbeat = nodeLabelsProvider.getNodeLabels();
|
|
|
|
- // if the provider returns null then consider empty labels are set
|
|
|
|
- nodeLabelsForHeartbeat =
|
|
|
|
- (nodeLabelsForHeartbeat == null) ? CommonNodeLabelsManager.EMPTY_NODELABEL_SET
|
|
|
|
- : nodeLabelsForHeartbeat;
|
|
|
|
- if (!areNodeLabelsUpdated(nodeLabelsForHeartbeat,
|
|
|
|
- lastUpdatedNodeLabelsToRM)) {
|
|
|
|
- // if nodelabels have not changed then no need to send
|
|
|
|
- nodeLabelsForHeartbeat = null;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
NodeHeartbeatRequest request =
|
|
NodeHeartbeatRequest request =
|
|
NodeHeartbeatRequest.newInstance(nodeStatus,
|
|
NodeHeartbeatRequest.newInstance(nodeStatus,
|
|
NodeStatusUpdaterImpl.this.context
|
|
NodeStatusUpdaterImpl.this.context
|
|
@@ -740,9 +707,8 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
|
updateMasterKeys(response);
|
|
updateMasterKeys(response);
|
|
|
|
|
|
if (response.getNodeAction() == NodeAction.SHUTDOWN) {
|
|
if (response.getNodeAction() == NodeAction.SHUTDOWN) {
|
|
- LOG
|
|
|
|
- .warn("Recieved SHUTDOWN signal from Resourcemanager as part of heartbeat,"
|
|
|
|
- + " hence shutting down.");
|
|
|
|
|
|
+ LOG.warn("Recieved SHUTDOWN signal from Resourcemanager as part of"
|
|
|
|
+ + " heartbeat, hence shutting down.");
|
|
LOG.warn("Message from ResourceManager: "
|
|
LOG.warn("Message from ResourceManager: "
|
|
+ response.getDiagnosticsMessage());
|
|
+ response.getDiagnosticsMessage());
|
|
context.setDecommissioned(true);
|
|
context.setDecommissioned(true);
|
|
@@ -764,16 +730,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
|
break;
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
- if (response.getAreNodeLabelsAcceptedByRM()) {
|
|
|
|
- lastUpdatedNodeLabelsToRM = nodeLabelsForHeartbeat;
|
|
|
|
- LOG.info("Node Labels {"
|
|
|
|
- + StringUtils.join(",", nodeLabelsForHeartbeat)
|
|
|
|
- + "} were Accepted by RM ");
|
|
|
|
- } else if (nodeLabelsForHeartbeat != null) {
|
|
|
|
- // case where NodeLabelsProvider is set and updated labels were
|
|
|
|
- // sent to RM and RM rejected the labels
|
|
|
|
- LOG.error(response.getDiagnosticsMessage());
|
|
|
|
- }
|
|
|
|
|
|
+ nodeLabelsHandler.verifyRMHeartbeatResponseForNodeLabels(response);
|
|
|
|
|
|
// Explicitly put this method after checking the resync response. We
|
|
// Explicitly put this method after checking the resync response. We
|
|
// don't want to remove the completed containers before resync
|
|
// don't want to remove the completed containers before resync
|
|
@@ -833,23 +790,6 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- /**
|
|
|
|
- * Caller should take care of sending non null nodelabels for both
|
|
|
|
- * arguments
|
|
|
|
- *
|
|
|
|
- * @param nodeLabelsNew
|
|
|
|
- * @param nodeLabelsOld
|
|
|
|
- * @return if the New node labels are diff from the older one.
|
|
|
|
- */
|
|
|
|
- private boolean areNodeLabelsUpdated(Set<NodeLabel> nodeLabelsNew,
|
|
|
|
- Set<NodeLabel> nodeLabelsOld) {
|
|
|
|
- if (nodeLabelsNew.size() != nodeLabelsOld.size()
|
|
|
|
- || !nodeLabelsOld.containsAll(nodeLabelsNew)) {
|
|
|
|
- return true;
|
|
|
|
- }
|
|
|
|
- return false;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
private void updateMasterKeys(NodeHeartbeatResponse response) {
|
|
private void updateMasterKeys(NodeHeartbeatResponse response) {
|
|
// See if the master-key has rolled over
|
|
// See if the master-key has rolled over
|
|
MasterKey updatedMasterKey = response.getContainerTokenMasterKey();
|
|
MasterKey updatedMasterKey = response.getContainerTokenMasterKey();
|
|
@@ -879,4 +819,183 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
|
reports.addAll(logAggregationReportForAppsTempList);
|
|
reports.addAll(logAggregationReportForAppsTempList);
|
|
return reports;
|
|
return reports;
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ private NMNodeLabelsHandler createNMNodeLabelsHandler(
|
|
|
|
+ NodeLabelsProvider nodeLabelsProvider) {
|
|
|
|
+ if (nodeLabelsProvider == null) {
|
|
|
|
+ return new NMCentralizedNodeLabelsHandler();
|
|
|
|
+ } else {
|
|
|
|
+ return new NMDistributedNodeLabelsHandler(nodeLabelsProvider);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private static interface NMNodeLabelsHandler {
|
|
|
|
+ /**
|
|
|
|
+ * validates nodeLabels From Provider and returns it to the caller. Also
|
|
|
|
+ * ensures that if provider returns null then empty label set is considered
|
|
|
|
+ */
|
|
|
|
+ Set<NodeLabel> getNodeLabelsForRegistration();
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * @return RMRegistration Success message and on failure will log
|
|
|
|
+ * independently and returns empty string
|
|
|
|
+ */
|
|
|
|
+ String verifyRMRegistrationResponseForNodeLabels(
|
|
|
|
+ RegisterNodeManagerResponse regNMResponse);
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * If nodeLabels From Provider is different previous node labels then it
|
|
|
|
+ * will check the syntax correctness and throws exception if invalid. If
|
|
|
|
+ * valid, returns nodeLabels From Provider. Also ensures that if provider
|
|
|
|
+ * returns null then empty label set is considered and If labels are not
|
|
|
|
+ * modified it returns null.
|
|
|
|
+ */
|
|
|
|
+ Set<NodeLabel> getNodeLabelsForHeartbeat();
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * check whether if updated labels sent to RM was accepted or not
|
|
|
|
+ * @param response
|
|
|
|
+ */
|
|
|
|
+ void verifyRMHeartbeatResponseForNodeLabels(NodeHeartbeatResponse response);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * In centralized configuration, NM need not send Node labels or process the
|
|
|
|
+ * response
|
|
|
|
+ */
|
|
|
|
+ private static class NMCentralizedNodeLabelsHandler
|
|
|
|
+ implements NMNodeLabelsHandler {
|
|
|
|
+ @Override
|
|
|
|
+ public Set<NodeLabel> getNodeLabelsForHeartbeat() {
|
|
|
|
+ return null;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ @Override
|
|
|
|
+ public Set<NodeLabel> getNodeLabelsForRegistration() {
|
|
|
|
+ return null;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ @Override
|
|
|
|
+ public void verifyRMHeartbeatResponseForNodeLabels(
|
|
|
|
+ NodeHeartbeatResponse response) {
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ @Override
|
|
|
|
+ public String verifyRMRegistrationResponseForNodeLabels(
|
|
|
|
+ RegisterNodeManagerResponse regNMResponse) {
|
|
|
|
+ return "";
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private static class NMDistributedNodeLabelsHandler
|
|
|
|
+ implements NMNodeLabelsHandler {
|
|
|
|
+ private NMDistributedNodeLabelsHandler(
|
|
|
|
+ NodeLabelsProvider nodeLabelsProvider) {
|
|
|
|
+ this.nodeLabelsProvider = nodeLabelsProvider;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private final NodeLabelsProvider nodeLabelsProvider;
|
|
|
|
+ private Set<NodeLabel> previousNodeLabels;
|
|
|
|
+ private boolean updatedLabelsSentToRM;
|
|
|
|
+
|
|
|
|
+ @Override
|
|
|
|
+ public Set<NodeLabel> getNodeLabelsForRegistration() {
|
|
|
|
+ Set<NodeLabel> nodeLabels = nodeLabelsProvider.getNodeLabels();
|
|
|
|
+ nodeLabels = (null == nodeLabels)
|
|
|
|
+ ? CommonNodeLabelsManager.EMPTY_NODELABEL_SET : nodeLabels;
|
|
|
|
+ previousNodeLabels = nodeLabels;
|
|
|
|
+ try {
|
|
|
|
+ validateNodeLabels(nodeLabels);
|
|
|
|
+ } catch (IOException e) {
|
|
|
|
+ nodeLabels = null;
|
|
|
|
+ }
|
|
|
|
+ return nodeLabels;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ @Override
|
|
|
|
+ public String verifyRMRegistrationResponseForNodeLabels(
|
|
|
|
+ RegisterNodeManagerResponse regNMResponse) {
|
|
|
|
+ StringBuilder successfulNodeLabelsRegistrationMsg = new StringBuilder("");
|
|
|
|
+ if (regNMResponse.getAreNodeLabelsAcceptedByRM()) {
|
|
|
|
+ successfulNodeLabelsRegistrationMsg
|
|
|
|
+ .append(" and with following Node label(s) : {")
|
|
|
|
+ .append(StringUtils.join(",", previousNodeLabels)).append("}");
|
|
|
|
+ } else {
|
|
|
|
+ // case where provider is set but RM did not accept the Node Labels
|
|
|
|
+ LOG.error(regNMResponse.getDiagnosticsMessage());
|
|
|
|
+ }
|
|
|
|
+ return successfulNodeLabelsRegistrationMsg.toString();
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ @Override
|
|
|
|
+ public Set<NodeLabel> getNodeLabelsForHeartbeat() {
|
|
|
|
+ Set<NodeLabel> nodeLabelsForHeartbeat =
|
|
|
|
+ nodeLabelsProvider.getNodeLabels();
|
|
|
|
+ // if the provider returns null then consider empty labels are set
|
|
|
|
+ nodeLabelsForHeartbeat = (nodeLabelsForHeartbeat == null)
|
|
|
|
+ ? CommonNodeLabelsManager.EMPTY_NODELABEL_SET
|
|
|
|
+ : nodeLabelsForHeartbeat;
|
|
|
|
+ // take some action only on modification of labels
|
|
|
|
+ boolean areNodeLabelsUpdated =
|
|
|
|
+ nodeLabelsForHeartbeat.size() != previousNodeLabels.size()
|
|
|
|
+ || !previousNodeLabels.containsAll(nodeLabelsForHeartbeat);
|
|
|
|
+
|
|
|
|
+ updatedLabelsSentToRM = false;
|
|
|
|
+ if (areNodeLabelsUpdated) {
|
|
|
|
+ previousNodeLabels = nodeLabelsForHeartbeat;
|
|
|
|
+ try {
|
|
|
|
+ validateNodeLabels(nodeLabelsForHeartbeat);
|
|
|
|
+ updatedLabelsSentToRM = true;
|
|
|
|
+ } catch (IOException e) {
|
|
|
|
+ // set previous node labels to invalid set, so that invalid
|
|
|
|
+ // labels are not verified for every HB, and send empty set
|
|
|
|
+ // to RM to have same nodeLabels which was earlier set.
|
|
|
|
+ nodeLabelsForHeartbeat = null;
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ // if nodelabels have not changed then no need to send
|
|
|
|
+ nodeLabelsForHeartbeat = null;
|
|
|
|
+ }
|
|
|
|
+ return nodeLabelsForHeartbeat;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private void validateNodeLabels(Set<NodeLabel> nodeLabelsForHeartbeat)
|
|
|
|
+ throws IOException {
|
|
|
|
+ Iterator<NodeLabel> iterator = nodeLabelsForHeartbeat.iterator();
|
|
|
|
+ boolean hasInvalidLabel = false;
|
|
|
|
+ StringBuilder errorMsg = new StringBuilder("");
|
|
|
|
+ while (iterator.hasNext()) {
|
|
|
|
+ try {
|
|
|
|
+ CommonNodeLabelsManager
|
|
|
|
+ .checkAndThrowLabelName(iterator.next().getName());
|
|
|
|
+ } catch (IOException e) {
|
|
|
|
+ errorMsg.append(e.getMessage());
|
|
|
|
+ errorMsg.append(" , ");
|
|
|
|
+ hasInvalidLabel = true;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if (hasInvalidLabel) {
|
|
|
|
+ LOG.error("Invalid Node Label(s) from Provider : " + errorMsg);
|
|
|
|
+ throw new IOException(errorMsg.toString());
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ @Override
|
|
|
|
+ public void verifyRMHeartbeatResponseForNodeLabels(
|
|
|
|
+ NodeHeartbeatResponse response) {
|
|
|
|
+ if (updatedLabelsSentToRM) {
|
|
|
|
+ if (response.getAreNodeLabelsAcceptedByRM()) {
|
|
|
|
+ LOG.info("Node Labels {" + StringUtils.join(",", previousNodeLabels)
|
|
|
|
+ + "} were Accepted by RM ");
|
|
|
|
+ } else {
|
|
|
|
+ // case where updated labels from NodeLabelsProvider is sent to RM and
|
|
|
|
+ // RM rejected the labels
|
|
|
|
+ LOG.error(
|
|
|
|
+ "NM node labels {" + StringUtils.join(",", previousNodeLabels)
|
|
|
|
+ + "} were not accepted by RM and message from RM : "
|
|
|
|
+ + response.getDiagnosticsMessage());
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
}
|
|
}
|