Browse Source

AMBARI-25606. Sometimes request aborting doesn't abort IN_PROGRESS task. (dvitiuk via dgrinenko) (#3275)

dvitiiuk 5 years ago
parent
commit
ebde46f27f

+ 2 - 2
ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessor.java

@@ -57,8 +57,8 @@ public interface ActionDBAccessor {
 
   /**
    * Abort all outstanding operations associated with the given request. This
-   * method uses the {@link HostRoleStatus#SCHEDULED_STATES} to determine which
-   * {@link HostRoleCommand} instances to abort.
+   * method uses the {@link HostRoleStatus#SCHEDULED_STATES} and {@link HostRoleStatus#HOLDING_STATES}
+   * to determine which {@link HostRoleCommand} instances to abort.
    *
    * Returns the list of the aborted operations.
    */

+ 1 - 1
ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionDBAccessorImpl.java

@@ -219,7 +219,7 @@ public class ActionDBAccessorImpl implements ActionDBAccessor {
     // only request commands which actually need to be aborted; requesting all
     // commands here can cause OOM problems during large requests like upgrades
     List<HostRoleCommandEntity> commands = hostRoleCommandDAO.findByRequestIdAndStatuses(requestId,
-        HostRoleStatus.SCHEDULED_STATES);
+        HostRoleStatus.STATES_TO_ABORT);
 
     for (HostRoleCommandEntity command : commands) {
       command.setStatus(HostRoleStatus.ABORTED);

+ 3 - 9
ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java

@@ -1234,9 +1234,9 @@ class ActionScheduler implements Runnable {
           cancelHostRoleCommands(tasksToDequeue, reason);
         }
 
-        // abort any stages in progress that belong to this request; don't execute this for all stages since
-        // that could lead to OOM errors on large requests, like those for
-        // upgrades
+        // abort any stages in progress and holding states that belong to this request;
+        // don't execute this for all stages since that could lead to OOM errors on large requests,
+        // like those for upgrades
         List<Stage> stagesInProgress = db.getStagesInProgressForRequest(requestId);
         for (Stage stageInProgress : stagesInProgress) {
           abortOperationsForStage(stageInProgress);
@@ -1270,12 +1270,6 @@ class ActionScheduler implements Runnable {
         }
       }
 
-      if (hostRoleCommand.getStatus().isHoldingState()) {
-        db.abortHostRole(hostRoleCommand.getHostName(),
-            hostRoleCommand.getRequestId(),
-            hostRoleCommand.getStageId(), hostRoleCommand.getRole().name());
-      }
-
       // If host role is an Action, we have to send an event
       if (hostRoleCommand.getRoleCommand().equals(RoleCommand.ACTIONEXECUTE)) {
         String clusterName = hostRoleCommand.getExecutionCommandWrapper().getExecutionCommand().getClusterName();

+ 4 - 0
ambari-server/src/main/java/org/apache/ambari/server/actionmanager/HostRoleStatus.java

@@ -22,6 +22,8 @@ import java.util.Collections;
 import java.util.EnumSet;
 import java.util.List;
 
+import org.apache.commons.collections.ListUtils;
+
 public enum HostRoleStatus {
   /**
    * Not queued for a host.
@@ -86,6 +88,8 @@ public enum HostRoleStatus {
 
   public static List<HostRoleStatus> SCHEDULED_STATES = Arrays.asList(PENDING, QUEUED, IN_PROGRESS);
 
+  public static List<HostRoleStatus> STATES_TO_ABORT = ListUtils.union(SCHEDULED_STATES, HOLDING_STATES);
+
   /**
    * The {@link HostRoleStatus}s that represent any commands which are
    * considered to be "Failed".

+ 10 - 2
ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionDBAccessorImpl.java

@@ -568,6 +568,7 @@ public class TestActionDBAccessorImpl {
     clusters.addHost("host2");
     clusters.addHost("host3");
     clusters.addHost("host4");
+    clusters.addHost("host5");
 
     s.addHostRoleExecutionCommand("host1", Role.HBASE_MASTER,
         RoleCommand.START,
@@ -589,13 +590,20 @@ public class TestActionDBAccessorImpl {
         RoleCommand.START,
         new ServiceComponentHostStartEvent(Role.HBASE_REGIONSERVER
             .toString(), "host4", System.currentTimeMillis()), "cluster1", "HBASE", false, false);
+    s.addHostRoleExecutionCommand(
+        "host5",
+        Role.HBASE_REGIONSERVER,
+        RoleCommand.START,
+        new ServiceComponentHostStartEvent(Role.HBASE_REGIONSERVER
+            .toString(), "host5", System.currentTimeMillis()), "cluster1", "HBASE", false, false);
     List<Stage> stages = new ArrayList<>();
     stages.add(s);
     s.getOrderedHostRoleCommands().get(0).setStatus(HostRoleStatus.PENDING);
     s.getOrderedHostRoleCommands().get(1).setStatus(HostRoleStatus.IN_PROGRESS);
-    s.getOrderedHostRoleCommands().get(2).setStatus(HostRoleStatus.QUEUED);
+    s.getOrderedHostRoleCommands().get(2).setStatus(HostRoleStatus.HOLDING_FAILED);
+    s.getOrderedHostRoleCommands().get(3).setStatus(HostRoleStatus.QUEUED);
 
-    HostRoleCommand cmd = s.getOrderedHostRoleCommands().get(3);
+    HostRoleCommand cmd = s.getOrderedHostRoleCommands().get(4);
     String hostName = cmd.getHostName();
     cmd.setStatus(HostRoleStatus.COMPLETED);
 

+ 0 - 35
ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java

@@ -2892,41 +2892,6 @@ public class TestActionScheduler {
 
   }
 
-  @Test
-  public void testAbortHolding() throws AmbariException {
-    UnitOfWork unitOfWork = EasyMock.createMock(UnitOfWork.class);
-    ActionDBAccessor db = EasyMock.createMock(ActionDBAccessor.class);
-    Clusters fsm = EasyMock.createMock(Clusters.class);
-    Configuration conf = new Configuration(new Properties());
-    HostEntity hostEntity1 = new HostEntity();
-    hostEntity1.setHostName("h1");
-    hostDAO.merge(hostEntity1);
-
-    db.abortHostRole("h1", -1L, -1L, "AMBARI_SERVER_ACTION");
-    EasyMock.expectLastCall();
-
-    EasyMock.replay(db);
-
-    ActionScheduler scheduler = new ActionScheduler(100, 50, db, fsm, 3,
-        new HostsMap((String) null),
-        unitOfWork, null, conf, entityManagerProviderMock,
-        (HostRoleCommandDAO)null, (HostRoleCommandFactory)null, null);
-
-    HostRoleCommand hrc1 = hostRoleCommandFactory.create("h1", Role.NAMENODE, null, RoleCommand.EXECUTE);
-    hrc1.setStatus(HostRoleStatus.COMPLETED);
-    HostRoleCommand hrc3 = hostRoleCommandFactory.create("h1", Role.AMBARI_SERVER_ACTION, null, RoleCommand.CUSTOM_COMMAND);
-    hrc3.setStatus(HostRoleStatus.HOLDING);
-    HostRoleCommand hrc4 = hostRoleCommandFactory.create("h1", Role.FLUME_HANDLER, null, RoleCommand.EXECUTE);
-    hrc4.setStatus(HostRoleStatus.PENDING);
-
-    List<HostRoleCommand> hostRoleCommands = Arrays.asList(hrc1, hrc3, hrc4);
-
-    scheduler.cancelHostRoleCommands(hostRoleCommands, "foo");
-
-    EasyMock.verify(db);
-
-  }
-
   @Test
   public void testAbortAmbariServerAction() throws AmbariException {
     UnitOfWork unitOfWork = EasyMock.createMock(UnitOfWork.class);