Browse Source

AMBARI-12657 - Cluster creates fail on larger deployments with SQL Azure DB (jonathanhurley)

Jonathan Hurley 10 years ago
parent
commit
65e50eee97

+ 3 - 3
ambari-server/src/main/java/org/apache/ambari/server/orm/dao/HostRoleCommandDAO.java

@@ -77,7 +77,7 @@ public class HostRoleCommandDAO {
    * SQL template to get all requests which have had all of their tasks
    * COMPLETED
    */
-  private static final String COMPLETED_REQUESTS_SQL = "SELECT DISTINCT task.requestId FROM HostRoleCommandEntity task WHERE NOT EXISTS (SELECT task.requestId FROM HostRoleCommandEntity task WHERE task.status IN :notCompletedStatuses) ORDER BY task.requestId {0}";
+  private static final String COMPLETED_REQUESTS_SQL = "SELECT DISTINCT task.requestId FROM HostRoleCommandEntity task WHERE task.requestId NOT IN (SELECT task.requestId FROM HostRoleCommandEntity task WHERE task.status IN :notCompletedStatuses) ORDER BY task.requestId {0}";
 
   @Inject
   Provider<EntityManager> entityManagerProvider;
@@ -383,9 +383,9 @@ public class HostRoleCommandDAO {
 
   @Transactional
   public void removeByHostId(Long hostId) {
-    Collection<HostRoleCommandEntity> commands = this.findByHostId(hostId);
+    Collection<HostRoleCommandEntity> commands = findByHostId(hostId);
     for (HostRoleCommandEntity cmd : commands) {
-      this.remove(cmd);
+      remove(cmd);
     }
   }
 

+ 38 - 13
ambari-server/src/main/java/org/apache/ambari/server/serveraction/ServerActionExecutor.java

@@ -18,10 +18,10 @@
 
 package org.apache.ambari.server.serveraction;
 
-import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 
@@ -32,9 +32,10 @@ import org.apache.ambari.server.actionmanager.ActionDBAccessor;
 import org.apache.ambari.server.actionmanager.ExecutionCommandWrapper;
 import org.apache.ambari.server.actionmanager.HostRoleCommand;
 import org.apache.ambari.server.actionmanager.HostRoleStatus;
-import org.apache.ambari.server.actionmanager.RequestStatus;
+import org.apache.ambari.server.actionmanager.Request;
 import org.apache.ambari.server.agent.CommandReport;
 import org.apache.ambari.server.agent.ExecutionCommand;
+import org.apache.ambari.server.controller.internal.CalculatedStatus;
 import org.apache.ambari.server.utils.StageUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -123,7 +124,7 @@ public class ServerActionExecutor {
    * @param sleepTimeMS the time (in milliseconds) to wait between polling the database for more tasks
    */
   public ServerActionExecutor(ActionDBAccessor db, long sleepTimeMS) {
-    this.serverHostName = StageUtils.getHostName();
+    serverHostName = StageUtils.getHostName();
     this.db = db;
     this.sleepTimeMS = (sleepTimeMS < 1) ? POLLING_TIMEOUT_MS : sleepTimeMS;
   }
@@ -231,20 +232,44 @@ public class ServerActionExecutor {
   }
 
   /**
-   * Cleans up orphaned shared data Maps due to completed or failed request contexts.
+   * Cleans up orphaned shared data Maps due to completed or failed request
+   * contexts. We are unable to use {@link Request#getStatus()} since this field
+   * is not populated in the database but, instead, calculated in realtime.
    */
   private void cleanRequestShareDataContexts() {
-    // Clean out any orphaned request shared data contexts
-    for (RequestStatus status : EnumSet.of(RequestStatus.FAILED, RequestStatus.COMPLETED)) {
-      List<Long> requests = db.getRequestsByStatus(status, 100, true);
-
-      if (requests != null) {
-        synchronized (requestSharedDataMap) {
-          for (Long requestId : requests) {
-            requestSharedDataMap.remove(requestId);
+    // if the cache is empty, do nothing
+    if (requestSharedDataMap.isEmpty()) {
+      return;
+    }
+
+    try {
+      // for every item in the map, get the request and check its status
+      synchronized (requestSharedDataMap) {
+        Set<Long> requestIds = requestSharedDataMap.keySet();
+        List<Request> requests = db.getRequests(requestIds);
+        for (Request request : requests) {
+          // calcuate the status from the stages and then remove from the map if
+          // necessary
+          CalculatedStatus calculatedStatus = CalculatedStatus.statusFromStages(
+              request.getStages());
+
+          // calcuate the status of the request
+          HostRoleStatus status = calculatedStatus.getStatus();
+
+          // remove the request from the map if the request is COMPLETED or
+          // FAILED
+          switch (status) {
+            case FAILED:
+            case COMPLETED:
+              requestSharedDataMap.remove(request.getRequestId());
+              break;
+            default:
+              break;
           }
         }
       }
+    } catch (Exception exception) {
+      LOG.warn("Unable to clear the server-side action request cache", exception);
     }
   }
 
@@ -551,7 +576,7 @@ public class ServerActionExecutor {
      * @param executionCommand the ExecutionCommand for the relevant task
      */
     private Worker(HostRoleCommand hostRoleCommand, ExecutionCommand executionCommand) {
-      this.taskId = hostRoleCommand.getTaskId();
+      taskId = hostRoleCommand.getTaskId();
       this.hostRoleCommand = hostRoleCommand;
       this.executionCommand = executionCommand;
     }

+ 60 - 0
ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionDBAccessorImpl.java

@@ -468,6 +468,39 @@ public class TestActionDBAccessorImpl {
     assertEquals("Request IDs not matches", requestIds, requestIdsResult);
   }
 
+  /**
+   * Tests getting requests which are fully COMPLETED out the database. This
+   * will test for partial completions as well.
+   *
+   * @throws AmbariException
+   */
+  @Test
+  public void testGetCompletedRequests() throws AmbariException {
+    List<Long> requestIds = new ArrayList<Long>();
+    requestIds.add(requestId);
+    requestIds.add(requestId + 1);
+
+    // populate with a completed request
+    populateActionDBWithCompletedRequest(db, hostName, requestId, stageId);
+
+    // only 1 should come back
+    List<Long> requestIdsResult = db.getRequestsByStatus(RequestStatus.COMPLETED,
+        BaseRequest.DEFAULT_PAGE_SIZE, false);
+
+    assertEquals(1, requestIdsResult.size());
+    assertTrue(requestIdsResult.contains(requestId));
+
+    // populate with a partially completed request
+    populateActionDBWithPartiallyCompletedRequest(db, hostName, requestId + 1, stageId);
+
+    // the new request should not come back
+    requestIdsResult = db.getRequestsByStatus(RequestStatus.COMPLETED,
+        BaseRequest.DEFAULT_PAGE_SIZE, false);
+
+    assertEquals(1, requestIdsResult.size());
+    assertTrue(requestIdsResult.contains(requestId));
+  }
+
   @Test
   public void testGetRequestsByStatusWithParams() throws AmbariException {
     List<Long> ids = new ArrayList<Long>();
@@ -672,6 +705,33 @@ public class TestActionDBAccessorImpl {
     db.persistActions(request);
   }
 
+  private void populateActionDBWithCompletedRequest(ActionDBAccessor db, String hostname,
+      long requestId, long stageId) throws AmbariException {
+
+    Stage s = createStubStage(hostname, requestId, stageId);
+    List<Stage> stages = new ArrayList<Stage>();
+    stages.add(s);
+    Request request = new Request(stages, clusters);
+
+    s.setHostRoleStatus(hostname, Role.HBASE_REGIONSERVER.name(), HostRoleStatus.COMPLETED);
+    s.setHostRoleStatus(hostname, Role.HBASE_MASTER.name(), HostRoleStatus.COMPLETED);
+    db.persistActions(request);
+  }
+
+  private void populateActionDBWithPartiallyCompletedRequest(ActionDBAccessor db, String hostname,
+      long requestId, long stageId) throws AmbariException {
+
+    Stage s = createStubStage(hostname, requestId, stageId);
+    List<Stage> stages = new ArrayList<Stage>();
+    stages.add(s);
+
+    Request request = new Request(stages, clusters);
+
+    s.setHostRoleStatus(hostname, Role.HBASE_REGIONSERVER.name(), HostRoleStatus.PENDING);
+    s.setHostRoleStatus(hostname, Role.HBASE_MASTER.name(), HostRoleStatus.COMPLETED);
+    db.persistActions(request);
+  }
+
   private Stage createStubStage(String hostname, long requestId, long stageId) {
     Stage s = stageFactory.createNew(requestId, "/a/b", "cluster1", 1L, "action db accessor test",
       "clusterHostInfo", "commandParamsStage", "hostParamsStage");