Browse Source

YARN-9545. Create healthcheck REST endpoint for ATSv2. Contributed by Zoltan Siegl.

(cherry picked from commit 72203f7a12c943ca231fbc40c058a1a094b009cd)
Sunil G 6 years ago
parent
commit
bc028d3ebb

+ 82 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/timeline/TimelineHealth.java

@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.api.records.timeline;
+
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+import javax.xml.bind.annotation.XmlAccessType;
+import javax.xml.bind.annotation.XmlAccessorType;
+import javax.xml.bind.annotation.XmlElement;
+import javax.xml.bind.annotation.XmlRootElement;
+
+/**
+ * This class holds health information for ATS.
+ */
+@XmlRootElement(name = "health")
+@XmlAccessorType(XmlAccessType.NONE)
+@InterfaceAudience.Public
+@InterfaceStability.Unstable
+public class TimelineHealth {
+
+  /**
+   * Timline health status.
+   *
+   * RUNNING - Service is up and running
+   * READER_CONNECTION_FAULURE - isConnectionAlive() of reader implementation
+   *    reported an error
+   */
+  public enum TimelineHealthStatus {
+    RUNNING,
+    READER_CONNECTION_FAILURE
+  }
+
+  private TimelineHealthStatus healthStatus;
+  private String diagnosticsInfo;
+
+  public TimelineHealth(TimelineHealthStatus healthy, String diagnosticsInfo) {
+    this.healthStatus = healthy;
+    this.diagnosticsInfo = diagnosticsInfo;
+  }
+
+  public TimelineHealth() {
+
+  }
+
+  @XmlElement(name = "healthStatus")
+  public TimelineHealthStatus getHealthStatus() {
+    return healthStatus;
+  }
+
+  @XmlElement(name = "diagnosticsInfo")
+  public String getDiagnosticsInfo() {
+    return diagnosticsInfo;
+  }
+
+
+  public void setHealthStatus(TimelineHealthStatus healthStatus) {
+    this.healthStatus = healthStatus;
+  }
+
+  public void setDiagnosticsInfo(String diagnosticsInfo) {
+    this.diagnosticsInfo = diagnosticsInfo;
+  }
+
+
+}

+ 13 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/HBaseTimelineReaderImpl.java

@@ -29,6 +29,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
 import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
 import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntityType;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
@@ -158,6 +159,18 @@ public class HBaseTimelineReaderImpl
     return reader.readEntityTypes(hbaseConf, conn);
   }
 
+  @Override
+  public TimelineHealth getHealthStatus() {
+    if (!this.isHBaseDown()) {
+      return new TimelineHealth(TimelineHealth.TimelineHealthStatus.RUNNING,
+          "");
+    } else {
+      return new TimelineHealth(
+          TimelineHealth.TimelineHealthStatus.READER_CONNECTION_FAILURE,
+          "HBase connection is down");
+    }
+  }
+
   protected static final TimelineEntityFilters MONITOR_FILTERS =
       new TimelineEntityFilters.Builder().entityLimit(1L).build();
   protected static final TimelineDataToRetrieve DATA_TO_RETRIEVE =

+ 10 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/reader/TimelineReaderManager.java

@@ -26,6 +26,7 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
 import org.apache.hadoop.yarn.api.records.timelineservice.FlowActivityEntity;
 import org.apache.hadoop.yarn.api.records.timelineservice.FlowRunEntity;
 import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
@@ -219,4 +220,13 @@ public class TimelineReaderManager extends AbstractService {
     }
     return callerUGI != null && adminACLsManager.isAdmin(callerUGI);
   }
+
+  /**
+   * Check if reader connection is alive.
+   *
+   * @return boolean True if reader connection is alive, false otherwise.
+   */
+  public TimelineHealth getHealthStatus() {
+    return reader.getHealthStatus();
+  }
 }

+ 33 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/reader/TimelineReaderWebServices.java

@@ -48,6 +48,7 @@ import org.apache.hadoop.http.JettyUtils;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.Time;
 import org.apache.hadoop.yarn.api.records.timeline.TimelineAbout;
+import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
 import org.apache.hadoop.yarn.api.records.timelineservice.FlowActivityEntity;
 import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
 import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntityType;
@@ -215,6 +216,38 @@ public class TimelineReaderWebServices {
     return TimelineUtils.createTimelineAbout("Timeline Reader API");
   }
 
+  /**
+   * Health check REST end point.
+   *
+   * @param req Servlet request.
+   * @param res Servlet response.
+   *
+   * @return A {@link Response} object with HTTP status 200 OK if the service
+   *         is running.
+   *         Otherwise, a {@link Response} object with HTTP status 500 is
+   *         returned.
+   */
+  @GET
+  @Path("/health")
+  @Produces(MediaType.APPLICATION_JSON + "; " + JettyUtils.UTF_8)
+  public Response health(
+      @Context HttpServletRequest req,
+      @Context HttpServletResponse res
+  ) {
+    Response response;
+    TimelineHealth timelineHealth = this.getTimelineReaderManager().getHealthStatus();
+    if (timelineHealth.getHealthStatus()
+        .equals(TimelineHealth.TimelineHealthStatus.RUNNING)) {
+      response = Response.ok(timelineHealth).build();
+    } else {
+       LOG.info("Timeline services health check: timeline reader reported " +
+           "connection failure");
+       response = Response.serverError().entity(timelineHealth).build();
+    }
+
+    return response;
+  }
+
   /**
    * Return a single entity for a given entity type and UID which is a delimited
    * string containing clusterid, userid, flow name, flowrun id and app id.

+ 23 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/FileSystemTimelineReaderImpl.java

@@ -42,6 +42,7 @@ import org.apache.commons.csv.CSVParser;
 import org.apache.commons.csv.CSVRecord;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
 import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
 import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEvent;
 import org.apache.hadoop.yarn.api.records.timelineservice.TimelineMetric;
@@ -429,4 +430,26 @@ public class FileSystemTimelineReaderImpl extends AbstractService
     }
     return result;
   }
+
+  @Override
+  public TimelineHealth getHealthStatus() {
+    try {
+      File file = new File(rootPath);
+      if (file.exists()) {
+        return new TimelineHealth(TimelineHealth.TimelineHealthStatus.RUNNING,
+            "");
+      } else {
+        return new TimelineHealth(
+          TimelineHealth.TimelineHealthStatus.READER_CONNECTION_FAILURE,
+            "Root path \"" + rootPath + "\" does not exist"
+          );
+      }
+    } catch (Exception e) {
+      return new TimelineHealth(
+          TimelineHealth.TimelineHealthStatus.READER_CONNECTION_FAILURE,
+          e.getMessage()
+          );
+    }
+
+  }
 }

+ 8 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/TimelineReader.java

@@ -24,6 +24,7 @@ import java.util.Set;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.service.Service;
+import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
 import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
 import org.apache.hadoop.yarn.server.timelineservice.reader.TimelineDataToRetrieve;
 import org.apache.hadoop.yarn.server.timelineservice.reader.TimelineEntityFilters;
@@ -192,4 +193,11 @@ public interface TimelineReader extends Service {
    * storage.
    */
   Set<String> getEntityTypes(TimelineReaderContext context) throws IOException;
+
+  /**
+   * Check if reader connection is working properly.
+   *
+   * @return True if reader connection works as expected, false otherwise.
+   */
+  TimelineHealth getHealthStatus();
 }

+ 19 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/src/test/java/org/apache/hadoop/yarn/server/timelineservice/reader/TestTimelineReaderWebServices.java

@@ -37,6 +37,7 @@ import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.http.JettyUtils;
 import org.apache.hadoop.yarn.api.records.timeline.TimelineAbout;
+import org.apache.hadoop.yarn.api.records.timeline.TimelineHealth;
 import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntity;
 import org.apache.hadoop.yarn.api.records.timelineservice.TimelineEntityType;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
@@ -777,4 +778,22 @@ public class TestTimelineReaderWebServices {
       client.destroy();
     }
   }
+
+  @Test
+  public void testHealthCheck() throws Exception {
+    Client client = createClient();
+    try {
+      URI uri = URI.create("http://localhost:" + serverPort + "/ws/v2/"
+      + "timeline/health");
+      ClientResponse resp = getResponse(client, uri);
+      TimelineHealth timelineHealth =
+          resp.getEntity(new GenericType<TimelineHealth>() {
+          });
+      assertEquals(200, resp.getStatus());
+      assertEquals(TimelineHealth.TimelineHealthStatus.RUNNING,
+          timelineHealth.getHealthStatus());
+    } finally {
+      client.destroy();
+    }
+  }
 }