Browse Source

HADOOP-18055. Async Profiler endpoint for Hadoop daemons (#3824)

Reviewed-by: Akira Ajisaka <aajisaka@apache.org>
Viraj Jasani 3 years ago
parent
commit
f64fda0f00

+ 21 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java

@@ -27,6 +27,7 @@ import java.net.InetSocketAddress;
 import java.net.MalformedURLException;
 import java.net.URI;
 import java.net.URL;
+import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.List;
@@ -771,6 +772,26 @@ public final class HttpServer2 implements FilterContainer {
 
     addDefaultServlets();
     addPrometheusServlet(conf);
+    addAsyncProfilerServlet(contexts);
+  }
+
+  private void addAsyncProfilerServlet(ContextHandlerCollection contexts) throws IOException {
+    final String asyncProfilerHome = ProfileServlet.getAsyncProfilerHome();
+    if (asyncProfilerHome != null && !asyncProfilerHome.trim().isEmpty()) {
+      addServlet("prof", "/prof", ProfileServlet.class);
+      Path tmpDir = Paths.get(ProfileServlet.OUTPUT_DIR);
+      if (Files.notExists(tmpDir)) {
+        Files.createDirectories(tmpDir);
+      }
+      ServletContextHandler genCtx = new ServletContextHandler(contexts, "/prof-output-hadoop");
+      genCtx.addServlet(ProfileOutputServlet.class, "/*");
+      genCtx.setResourceBase(tmpDir.toAbsolutePath().toString());
+      genCtx.setDisplayName("prof-output-hadoop");
+    } else {
+      addServlet("prof", "/prof", ProfilerDisabledServlet.class);
+      LOG.info("ASYNC_PROFILER_HOME environment variable and async.profiler.home system property "
+          + "not specified. Disabling /prof endpoint.");
+    }
   }
 
   private void addPrometheusServlet(Configuration conf) {

+ 87 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileOutputServlet.java

@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.http;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.regex.Pattern;
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.eclipse.jetty.servlet.DefaultServlet;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * Servlet to serve files generated by {@link ProfileServlet}.
+ */
+@InterfaceAudience.Private
+public class ProfileOutputServlet extends DefaultServlet {
+
+  private static final long serialVersionUID = 1L;
+
+  private static final Logger LOG = LoggerFactory.getLogger(ProfileOutputServlet.class);
+  // default refresh period 2 sec
+  private static final int REFRESH_PERIOD = 2;
+  // Alphanumeric characters, plus percent (url-encoding), equals, ampersand, dot and hyphen
+  private static final Pattern ALPHA_NUMERIC = Pattern.compile("[a-zA-Z0-9%=&.\\-]*");
+
+  @Override
+  protected void doGet(final HttpServletRequest req, final HttpServletResponse resp)
+      throws ServletException, IOException {
+    if (!HttpServer2.isInstrumentationAccessAllowed(getServletContext(), req, resp)) {
+      resp.setStatus(HttpServletResponse.SC_UNAUTHORIZED);
+      ProfileServlet.setResponseHeader(resp);
+      resp.getWriter().write("Unauthorized: Instrumentation access is not allowed!");
+      return;
+    }
+
+    String absoluteDiskPath = getServletContext().getRealPath(req.getPathInfo());
+    File requestedFile = new File(absoluteDiskPath);
+    // async-profiler version 1.4 writes 'Started [cpu] profiling' to output file when profiler is
+    // running which gets replaced by final output. If final output is not ready yet, the file size
+    // will be <100 bytes (in all modes).
+    if (requestedFile.length() < 100) {
+      LOG.info("{} is incomplete. Sending auto-refresh header.", requestedFile);
+      String refreshUrl = req.getRequestURI();
+      // Rebuild the query string (if we have one)
+      if (req.getQueryString() != null) {
+        refreshUrl += "?" + sanitize(req.getQueryString());
+      }
+      ProfileServlet.setResponseHeader(resp);
+      resp.setHeader("Refresh", REFRESH_PERIOD + ";" + refreshUrl);
+      resp.getWriter().write("This page will be auto-refreshed every " + REFRESH_PERIOD
+          + " seconds until the output file is ready. Redirecting to " + refreshUrl);
+    } else {
+      super.doGet(req, resp);
+    }
+  }
+
+  static String sanitize(String input) {
+    // Basic test to try to avoid any XSS attacks or HTML content showing up.
+    // Duplicates HtmlQuoting a little, but avoid destroying ampersand.
+    if (ALPHA_NUMERIC.matcher(input).matches()) {
+      return input;
+    }
+    throw new RuntimeException("Non-alphanumeric data found in input, aborting.");
+  }
+}

+ 394 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java

@@ -0,0 +1,394 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.http;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.apache.hadoop.thirdparty.com.google.common.base.Joiner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.util.ProcessUtils;
+
+/**
+ * Servlet that runs async-profiler as web-endpoint.
+ * <p>
+ * Following options from async-profiler can be specified as query paramater.
+ * //  -e event          profiling event: cpu|alloc|lock|cache-misses etc.
+ * //  -d duration       run profiling for 'duration' seconds (integer)
+ * //  -i interval       sampling interval in nanoseconds (long)
+ * //  -j jstackdepth    maximum Java stack depth (integer)
+ * //  -b bufsize        frame buffer size (long)
+ * //  -t                profile different threads separately
+ * //  -s                simple class names instead of FQN
+ * //  -o fmt[,fmt...]   output format: summary|traces|flat|collapsed|svg|tree|jfr|html
+ * //  --width px        SVG width pixels (integer)
+ * //  --height px       SVG frame height pixels (integer)
+ * //  --minwidth px     skip frames smaller than px (double)
+ * //  --reverse         generate stack-reversed FlameGraph / Call tree
+ * <p>
+ * Example:
+ * If Namenode http address is localhost:9870, and ResourceManager http address is localhost:8088,
+ * ProfileServlet running with async-profiler setup can be accessed with
+ * http://localhost:9870/prof and http://localhost:8088/prof for Namenode and ResourceManager
+ * processes respectively.
+ * Deep dive into some params:
+ * - To collect 10 second CPU profile of current process i.e. Namenode (returns FlameGraph svg)
+ * curl "http://localhost:9870/prof"
+ * - To collect 10 second CPU profile of pid 12345 (returns FlameGraph svg)
+ * curl "http://localhost:9870/prof?pid=12345" (For instance, provide pid of Datanode)
+ * - To collect 30 second CPU profile of pid 12345 (returns FlameGraph svg)
+ * curl "http://localhost:9870/prof?pid=12345&amp;duration=30"
+ * - To collect 1 minute CPU profile of current process and output in tree format (html)
+ * curl "http://localhost:9870/prof?output=tree&amp;duration=60"
+ * - To collect 10 second heap allocation profile of current process (returns FlameGraph svg)
+ * curl "http://localhost:9870/prof?event=alloc"
+ * - To collect lock contention profile of current process (returns FlameGraph svg)
+ * curl "http://localhost:9870/prof?event=lock"
+ * <p>
+ * Following event types are supported (default is 'cpu') (NOTE: not all OS'es support all events)
+ * // Perf events:
+ * //    cpu
+ * //    page-faults
+ * //    context-switches
+ * //    cycles
+ * //    instructions
+ * //    cache-references
+ * //    cache-misses
+ * //    branches
+ * //    branch-misses
+ * //    bus-cycles
+ * //    L1-dcache-load-misses
+ * //    LLC-load-misses
+ * //    dTLB-load-misses
+ * //    mem:breakpoint
+ * //    trace:tracepoint
+ * // Java events:
+ * //    alloc
+ * //    lock
+ */
+@InterfaceAudience.Private
+public class ProfileServlet extends HttpServlet {
+
+  private static final long serialVersionUID = 1L;
+  private static final Logger LOG = LoggerFactory.getLogger(ProfileServlet.class);
+
+  static final String ACCESS_CONTROL_ALLOW_METHODS = "Access-Control-Allow-Methods";
+  static final String ACCESS_CONTROL_ALLOW_ORIGIN = "Access-Control-Allow-Origin";
+  private static final String ALLOWED_METHODS = "GET";
+  private static final String CONTENT_TYPE_TEXT = "text/plain; charset=utf-8";
+  private static final String ASYNC_PROFILER_HOME_ENV = "ASYNC_PROFILER_HOME";
+  private static final String ASYNC_PROFILER_HOME_SYSTEM_PROPERTY = "async.profiler.home";
+  private static final String PROFILER_SCRIPT = "/profiler.sh";
+  private static final int DEFAULT_DURATION_SECONDS = 10;
+  private static final AtomicInteger ID_GEN = new AtomicInteger(0);
+
+  static final String OUTPUT_DIR = System.getProperty("java.io.tmpdir") + "/prof-output-hadoop";
+
+  private enum Event {
+
+    CPU("cpu"),
+    ALLOC("alloc"),
+    LOCK("lock"),
+    PAGE_FAULTS("page-faults"),
+    CONTEXT_SWITCHES("context-switches"),
+    CYCLES("cycles"),
+    INSTRUCTIONS("instructions"),
+    CACHE_REFERENCES("cache-references"),
+    CACHE_MISSES("cache-misses"),
+    BRANCHES("branches"),
+    BRANCH_MISSES("branch-misses"),
+    BUS_CYCLES("bus-cycles"),
+    L1_DCACHE_LOAD_MISSES("L1-dcache-load-misses"),
+    LLC_LOAD_MISSES("LLC-load-misses"),
+    DTLB_LOAD_MISSES("dTLB-load-misses"),
+    MEM_BREAKPOINT("mem:breakpoint"),
+    TRACE_TRACEPOINT("trace:tracepoint");
+
+    private final String internalName;
+
+    Event(final String internalName) {
+      this.internalName = internalName;
+    }
+
+    public String getInternalName() {
+      return internalName;
+    }
+
+    public static Event fromInternalName(final String name) {
+      for (Event event : values()) {
+        if (event.getInternalName().equalsIgnoreCase(name)) {
+          return event;
+        }
+      }
+
+      return null;
+    }
+  }
+
+  private enum Output {
+    SUMMARY,
+    TRACES,
+    FLAT,
+    COLLAPSED,
+    // No SVG in 2.x asyncprofiler.
+    SVG,
+    TREE,
+    JFR,
+    // In 2.x asyncprofiler, this is how you get flamegraphs.
+    HTML
+  }
+
+  private final Lock profilerLock = new ReentrantLock();
+  private transient volatile Process process;
+  private final String asyncProfilerHome;
+  private Integer pid;
+
+  public ProfileServlet() {
+    this.asyncProfilerHome = getAsyncProfilerHome();
+    this.pid = ProcessUtils.getPid();
+    LOG.info("Servlet process PID: {} asyncProfilerHome: {}", pid, asyncProfilerHome);
+  }
+
+  @Override
+  protected void doGet(final HttpServletRequest req, final HttpServletResponse resp)
+      throws IOException {
+    if (!HttpServer2.isInstrumentationAccessAllowed(getServletContext(), req, resp)) {
+      resp.setStatus(HttpServletResponse.SC_UNAUTHORIZED);
+      setResponseHeader(resp);
+      resp.getWriter().write("Unauthorized: Instrumentation access is not allowed!");
+      return;
+    }
+
+    // make sure async profiler home is set
+    if (asyncProfilerHome == null || asyncProfilerHome.trim().isEmpty()) {
+      resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
+      setResponseHeader(resp);
+      resp.getWriter().write("ASYNC_PROFILER_HOME env is not set.\n\n"
+          + "Please ensure the prerequisites for the Profiler Servlet have been installed and the\n"
+          + "environment is properly configured.");
+      return;
+    }
+
+    // if pid is explicitly specified, use it else default to current process
+    pid = getInteger(req, "pid", pid);
+
+    // if pid is not specified in query param and if current process pid cannot be determined
+    if (pid == null) {
+      resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
+      setResponseHeader(resp);
+      resp.getWriter().write(
+          "'pid' query parameter unspecified or unable to determine PID of current process.");
+      return;
+    }
+
+    final int duration = getInteger(req, "duration", DEFAULT_DURATION_SECONDS);
+    final Output output = getOutput(req);
+    final Event event = getEvent(req);
+    final Long interval = getLong(req, "interval");
+    final Integer jstackDepth = getInteger(req, "jstackdepth", null);
+    final Long bufsize = getLong(req, "bufsize");
+    final boolean thread = req.getParameterMap().containsKey("thread");
+    final boolean simple = req.getParameterMap().containsKey("simple");
+    final Integer width = getInteger(req, "width", null);
+    final Integer height = getInteger(req, "height", null);
+    final Double minwidth = getMinWidth(req);
+    final boolean reverse = req.getParameterMap().containsKey("reverse");
+
+    if (process == null || !process.isAlive()) {
+      try {
+        int lockTimeoutSecs = 3;
+        if (profilerLock.tryLock(lockTimeoutSecs, TimeUnit.SECONDS)) {
+          try {
+            File outputFile = new File(OUTPUT_DIR,
+                "async-prof-pid-" + pid + "-" + event.name().toLowerCase() + "-" + ID_GEN
+                    .incrementAndGet() + "." + output.name().toLowerCase());
+            List<String> cmd = new ArrayList<>();
+            cmd.add(asyncProfilerHome + PROFILER_SCRIPT);
+            cmd.add("-e");
+            cmd.add(event.getInternalName());
+            cmd.add("-d");
+            cmd.add("" + duration);
+            cmd.add("-o");
+            cmd.add(output.name().toLowerCase());
+            cmd.add("-f");
+            cmd.add(outputFile.getAbsolutePath());
+            if (interval != null) {
+              cmd.add("-i");
+              cmd.add(interval.toString());
+            }
+            if (jstackDepth != null) {
+              cmd.add("-j");
+              cmd.add(jstackDepth.toString());
+            }
+            if (bufsize != null) {
+              cmd.add("-b");
+              cmd.add(bufsize.toString());
+            }
+            if (thread) {
+              cmd.add("-t");
+            }
+            if (simple) {
+              cmd.add("-s");
+            }
+            if (width != null) {
+              cmd.add("--width");
+              cmd.add(width.toString());
+            }
+            if (height != null) {
+              cmd.add("--height");
+              cmd.add(height.toString());
+            }
+            if (minwidth != null) {
+              cmd.add("--minwidth");
+              cmd.add(minwidth.toString());
+            }
+            if (reverse) {
+              cmd.add("--reverse");
+            }
+            cmd.add(pid.toString());
+            process = ProcessUtils.runCmdAsync(cmd);
+
+            // set response and set refresh header to output location
+            setResponseHeader(resp);
+            resp.setStatus(HttpServletResponse.SC_ACCEPTED);
+            String relativeUrl = "/prof-output-hadoop/" + outputFile.getName();
+            resp.getWriter().write("Started [" + event.getInternalName()
+                + "] profiling. This page will automatically redirect to " + relativeUrl + " after "
+                + duration + " seconds. "
+                + "If empty diagram and Linux 4.6+, see 'Basic Usage' section on the Async "
+                + "Profiler Home Page, https://github.com/jvm-profiling-tools/async-profiler."
+                + "\n\nCommand:\n" + Joiner.on(" ").join(cmd));
+
+            // to avoid auto-refresh by ProfileOutputServlet, refreshDelay can be specified
+            // via url param
+            int refreshDelay = getInteger(req, "refreshDelay", 0);
+
+            // instead of sending redirect, set auto-refresh so that browsers will refresh
+            // with redirected url
+            resp.setHeader("Refresh", (duration + refreshDelay) + ";" + relativeUrl);
+            resp.getWriter().flush();
+          } finally {
+            profilerLock.unlock();
+          }
+        } else {
+          setResponseHeader(resp);
+          resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
+          resp.getWriter()
+              .write("Unable to acquire lock. Another instance of profiler might be running.");
+          LOG.warn("Unable to acquire lock in {} seconds. Another instance of profiler might be"
+              + " running.", lockTimeoutSecs);
+        }
+      } catch (InterruptedException e) {
+        LOG.warn("Interrupted while acquiring profile lock.", e);
+        resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
+      }
+    } else {
+      setResponseHeader(resp);
+      resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
+      resp.getWriter().write("Another instance of profiler is already running.");
+    }
+  }
+
+  private Integer getInteger(final HttpServletRequest req, final String param,
+      final Integer defaultValue) {
+    final String value = req.getParameter(param);
+    if (value != null) {
+      try {
+        return Integer.valueOf(value);
+      } catch (NumberFormatException e) {
+        return defaultValue;
+      }
+    }
+    return defaultValue;
+  }
+
+  private Long getLong(final HttpServletRequest req, final String param) {
+    final String value = req.getParameter(param);
+    if (value != null) {
+      try {
+        return Long.valueOf(value);
+      } catch (NumberFormatException e) {
+        return null;
+      }
+    }
+    return null;
+  }
+
+  private Double getMinWidth(final HttpServletRequest req) {
+    final String value = req.getParameter("minwidth");
+    if (value != null) {
+      try {
+        return Double.valueOf(value);
+      } catch (NumberFormatException e) {
+        return null;
+      }
+    }
+    return null;
+  }
+
+  private Event getEvent(final HttpServletRequest req) {
+    final String eventArg = req.getParameter("event");
+    if (eventArg != null) {
+      Event event = Event.fromInternalName(eventArg);
+      return event == null ? Event.CPU : event;
+    }
+    return Event.CPU;
+  }
+
+  private Output getOutput(final HttpServletRequest req) {
+    final String outputArg = req.getParameter("output");
+    if (req.getParameter("output") != null) {
+      try {
+        return Output.valueOf(outputArg.trim().toUpperCase());
+      } catch (IllegalArgumentException e) {
+        return Output.HTML;
+      }
+    }
+    return Output.HTML;
+  }
+
+  static void setResponseHeader(final HttpServletResponse response) {
+    response.setHeader(ACCESS_CONTROL_ALLOW_METHODS, ALLOWED_METHODS);
+    response.setHeader(ACCESS_CONTROL_ALLOW_ORIGIN, "*");
+    response.setContentType(CONTENT_TYPE_TEXT);
+  }
+
+  static String getAsyncProfilerHome() {
+    String asyncProfilerHome = System.getenv(ASYNC_PROFILER_HOME_ENV);
+    // if ENV is not set, see if -Dasync.profiler.home=/path/to/async/profiler/home is set
+    if (asyncProfilerHome == null || asyncProfilerHome.trim().isEmpty()) {
+      asyncProfilerHome = System.getProperty(ASYNC_PROFILER_HOME_SYSTEM_PROPERTY);
+    }
+
+    return asyncProfilerHome;
+  }
+
+}

+ 44 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java

@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.http;
+
+import java.io.IOException;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * Servlet for disabled async-profiler.
+ */
+@InterfaceAudience.Private
+public class ProfilerDisabledServlet extends HttpServlet {
+
+  @Override
+  protected void doGet(final HttpServletRequest req, final HttpServletResponse resp)
+      throws IOException {
+    resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
+    ProfileServlet.setResponseHeader(resp);
+    resp.getWriter().write("The profiler servlet was disabled at startup.\n\n"
+        + "Please ensure the prerequisites for the Profiler Servlet have been installed and the\n"
+        + "environment is properly configured.");
+  }
+
+}

+ 74 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProcessUtils.java

@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * Process related utilities.
+ */
+@InterfaceAudience.Private
+public final class ProcessUtils {
+
+  private static final Logger LOG = LoggerFactory.getLogger(ProcessUtils.class);
+
+  private ProcessUtils() {
+    // no-op
+  }
+
+  public static Integer getPid() {
+    // JVM_PID can be exported in service start script
+    String pidStr = System.getenv("JVM_PID");
+
+    // In case if it is not set correctly, fallback to mxbean which is implementation specific.
+    if (pidStr == null || pidStr.trim().isEmpty()) {
+      String name = ManagementFactory.getRuntimeMXBean().getName();
+      if (name != null) {
+        int idx = name.indexOf("@");
+        if (idx != -1) {
+          pidStr = name.substring(0, name.indexOf("@"));
+        }
+      }
+    }
+    try {
+      if (pidStr != null) {
+        return Integer.valueOf(pidStr);
+      }
+    } catch (NumberFormatException ignored) {
+      // ignore
+    }
+    return null;
+  }
+
+  public static Process runCmdAsync(List<String> cmd) {
+    try {
+      LOG.info("Running command async: {}", cmd);
+      return new ProcessBuilder(cmd).inheritIO().start();
+    } catch (IOException e) {
+      throw new IllegalStateException(e);
+    }
+  }
+}

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/resources/core-default.xml

@@ -78,7 +78,7 @@
   <value>false</value>
   <description>
     Indicates if administrator ACLs are required to access
-    instrumentation servlets (JMX, METRICS, CONF, STACKS).
+    instrumentation servlets (JMX, METRICS, CONF, STACKS, PROF).
   </description>
 </property>
 

+ 145 - 0
hadoop-common-project/hadoop-common/src/site/markdown/AsyncProfilerServlet.md

@@ -0,0 +1,145 @@
+<!---
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+Async Profiler Servlet for Hadoop
+========================================
+
+<!-- MACRO{toc|fromDepth=0|toDepth=3} -->
+
+Purpose
+-------
+
+This document describes how to configure and use async profiler
+with Hadoop applications.
+Async profiler is a low overhead sampling profiler for Java that
+does not suffer from Safepoint bias problem. It features
+HotSpot-specific APIs to collect stack traces and to track memory
+allocations. The profiler works with OpenJDK, Oracle JDK and other
+Java runtimes based on the HotSpot JVM.
+
+Hadoop profiler servlet supports Async Profiler major versions
+1.x and 2.x.
+
+Prerequisites
+-------------
+
+Make sure Hadoop is installed, configured and setup correctly.
+For more information see:
+
+* [Single Node Setup](./SingleCluster.html) for first-time users.
+* [Cluster Setup](./ClusterSetup.html) for large, distributed clusters.
+
+Go to https://github.com/jvm-profiling-tools/async-profiler,
+download a release appropriate for your platform, and install
+on every cluster host.
+
+Set `ASYNC_PROFILER_HOME` in the environment (put it in hadoop-env.sh)
+to the root directory of the async-profiler install location, or pass
+it on the Hadoop daemon's command line as a system property as
+`-Dasync.profiler.home=/path/to/async-profiler`.
+
+
+Usage
+--------
+
+Once the prerequisites have been satisfied, access to the async-profiler
+is available by using Namenode or ResourceManager UI.
+
+Following options from async-profiler can be specified as query paramater.
+*  `-e event`          profiling event: cpu|alloc|lock|cache-misses etc.
+*  `-d duration`       run profiling for 'duration' seconds (integer)
+*  `-i interval`       sampling interval in nanoseconds (long)
+*  `-j jstackdepth`    maximum Java stack depth (integer)
+*  `-b bufsize`        frame buffer size (long)
+*  `-t`                profile different threads separately
+*  `-s`                simple class names instead of FQN
+*  `-o fmt[,fmt...]`   output format: summary|traces|flat|collapsed|svg|tree|jfr|html
+*  `--width px`        SVG width pixels (integer)
+*  `--height px`       SVG frame height pixels (integer)
+*  `--minwidth px`     skip frames smaller than px (double)
+*  `--reverse`         generate stack-reversed FlameGraph / Call tree
+
+
+Example:
+If Namenode http address is localhost:9870, and ResourceManager http
+address is localhost:8088, ProfileServlet running with async-profiler
+setup can be accessed with http://localhost:9870/prof and
+http://localhost:8088/prof for Namenode and ResourceManager processes
+respectively.
+
+Diving deep into some params:
+
+* To collect 10 second CPU profile of current process
+  (returns FlameGraph svg)
+  * `curl http://localhost:9870/prof` (FlameGraph svg for Namenode)
+  * `curl http://localhost:8088/prof` (FlameGraph svg for ResourceManager)
+* To collect 10 second CPU profile of pid 12345 (returns FlameGraph svg)
+  * `curl http://localhost:9870/prof?pid=12345` (For instance, provide
+   pid of Datanode here)
+* To collect 30 second CPU profile of pid 12345 (returns FlameGraph svg)
+  * `curl http://localhost:9870/prof?pid=12345&duration=30`
+* To collect 1 minute CPU profile of current process and output in tree
+  format (html)
+  * `curl http://localhost:9870/prof?output=tree&amp;duration=60`
+* To collect 10 second heap allocation profile of current process
+  (returns FlameGraph svg)
+  * `curl http://localhost:9870/prof?event=alloc`
+* To collect lock contention profile of current process
+  (returns FlameGraph svg)
+  * `curl http://localhost:9870/prof?event=lock`
+
+
+The following event types are supported by async-profiler.
+Use the 'event' parameter to specify. Default is 'cpu'.
+Not all operating systems will support all types.
+
+Perf events:
+
+* cpu
+* page-faults
+* context-switches
+* cycles
+* instructions
+* cache-references
+* cache-misses
+* branches
+* branch-misses
+* bus-cycles
+* L1-dcache-load-misses
+* LLC-load-misses
+* dTLB-load-misses
+
+Java events:
+
+* alloc
+* lock
+
+The following output formats are supported.
+Use the 'output' parameter to specify. Default is 'flamegraph'.
+
+Output formats:
+
+* summary: A dump of basic profiling statistics.
+* traces: Call traces.
+* flat: Flat profile (top N hot methods).
+* collapsed: Collapsed call traces in the format used by FlameGraph
+  script. This is a collection of call stacks, where each line is a
+  semicolon separated list of frames followed by a counter.
+* svg: FlameGraph in SVG format.
+* tree: Call tree in HTML format.
+* jfr: Call traces in Java Flight Recorder format.
+
+The 'duration' parameter specifies how long to collect trace data
+before generating output, specified in seconds. The default is 10 seconds.
+

+ 95 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestDisabledProfileServlet.java

@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.http;
+
+import java.io.IOException;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import javax.servlet.http.HttpServletResponse;
+
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Small test to cover default disabled prof endpoint.
+ */
+public class TestDisabledProfileServlet extends HttpServerFunctionalTest {
+
+  private static HttpServer2 server;
+  private static URL baseUrl;
+
+  @BeforeClass
+  public static void setup() throws Exception {
+    server = createTestServer();
+    server.start();
+    baseUrl = getServerURL(server);
+  }
+
+  @AfterClass
+  public static void cleanup() throws Exception {
+    server.stop();
+  }
+
+  @Test
+  public void testQuery() throws Exception {
+    try {
+      readOutput(new URL(baseUrl, "/prof"));
+      throw new IllegalStateException("Should not reach here");
+    } catch (IOException e) {
+      assertTrue(e.getMessage()
+          .contains(HttpServletResponse.SC_INTERNAL_SERVER_ERROR + " for URL: " + baseUrl));
+    }
+
+    // CORS headers
+    HttpURLConnection conn =
+        (HttpURLConnection) new URL(baseUrl, "/prof").openConnection();
+    assertEquals("GET", conn.getHeaderField(ProfileServlet.ACCESS_CONTROL_ALLOW_METHODS));
+    assertNotNull(conn.getHeaderField(ProfileServlet.ACCESS_CONTROL_ALLOW_ORIGIN));
+    conn.disconnect();
+  }
+
+  @Test
+  public void testRequestMethods() throws IOException {
+    HttpURLConnection connection = getConnection("PUT");
+    assertEquals("Unexpected response code", HttpServletResponse.SC_METHOD_NOT_ALLOWED,
+        connection.getResponseCode());
+    connection.disconnect();
+    connection = getConnection("POST");
+    assertEquals("Unexpected response code", HttpServletResponse.SC_METHOD_NOT_ALLOWED,
+        connection.getResponseCode());
+    connection.disconnect();
+    connection = getConnection("DELETE");
+    assertEquals("Unexpected response code", HttpServletResponse.SC_METHOD_NOT_ALLOWED,
+        connection.getResponseCode());
+    connection.disconnect();
+    connection = getConnection("GET");
+    assertEquals("Unexpected response code", HttpServletResponse.SC_INTERNAL_SERVER_ERROR,
+        connection.getResponseCode());
+    connection.disconnect();
+  }
+
+  private HttpURLConnection getConnection(final String method) throws IOException {
+    URL url = new URL(baseUrl, "/prof");
+    HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+    conn.setRequestMethod(method);
+    return conn;
+  }
+
+}

+ 3 - 2
hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm

@@ -1208,9 +1208,10 @@ Name               | Description
 /logs              | Display log files
 /stacks            | Display JVM stacks
 /static/index.html | The static home page
+/prof              | Async Profiler endpoint
 
 To control the access to servlet `/conf`, `/jmx`, `/logLevel`, `/logs`,
-and `/stacks`, configure the following properties in `kms-site.xml`:
+`/stacks` and `/prof`, configure the following properties in `kms-site.xml`:
 
 ```xml
   <property>
@@ -1224,7 +1225,7 @@ and `/stacks`, configure the following properties in `kms-site.xml`:
     <value>true</value>
     <description>
       Indicates if administrator ACLs are required to access
-      instrumentation servlets (JMX, METRICS, CONF, STACKS).
+      instrumentation servlets (JMX, METRICS, CONF, STACKS, PROF).
     </description>
   </property>
 

+ 3 - 2
hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/ServerSetup.md.vm

@@ -162,9 +162,10 @@ Name               | Description
 /logs              | Display log files
 /stacks            | Display JVM stacks
 /static/index.html | The static home page
+/prof              | Async Profiler endpoint
 
 To control the access to servlet `/conf`, `/jmx`, `/logLevel`, `/logs`,
-and `/stacks`, configure the following properties in `httpfs-site.xml`:
+`/stacks` and `/prof`, configure the following properties in `httpfs-site.xml`:
 
 ```xml
   <property>
@@ -178,7 +179,7 @@ and `/stacks`, configure the following properties in `httpfs-site.xml`:
     <value>true</value>
     <description>
       Indicates if administrator ACLs are required to access
-      instrumentation servlets (JMX, METRICS, CONF, STACKS).
+      instrumentation servlets (JMX, METRICS, CONF, STACKS, PROF).
     </description>
   </property>
 

+ 1 - 0
hadoop-project/src/site/site.xml

@@ -69,6 +69,7 @@
       <item name="Tracing" href="hadoop-project-dist/hadoop-common/Tracing.html"/>
       <item name="Unix Shell Guide" href="hadoop-project-dist/hadoop-common/UnixShellGuide.html"/>
       <item name="Registry" href="hadoop-project-dist/hadoop-common/registry/index.html"/>
+      <item name="Async Profiler" href="hadoop-project-dist/hadoop-common/AsyncProfilerServlet.html" />
     </menu>
     
     <menu name="HDFS" inherit="top">