Browse Source

YARN-4086. Allow Aggregated Log readers to handle HAR files (rkanter)

(cherry picked from commit 6dd6ca442aba8612c3780399a42bb473e4483021)
Robert Kanter 10 năm trước cách đây
mục cha
commit
ea64a9b277
15 tập tin đã thay đổi với 160 bổ sung4 xóa
  1. 2 0
      hadoop-yarn-project/CHANGES.txt
  2. 12 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml
  3. 50 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestLogsCLI.java
  4. 0 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/resources/application_1440536969523_0001.har/_SUCCESS
  5. 3 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/resources/application_1440536969523_0001.har/_index
  6. 2 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/resources/application_1440536969523_0001.har/_masterindex
  7. BIN
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/resources/application_1440536969523_0001.har/part-0
  8. 4 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml
  9. 15 1
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogCLIHelpers.java
  10. 7 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/log/AggregatedLogsBlock.java
  11. 60 3
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/TestAggregatedLogsBlock.java
  12. 0 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/application_1440536969523_0001.har/_SUCCESS
  13. 3 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/application_1440536969523_0001.har/_index
  14. 2 0
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/application_1440536969523_0001.har/_masterindex
  15. BIN
      hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/application_1440536969523_0001.har/part-0

+ 2 - 0
hadoop-yarn-project/CHANGES.txt

@@ -376,6 +376,8 @@ Release 2.8.0 - UNRELEASED
     YARN-4121. Fix typos in capacity scheduler documentation.
     (Kai Sasaki via vvasudev)
 
+    YARN-4086. Allow Aggregated Log readers to handle HAR files (rkanter)
+
   OPTIMIZATIONS
 
     YARN-3339. TestDockerContainerExecutor should pull a single image and not

+ 12 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml

@@ -134,6 +134,18 @@
 
   <build>
     <plugins>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+        <configuration>
+          <excludes>
+            <exclude>src/test/resources/application_1440536969523_0001.har/_index</exclude>
+            <exclude>src/test/resources/application_1440536969523_0001.har/part-0</exclude>
+            <exclude>src/test/resources/application_1440536969523_0001.har/_masterindex</exclude>
+            <exclude>src/test/resources/application_1440536969523_0001.har/_SUCCESS</exclude>
+          </excludes>
+        </configuration>
+      </plugin>
       <plugin>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-maven-plugins</artifactId>

+ 50 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestLogsCLI.java

@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.yarn.client.cli;
 
+import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Matchers.any;
 import static org.mockito.Mockito.doReturn;
@@ -32,6 +33,7 @@ import java.io.IOException;
 import java.io.PrintStream;
 import java.io.PrintWriter;
 import java.io.Writer;
+import java.net.URL;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -318,6 +320,54 @@ public class TestLogsCLI {
     fs.delete(new Path(rootLogDir), true);
   }
 
+  @Test (timeout = 15000)
+  public void testFetchApplictionLogsHar() throws Exception {
+    String remoteLogRootDir = "target/logs/";
+    Configuration configuration = new Configuration();
+    configuration.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, true);
+    configuration
+        .set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogRootDir);
+    configuration.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, true);
+    configuration.set(YarnConfiguration.YARN_ADMIN_ACL, "admin");
+    FileSystem fs = FileSystem.get(configuration);
+    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
+    URL harUrl = ClassLoader.getSystemClassLoader()
+        .getResource("application_1440536969523_0001.har");
+    assertNotNull(harUrl);
+    Path path =
+        new Path(remoteLogRootDir + ugi.getShortUserName()
+            + "/logs/application_1440536969523_0001");
+    if (fs.exists(path)) {
+      fs.delete(path, true);
+    }
+    assertTrue(fs.mkdirs(path));
+    Path harPath = new Path(path, "application_1440536969523_0001.har");
+    fs.copyFromLocalFile(false, new Path(harUrl.toURI()), harPath);
+    assertTrue(fs.exists(harPath));
+
+    YarnClient mockYarnClient =
+        createMockYarnClient(YarnApplicationState.FINISHED);
+    LogsCLI cli = new LogsCLIForTest(mockYarnClient);
+    cli.setConf(configuration);
+    int exitCode = cli.run(new String[]{"-applicationId",
+        "application_1440536969523_0001"});
+    assertTrue(exitCode == 0);
+    String out = sysOutStream.toString();
+    assertTrue(
+        out.contains("container_1440536969523_0001_01_000001 on host1_1111"));
+    assertTrue(out.contains("Hello stderr"));
+    assertTrue(out.contains("Hello stdout"));
+    assertTrue(out.contains("Hello syslog"));
+    assertTrue(
+        out.contains("container_1440536969523_0001_01_000002 on host2_2222"));
+    assertTrue(out.contains("Goodbye stderr"));
+    assertTrue(out.contains("Goodbye stdout"));
+    assertTrue(out.contains("Goodbye syslog"));
+    sysOutStream.reset();
+
+    fs.delete(new Path(remoteLogRootDir), true);
+  }
+
   private static void createContainerLogInLocalDir(Path appLogsDir,
       ContainerId containerId, FileSystem fs, List<String> logTypes) throws Exception {
     Path containerLogsDir = new Path(appLogsDir, containerId.toString());

+ 0 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/resources/application_1440536969523_0001.har/_SUCCESS


+ 3 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/resources/application_1440536969523_0001.har/_index

@@ -0,0 +1,3 @@
+%2F dir 1440540845855+504+rkanter+supergroup 0 0 host1_1111 host2_2222 
+%2Fhost1_1111 file part-0 0 394 1440540845834+420+rkanter+supergroup 
+%2Fhost2_2222 file part-0 394 400 1440540845854+420+rkanter+supergroup 

+ 2 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/resources/application_1440536969523_0001.har/_masterindex

@@ -0,0 +1,2 @@
+3 
+0 1520266628 0 214 

BIN
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/resources/application_1440536969523_0001.har/part-0


+ 4 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml

@@ -215,6 +215,10 @@
             <exclude>src/main/resources/webapps/static/dt-1.9.4/css/demo_table.css</exclude>
             <exclude>src/main/resources/webapps/static/dt-1.9.4/images/Sorting icons.psd</exclude>
             <exclude>src/main/resources/webapps/static/jquery/themes-1.9.1/base/jquery-ui.css</exclude>
+            <exclude>src/test/resources/application_1440536969523_0001.har/_index</exclude>
+            <exclude>src/test/resources/application_1440536969523_0001.har/part-0</exclude>
+            <exclude>src/test/resources/application_1440536969523_0001.har/_masterindex</exclude>
+            <exclude>src/test/resources/application_1440536969523_0001.har/_SUCCESS</exclude>
           </excludes>
         </configuration>
       </plugin>

+ 15 - 1
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/LogCLIHelpers.java

@@ -31,6 +31,7 @@ import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.HarFs;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -61,8 +62,9 @@ public class LogCLIHelpers implements Configurable {
         YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
         YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR));
     String suffix = LogAggregationUtils.getRemoteNodeLogDirSuffix(getConf());
+    ApplicationId applicationId = ConverterUtils.toApplicationId(appId);
     Path remoteAppLogDir = LogAggregationUtils.getRemoteAppLogDir(
-        remoteRootLogDir, ConverterUtils.toApplicationId(appId), jobOwner,
+        remoteRootLogDir, applicationId, jobOwner,
         suffix);
     RemoteIterator<FileStatus> nodeFiles;
     try {
@@ -80,6 +82,12 @@ public class LogCLIHelpers implements Configurable {
     while (nodeFiles.hasNext()) {
       FileStatus thisNodeFile = nodeFiles.next();
       String fileName = thisNodeFile.getPath().getName();
+      if (fileName.equals(applicationId + ".har")) {
+        Path p = new Path("har:///"
+            + thisNodeFile.getPath().toUri().getRawPath());
+        nodeFiles = HarFs.get(p.toUri(), conf).listStatusIterator(p);
+        continue;
+      }
       if (fileName.contains(LogAggregationUtils.getNodeString(nodeId))
           && !fileName.endsWith(LogAggregationUtils.TMP_FILE_SUFFIX)) {
         AggregatedLogFormat.LogReader reader = null;
@@ -207,6 +215,12 @@ public class LogCLIHelpers implements Configurable {
     boolean foundAnyLogs = false;
     while (nodeFiles.hasNext()) {
       FileStatus thisNodeFile = nodeFiles.next();
+      if (thisNodeFile.getPath().getName().equals(appId + ".har")) {
+        Path p = new Path("har:///"
+            + thisNodeFile.getPath().toUri().getRawPath());
+        nodeFiles = HarFs.get(p.toUri(), conf).listStatusIterator(p);
+        continue;
+      }
       if (!thisNodeFile.getPath().getName()
         .endsWith(LogAggregationUtils.TMP_FILE_SUFFIX)) {
         AggregatedLogFormat.LogReader reader =

+ 7 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/log/AggregatedLogsBlock.java

@@ -32,6 +32,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.HarFs;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -120,6 +121,12 @@ public class AggregatedLogsBlock extends HtmlBlock {
         AggregatedLogFormat.LogReader reader = null;
         try {
           FileStatus thisNodeFile = nodeFiles.next();
+          if (thisNodeFile.getPath().getName().equals(applicationId + ".har")) {
+            Path p = new Path("har:///"
+                + thisNodeFile.getPath().toUri().getRawPath());
+            nodeFiles = HarFs.get(p.toUri(), conf).listStatusIterator(p);
+            continue;
+          }
           if (!thisNodeFile.getPath().getName()
             .contains(LogAggregationUtils.getNodeString(nodeId))
               || thisNodeFile.getPath().getName()

+ 60 - 3
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/TestAggregatedLogsBlock.java

@@ -23,6 +23,7 @@ import java.io.File;
 import java.io.FileWriter;
 import java.io.PrintWriter;
 import java.io.Writer;
+import java.net.URL;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
@@ -30,6 +31,7 @@ import java.util.Map;
 
 import javax.servlet.http.HttpServletRequest;
 
+import org.apache.commons.io.FileUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
@@ -117,7 +119,8 @@ public class TestAggregatedLogsBlock {
   }
 
   /**
-   * All ok and the AggregatedLogsBlockFor should aggregate logs and show it.
+   * Reading from logs should succeed and they should be shown in the
+   * AggregatedLogsBlock html.
    * 
    * @throws Exception
    */
@@ -144,8 +147,56 @@ public class TestAggregatedLogsBlock {
     assertTrue(out.contains("test log1"));
     assertTrue(out.contains("test log2"));
     assertTrue(out.contains("test log3"));
+  }
+
+  /**
+   * Reading from logs should succeed (from a HAR archive) and they should be
+   * shown in the AggregatedLogsBlock html.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testAggregatedLogsBlockHar() throws Exception {
+    FileUtil.fullyDelete(new File("target/logs"));
+    Configuration configuration = getConfiguration();
+
+    URL harUrl = ClassLoader.getSystemClassLoader()
+        .getResource("application_1440536969523_0001.har");
+    assertNotNull(harUrl);
+    String path = "target/logs/admin/logs/application_1440536969523_0001" +
+        "/application_1440536969523_0001.har";
+    FileUtils.copyDirectory(new File(harUrl.getPath()), new File(path));
+
+    AggregatedLogsBlockForTest aggregatedBlock = getAggregatedLogsBlockForTest(
+        configuration, "admin",
+        "container_1440536969523_0001_01_000001", "host1:1111");
+    ByteArrayOutputStream data = new ByteArrayOutputStream();
+    PrintWriter printWriter = new PrintWriter(data);
+    HtmlBlock html = new HtmlBlockForTest();
+    HtmlBlock.Block block = new BlockForTest(html, printWriter, 10, false);
+    aggregatedBlock.render(block);
 
+    block.getWriter().flush();
+    String out = data.toString();
+    assertTrue(out.contains("Hello stderr"));
+    assertTrue(out.contains("Hello stdout"));
+    assertTrue(out.contains("Hello syslog"));
+
+    aggregatedBlock = getAggregatedLogsBlockForTest(
+        configuration, "admin",
+        "container_1440536969523_0001_01_000002", "host2:2222");
+    data = new ByteArrayOutputStream();
+    printWriter = new PrintWriter(data);
+    html = new HtmlBlockForTest();
+    block = new BlockForTest(html, printWriter, 10, false);
+    aggregatedBlock.render(block);
+    block.getWriter().flush();
+    out = data.toString();
+    assertTrue(out.contains("Goodbye stderr"));
+    assertTrue(out.contains("Goodbye stdout"));
+    assertTrue(out.contains("Goodbye syslog"));
   }
+
   /**
    * Log files was deleted.
    * @throws Exception
@@ -188,14 +239,20 @@ public class TestAggregatedLogsBlock {
 
   private AggregatedLogsBlockForTest getAggregatedLogsBlockForTest(
       Configuration configuration, String user, String containerId) {
+    return getAggregatedLogsBlockForTest(configuration, user, containerId,
+        "localhost:1234");
+  }
+
+  private AggregatedLogsBlockForTest getAggregatedLogsBlockForTest(
+      Configuration configuration, String user, String containerId,
+      String nodeName) {
     HttpServletRequest request = mock(HttpServletRequest.class);
     when(request.getRemoteUser()).thenReturn(user);
     AggregatedLogsBlockForTest aggregatedBlock = new AggregatedLogsBlockForTest(
         configuration);
     aggregatedBlock.setRequest(request);
     aggregatedBlock.moreParams().put(YarnWebParams.CONTAINER_ID, containerId);
-    aggregatedBlock.moreParams().put(YarnWebParams.NM_NODENAME,
-        "localhost:1234");
+    aggregatedBlock.moreParams().put(YarnWebParams.NM_NODENAME, nodeName);
     aggregatedBlock.moreParams().put(YarnWebParams.APP_OWNER, user);
     aggregatedBlock.moreParams().put("start", "");
     aggregatedBlock.moreParams().put("end", "");

+ 0 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/application_1440536969523_0001.har/_SUCCESS


+ 3 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/application_1440536969523_0001.har/_index

@@ -0,0 +1,3 @@
+%2F dir 1440540845855+504+rkanter+supergroup 0 0 host1_1111 host2_2222 
+%2Fhost1_1111 file part-0 0 394 1440540845834+420+rkanter+supergroup 
+%2Fhost2_2222 file part-0 394 400 1440540845854+420+rkanter+supergroup 

+ 2 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/application_1440536969523_0001.har/_masterindex

@@ -0,0 +1,2 @@
+3 
+0 1520266628 0 214 

BIN
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/resources/application_1440536969523_0001.har/part-0