9 år sedan · 0c4af0f998
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -600,6 +600,8 @@ Release 2.8.0 - UNRELEASED
 
															    MAPREDUCE-6302. Preempt reducers after a configurable timeout irrespective 
														
 
															    of headroom. (kasha)
														
 
															+   MAPREDUCE-6495. Docs for archive-logs tool (rkanter)
														
 
															+
														
 
															 Release 2.7.2 - UNRELEASED
														
 
															   INCOMPATIBLE CHANGES
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapredCommands.md
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapredCommands.md
@@ -18,6 +18,7 @@ MapReduce Commands Guide
 
															 * [Overview](#Overview)
														
 
															 * [User Commands](#User_Commands)
														
 
															     * [archive](#archive)
														
 
															+    * [archive-logs](#archive-logs)
														
 
															     * [classpath](#classpath)
														
 
															     * [distcp](#distcp)
														
 
															     * [job](#job)
														
@@ -53,6 +54,12 @@ Commands useful for users of a hadoop cluster.
 
															 Creates a hadoop archive. More information can be found at
														
 
															 [Hadoop Archives Guide](../../hadoop-archives/HadoopArchives.html).
														
 
															+### `archive-logs`
														
 
															+
														
 
															+A tool to combine YARN aggregated logs into Hadoop archives to reduce the number
														
 
															+of files in HDFS. More information can be found at
														
 
															+[Hadoop Archive Logs Guide](../../hadoop-archive-logs/HadoopArchiveLogs.html).
														
 
															+
														
 
															 ### `classpath`
														
 
															 Usage: `yarn classpath [--glob |--jar <path> |-h |--help]`
														
--- a/hadoop-project/src/site/site.xml
+++ b/hadoop-project/src/site/site.xml
@@ -156,6 +156,7 @@
 
															     <menu name="Tools" inherit="top">
														
 
															       <item name="Hadoop Streaming" href="hadoop-streaming/HadoopStreaming.html"/>
														
 
															       <item name="Hadoop Archives" href="hadoop-archives/HadoopArchives.html"/>
														
 
															+      <item name="Hadoop Archive Logs" href="hadoop-archive-logs/HadoopArchiveLogs.html"/>
														
 
															       <item name="DistCp" href="hadoop-distcp/DistCp.html"/>
														
 
															       <item name="GridMix" href="hadoop-gridmix/GridMix.html"/>
														
 
															       <item name="Rumen" href="hadoop-rumen/Rumen.html"/>
														
--- a/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogs.java
+++ b/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogs.java
@@ -221,7 +221,7 @@ public class HadoopArchiveLogs implements Tool {
 
															       CommandLine commandLine = parser.parse(opts, args);
														
 
															       if (commandLine.hasOption(HELP_OPTION)) {
														
 
															         HelpFormatter formatter = new HelpFormatter();
														
 
															-        formatter.printHelp("yarn archive-logs", opts);
														
 
															+        formatter.printHelp("mapred archive-logs", opts);
														
 
															         System.exit(0);
														
 
															       }
														
 
															       if (commandLine.hasOption(MAX_ELIGIBLE_APPS_OPTION)) {
														
@@ -254,7 +254,7 @@ public class HadoopArchiveLogs implements Tool {
 
															       }
														
 
															     } catch (ParseException pe) {
														
 
															       HelpFormatter formatter = new HelpFormatter();
														
 
															-      formatter.printHelp("yarn archive-logs", opts);
														
 
															+      formatter.printHelp("mapred archive-logs", opts);
														
 
															       throw pe;
														
 
															     }
														
 
															   }
														
--- a/hadoop-tools/hadoop-archive-logs/src/site/markdown/HadoopArchiveLogs.md
+++ b/hadoop-tools/hadoop-archive-logs/src/site/markdown/HadoopArchiveLogs.md
@@ -0,0 +1,85 @@
 
															+<!---
														
 
															+  Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+  you may not use this file except in compliance with the License.
														
 
															+  You may obtain a copy of the License at
														
 
															+
														
 
															+    http://www.apache.org/licenses/LICENSE-2.0
														
 
															+
														
 
															+  Unless required by applicable law or agreed to in writing, software
														
 
															+  distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+  See the License for the specific language governing permissions and
														
 
															+  limitations under the License. See accompanying LICENSE file.
														
 
															+-->
														
 
															+
														
 
															+Hadoop Archive Logs Guide
														
 
															+=========================
														
 
															+
														
 
															+ - [Overview](#Overview)
														
 
															+ - [How to Archive Logs](#How_to_Archive_Logs)
														
 
															+
														
 
															+Overview
														
 
															+--------
														
 
															+
														
 
															+For clusters with a lot of Yarn aggregated logs, it can be helpful to combine
														
 
															+them into hadoop archives in order to reduce the number of small files, and
														
 
															+hence the stress on the NameNode.  This tool provides an easy way to do this.
														
 
															+Aggregated logs in hadoop archives can still be read by the Job History Server
														
 
															+and by the `yarn logs` command.
														
 
															+
														
 
															+For more on hadoop archives, see
														
 
															+[Hadoop Archives Guide](../hadoop-archives/HadoopArchives.html).
														
 
															+
														
 
															+How to Archive Logs
														
 
															+-------------------
														
 
															+
														
 
															+    usage: mapred archive-logs
														
 
															+    -force                         Force recreating the working directory if
														
 
															+                                   an existing one is found. This should
														
 
															+                                   only be used if you know that another
														
 
															+                                   instance is not currently running
														
 
															+    -help                          Prints this message
														
 
															+    -maxEligibleApps <n>           The maximum number of eligible apps to
														
 
															+                                   process (default: -1 (all))
														
 
															+    -maxTotalLogsSize <megabytes>  The maximum total logs size (in
														
 
															+                                   megabytes) required to be eligible
														
 
															+                                   (default: 1024)
														
 
															+    -memory <megabytes>            The amount of memory (in megabytes) for
														
 
															+                                   each container (default: 1024)
														
 
															+    -minNumberLogFiles <n>         The minimum number of log files required
														
 
															+                                   to be eligible (default: 20)
														
 
															+    -verbose                       Print more details.
														
 
															+
														
 
															+The tool only supports running one instance on a cluster at a time in order
														
 
															+to prevent conflicts. It does this by checking for the existance of a
														
 
															+directory named ``archive-logs-work`` under
														
 
															+``yarn.nodemanager.remote-app-log-dir`` in HDFS
														
 
															+(default: ``/tmp/logs/archive-logs-work``). If for some reason that
														
 
															+directory was not cleaned up properly, and the tool refuses to run, you can
														
 
															+force it with the ``-force`` option.
														
 
															+
														
 
															+The ``-help`` option prints out the usage information.
														
 
															+
														
 
															+The tool works by performing the following procedure:
														
 
															+
														
 
															+ 1. Determine the list of eligible applications, based on the following
														
 
															+    criteria:
														
 
															+    - is not already archived
														
 
															+    - its aggregation status has successfully completed
														
 
															+    - has at least ``-minNumberLogFiles`` log files
														
 
															+    - the sum of its log files size is less than ``-maxTotalLogsSize`` megabytes
														
 
															+ 2. If there are are more than ``-maxEligibleApps`` applications found, the
														
 
															+    newest applications are dropped. They can be processed next time.
														
 
															+ 3. A shell script is generated based on the eligible applications
														
 
															+ 4. The Distributed Shell program is run with the aformentioned script. It
														
 
															+    will run with ``-maxEligibleApps`` containers, one to process each
														
 
															+    application, and with ``-memory`` megabytes of memory. Each container runs
														
 
															+    the ``hadoop archives`` command for a single application and replaces
														
 
															+    its aggregated log files with the resulting archive.
														
 
															+
														
 
															+The ``-verbose`` option makes the tool print more details about what it's
														
 
															+doing.
														
 
															+
														
 
															+The end result of running the tool is that the original aggregated log files for
														
 
															+a processed application will be replaced by a hadoop archive containing all of
														
 
															+those logs.
														
--- a/hadoop-tools/hadoop-archive-logs/src/site/resources/css/site.css
+++ b/hadoop-tools/hadoop-archive-logs/src/site/resources/css/site.css
@@ -0,0 +1,30 @@
 
															+/*
														
 
															+* Licensed to the Apache Software Foundation (ASF) under one or more
														
 
															+* contributor license agreements.  See the NOTICE file distributed with
														
 
															+* this work for additional information regarding copyright ownership.
														
 
															+* The ASF licenses this file to You under the Apache License, Version 2.0
														
 
															+* (the "License"); you may not use this file except in compliance with
														
 
															+* the License.  You may obtain a copy of the License at
														
 
															+*
														
 
															+*     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+*
														
 
															+* Unless required by applicable law or agreed to in writing, software
														
 
															+* distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+* See the License for the specific language governing permissions and
														
 
															+* limitations under the License.
														
 
															+*/
														
 
															+#banner {
														
 
															+  height: 93px;
														
 
															+  background: none;
														
 
															+}
														
 
															+
														
 
															+#bannerLeft img {
														
 
															+  margin-left: 30px;
														
 
															+  margin-top: 10px;
														
 
															+}
														
 
															+
														
 
															+#bannerRight img {
														
 
															+  margin: 17px;
														
 
															+}
														
 
															+