10 年之前 · 0c4af0f998
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -600,6 +600,8 @@ Release 2.8.0 - UNRELEASED
 
				    MAPREDUCE-6302. Preempt reducers after a configurable timeout irrespective 
			
 
				    of headroom. (kasha)
			
 
				 
			
 
				+   MAPREDUCE-6495. Docs for archive-logs tool (rkanter)
			
 
				+
			
 
				 Release 2.7.2 - UNRELEASED
			
 
				 
			
 
				   INCOMPATIBLE CHANGES
			
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapredCommands.md
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapredCommands.md
@@ -18,6 +18,7 @@ MapReduce Commands Guide
 
				 * [Overview](#Overview)
			
 
				 * [User Commands](#User_Commands)
			
 
				     * [archive](#archive)
			
 
				+    * [archive-logs](#archive-logs)
			
 
				     * [classpath](#classpath)
			
 
				     * [distcp](#distcp)
			
 
				     * [job](#job)
			
@@ -53,6 +54,12 @@ Commands useful for users of a hadoop cluster.
 
				 Creates a hadoop archive. More information can be found at
			
 
				 [Hadoop Archives Guide](../../hadoop-archives/HadoopArchives.html).
			
 
				 
			
 
				+### `archive-logs`
			
 
				+
			
 
				+A tool to combine YARN aggregated logs into Hadoop archives to reduce the number
			
 
				+of files in HDFS. More information can be found at
			
 
				+[Hadoop Archive Logs Guide](../../hadoop-archive-logs/HadoopArchiveLogs.html).
			
 
				+
			
 
				 ### `classpath`
			
 
				 
			
 
				 Usage: `yarn classpath [--glob |--jar <path> |-h |--help]`
			
--- a/hadoop-project/src/site/site.xml
+++ b/hadoop-project/src/site/site.xml
@@ -156,6 +156,7 @@
 
				     <menu name="Tools" inherit="top">
			
 
				       <item name="Hadoop Streaming" href="hadoop-streaming/HadoopStreaming.html"/>
			
 
				       <item name="Hadoop Archives" href="hadoop-archives/HadoopArchives.html"/>
			
 
				+      <item name="Hadoop Archive Logs" href="hadoop-archive-logs/HadoopArchiveLogs.html"/>
			
 
				       <item name="DistCp" href="hadoop-distcp/DistCp.html"/>
			
 
				       <item name="GridMix" href="hadoop-gridmix/GridMix.html"/>
			
 
				       <item name="Rumen" href="hadoop-rumen/Rumen.html"/>
			
--- a/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogs.java
+++ b/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogs.java
@@ -221,7 +221,7 @@ public class HadoopArchiveLogs implements Tool {
 
				       CommandLine commandLine = parser.parse(opts, args);
			
 
				       if (commandLine.hasOption(HELP_OPTION)) {
			
 
				         HelpFormatter formatter = new HelpFormatter();
			
 
				-        formatter.printHelp("yarn archive-logs", opts);
			
 
				+        formatter.printHelp("mapred archive-logs", opts);
			
 
				         System.exit(0);
			
 
				       }
			
 
				       if (commandLine.hasOption(MAX_ELIGIBLE_APPS_OPTION)) {
			
@@ -254,7 +254,7 @@ public class HadoopArchiveLogs implements Tool {
 
				       }
			
 
				     } catch (ParseException pe) {
			
 
				       HelpFormatter formatter = new HelpFormatter();
			
 
				-      formatter.printHelp("yarn archive-logs", opts);
			
 
				+      formatter.printHelp("mapred archive-logs", opts);
			
 
				       throw pe;
			
 
				     }
			
 
				   }
			
--- a/hadoop-tools/hadoop-archive-logs/src/site/markdown/HadoopArchiveLogs.md
+++ b/hadoop-tools/hadoop-archive-logs/src/site/markdown/HadoopArchiveLogs.md
@@ -0,0 +1,85 @@
 
				+<!---
			
 
				+  Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+  you may not use this file except in compliance with the License.
			
 
				+  You may obtain a copy of the License at
			
 
				+
			
 
				+    http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+  Unless required by applicable law or agreed to in writing, software
			
 
				+  distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+  See the License for the specific language governing permissions and
			
 
				+  limitations under the License. See accompanying LICENSE file.
			
 
				+-->
			
 
				+
			
 
				+Hadoop Archive Logs Guide
			
 
				+=========================
			
 
				+
			
 
				+ - [Overview](#Overview)
			
 
				+ - [How to Archive Logs](#How_to_Archive_Logs)
			
 
				+
			
 
				+Overview
			
 
				+--------
			
 
				+
			
 
				+For clusters with a lot of Yarn aggregated logs, it can be helpful to combine
			
 
				+them into hadoop archives in order to reduce the number of small files, and
			
 
				+hence the stress on the NameNode.  This tool provides an easy way to do this.
			
 
				+Aggregated logs in hadoop archives can still be read by the Job History Server
			
 
				+and by the `yarn logs` command.
			
 
				+
			
 
				+For more on hadoop archives, see
			
 
				+[Hadoop Archives Guide](../hadoop-archives/HadoopArchives.html).
			
 
				+
			
 
				+How to Archive Logs
			
 
				+-------------------
			
 
				+
			
 
				+    usage: mapred archive-logs
			
 
				+    -force                         Force recreating the working directory if
			
 
				+                                   an existing one is found. This should
			
 
				+                                   only be used if you know that another
			
 
				+                                   instance is not currently running
			
 
				+    -help                          Prints this message
			
 
				+    -maxEligibleApps <n>           The maximum number of eligible apps to
			
 
				+                                   process (default: -1 (all))
			
 
				+    -maxTotalLogsSize <megabytes>  The maximum total logs size (in
			
 
				+                                   megabytes) required to be eligible
			
 
				+                                   (default: 1024)
			
 
				+    -memory <megabytes>            The amount of memory (in megabytes) for
			
 
				+                                   each container (default: 1024)
			
 
				+    -minNumberLogFiles <n>         The minimum number of log files required
			
 
				+                                   to be eligible (default: 20)
			
 
				+    -verbose                       Print more details.
			
 
				+
			
 
				+The tool only supports running one instance on a cluster at a time in order
			
 
				+to prevent conflicts. It does this by checking for the existance of a
			
 
				+directory named ``archive-logs-work`` under
			
 
				+``yarn.nodemanager.remote-app-log-dir`` in HDFS
			
 
				+(default: ``/tmp/logs/archive-logs-work``). If for some reason that
			
 
				+directory was not cleaned up properly, and the tool refuses to run, you can
			
 
				+force it with the ``-force`` option.
			
 
				+
			
 
				+The ``-help`` option prints out the usage information.
			
 
				+
			
 
				+The tool works by performing the following procedure:
			
 
				+
			
 
				+ 1. Determine the list of eligible applications, based on the following
			
 
				+    criteria:
			
 
				+    - is not already archived
			
 
				+    - its aggregation status has successfully completed
			
 
				+    - has at least ``-minNumberLogFiles`` log files
			
 
				+    - the sum of its log files size is less than ``-maxTotalLogsSize`` megabytes
			
 
				+ 2. If there are are more than ``-maxEligibleApps`` applications found, the
			
 
				+    newest applications are dropped. They can be processed next time.
			
 
				+ 3. A shell script is generated based on the eligible applications
			
 
				+ 4. The Distributed Shell program is run with the aformentioned script. It
			
 
				+    will run with ``-maxEligibleApps`` containers, one to process each
			
 
				+    application, and with ``-memory`` megabytes of memory. Each container runs
			
 
				+    the ``hadoop archives`` command for a single application and replaces
			
 
				+    its aggregated log files with the resulting archive.
			
 
				+
			
 
				+The ``-verbose`` option makes the tool print more details about what it's
			
 
				+doing.
			
 
				+
			
 
				+The end result of running the tool is that the original aggregated log files for
			
 
				+a processed application will be replaced by a hadoop archive containing all of
			
 
				+those logs.
			
--- a/hadoop-tools/hadoop-archive-logs/src/site/resources/css/site.css
+++ b/hadoop-tools/hadoop-archive-logs/src/site/resources/css/site.css
@@ -0,0 +1,30 @@
 
				+/*
			
 
				+* Licensed to the Apache Software Foundation (ASF) under one or more
			
 
				+* contributor license agreements.  See the NOTICE file distributed with
			
 
				+* this work for additional information regarding copyright ownership.
			
 
				+* The ASF licenses this file to You under the Apache License, Version 2.0
			
 
				+* (the "License"); you may not use this file except in compliance with
			
 
				+* the License.  You may obtain a copy of the License at
			
 
				+*
			
 
				+*     http://www.apache.org/licenses/LICENSE-2.0
			
 
				+*
			
 
				+* Unless required by applicable law or agreed to in writing, software
			
 
				+* distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+* See the License for the specific language governing permissions and
			
 
				+* limitations under the License.
			
 
				+*/
			
 
				+#banner {
			
 
				+  height: 93px;
			
 
				+  background: none;
			
 
				+}
			
 
				+
			
 
				+#bannerLeft img {
			
 
				+  margin-left: 30px;
			
 
				+  margin-top: 10px;
			
 
				+}
			
 
				+
			
 
				+#bannerRight img {
			
 
				+  margin: 17px;
			
 
				+}
			
 
				+