14 năm trước cách đây · 8e4c70fb20
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -318,6 +318,9 @@ Release 0.23.0 - Unreleased
 
				     MAPREDUCE-3092. Removed a special comparator for JobIDs in JobHistory as
			
 
				     JobIDs are already comparable. (Devaraj K via vinodkv)
			
 
				 
			
 
				+    MAPREDUCE-3099. Add docs for setting up a single node MRv2 cluster.
			
 
				+    (mahadev)
			
 
				+
			
 
				   OPTIMIZATIONS
			
 
				 
			
 
				     MAPREDUCE-2026. Make JobTracker.getJobCounters() and
			
--- a/hadoop-mapreduce-project/hadoop-yarn/src/site/apt/SingleCluster.apt.vm
+++ b/hadoop-mapreduce-project/hadoop-yarn/src/site/apt/SingleCluster.apt.vm
@@ -0,0 +1,180 @@
 
				+~~ Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+~~ you may not use this file except in compliance with the License.
			
 
				+~~ You may obtain a copy of the License at
			
 
				+~~
			
 
				+~~   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+~~
			
 
				+~~ Unless required by applicable law or agreed to in writing, software
			
 
				+~~ distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+~~ See the License for the specific language governing permissions and
			
 
				+~~ limitations under the License. See accompanying LICENSE file.
			
 
				+
			
 
				+  ---
			
 
				+  Hadoop MapReduce Next Generation ${project.version} - Setting up a Single Node Cluster.
			
 
				+  ---
			
 
				+  ---
			
 
				+  ${maven.build.timestamp}
			
 
				+
			
 
				+Hadoop MapReduce Next Generation - Setting up a Single Node Cluster.
			
 
				+
			
 
				+  \[ {{{./index.html}Go Back}} \]
			
 
				+
			
 
				+* Mapreduce Tarball
			
 
				+
			
 
				+  You should be able to obtain the MapReduce tarball from the release.
			
 
				+  If not, you should be able to create a tarball from the source.
			
 
				+
			
 
				++---+
			
 
				+$ mvn clean install -DskipTests
			
 
				+$ cd hadoop-mapreduce-project
			
 
				+$ mvn clean install assembly:assembly 
			
 
				++---+
			
 
				+  <<NOTE:>> You will need protoc installed of version 2.4.1 or greater.
			
 
				+
			
 
				+  To ignore the native builds in mapreduce you can use <<<-P-cbuild>>> argument
			
 
				+  for maven. The tarball should be available in <<<target/>>> directory. 
			
 
				+
			
 
				+  
			
 
				+* Setting up the environment.
			
 
				+
			
 
				+  Assuming you have installed hadoop-common/hadoop-hdfs and exported
			
 
				+  <<$HADOOP_COMMON_HOME>>/<<$HADOOP_COMMON_HOME>>, untar hadoop mapreduce 
			
 
				+  tarball and set environment variable <<$HADOOP_MAPRED_HOME>> to the 
			
 
				+  untarred directory. Set <<$YARN_HOME>> the same as <<$HADOOP_MAPRED_HOME>>. 
			
 
				+ 
			
 
				+  <<NOTE:>> The following instructions assume you have hdfs running.
			
 
				+
			
 
				+* Setting up Configuration.
			
 
				+
			
 
				+  To start the ResourceManager and NodeManager, you will have to update the configs.
			
 
				+  Assuming your $HADOOP_CONF_DIR is the configuration directory and has the installed
			
 
				+  configs for HDFS and <<<core-site.xml>>>. There are 2 config files you will have to setup
			
 
				+  <<<mapred-site.xml>>> and <<<yarn-site.xml>>>.
			
 
				+
			
 
				+** Setting up <<<mapred-site.xml>>>
			
 
				+
			
 
				+  Add the following configs to your <<<mapred-site.xml>>>.
			
 
				+
			
 
				++---+
			
 
				+  <property>
			
 
				+    <name>mapreduce.cluster.temp.dir</name>
			
 
				+    <value></value>
			
 
				+    <description>No description</description>
			
 
				+    <final>true</final>
			
 
				+  </property>
			
 
				+
			
 
				+  <property>
			
 
				+    <name>mapreduce.cluster.local.dir</name>
			
 
				+    <value></value>
			
 
				+    <description>No description</description>
			
 
				+    <final>true</final>
			
 
				+  </property>
			
 
				++---+
			
 
				+
			
 
				+** Setting up <<<yarn-site.xml>>>
			
 
				+
			
 
				+Add the following configs to your <<<yarn-site.xml>>>
			
 
				+
			
 
				++---+
			
 
				+ <property>
			
 
				+    <name>yarn.resourcemanager.resource-tracker.address</name>
			
 
				+    <value>host:port</value>
			
 
				+    <description>host is the hostname of the resource manager and 
			
 
				+    port is the port on which the NodeManagers contact the Resource Manager.
			
 
				+    </description>
			
 
				+ </property>
			
 
				+
			
 
				+ <property>
			
 
				+    <name>yarn.resourcemanager.scheduler.address</name>
			
 
				+    <value>host:port</value>
			
 
				+    <description>host is the hostname of the resourcemanager and port is the port
			
 
				+    on which the Applications in the cluster talk to the Resource Manager.
			
 
				+    </description>
			
 
				+  </property>
			
 
				+
			
 
				+  <property>
			
 
				+    <name>yarn.resourcemanager.scheduler.class</name>
			
 
				+    <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
			
 
				+    <description>In case you do not want to use the default scheduler</description>
			
 
				+  </property>
			
 
				+
			
 
				+  <property>
			
 
				+    <name>yarn.resourcemanager.address</name>
			
 
				+    <value>host:port</value>
			
 
				+    <description>the host is the hostname of the ResourceManager and the port is the port on
			
 
				+    which the clients can talk to the Resource Manager. </description>
			
 
				+  </property>
			
 
				+
			
 
				+  <property>
			
 
				+    <name>yarn.nodemanager.local-dirs</name>
			
 
				+    <value></value>
			
 
				+    <description>the local directories used by the nodemanager</description>
			
 
				+  </property>
			
 
				+
			
 
				+  <property>
			
 
				+    <name>yarn.nodemanager.address</name>
			
 
				+    <value>0.0.0.0:port</value>
			
 
				+    <description>the nodemanagers bind to this port</description>
			
 
				+  </property>  
			
 
				+
			
 
				+  <property>
			
 
				+    <name>yarn.nodemanager.resource.memory-gb</name>
			
 
				+    <value>10</value>
			
 
				+    <description>the amount of memory on the NodeManager in GB</description>
			
 
				+  </property>
			
 
				+ 
			
 
				+  <property>
			
 
				+    <name>yarn.nodemanager.remote-app-log-dir</name>
			
 
				+    <value>/app-logs</value>
			
 
				+    <description>directory on hdfs where the application logs are moved to </description>
			
 
				+  </property>
			
 
				+
			
 
				+   <property>
			
 
				+    <name>yarn.nodemanager.log-dirs</name>
			
 
				+    <value></value>
			
 
				+    <description>the directories used by Nodemanagers as log directories</description>
			
 
				+  </property>
			
 
				+
			
 
				+  <property>
			
 
				+    <name>yarn.nodemanager.aux-services</name>
			
 
				+    <value>mapreduce.shuffle</value>
			
 
				+    <description>shuffle service that needs to be set for Map Reduce to run </description>
			
 
				+  </property>
			
 
				++---+
			
 
				+
			
 
				+* Create Symlinks.
			
 
				+
			
 
				+  You will have to create the following symlinks:
			
 
				+
			
 
				++---+
			
 
				+$ cd $HADOOP_COMMON_HOME/share/hadoop/common/lib/
			
 
				+$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-app-*-SNAPSHOT.jar .
			
 
				+$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-jobclient-*-SNAPSHOT.jar .
			
 
				+$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-common-*-SNAPSHOT.jar .
			
 
				+$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-shuffle-*-SNAPSHOT.jar .
			
 
				+$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-core-*-SNAPSHOT.jar .
			
 
				+$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-yarn-common-*-SNAPSHOT.jar .
			
 
				+$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-yarn-api-*-SNAPSHOT.jar .
			
 
				++---+
			
 
				+* Running daemons.
			
 
				+
			
 
				+  Assuming that the environment variables <<$HADOOP_COMMON_HOME>>, <<$HADOOP_HDFS_HOME>>, <<$HADOO_MAPRED_HOME>>,
			
 
				+  <<$YARN_HOME>>, <<$JAVA_HOME>> and <<$HADOOP_CONF_DIR>> have been set appropriately.
			
 
				+  Set $<<$YARN_CONF_DIR>> the same as $<<HADOOP_CONF_DIR>>
			
 
				+ 
			
 
				+  Run ResourceManager and NodeManager as:
			
 
				+  
			
 
				++---+
			
 
				+$ cd $HADOOP_MAPRED_HOME
			
 
				+$ bin/yarn-daemon.sh start resourcemanager
			
 
				+$ bin/yarn-daemon.sh start nodemanager
			
 
				++---+
			
 
				+
			
 
				+  You should be up and running. You can run randomwriter as:
			
 
				+
			
 
				++---+
			
 
				+$ $HADOOP_COMMON_HOME/bin/hadoop jar hadoop-examples.jar randomwriter out
			
 
				++---+
			
 
				+
			
 
				+Good luck.
			
--- a/hadoop-mapreduce-project/hadoop-yarn/src/site/apt/index.apt.vm
+++ b/hadoop-mapreduce-project/hadoop-yarn/src/site/apt/index.apt.vm
@@ -0,0 +1,39 @@
 
				+~~ Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+~~ you may not use this file except in compliance with the License.
			
 
				+~~ You may obtain a copy of the License at
			
 
				+~~
			
 
				+~~   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+~~
			
 
				+~~ Unless required by applicable law or agreed to in writing, software
			
 
				+~~ distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+~~ See the License for the specific language governing permissions and
			
 
				+~~ limitations under the License. See accompanying LICENSE file.
			
 
				+
			
 
				+  ---
			
 
				+  Hadoop MapReduce Next Generation  ${project.version}
			
 
				+  ---
			
 
				+  ---
			
 
				+  ${maven.build.timestamp}
			
 
				+
			
 
				+Hadoop MapReduce Next Generation
			
 
				+
			
 
				+* Architecture
			
 
				+
			
 
				+  The new architecture introduced in 0.23, divides the two major functions 
			
 
				+  of the JobTracker, resource management and job scheduling/monitoring, into separate 
			
 
				+  components. 
			
 
				+  The new ResourceManager manages the global assignment of compute resources to applications 
			
 
				+  and the per-application ApplicationMaster manages the application’s scheduling and coordination.
			
 
				+  An application is either a single job in the classic MapReduce jobs or a DAG of such jobs. 
			
 
				+  The ResourceManager and per-machine NodeManager server, which manages the user processes on that 
			
 
				+  machine, form the computation fabric. The per-application ApplicationMaster is, in effect, a 
			
 
				+  framework specific library and is tasked with negotiating resources from the ResourceManager 
			
 
				+  and working with the NodeManager(s) to execute and monitor the tasks.
			
 
				+
			
 
				+* User Documentation
			
 
				+
			
 
				+  * {{{./SingleCluster.html}SingleCluster}}
			
 
				+
			
 
				+  * {{{./apidocs/index.html}JavaDocs}}
			
 
				+
			
--- a/hadoop-mapreduce-project/hadoop-yarn/src/site/site.xml
+++ b/hadoop-mapreduce-project/hadoop-yarn/src/site/site.xml
@@ -0,0 +1,34 @@
 
				+<!--
			
 
				+ Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+ you may not use this file except in compliance with the License.
			
 
				+ You may obtain a copy of the License at
			
 
				+
			
 
				+   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+ Unless required by applicable law or agreed to in writing, software
			
 
				+ distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+ See the License for the specific language governing permissions and
			
 
				+ limitations under the License. See accompanying LICENSE file.
			
 
				+-->
			
 
				+<project name="Hadoop MapReduce Next Gen">
			
 
				+
			
 
				+  <version position="right"/>
			
 
				+
			
 
				+  <bannerLeft>
			
 
				+    <name>&nbsp;</name>
			
 
				+  </bannerLeft>
			
 
				+
			
 
				+  <skin>
			
 
				+    <groupId>org.apache.maven.skins</groupId>
			
 
				+    <artifactId>maven-stylus-skin</artifactId>
			
 
				+    <version>1.1</version>
			
 
				+  </skin>
			
 
				+
			
 
				+  <body>
			
 
				+    <links>
			
 
				+      <item name="Apache Hadoop" href="http://hadoop.apache.org/"/>
			
 
				+    </links>
			
 
				+  </body>
			
 
				+
			
 
				+</project>