13 年之前 · 37137845c8
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -832,6 +832,8 @@ Release 0.23.0 - Unreleased
 
				     HDFS-2401. Running a set of methods in a Single Test Class.
			
 
				     (Jonathan Eagles via mahadev)
			
 
				 
			
 
				+    HDFS-2471. Add federation documentation. (suresh)
			
 
				+
			
 
				   OPTIMIZATIONS
			
 
				 
			
 
				     HDFS-1458. Improve checkpoint performance by avoiding unnecessary image
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm
@@ -394,19 +394,19 @@ Hadoop MapReduce Next Generation - Cluster Setup
 
				     Format a new distributed filesystem:

			
 
				   

			
 
				 ----

			
 
				-  $ $HADOOP_HDFS_HOME/bin/hdfs namenode -format <cluster_name>

			
 
				+  $ $HADOOP_PREFIX_HOME/bin/hdfs namenode -format <cluster_name>

			
 
				 ----

			
 
				 

			
 
				     Start the HDFS with the following command, run on the designated NameNode:

			
 
				   

			
 
				 ----

			
 
				-  $ $HADOOP_HDFS_HOME/bin/hdfs start namenode --config $HADOOP_CONF_DIR  

			
 
				+  $ $HADOOP_PREFIX_HOME/bin/hdfs start namenode --config $HADOOP_CONF_DIR  

			
 
				 ----    	  

			
 
				 

			
 
				     Run a script to start DataNodes on all slaves:

			
 
				 

			
 
				 ----

			
 
				-  $ $HADOOP_HDFS_HOME/bin/hdfs start datanode --config $HADOOP_CONF_DIR  

			
 
				+  $ $HADOOP_PREFIX_HOME/bin/hdfs start datanode --config $HADOOP_CONF_DIR  

			
 
				 ----    	  

			
 
				   

			
 
				     Start the YARN with the following command, run on the designated 

			
@@ -435,13 +435,13 @@ Hadoop MapReduce Next Generation - Cluster Setup
 
				     NameNode:

			
 
				   

			
 
				 ----

			
 
				-  $ $HADOOP_HDFS_HOME/bin/hdfs stop namenode --config $HADOOP_CONF_DIR  

			
 
				+  $ $HADOOP_PREFIX_HOME/bin/hdfs stop namenode --config $HADOOP_CONF_DIR  

			
 
				 ----    	  

			
 
				 

			
 
				     Run a script to stop DataNodes on all slaves:

			
 
				 

			
 
				 ----

			
 
				-  $ $HADOOP_HDFS_HOME/bin/hdfs stop datanode --config $HADOOP_CONF_DIR  

			
 
				+  $ $HADOOP_PREFIX_HOME/bin/hdfs stop datanode --config $HADOOP_CONF_DIR  

			
 
				 ----    	  

			
 
				   

			
 
				     Stop the ResourceManager with the following command, run on the designated 

			
@@ -910,21 +910,21 @@ KVNO Timestamp         Principal
 
				     Format a new distributed filesystem as <hdfs>:

			
 
				   

			
 
				 ----

			
 
				-[hdfs]$ $HADOOP_HDFS_HOME/bin/hdfs namenode -format <cluster_name>

			
 
				+[hdfs]$ $HADOOP_PREFIX_HOME/bin/hdfs namenode -format <cluster_name>

			
 
				 ----

			
 
				 

			
 
				     Start the HDFS with the following command, run on the designated NameNode

			
 
				     as <hdfs>:

			
 
				   

			
 
				 ----

			
 
				-[hdfs]$ $HADOOP_HDFS_HOME/bin/hdfs start namenode --config $HADOOP_CONF_DIR  

			
 
				+[hdfs]$ $HADOOP_PREFIX_HOME/bin/hdfs start namenode --config $HADOOP_CONF_DIR  

			
 
				 ----    	  

			
 
				 

			
 
				     Run a script to start DataNodes on all slaves as <root> with a special

			
 
				     environment variable <<<HADOOP_SECURE_DN_USER>>> set to <hdfs>:

			
 
				 

			
 
				 ----

			
 
				-[root]$ HADOOP_SECURE_DN_USER=hdfs $HADOOP_HDFS_HOME/bin/hdfs start datanode --config $HADOOP_CONF_DIR  

			
 
				+[root]$ HADOOP_SECURE_DN_USER=hdfs $HADOOP_PREFIX_HOME/bin/hdfs start datanode --config $HADOOP_CONF_DIR  

			
 
				 ----    	  

			
 
				   

			
 
				     Start the YARN with the following command, run on the designated 

			
@@ -953,13 +953,13 @@ KVNO Timestamp         Principal
 
				     as <hdfs>:

			
 
				   

			
 
				 ----

			
 
				-[hdfs]$ $HADOOP_HDFS_HOME/bin/hdfs stop namenode --config $HADOOP_CONF_DIR  

			
 
				+[hdfs]$ $HADOOP_PREFIX_HOME/bin/hdfs stop namenode --config $HADOOP_CONF_DIR  

			
 
				 ----    	  

			
 
				 

			
 
				     Run a script to stop DataNodes on all slaves as <root>:

			
 
				 

			
 
				 ----

			
 
				-[root]$ $HADOOP_HDFS_HOME/bin/hdfs stop datanode --config $HADOOP_CONF_DIR  

			
 
				+[root]$ $HADOOP_PREFIX_HOME/bin/hdfs stop datanode --config $HADOOP_CONF_DIR  

			
 
				 ----    	  

			
 
				   

			
 
				     Stop the ResourceManager with the following command, run on the designated 

			
@@ -998,4 +998,4 @@ KVNO Timestamp         Principal
 
				 | | | Default HTTP port is 19888. |

			
 
				 *-------------------------+-------------------------+------------------------+

			
 
				     

			
 
				-    
			
 
				+    

			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/Federation.apt.vm
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/Federation.apt.vm
@@ -0,0 +1,343 @@
 
				+
			
 
				+~~ Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+~~ you may not use this file except in compliance with the License.
			
 
				+~~ You may obtain a copy of the License at
			
 
				+~~
			
 
				+~~   http://www.apache.org/licenses/LICENSE-2.0
			
 
				+~~
			
 
				+~~ Unless required by applicable law or agreed to in writing, software
			
 
				+~~ distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+~~ See the License for the specific language governing permissions and
			
 
				+~~ limitations under the License. See accompanying LICENSE file.
			
 
				+
			
 
				+  ---
			
 
				+  Hadoop Map Reduce Next Generation-${project.version} - Cluster Setup
			
 
				+  ---
			
 
				+  ---
			
 
				+  ${maven.build.timestamp}
			
 
				+
			
 
				+HDFS Federation
			
 
				+
			
 
				+  \[ {{{./index.html}Go Back}} \]
			
 
				+
			
 
				+%{toc|section=1|fromDepth=0}
			
 
				+
			
 
				+  This guide provides an overview of the HDFS Federation feature and
			
 
				+  how to configure and manage the federated cluster.
			
 
				+
			
 
				+* {Background}
			
 
				+
			
 
				+[./federation-background.gif] HDFS Layers
			
 
				+
			
 
				+  HDFS has two main layers:
			
 
				+
			
 
				+  * <<Namespace>>
			
 
				+
			
 
				+    * Consists of directories, files and blocks
			
 
				+
			
 
				+    * It supports all the namespace related file system operations such as 
			
 
				+      create, delete, modify and list files and directories.
			
 
				+
			
 
				+  * <<Block Storage Service>> has two parts
			
 
				+
			
 
				+    * Block Management (which is done in Namenode)
			
 
				+
			
 
				+      * Provides datanode cluster membership by handling registrations, and 
			
 
				+        periodic heart beats.
			
 
				+
			
 
				+      * Processes block reports and maintains location of blocks.
			
 
				+
			
 
				+      * Supports block related operations such as create, delete, modify and 
			
 
				+        get block location.
			
 
				+
			
 
				+      * Manages replica placement and replication of a block for under 
			
 
				+        replicated blocks and deletes blocks that are over replicated.
			
 
				+
			
 
				+    * Storage - is provided by datanodes by storing blocks on the local file 
			
 
				+      system and allows read/write access.
			
 
				+
			
 
				+  The current HDFS architecture allows only a single namespace for the 
			
 
				+  entire cluster. A single Namenode manages this namespace. HDFS 
			
 
				+  Federation addresses limitation of current architecture by adding 
			
 
				+  support multiple Namenodes/namespaces to HDFS file system.
			
 
				+    
			
 
				+* {HDFS Federation}
			
 
				+
			
 
				+  In order to scale the name service horizontally, federation uses multiple 
			
 
				+  independent Namenodes/namespaces. The Namenodes are federated, that is, the 
			
 
				+  Namenodes are independent and don’t require coordination with each other. 
			
 
				+  The datanodes are used as common storage for blocks by all the Namenodes. 
			
 
				+  Each datanode registers with all the Namenodes in the cluster. Datanodes 
			
 
				+  send periodic heartbeats and block reports and handles commands from the 
			
 
				+  Namenodes.
			
 
				+
			
 
				+[./federation.gif] HDFS Federation Architecture
			
 
				+
			
 
				+
			
 
				+  <<Block Pool>>
			
 
				+
			
 
				+  A Block Pool is a set of blocks that belong to a single namespace. 
			
 
				+  Datanodes store blocks for all the block pools in the cluster.
			
 
				+  It is managed independently of other block pools. This allows a namespace 
			
 
				+  to generate Block IDs for new blocks without the need for coordination 
			
 
				+  with the other namespaces. The failure of a Namenode does not prevent 
			
 
				+  the datanode from serving other Namenodes in the cluster.
			
 
				+
			
 
				+  A Namespace and its block pool together are called Namespace Volume. 
			
 
				+  It is a self-contained unit of management. When a Namenode/namespace 
			
 
				+  is deleted, the corresponding block pool at the datanodes is deleted.
			
 
				+  Each namespace volume is upgraded as a unit, during cluster upgrade.
			
 
				+
			
 
				+  <<ClusterID>>
			
 
				+
			
 
				+  A new identifier <<ClusterID>> is added to identify all the nodes in 
			
 
				+  the cluster.  When a Namenode is formatted, this identifier is provided 
			
 
				+  or auto generated. This ID should be used for formatting the other 
			
 
				+  Namenodes into the cluster.
			
 
				+
			
 
				+** Key Benefits
			
 
				+
			
 
				+  * Namespace Scalability - HDFS cluster storage scales horizontally but 
			
 
				+    the namespace does not. Large deployments or deployments using lot 
			
 
				+    of small files benefit from scaling the namespace by adding more 
			
 
				+    Namenodes to the cluster
			
 
				+
			
 
				+  * Performance - File system operation throughput is currently limited 
			
 
				+    by a single Namenode. Adding more Namenodes to the cluster scales the 
			
 
				+    file system read/write operations throughput.
			
 
				+
			
 
				+  * Isolation - A single Namenode offers no isolation in multi user 
			
 
				+    environment. An experimental application can overload the Namenode 
			
 
				+    and slow down production critical applications. With multiple Namenodes, 
			
 
				+    different categories of applications and users can be isolated to 
			
 
				+    different namespaces.
			
 
				+
			
 
				+* {Federation Configuration}
			
 
				+
			
 
				+  Federation configuration is <<backward compatible>> and allows existing 
			
 
				+  single Namenode configuration to work without any change. The new 
			
 
				+  configuration is designed such that all the nodes in the cluster have 
			
 
				+  same configuration without the need for deploying different configuration 
			
 
				+  based on the type of the node in the cluster.
			
 
				+
			
 
				+  A new abstraction called <<<NameServiceID>>> is added with
			
 
				+  federation. The Namenode and its corresponding secondary/backup/checkpointer
			
 
				+  nodes belong to this. To support single configuration file, the Namenode and
			
 
				+  secondary/backup/checkpointer configuration parameters are suffixed with
			
 
				+  <<<NameServiceID>>> and are added to the same configuration file.
			
 
				+
			
 
				+
			
 
				+** Configuration:
			
 
				+
			
 
				+  <<Step 1>>: Add the following parameters to your configuration:
			
 
				+  <<<dfs.federation.nameservices>>>: Configure with list of comma separated 
			
 
				+  NameServiceIDs. This will be used by Datanodes to determine all the 
			
 
				+  Namenodes in the cluster.
			
 
				+  
			
 
				+  <<Step 2>>: For each Namenode and Secondary Namenode/BackupNode/Checkpointer 
			
 
				+  add the following configuration suffixed with the corresponding 
			
 
				+  <<<NameServiceID>>> into the common configuration file.
			
 
				+
			
 
				+*---------------------+--------------------------------------------+
			
 
				+|| Daemon             || Configuration Parameter                   |
			
 
				+*---------------------+--------------------------------------------+
			
 
				+| Namenode            | <<<dfs.namenode.rpc-address>>>             |
			
 
				+|                     | <<<dfs.namenode.servicerpc-address>>>      |
			
 
				+|                     | <<<dfs.namenode.http-address>>>            |
			
 
				+|                     | <<<dfs.namenode.https-address>>>           |
			
 
				+|                     | <<<dfs.namenode.keytab.file>>>             |
			
 
				+|                     | <<<dfs.namenode.name.dir>>>                |
			
 
				+|                     | <<<dfs.namenode.edits.dir>>>               |
			
 
				+|                     | <<<dfs.namenode.checkpoint.dir>>>          |
			
 
				+|                     | <<<dfs.namenode.checkpoint.edits.dir>>>    |
			
 
				+*---------------------+--------------------------------------------+
			
 
				+| Secondary Namenode  | <<<dfs.namenode.secondary.http-address>>>  |
			
 
				+|                     | <<<dfs.secondary.namenode.keytab.file>>>   |
			
 
				+*---------------------+--------------------------------------------+
			
 
				+| BackupNode          | <<<dfs.namenode.backup.address>>>          |
			
 
				+|                     | <<<dfs.secondary.namenode.keytab.file>>>   |
			
 
				+*---------------------+--------------------------------------------+
			
 
				+    
			
 
				+  Here is an example configuration with two namenodes:
			
 
				+
			
 
				+----
			
 
				+<configuration>
			
 
				+  <property>
			
 
				+    <name>dfs.federation.nameservices</name>
			
 
				+    <value>ns1,ns2</value>
			
 
				+  </property>
			
 
				+  <property>
			
 
				+    <name>dfs.namenode.rpc-address.ns1</name>
			
 
				+    <value>hdfs://nn-host1:rpc-port</value>
			
 
				+  </property>
			
 
				+  <property>
			
 
				+    <name>dfs.namenode.http-address.ns1</name>
			
 
				+    <value>nn-host1:http-port</value>
			
 
				+  </property>
			
 
				+  <property>
			
 
				+    <name>dfs.namenode.secondaryhttp-address.ns1</name>
			
 
				+    <value>snn-host1:http-port</value>
			
 
				+  </property>
			
 
				+  <property>
			
 
				+    <name>dfs.namenode.rpc-address.ns2</name>
			
 
				+    <value>hdfs://nn-host2:rpc-port</value>
			
 
				+  </property>
			
 
				+  <property>
			
 
				+    <name>dfs.namenode.http-address.ns2</name>
			
 
				+    <value>nn-host2:http-port</value>
			
 
				+  </property>
			
 
				+  <property>
			
 
				+    <name>dfs.namenode.secondaryhttp-address.ns2</name>
			
 
				+    <value>snn-host2:http-port</value>
			
 
				+  </property>
			
 
				+
			
 
				+  .... Other common configuration ...
			
 
				+</configuration>
			
 
				+----
			
 
				+
			
 
				+** Formatting Namenodes
			
 
				+
			
 
				+  <<Step 1>>: Format a namenode using the following command:
			
 
				+  
			
 
				+----
			
 
				+> $HADOOP_PREFIX_HOME/bin/hdfs namenode -format [-clusterId <cluster_id>]
			
 
				+----
			
 
				+  Choose a unique cluster_id, which will not conflict other clusters in 
			
 
				+  your environment. If it is not provided, then a unique ClusterID is 
			
 
				+  auto generated.
			
 
				+
			
 
				+  <<Step 2>>: Format additional namenode using the following command:
			
 
				+
			
 
				+----
			
 
				+> $HADOOP_PREFIX_HOME/bin/hdfs namenode -format -clusterId <cluster_id>
			
 
				+----
			
 
				+  Note that the cluster_id in step 2 must be same as that of the 
			
 
				+  cluster_id in step 1. If they are different, the additional Namenodes 
			
 
				+  will not be part of the federated cluster.
			
 
				+
			
 
				+** Upgrading from older release to 0.23 and configuring federation
			
 
				+
			
 
				+  Older releases supported a single Namenode. Here are the steps enable 
			
 
				+  federation:
			
 
				+
			
 
				+  Step 1: Upgrade the cluster to newer release. During upgrade you can 
			
 
				+  provide a ClusterID as follows:
			
 
				+
			
 
				+----
			
 
				+> $HADOOP_PREFIX_HOME/bin/hdfs start namenode --config $HADOOP_CONF_DIR  -upgrade -clusterId <cluster_ID>
			
 
				+----
			
 
				+  If ClusterID is not provided, it is auto generated.
			
 
				+
			
 
				+** Adding a new Namenode to an existing HDFS cluster
			
 
				+
			
 
				+  Follow the following steps:
			
 
				+
			
 
				+  * Add configuration parameter <<<dfs.federation.nameservices>>> to 
			
 
				+    the configuration.
			
 
				+
			
 
				+  * Update the configuration with NameServiceID suffix. Configuration 
			
 
				+    key names have changed post release 0.20. You must use new configuration 
			
 
				+    parameter names, for federation.
			
 
				+
			
 
				+  * Add new Namenode related config to the configuration files.
			
 
				+
			
 
				+  * Propagate the configuration file to the all the nodes in the cluster.
			
 
				+
			
 
				+  * Start the new Namenode, Secondary/Backup.
			
 
				+
			
 
				+  * Refresh the datanodes to pickup the newly added Namenode by running 
			
 
				+    the following command:
			
 
				+
			
 
				+----
			
 
				+> $HADOOP_PREFIX_HOME/bin/hdfs dfadmin -refreshNameNode <datanode_host_name>:<datanode_rpc_port>
			
 
				+----
			
 
				+
			
 
				+  * The above command must be run against all the datanodes in the cluster.
			
 
				+
			
 
				+* {Managing the cluster}
			
 
				+
			
 
				+**  Starting and stopping cluster
			
 
				+
			
 
				+  To start the cluster run the following command:
			
 
				+
			
 
				+----
			
 
				+> $HADOOP_PREFIX_HOME/bin/start-dfs.sh
			
 
				+----
			
 
				+
			
 
				+  To start the cluster run the following command:
			
 
				+
			
 
				+----
			
 
				+> $HADOOP_PREFIX_HOME/bin/stop-dfs.sh
			
 
				+----
			
 
				+
			
 
				+  These commands can be run from any node where the HDFS configuration is 
			
 
				+  available.  The command uses configuration to determine the Namenodes 
			
 
				+  in the cluster and starts the Namenode process on those nodes. The 
			
 
				+  datanodes are started on nodes specified in the <<<slaves>>> file. The 
			
 
				+  script can be used as reference for building your own scripts for 
			
 
				+  starting and stopping the cluster.
			
 
				+
			
 
				+**  Balancer
			
 
				+
			
 
				+  Balancer has been changed to work with multiple Namenodes in the cluster to 
			
 
				+  balance the cluster. Balancer can be run using the command:
			
 
				+
			
 
				+----
			
 
				+"$HADOOP_PREFIX"/bin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script "$bin"/hdfs start balancer [-policy <policy>]
			
 
				+----
			
 
				+
			
 
				+  Policy could be:
			
 
				+
			
 
				+  * <<<node>>> - this is the <default> policy. This balances the storage at 
			
 
				+    the datanode level. This is similar to balancing policy from prior releases.
			
 
				+
			
 
				+  * <<<blockpool>>> - this balances the storage at the block pool level. 
			
 
				+    Balancing at block pool level balances storage at the datanode level also.
			
 
				+
			
 
				+  Note that Balander only balances the data and does not balance the namespace.
			
 
				+
			
 
				+** Decommissioning
			
 
				+
			
 
				+  Decommissioning is similar to prior releases. The nodes that need to be 
			
 
				+  decommissioning are added to the exclude file at all the Namenode. Each 
			
 
				+  Namenode decommissions its Block Pool. When all the Namenodes finish 
			
 
				+  decommissioning a datanode, the datanode is considered to be decommissioned.
			
 
				+
			
 
				+  <<Step 1>>: To distributed an exclude file to all the Namenodes, use the 
			
 
				+  following command:
			
 
				+
			
 
				+----
			
 
				+"$HADOOP_PREFIX"/bin/distributed-exclude.sh <exclude_file>
			
 
				+----
			
 
				+
			
 
				+  <<Step 2>>: Refresh all the Namenodes to pick up the new exclude file.
			
 
				+
			
 
				+----
			
 
				+"$HADOOP_PREFIX"/bin/refresh-namenodes.sh
			
 
				+----
			
 
				+ 
			
 
				+  The above command uses HDFS configuration to determine the Namenodes 
			
 
				+  configured in the cluster and refreshes all the Namenodes to pick up 
			
 
				+  the new exclude file.
			
 
				+
			
 
				+** Cluster Web Console
			
 
				+
			
 
				+  Similar to Namenode status web page, a Cluster Web Console is added in 
			
 
				+  federation to monitor the federated cluster at 
			
 
				+  <<<http://<any_nn_host:port>/dfsclusterhealth.jsp>>>.
			
 
				+  Any Namenode in the cluster can be used to access this web page.
			
 
				+
			
 
				+  The web page provides the following information:
			
 
				+
			
 
				+  * Cluster summary that shows number of files, number of blocks and 
			
 
				+    total configured storage capacity, available and used storage information 
			
 
				+    for the entire cluster.
			
 
				+
			
 
				+  * Provides list of Namenodes and summary that includes number of files,
			
 
				+    blocks, missing blocks, number of live and dead data nodes for each 
			
 
				+    Namenode. It also provides a link to conveniently access Namenode web UI.
			
 
				+
			
 
				+  * It also provides decommissioning status of datanodes.
			
 
				+
			
 
				+
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/index.apt.vm
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/index.apt.vm
@@ -49,3 +49,5 @@ Hadoop MapReduce Next Generation
 
				 
			
 
				   * {{{./CapacityScheduler.html}Capacity Scheduler}}
			
 
				 
			
 
				+  * {{{./Federation.html}HDFS Federation feature description, configuration and management}}
			
 
				+
			
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/resources/federation-background.gif
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/resources/federation-background.gif
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/resources/federation.gif
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/resources/federation.gif