%!s(int64=12) %!d(string=hai) anos · 1f12d4fe94
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -132,9 +132,6 @@ Trunk (Unreleased)
 
															     HADOOP-8776. Provide an option in test-patch that can enable/disable
														
 
															     compiling native code. (Chris Nauroth via suresh)
														
 
															-    HADOOP-9004. Allow security unit tests to use external KDC. (Stephen Chu
														
 
															-    via suresh)
														
 
															-
														
 
															     HADOOP-6616. Improve documentation for rack awareness. (Adam Faris via 
														
 
															     jghoman)
														
@@ -144,6 +141,11 @@ Trunk (Unreleased)
 
															     HADOOP-9093. Move all the Exception in PathExceptions to o.a.h.fs package.
														
 
															     (suresh)
														
 
															+    HADOOP-9140 Cleanup rpc PB protos (sanjay Radia)
														
 
															+
														
 
															+    HADOOP-9162. Add utility to check native library availability.
														
 
															+    (Binglin Chang via suresh)
														
 
															+
														
 
															   BUG FIXES
														
 
															     HADOOP-9041. FsUrlStreamHandlerFactory could cause an infinite loop in
														
@@ -410,6 +412,14 @@ Release 2.0.3-alpha - Unreleased
 
															     HADOOP-9127. Update documentation for ZooKeeper Failover Controller.
														
 
															     (Daisuke Kobayashi via atm)
														
 
															+    HADOOP-9004. Allow security unit tests to use external KDC. (Stephen Chu
														
 
															+    via suresh)
														
 
															+
														
 
															+    HADOOP-9147. Add missing fields to FIleStatus.toString.
														
 
															+    (Jonathan Allen via suresh)
														
 
															+
														
 
															+    HADOOP-8427. Convert Forrest docs to APT, incremental. (adi2 via tucu)
														
 
															+
														
 
															   OPTIMIZATIONS
														
 
															     HADOOP-8866. SampleQuantiles#query is O(N^2) instead of O(N). (Andrew Wang
														
@@ -500,6 +510,12 @@ Release 2.0.3-alpha - Unreleased
 
															     HADOOP-9135. JniBasedUnixGroupsMappingWithFallback should log at debug
														
 
															     rather than info during fallback. (Colin Patrick McCabe via todd)
														
 
															+    HADOOP-9152. HDFS can report negative DFS Used on clusters with very small
														
 
															+    amounts of data. (Brock Noland via atm)
														
 
															+
														
 
															+    HADOOP-9153. Support createNonRecursive in ViewFileSystem.
														
 
															+    (Sandy Ryza via tomwhite)
														
 
															+
														
 
															 Release 2.0.2-alpha - 2012-09-07 
														
 
															   INCOMPATIBLE CHANGES
														
@@ -1211,6 +1227,8 @@ Release 0.23.6 - UNRELEASED
 
															     HADOOP-9038. unit-tests for AllocatorPerContext.PathIterator (Ivan A.
														
 
															     Veselovsky via bobby)
														
 
															+    HADOOP-9105. FsShell -moveFromLocal erroneously fails (daryn via bobby)
														
 
															+
														
 
															 Release 0.23.5 - UNRELEASED
														
 
															   INCOMPATIBLE CHANGES
														
--- a/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml
+++ b/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml
@@ -260,7 +260,7 @@
 
															     </Match>
														
 
															     <Match>
														
 
															       <!-- protobuf generated code -->
														
 
															-      <Class name="~org\.apache\.hadoop\.ipc\.protobuf\.HadoopRpcProtos.*"/>
														
 
															+      <Class name="~org\.apache\.hadoop\.ipc\.protobuf\.ProtobufRpcEngineProtos.*"/>
														
 
															     </Match>
														
 
															     <Match>
														
 
															       <!-- protobuf generated code -->
														
@@ -272,7 +272,7 @@
 
															     </Match>
														
 
															         <Match>
														
 
															       <!-- protobuf generated code -->
														
 
															-      <Class name="~org\.apache\.hadoop\.ipc\.protobuf\.RpcPayloadHeaderProtos.*"/>
														
 
															+      <Class name="~org\.apache\.hadoop\.ipc\.protobuf\.RpcHeaderProtos.*"/>
														
 
															     </Match>
														
 
															     <Match>
														
 
															       <!-- protobuf generated code -->
														
--- a/hadoop-common-project/hadoop-common/pom.xml
+++ b/hadoop-common-project/hadoop-common/pom.xml
@@ -402,9 +402,9 @@
 
															                 <argument>src/main/proto/HAServiceProtocol.proto</argument>
														
 
															                 <argument>src/main/proto/IpcConnectionContext.proto</argument>
														
 
															                 <argument>src/main/proto/ProtocolInfo.proto</argument>
														
 
															-                <argument>src/main/proto/RpcPayloadHeader.proto</argument>
														
 
															+                <argument>src/main/proto/RpcHeader.proto</argument>
														
 
															                 <argument>src/main/proto/ZKFCProtocol.proto</argument>
														
 
															-                <argument>src/main/proto/hadoop_rpc.proto</argument>
														
 
															+                <argument>src/main/proto/ProtobufRpcEngine.proto</argument>
														
 
															               </arguments>
														
 
															             </configuration>
														
 
															           </execution>
														
--- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop
+++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop
@@ -31,6 +31,7 @@ function print_usage(){
 
															   echo "  fs                   run a generic filesystem user client"
														
 
															   echo "  version              print the version"
														
 
															   echo "  jar <jar>            run a jar file"
														
 
															+  echo "  checknative [-a|-h]  check native hadoop and compression libraries availability"
														
 
															   echo "  distcp <srcurl> <desturl> copy file or directories recursively"
														
 
															   echo "  archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
														
 
															   echo "  classpath            prints the class path needed to get the"
														
@@ -100,6 +101,8 @@ case $COMMAND in
 
															       CLASS=org.apache.hadoop.util.VersionInfo
														
 
															     elif [ "$COMMAND" = "jar" ] ; then
														
 
															       CLASS=org.apache.hadoop.util.RunJar
														
 
															+    elif [ "$COMMAND" = "checknative" ] ; then
														
 
															+      CLASS=org.apache.hadoop.util.NativeLibraryChecker
														
 
															     elif [ "$COMMAND" = "distcp" ] ; then
														
 
															       CLASS=org.apache.hadoop.tools.DistCp
														
 
															       CLASSPATH=${CLASSPATH}:${TOOL_PATH}
														
--- a/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/HttpAuthentication.xml
+++ b/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/HttpAuthentication.xml
@@ -1,127 +0,0 @@
 
															-<?xml version="1.0"?>
														
 
															-<!--
														
 
															-  Copyright 2002-2004 The Apache Software Foundation
														
 
															-
														
 
															-  Licensed under the Apache License, Version 2.0 (the "License");
														
 
															-  you may not use this file except in compliance with the License.
														
 
															-  You may obtain a copy of the License at
														
 
															-
														
 
															-      http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-  Unless required by applicable law or agreed to in writing, software
														
 
															-  distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-  See the License for the specific language governing permissions and
														
 
															-  limitations under the License.
														
 
															--->
														
 
															-
														
 
															-<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
														
 
															-          "http://forrest.apache.org/dtd/document-v20.dtd">
														
 
															-
														
 
															-
														
 
															-<document>
														
 
															-
														
 
															-  <header>
														
 
															-    <title> 
														
 
															-      Authentication for Hadoop HTTP web-consoles
														
 
															-    </title>
														
 
															-  </header>
														
 
															-
														
 
															-  <body>
														
 
															-    <section>
														
 
															-      <title> Introduction </title>
														
 
															-      <p>
														
 
															-        This document describes how to configure Hadoop HTTP web-consoles to require user
														
 
															-        authentication. 
														
 
															-      </p>
														
 
															-      <p>
														
 
															-        By default Hadoop HTTP web-consoles (JobTracker, NameNode, TaskTrackers and DataNodes) allow 
														
 
															-        access without any form of authentication. 
														
 
															-      </p>
														
 
															-      <p>
														
 
															-        Similarly to Hadoop RPC, Hadoop HTTP web-consoles can be configured to require Kerberos 
														
 
															-        authentication using HTTP SPNEGO protocol (supported by browsers like Firefox and Internet
														
 
															-        Explorer).        
														
 
															-      </p>
														
 
															-      <p>
														
 
															-        In addition, Hadoop HTTP web-consoles support the equivalent of Hadoop's Pseudo/Simple
														
 
															-        authentication. If this option is enabled, user must specify their user name in the first
														
 
															-        browser interaction using the <code>user.name</code> query string parameter. For example:
														
 
															-        <code>http://localhost:50030/jobtracker.jsp?user.name=babu</code>.
														
 
															-      </p>
														
 
															-      <p>
														
 
															-        If a custom authentication mechanism is required for the HTTP web-consoles, it is possible 
														
 
															-        to implement a plugin to support the alternate authentication mechanism (refer to 
														
 
															-        Hadoop hadoop-auth for details on writing an <code>AuthenticatorHandler</code>).
														
 
															-      </p>
														
 
															-      <p>       
														
 
															-        The next section describes how to configure Hadoop HTTP web-consoles to require user 
														
 
															-        authentication.
														
 
															-      </p>
														
 
															-    </section>
														
 
															-
														
 
															-    <section> 
														
 
															-      <title> Configuration </title>
														
 
															-
														
 
															-      <p>
														
 
															-        The following properties should be in the <code>core-site.xml</code> of all the nodes
														
 
															-        in the cluster.
														
 
															-      </p>
														
 
															-
														
 
															-      <p><code>hadoop.http.filter.initializers</code>: add to this property the 
														
 
															-      <code>org.apache.hadoop.security.AuthenticationFilterInitializer</code> initializer class.
														
 
															-      </p>
														
 
															-      
														
 
															-      <p><code>hadoop.http.authentication.type</code>: Defines authentication used for the HTTP 
														
 
															-      web-consoles. The supported values are: <code>simple | kerberos | 
														
 
															-      #AUTHENTICATION_HANDLER_CLASSNAME#</code>. The dfeault value is <code>simple</code>.
														
 
															-      </p>
														
 
															-
														
 
															-      <p><code>hadoop.http.authentication.token.validity</code>: Indicates how long (in seconds) 
														
 
															-      an authentication token is valid before it has to be renewed. The default value is 
														
 
															-      <code>36000</code>.
														
 
															-      </p>
														
 
															-
														
 
															-      <p><code>hadoop.http.authentication.signature.secret.file</code>: The signature secret 
														
 
															-      file for signing the authentication tokens. If not set a random secret is generated at 
														
 
															-      startup time. The same secret should be used for all nodes in the cluster, JobTracker, 
														
 
															-      NameNode, DataNode and TastTracker. The default value is 
														
 
															-      <code>${user.home}/hadoop-http-auth-signature-secret</code>.
														
 
															-      IMPORTANT: This file should be readable only by the Unix user running the daemons.
														
 
															-      </p>
														
 
															-        
														
 
															-      <p><code>hadoop.http.authentication.cookie.domain</code>: The domain to use for the HTTP 
														
 
															-      cookie that stores the authentication token. In order to authentiation to work 
														
 
															-      correctly across all nodes in the cluster the domain must be correctly set.
														
 
															-      There is no default value, the HTTP cookie will not have a domain working only
														
 
															-      with the hostname issuing the HTTP cookie.
														
 
															-      </p>
														
 
															-
														
 
															-      <p>
														
 
															-      IMPORTANT: when using IP addresses, browsers ignore cookies with domain settings.
														
 
															-      For this setting to work properly all nodes in the cluster must be configured
														
 
															-      to generate URLs with hostname.domain names on it.
														
 
															-      </p>
														
 
															-
														
 
															-      <p><code>hadoop.http.authentication.simple.anonymous.allowed</code>: Indicates if anonymous 
														
 
															-      requests are allowed when using 'simple' authentication. The default value is 
														
 
															-      <code>true</code>
														
 
															-      </p>
														
 
															-
														
 
															-      <p><code>hadoop.http.authentication.kerberos.principal</code>: Indicates the Kerberos 
														
 
															-      principal to be used for HTTP endpoint when using 'kerberos' authentication.
														
 
															-      The principal short name must be <code>HTTP</code> per Kerberos HTTP SPNEGO specification.
														
 
															-      The default value is <code>HTTP/_HOST@$LOCALHOST</code>, where <code>_HOST</code> -if present-
														
 
															-      is replaced with bind address of the HTTP server.
														
 
															-      </p>
														
 
															-
														
 
															-      <p><code>hadoop.http.authentication.kerberos.keytab</code>: Location of the keytab file 
														
 
															-      with the credentials for the Kerberos principal used for the HTTP endpoint. 
														
 
															-      The default value is <code>${user.home}/hadoop.keytab</code>.i
														
 
															-      </p>
														
 
															-
														
 
															-    </section>
														
 
															-
														
 
															-  </body>
														
 
															-</document>
														
 
															-
														
--- a/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/cluster_setup.xml
+++ b/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/cluster_setup.xml
@@ -1,1485 +0,0 @@
 
															-<?xml version="1.0"?>
														
 
															-<!--
														
 
															-  Licensed to the Apache Software Foundation (ASF) under one or more
														
 
															-  contributor license agreements.  See the NOTICE file distributed with
														
 
															-  this work for additional information regarding copyright ownership.
														
 
															-  The ASF licenses this file to You under the Apache License, Version 2.0
														
 
															-  (the "License"); you may not use this file except in compliance with
														
 
															-  the License.  You may obtain a copy of the License at
														
 
															-
														
 
															-      http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-  Unless required by applicable law or agreed to in writing, software
														
 
															-  distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-  See the License for the specific language governing permissions and
														
 
															-  limitations under the License.
														
 
															--->
														
 
															-
														
 
															-<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
														
 
															-
														
 
															-<document>
														
 
															-  
														
 
															-  <header>
														
 
															-    <title>Cluster Setup</title>
														
 
															-  </header>
														
 
															-  
														
 
															-  <body>
														
 
															-  
														
 
															-    <section>
														
 
															-      <title>Purpose</title>
														
 
															-      
														
 
															-      <p>This document describes how to install, configure and manage non-trivial
														
 
															-      Hadoop clusters ranging from a few nodes to extremely large clusters with 
														
 
															-      thousands of nodes.</p>
														
 
															-      <p>
														
 
															-      To play with Hadoop, you may first want to install Hadoop on a single machine (see <a href="single_node_setup.html"> Hadoop Quick Start</a>).
														
 
															-      </p>
														
 
															-    </section>
														
 
															-    
														
 
															-    <section>
														
 
															-      <title>Pre-requisites</title>
														
 
															-      
														
 
															-      <ol>
														
 
															-        <li>
														
 
															-          Make sure all <a href="single_node_setup.html#PreReqs">requisite</a> software 
														
 
															-          is installed on all nodes in your cluster.
														
 
															-        </li>
														
 
															-        <li>
														
 
															-          <a href="single_node_setup.html#Download">Get</a> the Hadoop software.
														
 
															-        </li>
														
 
															-      </ol>
														
 
															-    </section>
														
 
															-    
														
 
															-    <section>
														
 
															-      <title>Installation</title>
														
 
															-      
														
 
															-      <p>Installing a Hadoop cluster typically involves unpacking the software 
														
 
															-      on all the machines in the cluster.</p>
														
 
															-      
														
 
															-      <p>Typically one machine in the cluster is designated as the 
														
 
															-      <code>NameNode</code> and another machine the as <code>JobTracker</code>,
														
 
															-      exclusively. These are the <em>masters</em>. The rest of the machines in 
														
 
															-      the cluster act as both <code>DataNode</code> <em>and</em> 
														
 
															-      <code>TaskTracker</code>. These are the <em>slaves</em>.</p>
														
 
															-      
														
 
															-      <p>The root of the distribution is referred to as 
														
 
															-      <code>HADOOP_PREFIX</code>. All machines in the cluster usually have the same 
														
 
															-      <code>HADOOP_PREFIX</code> path.</p>
														
 
															-    </section>
														
 
															-    
														
 
															-    <section>
														
 
															-      <title>Configuration</title>
														
 
															-      
														
 
															-      <p>The following sections describe how to configure a Hadoop cluster.</p>
														
 
															-      
														
 
															-      <section>
														
 
															-        <title>Configuration Files</title>
														
 
															-        
														
 
															-        <p>Hadoop configuration is driven by two types of important 
														
 
															-        configuration files:</p>
														
 
															-        <ol>
														
 
															-          <li>
														
 
															-            Read-only default configuration - 
														
 
															-            <a href="ext:core-default">src/core/core-default.xml</a>, 
														
 
															-            <a href="ext:hdfs-default">src/hdfs/hdfs-default.xml</a>, 
														
 
															-            <a href="ext:mapred-default">src/mapred/mapred-default.xml</a> and
														
 
															-            <a href="ext:mapred-queues">conf/mapred-queues.xml.template</a>.
														
 
															-          </li>
														
 
															-          <li>
														
 
															-            Site-specific configuration - 
														
 
															-            <a href="#core-site.xml">conf/core-site.xml</a>, 
														
 
															-            <a href="#hdfs-site.xml">conf/hdfs-site.xml</a>, 
														
 
															-            <a href="#mapred-site.xml">conf/mapred-site.xml</a> and
														
 
															-            <a href="#mapred-queues.xml">conf/mapred-queues.xml</a>.
														
 
															-          </li>
														
 
															-        </ol>
														
 
															-      
														
 
															-        <p>To learn more about how the Hadoop framework is controlled by these 
														
 
															-        configuration files, look 
														
 
															-        <a href="ext:api/org/apache/hadoop/conf/configuration">here</a>.</p>
														
 
															-      
														
 
															-        <p>Additionally, you can control the Hadoop scripts found in the 
														
 
															-        <code>bin/</code> directory of the distribution, by setting site-specific 
														
 
															-        values via the <code>conf/hadoop-env.sh</code>.</p>
														
 
															-      </section>
														
 
															-      
														
 
															-      <section>
														
 
															-        <title>Site Configuration</title>
														
 
															-        
														
 
															-        <p>To configure the Hadoop cluster you will need to configure the
														
 
															-        <em>environment</em> in which the Hadoop daemons execute as well as
														
 
															-        the <em>configuration parameters</em> for the Hadoop daemons.</p>
														
 
															-        
														
 
															-        <p>The Hadoop daemons are <code>NameNode</code>/<code>DataNode</code> 
														
 
															-        and <code>JobTracker</code>/<code>TaskTracker</code>.</p>
														
 
															-        
														
 
															-        <section>
														
 
															-          <title>Configuring the Environment of the Hadoop Daemons</title>
														
 
															-
														
 
															-          <p>Administrators should use the <code>conf/hadoop-env.sh</code> script
														
 
															-          to do site-specific customization of the Hadoop daemons' process 
														
 
															-          environment.</p> 
														
 
															-          
														
 
															-          <p>At the very least you should specify the
														
 
															-          <code>JAVA_HOME</code> so that it is correctly defined on each
														
 
															-          remote node.</p>
														
 
															-          
														
 
															-          <p>Administrators can configure individual daemons using the
														
 
															-          configuration options <code>HADOOP_*_OPTS</code>. Various options 
														
 
															-          available are shown below in the table. </p>
														
 
															-          <table>
														
 
															-          <tr><th>Daemon</th><th>Configure Options</th></tr>
														
 
															-          <tr><td>NameNode</td><td>HADOOP_NAMENODE_OPTS</td></tr>
														
 
															-          <tr><td>DataNode</td><td>HADOOP_DATANODE_OPTS</td></tr>
														
 
															-          <tr><td>SecondaryNamenode</td>
														
 
															-              <td>HADOOP_SECONDARYNAMENODE_OPTS</td></tr>
														
 
															-          </table>
														
 
															-          
														
 
															-          <p> For example, To configure Namenode to use parallelGC, the
														
 
															-          following statement should be added in <code>hadoop-env.sh</code> :
														
 
															-          <br/><code>
														
 
															-          export HADOOP_NAMENODE_OPTS="-XX:+UseParallelGC ${HADOOP_NAMENODE_OPTS}"
														
 
															-          </code><br/></p>
														
 
															-          
														
 
															-          <p>Other useful configuration parameters that you can customize 
														
 
															-          include:</p>
														
 
															-          <ul>
														
 
															-            <li>
														
 
															-              <code>HADOOP_LOG_DIR</code> - The directory where the daemons'
														
 
															-              log files are stored. They are automatically created if they don't
														
 
															-              exist.
														
 
															-            </li>
														
 
															-            <li>
														
 
															-              <code>HADOOP_HEAPSIZE</code> - The maximum amount of heapsize 
														
 
															-              to use, in MB e.g. <code>1000MB</code>. This is used to 
														
 
															-              configure the heap size for the hadoop daemon. By default,
														
 
															-              the value is <code>1000MB</code>.
														
 
															-            </li>
														
 
															-          </ul>
														
 
															-        </section>
														
 
															-        
														
 
															-        <section>
														
 
															-          <title>Configuring the Hadoop Daemons</title>
														
 
															-          
														
 
															-          <p>This section deals with important parameters to be specified in the
														
 
															-          following:</p>
														
 
															-          <anchor id="core-site.xml"/><p><code>conf/core-site.xml</code>:</p>
														
 
															-
														
 
															-		  <table>
														
 
															-  		    <tr>
														
 
															-		      <th>Parameter</th>
														
 
															-		      <th>Value</th> 
														
 
															-		      <th>Notes</th>
														
 
															-		    </tr>
														
 
															-		    <tr>
														
 
															-              <td>fs.defaultFS</td>
														
 
															-              <td>URI of <code>NameNode</code>.</td>
														
 
															-              <td><em>hdfs://hostname/</em></td>
														
 
															-            </tr>
														
 
															-          </table>
														
 
															-
														
 
															-      <anchor id="hdfs-site.xml"/><p><code>conf/hdfs-site.xml</code>:</p>
														
 
															-          
														
 
															-      <table>   
														
 
															-        <tr>
														
 
															-          <th>Parameter</th>
														
 
															-          <th>Value</th> 
														
 
															-          <th>Notes</th>
														
 
															-        </tr>
														
 
															-		    <tr>
														
 
															-		      <td>dfs.namenode.name.dir</td>
														
 
															-		      <td>
														
 
															-		        Path on the local filesystem where the <code>NameNode</code> 
														
 
															-		        stores the namespace and transactions logs persistently.</td>
														
 
															-		      <td>
														
 
															-		        If this is a comma-delimited list of directories then the name 
														
 
															-		        table is replicated in all of the directories, for redundancy.
														
 
															-		      </td>
														
 
															-		    </tr>
														
 
															-		    <tr>
														
 
															-		      <td>dfs.datanode.data.dir</td>
														
 
															-		      <td>
														
 
															-		        Comma separated list of paths on the local filesystem of a 
														
 
															-		        <code>DataNode</code> where it should store its blocks.
														
 
															-		      </td>
														
 
															-		      <td>
														
 
															-		        If this is a comma-delimited list of directories, then data will 
														
 
															-		        be stored in all named directories, typically on different 
														
 
															-		        devices.
														
 
															-		      </td>
														
 
															-		    </tr>
														
 
															-      </table>
														
 
															-
														
 
															-      <anchor id="mapred-site.xml"/><p><code>conf/mapred-site.xml</code>:</p>
														
 
															-
														
 
															-      <table>
														
 
															-          <tr>
														
 
															-          <th>Parameter</th>
														
 
															-          <th>Value</th> 
														
 
															-          <th>Notes</th>
														
 
															-        </tr>
														
 
															-        <tr>
														
 
															-          <td>mapreduce.jobtracker.address</td>
														
 
															-          <td>Host or IP and port of <code>JobTracker</code>.</td>
														
 
															-          <td><em>host:port</em> pair.</td>
														
 
															-        </tr>
														
 
															-		    <tr>
														
 
															-		      <td>mapreduce.jobtracker.system.dir</td>
														
 
															-		      <td>
														
 
															-		        Path on the HDFS where where the Map/Reduce framework stores 
														
 
															-		        system files e.g. <code>/hadoop/mapred/system/</code>.
														
 
															-		      </td>
														
 
															-		      <td>
														
 
															-		        This is in the default filesystem (HDFS) and must be accessible 
														
 
															-		        from both the server and client machines.
														
 
															-		      </td>
														
 
															-		    </tr>
														
 
															-		    <tr>
														
 
															-		      <td>mapreduce.cluster.local.dir</td>
														
 
															-		      <td>
														
 
															-		        Comma-separated list of paths on the local filesystem where 
														
 
															-		        temporary Map/Reduce data is written.
														
 
															-		      </td>
														
 
															-		      <td>Multiple paths help spread disk i/o.</td>
														
 
															-		    </tr>
														
 
															-		    <tr>
														
 
															-		      <td>mapred.tasktracker.{map|reduce}.tasks.maximum</td>
														
 
															-		      <td>
														
 
															-		        The maximum number of Map/Reduce tasks, which are run 
														
 
															-		        simultaneously on a given <code>TaskTracker</code>, individually.
														
 
															-		      </td>
														
 
															-		      <td>
														
 
															-		        Defaults to 2 (2 maps and 2 reduces), but vary it depending on 
														
 
															-		        your hardware.
														
 
															-		      </td>
														
 
															-		    </tr>
														
 
															-		    <tr>
														
 
															-		      <td>dfs.hosts/dfs.hosts.exclude</td>
														
 
															-		      <td>List of permitted/excluded DataNodes.</td>
														
 
															-		      <td>
														
 
															-		        If necessary, use these files to control the list of allowable 
														
 
															-		        datanodes.
														
 
															-		      </td>
														
 
															-		    </tr>
														
 
															-		    <tr>
														
 
															-		      <td>mapreduce.jobtracker.hosts.filename/mapreduce.jobtracker.hosts.exclude.filename</td>
														
 
															-		      <td>List of permitted/excluded TaskTrackers.</td>
														
 
															-		      <td>
														
 
															-		        If necessary, use these files to control the list of allowable 
														
 
															-		        TaskTrackers.
														
 
															-		      </td>
														
 
															-      </tr>
														
 
															-      <tr>
														
 
															-        <td>mapreduce.cluster.acls.enabled</td>
														
 
															-        <td>Boolean, specifying whether checks for queue ACLs and job ACLs
														
 
															-            are to be done for authorizing users for doing queue operations and
														
 
															-            job operations.
														
 
															-        </td>
														
 
															-        <td>
														
 
															-          If <em>true</em>, queue ACLs are checked while submitting
														
 
															-          and administering jobs and job ACLs are checked for authorizing
														
 
															-          view and modification of jobs. Queue ACLs are specified using the
														
 
															-          configuration parameters of the form defined below under
														
 
															-          mapred-queues.xml. Job ACLs are described at
														
 
															-          mapred-tutorial in "Job Authorization" section.
														
 
															-          For enabling this flag(mapreduce.cluster.acls.enabled), this is to be
														
 
															-          set to true in mapred-site.xml on JobTracker node and on all
														
 
															-          TaskTracker nodes.
														
 
															-        </td>
														
 
															-      </tr>
														
 
															-  		    
														
 
															-		  </table>      
														
 
															-
														
 
															-          <p>Typically all the above parameters are marked as 
														
 
															-          <a href="ext:api/org/apache/hadoop/conf/configuration/final_parameters">
														
 
															-          final</a> to ensure that they cannot be overriden by user-applications.
														
 
															-          </p>
														
 
															-
														
 
															-          <anchor id="mapred-queues.xml"/><p><code>conf/mapred-queues.xml
														
 
															-          </code>:</p>
														
 
															-          <p>This file is used to configure the queues in the Map/Reduce
														
 
															-          system. Queues are abstract entities in the JobTracker that can be
														
 
															-          used to manage collections of jobs. They provide a way for 
														
 
															-          administrators to organize jobs in specific ways and to enforce 
														
 
															-          certain policies on such collections, thus providing varying
														
 
															-          levels of administrative control and management functions on jobs.
														
 
															-          </p> 
														
 
															-          <p>One can imagine the following sample scenarios:</p>
														
 
															-          <ul>
														
 
															-            <li> Jobs submitted by a particular group of users can all be 
														
 
															-            submitted to one queue. </li> 
														
 
															-            <li> Long running jobs in an organization can be submitted to a
														
 
															-            queue. </li>
														
 
															-            <li> Short running jobs can be submitted to a queue and the number
														
 
															-            of jobs that can run concurrently can be restricted. </li> 
														
 
															-          </ul> 
														
 
															-          <p>The usage of queues is closely tied to the scheduler configured
														
 
															-          at the JobTracker via <em>mapreduce.jobtracker.taskscheduler</em>.
														
 
															-          The degree of support of queues depends on the scheduler used. Some
														
 
															-          schedulers support a single queue, while others support more complex
														
 
															-          configurations. Schedulers also implement the policies that apply 
														
 
															-          to jobs in a queue. Some schedulers, such as the Fairshare scheduler,
														
 
															-          implement their own mechanisms for collections of jobs and do not rely
														
 
															-          on queues provided by the framework. The administrators are 
														
 
															-          encouraged to refer to the documentation of the scheduler they are
														
 
															-          interested in for determining the level of support for queues.</p>
														
 
															-          <p>The Map/Reduce framework supports some basic operations on queues
														
 
															-          such as job submission to a specific queue, access control for queues,
														
 
															-          queue states, viewing configured queues and their properties
														
 
															-          and refresh of queue properties. In order to fully implement some of
														
 
															-          these operations, the framework takes the help of the configured
														
 
															-          scheduler.</p>
														
 
															-          <p>The following types of queue configurations are possible:</p>
														
 
															-          <ul>
														
 
															-            <li> Single queue: The default configuration in Map/Reduce comprises
														
 
															-            of a single queue, as supported by the default scheduler. All jobs
														
 
															-            are submitted to this default queue which maintains jobs in a priority
														
 
															-            based FIFO order.</li>
														
 
															-            <li> Multiple single level queues: Multiple queues are defined, and
														
 
															-            jobs can be submitted to any of these queues. Different policies
														
 
															-            can be applied to these queues by schedulers that support this 
														
 
															-            configuration to provide a better level of support. For example,
														
 
															-            the <a href="ext:capacity-scheduler">capacity scheduler</a>
														
 
															-            provides ways of configuring different 
														
 
															-            capacity and fairness guarantees on these queues.</li>
														
 
															-            <li> Hierarchical queues: Hierarchical queues are a configuration in
														
 
															-            which queues can contain other queues within them recursively. The
														
 
															-            queues that contain other queues are referred to as 
														
 
															-            container queues. Queues that do not contain other queues are 
														
 
															-            referred as leaf or job queues. Jobs can only be submitted to leaf
														
 
															-            queues. Hierarchical queues can potentially offer a higher level 
														
 
															-            of control to administrators, as schedulers can now build a
														
 
															-            hierarchy of policies where policies applicable to a container
														
 
															-            queue can provide context for policies applicable to queues it
														
 
															-            contains. It also opens up possibilities for delegating queue
														
 
															-            administration where administration of queues in a container queue
														
 
															-            can be turned over to a different set of administrators, within
														
 
															-            the context provided by the container queue. For example, the
														
 
															-            <a href="ext:capacity-scheduler">capacity scheduler</a>
														
 
															-            uses hierarchical queues to partition capacity of a cluster
														
 
															-            among container queues, and allowing queues they contain to divide
														
 
															-            that capacity in more ways.</li> 
														
 
															-          </ul>
														
 
															-
														
 
															-          <p>Most of the configuration of the queues can be refreshed/reloaded
														
 
															-          without restarting the Map/Reduce sub-system by editing this
														
 
															-          configuration file as described in the section on
														
 
															-          <a href="commands_manual.html#RefreshQueues">reloading queue 
														
 
															-          configuration</a>.
														
 
															-          Not all configuration properties can be reloaded of course,
														
 
															-          as will description of each property below explain.</p>
														
 
															-
														
 
															-          <p>The format of conf/mapred-queues.xml is different from the other 
														
 
															-          configuration files, supporting nested configuration
														
 
															-          elements to support hierarchical queues. The format is as follows:
														
 
															-          </p>
														
 
															-
														
 
															-          <source>
														
 
															-          &lt;queues&gt;
														
 
															-            &lt;queue&gt;
														
 
															-              &lt;name&gt;$queue-name&lt;/name&gt;
														
 
															-              &lt;state&gt;$state&lt;/state&gt;
														
 
															-              &lt;queue&gt;
														
 
															-                &lt;name&gt;$child-queue1&lt;/name&gt;
														
 
															-                &lt;properties&gt;
														
 
															-                   &lt;property key="$key" value="$value"/&gt;
														
 
															-                   ...
														
 
															-                &lt;/properties&gt;
														
 
															-                &lt;queue&gt;
														
 
															-                  &lt;name&gt;$grand-child-queue1&lt;/name&gt;
														
 
															-                  ...
														
 
															-                &lt;/queue&gt;
														
 
															-              &lt;/queue&gt;
														
 
															-              &lt;queue&gt;
														
 
															-                &lt;name&gt;$child-queue2&lt;/name&gt;
														
 
															-                ...
														
 
															-              &lt;/queue&gt;
														
 
															-              ...
														
 
															-              ...
														
 
															-              ...
														
 
															-              &lt;queue&gt;
														
 
															-                &lt;name&gt;$leaf-queue&lt;/name&gt;
														
 
															-                &lt;acl-submit-job&gt;$acls&lt;/acl-submit-job&gt;
														
 
															-                &lt;acl-administer-jobs&gt;$acls&lt;/acl-administer-jobs&gt;
														
 
															-                &lt;properties&gt;
														
 
															-                   &lt;property key="$key" value="$value"/&gt;
														
 
															-                   ...
														
 
															-                &lt;/properties&gt;
														
 
															-              &lt;/queue&gt;
														
 
															-            &lt;/queue&gt;
														
 
															-          &lt;/queues&gt;
														
 
															-          </source>
														
 
															-          <table>
														
 
															-            <tr>
														
 
															-              <th>Tag/Attribute</th>
														
 
															-              <th>Value</th>
														
 
															-              <th>
														
 
															-              	<a href="commands_manual.html#RefreshQueues">Refresh-able?</a>
														
 
															-              </th>
														
 
															-              <th>Notes</th>
														
 
															-            </tr>
														
 
															-
														
 
															-            <tr>
														
 
															-              <td><anchor id="queues_tag"/>queues</td>
														
 
															-              <td>Root element of the configuration file.</td>
														
 
															-              <td>Not-applicable</td>
														
 
															-              <td>All the queues are nested inside this root element of the
														
 
															-              file. There can be only one root queues element in the file.</td>
														
 
															-            </tr>
														
 
															-
														
 
															-            <tr>
														
 
															-              <td>aclsEnabled</td>
														
 
															-              <td>Boolean attribute to the
														
 
															-              <a href="#queues_tag"><em>&lt;queues&gt;</em></a> tag
														
 
															-              specifying whether ACLs are supported for controlling job
														
 
															-              submission and administration for <em>all</em> the queues
														
 
															-              configured.
														
 
															-              </td>
														
 
															-              <td>Yes</td>
														
 
															-              <td>If <em>false</em>, ACLs are ignored for <em>all</em> the
														
 
															-              configured queues. <br/><br/>
														
 
															-              If <em>true</em>, the user and group details of the user
														
 
															-              are checked against the configured ACLs of the corresponding
														
 
															-              job-queue while submitting and administering jobs. ACLs can be
														
 
															-              specified for each queue using the queue-specific tags
														
 
															-              "acl-$acl_name", defined below. ACLs are checked only against
														
 
															-              the job-queues, i.e. the leaf-level queues; ACLs configured
														
 
															-              for the rest of the queues in the hierarchy are ignored.
														
 
															-              </td>
														
 
															-            </tr>
														
 
															-
														
 
															-            <tr>
														
 
															-              <td><anchor id="queue_tag"/>queue</td>
														
 
															-              <td>A child element of the
														
 
															-              <a href="#queues_tag"><em>&lt;queues&gt;</em></a> tag or another
														
 
															-              <a href="#queue_tag"><em>&lt;queue&gt;</em></a>. Denotes a queue
														
 
															-              in the system.
														
 
															-              </td>
														
 
															-              <td>Not applicable</td>
														
 
															-              <td>Queues can be hierarchical and so this element can contain
														
 
															-              children of this same type.</td>
														
 
															-            </tr>
														
 
															-
														
 
															-            <tr>
														
 
															-              <td>name</td>
														
 
															-              <td>Child element of a 
														
 
															-              <a href="#queue_tag"><em>&lt;queue&gt;</em></a> specifying the
														
 
															-              name of the queue.</td>
														
 
															-              <td>No</td>
														
 
															-              <td>Name of the queue cannot contain the character <em>":"</em>
														
 
															-              which is reserved as the queue-name delimiter when addressing a
														
 
															-              queue in a hierarchy.</td>
														
 
															-            </tr>
														
 
															-
														
 
															-            <tr>
														
 
															-              <td>state</td>
														
 
															-              <td>Child element of a
														
 
															-              <a href="#queue_tag"><em>&lt;queue&gt;</em></a> specifying the
														
 
															-              state of the queue.
														
 
															-              </td>
														
 
															-              <td>Yes</td>
														
 
															-              <td>Each queue has a corresponding state. A queue in
														
 
															-              <em>'running'</em> state can accept new jobs, while a queue in
														
 
															-              <em>'stopped'</em> state will stop accepting any new jobs. State
														
 
															-              is defined and respected by the framework only for the
														
 
															-              leaf-level queues and is ignored for all other queues.
														
 
															-              <br/><br/>
														
 
															-              The state of the queue can be viewed from the command line using
														
 
															-              <code>'bin/mapred queue'</code> command and also on the the Web
														
 
															-              UI.<br/><br/>
														
 
															-              Administrators can stop and start queues at runtime using the
														
 
															-              feature of <a href="commands_manual.html#RefreshQueues">reloading
														
 
															-              queue configuration</a>. If a queue is stopped at runtime, it
														
 
															-              will complete all the existing running jobs and will stop
														
 
															-              accepting any new jobs.
														
 
															-              </td>
														
 
															-            </tr>
														
 
															-
														
 
															-            <tr>
														
 
															-              <td>acl-submit-job</td>
														
 
															-              <td>Child element of a
														
 
															-              <a href="#queue_tag"><em>&lt;queue&gt;</em></a> specifying the
														
 
															-              list of users and groups that can submit jobs to the specified
														
 
															-              queue.</td>
														
 
															-              <td>Yes</td>
														
 
															-              <td>
														
 
															-              Applicable only to leaf-queues.<br/><br/>
														
 
															-              The list of users and groups are both comma separated
														
 
															-              list of names. The two lists are separated by a blank.
														
 
															-              Example: <em>user1,user2 group1,group2</em>.
														
 
															-              If you wish to define only a list of groups, provide
														
 
															-              a blank at the beginning of the value.
														
 
															-              <br/><br/>
														
 
															-              </td>
														
 
															-            </tr>
														
 
															-
														
 
															-            <tr>
														
 
															-              <td>acl-administer-jobs</td>
														
 
															-              <td>Child element of a
														
 
															-              <a href="#queue_tag"><em>&lt;queue&gt;</em></a> specifying the
														
 
															-              list of users and groups that can view job details, change the
														
 
															-              priority of a job or kill a job that has been submitted to the
														
 
															-              specified queue.
														
 
															-              </td>
														
 
															-              <td>Yes</td>
														
 
															-              <td>
														
 
															-              Applicable only to leaf-queues.<br/><br/>
														
 
															-              The list of users and groups are both comma separated
														
 
															-              list of names. The two lists are separated by a blank.
														
 
															-              Example: <em>user1,user2 group1,group2</em>.
														
 
															-              If you wish to define only a list of groups, provide
														
 
															-              a blank at the beginning of the value. Note that the
														
 
															-              owner of a job can always change the priority or kill
														
 
															-              his/her own job, irrespective of the ACLs.
														
 
															-              </td>
														
 
															-            </tr>
														
 
															-
														
 
															-            <tr>
														
 
															-              <td><anchor id="properties_tag"/>properties</td>
														
 
															-              <td>Child element of a 
														
 
															-              <a href="#queue_tag"><em>&lt;queue&gt;</em></a> specifying the
														
 
															-              scheduler specific properties.</td>
														
 
															-              <td>Not applicable</td>
														
 
															-              <td>The scheduler specific properties are the children of this
														
 
															-              element specified as a group of &lt;property&gt; tags described
														
 
															-              below. The JobTracker completely ignores these properties. These
														
 
															-              can be used as per-queue properties needed by the scheduler
														
 
															-              being configured. Please look at the scheduler specific
														
 
															-              documentation as to how these properties are used by that
														
 
															-              particular scheduler.
														
 
															-              </td>
														
 
															-            </tr>
														
 
															-
														
 
															-            <tr>
														
 
															-              <td><anchor id="property_tag"/>property</td>
														
 
															-              <td>Child element of
														
 
															-              <a href="#properties_tag"><em>&lt;properties&gt;</em></a> for a
														
 
															-              specific queue.</td>
														
 
															-              <td>Not applicable</td>
														
 
															-              <td>A single scheduler specific queue-property. Ignored by
														
 
															-              the JobTracker and used by the scheduler that is configured.</td>
														
 
															-            </tr>
														
 
															-
														
 
															-            <tr>
														
 
															-              <td>key</td>
														
 
															-              <td>Attribute of a
														
 
															-              <a href="#property_tag"><em>&lt;property&gt;</em></a> for a
														
 
															-              specific queue.</td>
														
 
															-              <td>Scheduler-specific</td>
														
 
															-              <td>The name of a single scheduler specific queue-property.</td>
														
 
															-            </tr>
														
 
															-
														
 
															-            <tr>
														
 
															-              <td>value</td>
														
 
															-              <td>Attribute of a
														
 
															-              <a href="#property_tag"><em>&lt;property&gt;</em></a> for a
														
 
															-              specific queue.</td>
														
 
															-              <td>Scheduler-specific</td>
														
 
															-              <td>The value of a single scheduler specific queue-property.
														
 
															-              The value can be anything that is left for the proper
														
 
															-              interpretation by the scheduler that is configured.</td>
														
 
															-            </tr>
														
 
															-
														
 
															-         </table>
														
 
															-
														
 
															-          <p>Once the queues are configured properly and the Map/Reduce
														
 
															-          system is up and running, from the command line one can
														
 
															-          <a href="commands_manual.html#QueuesList">get the list
														
 
															-          of queues</a> and
														
 
															-          <a href="commands_manual.html#QueuesInfo">obtain
														
 
															-          information specific to each queue</a>. This information is also
														
 
															-          available from the web UI. On the web UI, queue information can be
														
 
															-          seen by going to queueinfo.jsp, linked to from the queues table-cell
														
 
															-          in the cluster-summary table. The queueinfo.jsp prints the hierarchy
														
 
															-          of queues as well as the specific information for each queue.
														
 
															-          </p>
														
 
															-
														
 
															-          <p> Users can submit jobs only to a
														
 
															-          leaf-level queue by specifying the fully-qualified queue-name for
														
 
															-          the property name <em>mapreduce.job.queuename</em> in the job
														
 
															-          configuration. The character ':' is the queue-name delimiter and so,
														
 
															-          for e.g., if one wants to submit to a configured job-queue 'Queue-C'
														
 
															-          which is one of the sub-queues of 'Queue-B' which in-turn is a
														
 
															-          sub-queue of 'Queue-A', then the job configuration should contain
														
 
															-          property <em>mapreduce.job.queuename</em> set to the <em>
														
 
															-          &lt;value&gt;Queue-A:Queue-B:Queue-C&lt;/value&gt;</em></p>
														
 
															-         </section>
														
 
															-          <section>
														
 
															-            <title>Real-World Cluster Configurations</title>
														
 
															-            
														
 
															-            <p>This section lists some non-default configuration parameters which 
														
 
															-            have been used to run the <em>sort</em> benchmark on very large 
														
 
															-            clusters.</p>
														
 
															-            
														
 
															-            <ul>
														
 
															-              <li>
														
 
															-                <p>Some non-default configuration values used to run sort900,
														
 
															-                that is 9TB of data sorted on a cluster with 900 nodes:</p>
														
 
															-                <table>
														
 
															-  		          <tr>
														
 
															-                <th>Configuration File</th>
														
 
															-		            <th>Parameter</th>
														
 
															-		            <th>Value</th> 
														
 
															-		            <th>Notes</th>
														
 
															-		          </tr>
														
 
															-                  <tr>
														
 
															-                    <td>conf/hdfs-site.xml</td>
														
 
															-                    <td>dfs.blocksize</td>
														
 
															-                    <td>128m</td>
														
 
															-                    <td>
														
 
															-                        HDFS blocksize of 128 MB for large file-systems. Sizes can be provided
														
 
															-                        in size-prefixed values (10k, 128m, 1g, etc.) or simply in bytes (134217728 for 128 MB, etc.).
														
 
															-                    </td>
														
 
															-                  </tr>
														
 
															-                  <tr>
														
 
															-                    <td>conf/hdfs-site.xml</td>
														
 
															-                    <td>dfs.namenode.handler.count</td>
														
 
															-                    <td>40</td>
														
 
															-                    <td>
														
 
															-                      More NameNode server threads to handle RPCs from large 
														
 
															-                      number of DataNodes.
														
 
															-                    </td>
														
 
															-                  </tr>
														
 
															-                  <tr>
														
 
															-                    <td>conf/mapred-site.xml</td>
														
 
															-                    <td>mapreduce.reduce.shuffle.parallelcopies</td>
														
 
															-                    <td>20</td>
														
 
															-                    <td>
														
 
															-                      Higher number of parallel copies run by reduces to fetch
														
 
															-                      outputs from very large number of maps.
														
 
															-                    </td>
														
 
															-                  </tr>
														
 
															-                  <tr>
														
 
															-                    <td>conf/mapred-site.xml</td>
														
 
															-                    <td>mapreduce.map.java.opts</td>
														
 
															-                    <td>-Xmx512M</td>
														
 
															-                    <td>
														
 
															-                      Larger heap-size for child jvms of maps. 
														
 
															-                    </td>
														
 
															-                  </tr>
														
 
															-                  <tr>
														
 
															-                    <td>conf/mapred-site.xml</td>
														
 
															-                    <td>mapreduce.reduce.java.opts</td>
														
 
															-                    <td>-Xmx512M</td>
														
 
															-                    <td>
														
 
															-                      Larger heap-size for child jvms of reduces. 
														
 
															-                    </td>
														
 
															-                  </tr>
														
 
															-                  <tr>
														
 
															-                    <td>conf/mapred-site.xml</td>
														
 
															-                    <td>mapreduce.reduce.shuffle.input.buffer.percent</td>
														
 
															-                    <td>0.80</td>
														
 
															-                    <td>
														
 
															-                      Larger amount of memory allocated for merging map output
														
 
															-                      in memory during the shuffle. Expressed as a fraction of
														
 
															-                      the total heap.
														
 
															-                    </td>
														
 
															-                  </tr>
														
 
															-                  <tr>
														
 
															-                    <td>conf/mapred-site.xml</td>
														
 
															-                    <td>mapreduce.reduce.input.buffer.percent</td>
														
 
															-                    <td>0.80</td>
														
 
															-                    <td>
														
 
															-                      Larger amount of memory allocated for retaining map output
														
 
															-                      in memory during the reduce. Expressed as a fraction of
														
 
															-                      the total heap.
														
 
															-                    </td>
														
 
															-                  </tr>
														
 
															-                  <tr>
														
 
															-                    <td>conf/mapred-site.xml</td>
														
 
															-                    <td>mapreduce.task.io.sort.factor</td>
														
 
															-                    <td>100</td>
														
 
															-                    <td>More streams merged at once while sorting files.</td>
														
 
															-                  </tr>
														
 
															-                  <tr>
														
 
															-                    <td>conf/mapred-site.xml</td>
														
 
															-                    <td>mapreduce.task.io.sort.mb</td>
														
 
															-                    <td>200</td>
														
 
															-                    <td>Higher memory-limit while sorting data.</td>
														
 
															-                  </tr>
														
 
															-                  <tr>
														
 
															-                    <td>conf/core-site.xml</td>
														
 
															-                    <td>io.file.buffer.size</td>
														
 
															-                    <td>131072</td>
														
 
															-                    <td>Size of read/write buffer used in SequenceFiles.</td>
														
 
															-                  </tr>
														
 
															-                </table>
														
 
															-              </li>
														
 
															-              <li>
														
 
															-                <p>Updates to some configuration values to run sort1400 and 
														
 
															-                sort2000, that is 14TB of data sorted on 1400 nodes and 20TB of
														
 
															-                data sorted on 2000 nodes:</p>
														
 
															-                <table>
														
 
															-  		          <tr>
														
 
															-                <th>Configuration File</th>
														
 
															-		            <th>Parameter</th>
														
 
															-		            <th>Value</th> 
														
 
															-		            <th>Notes</th>
														
 
															-		          </tr>
														
 
															-                  <tr>
														
 
															-                    <td>conf/mapred-site.xml</td>
														
 
															-                    <td>mapreduce.jobtracker.handler.count</td>
														
 
															-                    <td>60</td>
														
 
															-                    <td>
														
 
															-                      More JobTracker server threads to handle RPCs from large 
														
 
															-                      number of TaskTrackers.
														
 
															-                    </td>
														
 
															-                  </tr>
														
 
															-                  <tr>
														
 
															-                    <td>conf/mapred-site.xml</td>
														
 
															-                    <td>mapreduce.reduce.shuffle.parallelcopies</td>
														
 
															-                    <td>50</td>
														
 
															-                    <td></td>
														
 
															-                  </tr>
														
 
															-                  <tr>
														
 
															-                    <td>conf/mapred-site.xml</td>
														
 
															-                    <td>mapreduce.tasktracker.http.threads</td>
														
 
															-                    <td>50</td>
														
 
															-                    <td>
														
 
															-                      More worker threads for the TaskTracker's http server. The
														
 
															-                      http server is used by reduces to fetch intermediate 
														
 
															-                      map-outputs.
														
 
															-                    </td>
														
 
															-                  </tr>
														
 
															-                  <tr>
														
 
															-                    <td>conf/mapred-site.xml</td>
														
 
															-                    <td>mapreduce.map.java.opts</td>
														
 
															-                    <td>-Xmx512M</td>
														
 
															-                    <td>
														
 
															-                      Larger heap-size for child jvms of maps. 
														
 
															-                    </td>
														
 
															-                  </tr>
														
 
															-                  <tr>
														
 
															-                    <td>conf/mapred-site.xml</td>
														
 
															-                    <td>mapreduce.reduce.java.opts</td>
														
 
															-                    <td>-Xmx1024M</td>
														
 
															-                    <td>Larger heap-size for child jvms of reduces.</td>
														
 
															-                  </tr>
														
 
															-                </table>
														
 
															-              </li>
														
 
															-            </ul>
														
 
															-          </section>
														
 
															-        <section>
														
 
															-        <title>Configuring Memory Parameters for MapReduce Jobs</title>
														
 
															-        <p>
														
 
															-        As MapReduce jobs could use varying amounts of memory, Hadoop
														
 
															-        provides various configuration options to users and administrators
														
 
															-        for managing memory effectively. Some of these options are job 
														
 
															-        specific and can be used by users. While setting up a cluster, 
														
 
															-        administrators can configure appropriate default values for these 
														
 
															-        options so that users jobs run out of the box. Other options are 
														
 
															-        cluster specific and can be used by administrators to enforce 
														
 
															-        limits and prevent misconfigured or memory intensive jobs from 
														
 
															-        causing undesired side effects on the cluster.
														
 
															-        </p>
														
 
															-        <p> 
														
 
															-        The values configured should
														
 
															-        take into account the hardware resources of the cluster, such as the
														
 
															-        amount of physical and virtual memory available for tasks,
														
 
															-        the number of slots configured on the slaves and the requirements
														
 
															-        for other processes running on the slaves. If right values are not
														
 
															-        set, it is likely that jobs start failing with memory related
														
 
															-        errors or in the worst case, even affect other tasks or
														
 
															-        the slaves themselves.
														
 
															-        </p>
														
 
															-
														
 
															-        <section>
														
 
															-          <title>Monitoring Task Memory Usage</title>
														
 
															-          <p>
														
 
															-          Before describing the memory options, it is
														
 
															-          useful to look at a feature provided by Hadoop to monitor
														
 
															-          memory usage of MapReduce tasks it runs. The basic objective
														
 
															-          of this feature is to prevent MapReduce tasks from consuming
														
 
															-          memory beyond a limit that would result in their affecting
														
 
															-          other processes running on the slave, including other tasks
														
 
															-          and daemons like the DataNode or TaskTracker.
														
 
															-          </p>
														
 
															-        
														
 
															-          <p>
														
 
															-          <em>Note:</em> For the time being, this feature is available
														
 
															-          only for the Linux platform.
														
 
															-          </p>
														
 
															-          
														
 
															-          <p>
														
 
															-          Hadoop allows monitoring to be done both for virtual
														
 
															-          and physical memory usage of tasks. This monitoring 
														
 
															-          can be done independently of each other, and therefore the
														
 
															-          options can be configured independently of each other. It
														
 
															-          has been found in some environments, particularly related
														
 
															-          to streaming, that virtual memory recorded for tasks is high
														
 
															-          because of libraries loaded by the programs used to run
														
 
															-          the tasks. However, this memory is largely unused and does
														
 
															-          not affect the slaves's memory itself. In such cases,
														
 
															-          monitoring based on physical memory can provide a more
														
 
															-          accurate picture of memory usage.
														
 
															-          </p>
														
 
															-          
														
 
															-          <p>
														
 
															-          This feature considers that there is a limit on
														
 
															-          the amount of virtual or physical memory on the slaves 
														
 
															-          that can be used by
														
 
															-          the running MapReduce tasks. The rest of the memory is
														
 
															-          assumed to be required for the system and other processes.
														
 
															-          Since some jobs may require higher amount of memory for their
														
 
															-          tasks than others, Hadoop allows jobs to specify how much
														
 
															-          memory they expect to use at a maximum. Then by using
														
 
															-          resource aware scheduling and monitoring, Hadoop tries to
														
 
															-          ensure that at any time, only enough tasks are running on
														
 
															-          the slaves as can meet the dual constraints of an individual
														
 
															-          job's memory requirements and the total amount of memory
														
 
															-          available for all MapReduce tasks.
														
 
															-          </p>
														
 
															-          
														
 
															-          <p>
														
 
															-          The TaskTracker monitors tasks in regular intervals. Each time,
														
 
															-          it operates in two steps:
														
 
															-          </p> 
														
 
															-          
														
 
															-          <ul>
														
 
															-          
														
 
															-          <li>
														
 
															-          In the first step, it
														
 
															-          checks that a job's task and any child processes it
														
 
															-          launches are not cumulatively using more virtual or physical 
														
 
															-          memory than specified. If both virtual and physical memory
														
 
															-          monitoring is enabled, then virtual memory usage is checked
														
 
															-          first, followed by physical memory usage. 
														
 
															-          Any task that is found to
														
 
															-          use more memory is killed along with any child processes it
														
 
															-          might have launched, and the task status is marked
														
 
															-          <em>failed</em>. Repeated failures such as this will terminate
														
 
															-          the job. 
														
 
															-          </li>
														
 
															-          
														
 
															-          <li>
														
 
															-          In the next step, it checks that the cumulative virtual and
														
 
															-          physical memory 
														
 
															-          used by all running tasks and their child processes
														
 
															-          does not exceed the total virtual and physical memory limit,
														
 
															-          respectively. Again, virtual memory limit is checked first, 
														
 
															-          followed by physical memory limit. In this case, it kills
														
 
															-          enough number of tasks, along with any child processes they
														
 
															-          might have launched, until the cumulative memory usage
														
 
															-          is brought under limit. In the case of virtual memory limit
														
 
															-          being exceeded, the tasks chosen for killing are
														
 
															-          the ones that have made the least progress. In the case of
														
 
															-          physical memory limit being exceeded, the tasks chosen
														
 
															-          for killing are the ones that have used the maximum amount
														
 
															-          of physical memory. Also, the status
														
 
															-          of these tasks is marked as <em>killed</em>, and hence repeated
														
 
															-          occurrence of this will not result in a job failure. 
														
 
															-          </li>
														
 
															-          
														
 
															-          </ul>
														
 
															-          
														
 
															-          <p>
														
 
															-          In either case, the task's diagnostic message will indicate the
														
 
															-          reason why the task was terminated.
														
 
															-          </p>
														
 
															-          
														
 
															-          <p>
														
 
															-          Resource aware scheduling can ensure that tasks are scheduled
														
 
															-          on a slave only if their memory requirement can be satisfied
														
 
															-          by the slave. The Capacity Scheduler, for example,
														
 
															-          takes virtual memory requirements into account while 
														
 
															-          scheduling tasks, as described in the section on 
														
 
															-          <a href="ext:capacity-scheduler/MemoryBasedTaskScheduling"> 
														
 
															-          memory based scheduling</a>.
														
 
															-          </p>
														
 
															- 
														
 
															-          <p>
														
 
															-          Memory monitoring is enabled when certain configuration 
														
 
															-          variables are defined with non-zero values, as described below.
														
 
															-          </p>
														
 
															-          
														
 
															-        </section>
														
 
															-
														
 
															-        <section>
														
 
															-          <title>Job Specific Options</title>
														
 
															-          <p>
														
 
															-          Memory related options that can be configured individually per 
														
 
															-          job are described in detail in the section on
														
 
															-          <a href="ext:mapred-tutorial/ConfiguringMemoryRequirements">
														
 
															-          Configuring Memory Requirements For A Job</a> in the MapReduce
														
 
															-          tutorial. While setting up
														
 
															-          the cluster, the Hadoop defaults for these options can be reviewed
														
 
															-          and changed to better suit the job profiles expected to be run on
														
 
															-          the clusters, as also the hardware configuration.
														
 
															-          </p>
														
 
															-          <p>
														
 
															-          As with any other configuration option in Hadoop, if the 
														
 
															-          administrators desire to prevent users from overriding these 
														
 
															-          options in jobs they submit, these values can be marked as
														
 
															-          <em>final</em> in the cluster configuration.
														
 
															-          </p>
														
 
															-        </section>
														
 
															-        
														
 
															-          
														
 
															-        <section>
														
 
															-          <title>Cluster Specific Options</title>
														
 
															-          
														
 
															-          <p>
														
 
															-          This section describes the memory related options that are
														
 
															-          used by the JobTracker and TaskTrackers, and cannot be changed 
														
 
															-          by jobs. The values set for these options should be the same
														
 
															-          for all the slave nodes in a cluster.
														
 
															-          </p>
														
 
															-          
														
 
															-          <ul>
														
 
															-          
														
 
															-          <li>
														
 
															-          <code>mapreduce.cluster.{map|reduce}memory.mb</code>: These
														
 
															-          options define the default amount of virtual memory that should be
														
 
															-          allocated for MapReduce tasks running in the cluster. They
														
 
															-          typically match the default values set for the options
														
 
															-          <code>mapreduce.{map|reduce}.memory.mb</code>. They help in the
														
 
															-          calculation of the total amount of virtual memory available for 
														
 
															-          MapReduce tasks on a slave, using the following equation:<br/>
														
 
															-          <em>Total virtual memory for all MapReduce tasks = 
														
 
															-          (mapreduce.cluster.mapmemory.mb * 
														
 
															-           mapreduce.tasktracker.map.tasks.maximum) +
														
 
															-          (mapreduce.cluster.reducememory.mb * 
														
 
															-           mapreduce.tasktracker.reduce.tasks.maximum)</em><br/>
														
 
															-          Typically, reduce tasks require more memory than map tasks.
														
 
															-          Hence a higher value is recommended for 
														
 
															-          <em>mapreduce.cluster.reducememory.mb</em>. The value is  
														
 
															-          specified in MB. To set a value of 2GB for reduce tasks, set
														
 
															-          <em>mapreduce.cluster.reducememory.mb</em> to 2048.
														
 
															-          </li>
														
 
															-
														
 
															-          <li>
														
 
															-          <code>mapreduce.jobtracker.max{map|reduce}memory.mb</code>:
														
 
															-          These options define the maximum amount of virtual memory that 
														
 
															-          can be requested by jobs using the parameters
														
 
															-          <code>mapreduce.{map|reduce}.memory.mb</code>. The system
														
 
															-          will reject any job that is submitted requesting for more
														
 
															-          memory than these limits. Typically, the values for these
														
 
															-          options should be set to satisfy the following constraint:<br/>
														
 
															-          <em>mapreduce.jobtracker.maxmapmemory.mb =
														
 
															-            mapreduce.cluster.mapmemory.mb * 
														
 
															-             mapreduce.tasktracker.map.tasks.maximum<br/>
														
 
															-              mapreduce.jobtracker.maxreducememory.mb =
														
 
															-            mapreduce.cluster.reducememory.mb * 
														
 
															-             mapreduce.tasktracker.reduce.tasks.maximum</em><br/>
														
 
															-          The value is specified in MB. If 
														
 
															-          <code>mapreduce.cluster.reducememory.mb</code> is set to 2GB and
														
 
															-          there are 2 reduce slots configured in the slaves, the value
														
 
															-          for <code>mapreduce.jobtracker.maxreducememory.mb</code> should 
														
 
															-          be set to 4096.
														
 
															-          </li>
														
 
															-          
														
 
															-          <li>
														
 
															-          <code>mapreduce.tasktracker.reserved.physicalmemory.mb</code>:
														
 
															-          This option defines the amount of physical memory that is
														
 
															-          marked for system and daemon processes. Using this, the amount
														
 
															-          of physical memory available for MapReduce tasks is calculated
														
 
															-          using the following equation:<br/>
														
 
															-          <em>Total physical memory for all MapReduce tasks = 
														
 
															-                Total physical memory available on the system -
														
 
															-                mapreduce.tasktracker.reserved.physicalmemory.mb</em><br/>
														
 
															-          The value is specified in MB. To set this value to 2GB, 
														
 
															-          specify the value as 2048.
														
 
															-          </li>
														
 
															-
														
 
															-          <li>
														
 
															-          <code>mapreduce.tasktracker.taskmemorymanager.monitoringinterval</code>:
														
 
															-          This option defines the time the TaskTracker waits between
														
 
															-          two cycles of memory monitoring. The value is specified in 
														
 
															-          milliseconds.
														
 
															-          </li>
														
 
															-          
														
 
															-          </ul>
														
 
															-          
														
 
															-          <p>
														
 
															-          <em>Note:</em> The virtual memory monitoring function is only 
														
 
															-          enabled if
														
 
															-          the variables <code>mapreduce.cluster.{map|reduce}memory.mb</code>
														
 
															-          and <code>mapreduce.jobtracker.max{map|reduce}memory.mb</code>
														
 
															-          are set to values greater than zero. Likewise, the physical
														
 
															-          memory monitoring function is only enabled if the variable
														
 
															-          <code>mapreduce.tasktracker.reserved.physicalmemory.mb</code>
														
 
															-          is set to a value greater than zero.
														
 
															-          </p>
														
 
															-        </section>
														
 
															-      </section>
														
 
															-      
														
 
															-        
														
 
															-          <section>
														
 
															-            <title>Task Controllers</title>
														
 
															-            <p>Task controllers are classes in the Hadoop Map/Reduce 
														
 
															-            framework that define how user's map and reduce tasks 
														
 
															-            are launched and controlled. They can 
														
 
															-            be used in clusters that require some customization in 
														
 
															-            the process of launching or controlling the user tasks.
														
 
															-            For example, in some 
														
 
															-            clusters, there may be a requirement to run tasks as 
														
 
															-            the user who submitted the job, instead of as the task 
														
 
															-            tracker user, which is how tasks are launched by default.
														
 
															-            This section describes how to configure and use 
														
 
															-            task controllers.</p>
														
 
															-            <p>The following task controllers are the available in
														
 
															-            Hadoop.
														
 
															-            </p>
														
 
															-            <table>
														
 
															-            <tr><th>Name</th><th>Class Name</th><th>Description</th></tr>
														
 
															-            <tr>
														
 
															-            <td>DefaultTaskController</td>
														
 
															-            <td>org.apache.hadoop.mapred.DefaultTaskController</td>
														
 
															-            <td> The default task controller which Hadoop uses to manage task
														
 
															-            execution. The tasks run as the task tracker user.</td>
														
 
															-            </tr>
														
 
															-            <tr>
														
 
															-            <td>LinuxTaskController</td>
														
 
															-            <td>org.apache.hadoop.mapred.LinuxTaskController</td>
														
 
															-            <td>This task controller, which is supported only on Linux, 
														
 
															-            runs the tasks as the user who submitted the job. It requires
														
 
															-            these user accounts to be created on the cluster nodes 
														
 
															-            where the tasks are launched. It 
														
 
															-            uses a setuid executable that is included in the Hadoop
														
 
															-            distribution. The task tracker uses this executable to 
														
 
															-            launch and kill tasks. The setuid executable switches to
														
 
															-            the user who has submitted the job and launches or kills
														
 
															-            the tasks. For maximum security, this task controller 
														
 
															-            sets up restricted permissions and user/group ownership of
														
 
															-            local files and directories used by the tasks such as the
														
 
															-            job jar files, intermediate files, task log files and distributed
														
 
															-            cache files. Particularly note that, because of this, except the
														
 
															-            job owner and tasktracker, no other user can access any of the
														
 
															-            local files/directories including those localized as part of the
														
 
															-            distributed cache.
														
 
															-            </td>
														
 
															-            </tr>
														
 
															-            </table>
														
 
															-            <section>
														
 
															-            <title>Configuring Task Controllers</title>
														
 
															-            <p>The task controller to be used can be configured by setting the
														
 
															-            value of the following key in mapred-site.xml</p>
														
 
															-            <table>
														
 
															-            <tr>
														
 
															-            <th>Property</th><th>Value</th><th>Notes</th>
														
 
															-            </tr>
														
 
															-            <tr>
														
 
															-            <td>mapreduce.tasktracker.taskcontroller</td>
														
 
															-            <td>Fully qualified class name of the task controller class</td>
														
 
															-            <td>Currently there are two implementations of task controller
														
 
															-            in the Hadoop system, DefaultTaskController and LinuxTaskController.
														
 
															-            Refer to the class names mentioned above to determine the value
														
 
															-            to set for the class of choice.
														
 
															-            </td>
														
 
															-            </tr>
														
 
															-            </table>
														
 
															-            </section>
														
 
															-            <section>
														
 
															-            <title>Using the LinuxTaskController</title>
														
 
															-            <p>This section of the document describes the steps required to
														
 
															-            use the LinuxTaskController.</p>
														
 
															-            
														
 
															-            <p>In order to use the LinuxTaskController, a setuid executable
														
 
															-            should be built and deployed on the compute nodes. The
														
 
															-            executable is named task-controller. To build the executable, 
														
 
															-            execute 
														
 
															-            <em>ant task-controller -Dhadoop.conf.dir=/path/to/conf/dir.
														
 
															-            </em>
														
 
															-            The path passed in <em>-Dhadoop.conf.dir</em> should be the path
														
 
															-            on the cluster nodes where a configuration file for the setuid
														
 
															-            executable would be located. The executable would be built to
														
 
															-            <em>build.dir/dist.dir/bin</em> and should be installed to 
														
 
															-            <em>$HADOOP_PREFIX/bin</em>.
														
 
															-            </p>
														
 
															-            
														
 
															-            <p>
														
 
															-            The executable must have specific permissions as follows. The
														
 
															-            executable should have <em>6050 or --Sr-s---</em> permissions
														
 
															-            user-owned by root(super-user) and group-owned by a special group 
														
 
															-            of which the TaskTracker's user is the group member and no job 
														
 
															-            submitter is. If any job submitter belongs to this special group,
														
 
															-            security will be compromised. This special group name should be
														
 
															-            specified for the configuration property 
														
 
															-            <em>"mapreduce.tasktracker.group"</em> in both mapred-site.xml and 
														
 
															-            <a href="#task-controller.cfg">task-controller.cfg</a>.  
														
 
															-            For example, let's say that the TaskTracker is run as user
														
 
															-            <em>mapred</em> who is part of the groups <em>users</em> and
														
 
															-            <em>specialGroup</em> any of them being the primary group.
														
 
															-            Let also be that <em>users</em> has both <em>mapred</em> and
														
 
															-            another user (job submitter) <em>X</em> as its members, and X does
														
 
															-            not belong to <em>specialGroup</em>. Going by the above
														
 
															-            description, the setuid/setgid executable should be set
														
 
															-            <em>6050 or --Sr-s---</em> with user-owner as <em>mapred</em> and
														
 
															-            group-owner as <em>specialGroup</em> which has
														
 
															-            <em>mapred</em> as its member(and not <em>users</em> which has
														
 
															-            <em>X</em> also as its member besides <em>mapred</em>).
														
 
															-            </p>
														
 
															-
														
 
															-            <p>
														
 
															-            The LinuxTaskController requires that paths including and leading up
														
 
															-            to the directories specified in
														
 
															-            <em>mapreduce.cluster.local.dir</em> and <em>hadoop.log.dir</em> to
														
 
															-            be set 755 permissions.
														
 
															-            </p>
														
 
															-            
														
 
															-            <section>
														
 
															-            <title>task-controller.cfg</title>
														
 
															-            <p>The executable requires a configuration file called 
														
 
															-            <em>taskcontroller.cfg</em> to be
														
 
															-            present in the configuration directory passed to the ant target 
														
 
															-            mentioned above. If the binary was not built with a specific 
														
 
															-            conf directory, the path defaults to
														
 
															-            <em>/path-to-binary/../conf</em>. The configuration file must be
														
 
															-            owned by the user running TaskTracker (user <em>mapred</em> in the
														
 
															-            above example), group-owned by anyone and should have the
														
 
															-            permissions <em>0400 or r--------</em>.
														
 
															-            </p>
														
 
															-            
														
 
															-            <p>The executable requires following configuration items to be 
														
 
															-            present in the <em>taskcontroller.cfg</em> file. The items should
														
 
															-            be mentioned as simple <em>key=value</em> pairs.
														
 
															-            </p>
														
 
															-            <table><tr><th>Name</th><th>Description</th></tr>
														
 
															-            <tr>
														
 
															-            <td>mapreduce.cluster.local.dir</td>
														
 
															-            <td>Path to mapreduce.cluster.local.directories. Should be same as the value 
														
 
															-            which was provided to key in mapred-site.xml. This is required to
														
 
															-            validate paths passed to the setuid executable in order to prevent
														
 
															-            arbitrary paths being passed to it.</td>
														
 
															-            </tr>
														
 
															-            <tr>
														
 
															-            <td>hadoop.log.dir</td>
														
 
															-            <td>Path to hadoop log directory. Should be same as the value which
														
 
															-            the TaskTracker is started with. This is required to set proper
														
 
															-            permissions on the log files so that they can be written to by the user's
														
 
															-            tasks and read by the TaskTracker for serving on the web UI.</td>
														
 
															-            </tr>
														
 
															-            <tr>
														
 
															-            <td>mapreduce.tasktracker.group</td>
														
 
															-            <td>Group to which the TaskTracker belongs. The group owner of the
														
 
															-            taskcontroller binary should be this group. Should be same as
														
 
															-            the value with which the TaskTracker is configured. This 
														
 
															-            configuration is required for validating the secure access of the
														
 
															-            task-controller binary.</td>
														
 
															-            </tr>
														
 
															-            </table>
														
 
															-            </section>
														
 
															-            </section>
														
 
															-            
														
 
															-          </section>
														
 
															-          <section>
														
 
															-            <title>Monitoring Health of TaskTracker Nodes</title>
														
 
															-            <p>Hadoop Map/Reduce provides a mechanism by which administrators 
														
 
															-            can configure the TaskTracker to run an administrator supplied
														
 
															-            script periodically to determine if a node is healthy or not.
														
 
															-            Administrators can determine if the node is in a healthy state
														
 
															-            by performing any checks of their choice in the script. If the
														
 
															-            script detects the node to be in an unhealthy state, it must print
														
 
															-            a line to standard output beginning with the string <em>ERROR</em>.
														
 
															-            The TaskTracker spawns the script periodically and checks its 
														
 
															-            output. If the script's output contains the string <em>ERROR</em>,
														
 
															-            as described above, the node's status is reported as 'unhealthy'
														
 
															-            and the node is black-listed on the JobTracker. No further tasks 
														
 
															-            will be assigned to this node. However, the
														
 
															-            TaskTracker continues to run the script, so that if the node
														
 
															-            becomes healthy again, it will be removed from the blacklisted
														
 
															-            nodes on the JobTracker automatically. The node's health
														
 
															-            along with the output of the script, if it is unhealthy, is
														
 
															-            available to the administrator in the JobTracker's web interface.
														
 
															-            The time since the node was healthy is also displayed on the 
														
 
															-            web interface.
														
 
															-            </p>
														
 
															-            
														
 
															-            <section>
														
 
															-            <title>Configuring the Node Health Check Script</title>
														
 
															-            <p>The following parameters can be used to control the node health 
														
 
															-            monitoring script in <em>mapred-site.xml</em>.</p>
														
 
															-            <table>
														
 
															-            <tr><th>Name</th><th>Description</th></tr>
														
 
															-            <tr><td><code>mapreduce.tasktracker.healthchecker.script.path</code></td>
														
 
															-            <td>Absolute path to the script which is periodically run by the 
														
 
															-            TaskTracker to determine if the node is 
														
 
															-            healthy or not. The file should be executable by the TaskTracker.
														
 
															-            If the value of this key is empty or the file does 
														
 
															-            not exist or is not executable, node health monitoring
														
 
															-            is not started.</td>
														
 
															-            </tr>
														
 
															-            <tr>
														
 
															-            <td><code>mapreduce.tasktracker.healthchecker.interval</code></td>
														
 
															-            <td>Frequency at which the node health script is run, 
														
 
															-            in milliseconds</td>
														
 
															-            </tr>
														
 
															-            <tr>
														
 
															-            <td><code>mapreduce.tasktracker.healthchecker.script.timeout</code></td>
														
 
															-            <td>Time after which the node health script will be killed by
														
 
															-            the TaskTracker if unresponsive.
														
 
															-            The node is marked unhealthy. if node health script times out.</td>
														
 
															-            </tr>
														
 
															-            <tr>
														
 
															-            <td><code>mapreduce.tasktracker.healthchecker.script.args</code></td>
														
 
															-            <td>Extra arguments that can be passed to the node health script 
														
 
															-            when launched.
														
 
															-            These should be comma separated list of arguments. </td>
														
 
															-            </tr>
														
 
															-            </table>
														
 
															-            </section>
														
 
															-          </section>
														
 
															-          
														
 
															-        </section>
														
 
															-        
														
 
															-        <section>
														
 
															-          <title>Slaves</title>
														
 
															-          
														
 
															-          <p>Typically you choose one machine in the cluster to act as the 
														
 
															-          <code>NameNode</code> and one machine as to act as the 
														
 
															-          <code>JobTracker</code>, exclusively. The rest of the machines act as 
														
 
															-          both a <code>DataNode</code> and <code>TaskTracker</code> and are 
														
 
															-          referred to as <em>slaves</em>.</p>
														
 
															-          
														
 
															-          <p>List all slave hostnames or IP addresses in your 
														
 
															-          <code>conf/slaves</code> file, one per line.</p>
														
 
															-        </section>
														
 
															-        
														
 
															-        <section>
														
 
															-          <title>Logging</title>
														
 
															-          
														
 
															-          <p>Hadoop uses the <a href="http://logging.apache.org/log4j/">Apache 
														
 
															-          log4j</a> via the <a href="http://commons.apache.org/logging/">Apache 
														
 
															-          Commons Logging</a> framework for logging. Edit the 
														
 
															-          <code>conf/log4j.properties</code> file to customize the Hadoop 
														
 
															-          daemons' logging configuration (log-formats and so on).</p>
														
 
															-          
														
 
															-          <section>
														
 
															-            <title>History Logging</title>
														
 
															-            
														
 
															-            <p> The job history files are stored in central location 
														
 
															-            <code> mapreduce.jobtracker.jobhistory.location </code> which can be on DFS also,
														
 
															-            whose default value is <code>${HADOOP_LOG_DIR}/history</code>. 
														
 
															-            The history web UI is accessible from job tracker web UI.</p>
														
 
															-            
														
 
															-            <p> The history files are also logged to user specified directory
														
 
															-            <code>mapreduce.job.userhistorylocation</code> 
														
 
															-            which defaults to job output directory. The files are stored in
														
 
															-            "_logs/history/" in the specified directory. Hence, by default 
														
 
															-            they will be in "mapreduce.output.fileoutputformat.outputdir/_logs/history/". User can stop
														
 
															-            logging by giving the value <code>none</code> for 
														
 
															-            <code>mapreduce.job.userhistorylocation</code> </p>
														
 
															-            
														
 
															-            <p> User can view the history logs summary in specified directory 
														
 
															-            using the following command <br/>
														
 
															-            <code>$ bin/hadoop job -history output-dir</code><br/> 
														
 
															-            This command will print job details, failed and killed tip
														
 
															-            details. <br/>
														
 
															-            More details about the job such as successful tasks and 
														
 
															-            task attempts made for each task can be viewed using the  
														
 
															-            following command <br/>
														
 
															-            <code>$ bin/hadoop job -history all output-dir</code><br/></p> 
														
 
															-          </section>
														
 
															-        </section>
														
 
															-      
														
 
															-      <p>Once all the necessary configuration is complete, distribute the files
														
 
															-      to the <code>HADOOP_CONF_DIR</code> directory on all the machines, 
														
 
															-      typically <code>${HADOOP_PREFIX}/conf</code>.</p>
														
 
															-    </section>
														
 
															-    <section>
														
 
															-      <title>Cluster Restartability</title>
														
 
															-      <section>
														
 
															-        <title>Map/Reduce</title>
														
 
															-        <p>The job tracker restart can recover running jobs if 
														
 
															-        <code>mapreduce.jobtracker.restart.recover</code> is set true and 
														
 
															-        <a href="#Logging">JobHistory logging</a> is enabled. Also 
														
 
															-        <code>mapreduce.jobtracker.jobhistory.block.size</code> value should be 
														
 
															-        set to an optimal value to dump job history to disk as soon as 
														
 
															-        possible, the typical value is 3145728(3MB).</p>
														
 
															-      </section>
														
 
															-    </section>
														
 
															-    
														
 
															-    <section>
														
 
															-      <title>Hadoop Rack Awareness</title>
														
 
															-      <p>
														
 
															-         Both HDFS and Map/Reduce components are rack-aware.  HDFS block placement will use rack 
														
 
															-         awareness for fault tolerance by placing one block replica on a different rack.  This provides 
														
 
															-         data availability in the event of a network switch failure within the cluster.  The jobtracker uses rack
														
 
															-         awareness to reduce network transfers of HDFS data blocks by attempting to schedule tasks on datanodes with a local
														
 
															-         copy of needed HDFS blocks.  If the tasks cannot be scheduled on the datanodes
														
 
															-         containing the needed HDFS blocks, then the tasks will be scheduled on the same rack to reduce network transfers if possible.
														
 
															-      </p>
														
 
															-      <p>The NameNode and the JobTracker obtain the rack id of the cluster slaves by invoking either 
														
 
															-         an external script or java class as specified by configuration files.  Using either the 
														
 
															-         java class or external script for topology, output must adhere to the java 
														
 
															-         <a href="ext:api/org/apache/hadoop/net/dnstoswitchmapping/resolve">DNSToSwitchMapping</a> 
														
 
															-         interface.  The interface expects a one-to-one correspondence to be maintained 
														
 
															-         and the topology information in the format of '/myrack/myhost', where '/' is the topology 
														
 
															-         delimiter, 'myrack' is the rack identifier, and 'myhost' is the individual host.  Assuming 
														
 
															-         a single /24 subnet per rack, one could use the format of '/192.168.100.0/192.168.100.5' as a 
														
 
															-         unique rack-host topology mapping.
														
 
															-      </p>
														
 
															-      <p>
														
 
															-         To use the java class for topology mapping, the class name is specified by the 
														
 
															-         <code>'topology.node.switch.mapping.impl'</code> parameter in the configuration file. 
														
 
															-         An example, NetworkTopology.java, is included with the hadoop distribution and can be customized 
														
 
															-         by the hadoop administrator.  If not included with your distribution, NetworkTopology.java can also be found in the Hadoop 
														
 
															-         <a href="http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java?view=markup">
														
 
															-         subversion tree</a>.  Using a java class instead of an external script has a slight performance benefit in 
														
 
															-         that it doesn't need to fork an external process when a new slave node registers itself with the jobtracker or namenode.  
														
 
															-         As this class is only used during slave node registration, the performance benefit is limited.  
														
 
															-      </p>
														
 
															-      <p>
														
 
															-         If implementing an external script, it will be specified with the
														
 
															-         <code>topology.script.file.name</code> parameter in the configuration files.  Unlike the java 
														
 
															-         class, the external topology script is not included with the Hadoop distribution and is provided by the 
														
 
															-         administrator.  Hadoop will send multiple IP addresses to ARGV when forking the topology script.  The  
														
 
															-         number of IP addresses sent to the topology script is controlled with <code>net.topology.script.number.args</code>
														
 
															-         and defaults to 100. If <code>net.topology.script.number.args</code> was changed to 1, a topology script would 
														
 
															-         get forked for each IP submitted by datanodes and/or tasktrackers.  Below are example topology scripts.
														
 
															-      </p>
														
 
															-      <section>
														
 
															-      <title>Python example</title>
														
 
															-      <source>
														
 
															-      <code>
														
 
															-      #!/usr/bin/python
														
 
															-
														
 
															-      # this script makes assumptions about the physical environment.
														
 
															-      #  1) each rack is its own layer 3 network with a /24 subnet, which could be typical where each rack has its own
														
 
															-      #     switch with uplinks to a central core router.
														
 
															-      #     
														
 
															-      #             +-----------+
														
 
															-      #             |core router|
														
 
															-      #             +-----------+
														
 
															-      #            /             \
														
 
															-      #   +-----------+        +-----------+
														
 
															-      #   |rack switch|        |rack switch|
														
 
															-      #   +-----------+        +-----------+
														
 
															-      #   | data node |        | data node |
														
 
															-      #   +-----------+        +-----------+
														
 
															-      #   | data node |        | data node |
														
 
															-      #   +-----------+        +-----------+
														
 
															-      #
														
 
															-      # 2) topology script gets list of IP's as input, calculates network address, and prints '/network_address/ip'.
														
 
															-
														
 
															-      import netaddr
														
 
															-      import sys             
														
 
															-      sys.argv.pop(0)                                                  # discard name of topology script from argv list as we just want IP addresses
														
 
															-
														
 
															-      netmask = '255.255.255.0'                                        # set netmask to what's being used in your environment.  The example uses a /24
														
 
															-
														
 
															-      for ip in sys.argv:                                              # loop over list of datanode IP's
														
 
															-          address = '{0}/{1}'.format(ip, netmask)                      # format address string so it looks like 'ip/netmask' to make netaddr work
														
 
															-          try:
														
 
															-              network_address = netaddr.IPNetwork(address).network     # calculate and print network address
														
 
															-              print "/{0}".format(network_address)                     
														
 
															-          except:
														
 
															-              print "/rack-unknown"                                    # print catch-all value if unable to calculate network address
														
 
															-
														
 
															-      </code>
														
 
															-      </source>
														
 
															-      </section>
														
 
															-          
														
 
															-      <section>
														
 
															-      <title>Bash  example</title>
														
 
															-      <source>
														
 
															-      <code>
														
 
															-      #!/bin/bash
														
 
															-      # Here's a bash example to show just how simple these scripts can be
														
 
															-      
														
 
															-      # Assuming we have flat network with everything on a single switch, we can fake a rack topology. 
														
 
															-      # This could occur in a lab environment where we have limited nodes,like 2-8 physical machines on a unmanaged switch. 
														
 
															-      # This may also apply to multiple virtual machines running on the same physical hardware.  
														
 
															-      # The number of machines isn't important, but that we are trying to fake a network topology when there isn't one. 
														
 
															-      #
														
 
															-      #       +----------+    +--------+
														
 
															-      #       |jobtracker|    |datanode| 
														
 
															-      #       +----------+    +--------+
														
 
															-      #              \        /
														
 
															-      #  +--------+  +--------+  +--------+
														
 
															-      #  |datanode|--| switch |--|datanode|
														
 
															-      #  +--------+  +--------+  +--------+
														
 
															-      #              /        \
														
 
															-      #       +--------+    +--------+
														
 
															-      #       |datanode|    |namenode| 
														
 
															-      #       +--------+    +--------+
														
 
															-      #
														
 
															-      # With this network topology, we are treating each host as a rack.  This is being done by taking the last octet 
														
 
															-      # in the datanode's IP and prepending it with the word '/rack-'.  The advantage for doing this is so HDFS
														
 
															-      # can create its 'off-rack' block copy.
														
 
															-      
														
 
															-      # 1) 'echo $@' will echo all ARGV values to xargs.  
														
 
															-      # 2) 'xargs' will enforce that we print a single argv value per line
														
 
															-      # 3) 'awk' will split fields on dots and append the last field to the string '/rack-'. If awk 
														
 
															-      #    fails to split on four dots, it will still print '/rack-' last field value
														
 
															-
														
 
															-      echo $@ | xargs -n 1 | awk -F '.' '{print "/rack-"$NF}'
														
 
															-
														
 
															-
														
 
															-      </code>
														
 
															-      </source>
														
 
															-      </section>
														
 
															-
														
 
															-
														
 
															-      <p>
														
 
															-         If <code>topology.script.file.name</code> or <code>topology.node.switch.mapping.impl</code> is 
														
 
															-         not set, the rack id '/default-rack' is returned for any passed IP address.  
														
 
															-         While this behavior appears desirable, it can cause issues with HDFS block replication as 
														
 
															-         default behavior is to write one replicated block off rack and is unable to do so as there is 
														
 
															-         only a single rack named '/default-rack'.
														
 
															-      </p>
														
 
															-      <p>
														
 
															-         An additional configuration setting is <code>mapred.cache.task.levels</code> which determines 
														
 
															-         the number of levels (in the network topology) of caches. So, for example, if it is the 
														
 
															-         default value of 2, two levels of caches will be constructed - one for hosts 
														
 
															-         (host -> task mapping) and another for racks (rack -> task mapping). Giving us our one-to-one 
														
 
															-          mapping of '/myrack/myhost'
														
 
															-      </p>
														
 
															-    </section>
														
 
															-    
														
 
															-    <section>
														
 
															-      <title>Hadoop Startup</title>
														
 
															-      
														
 
															-      <p>To start a Hadoop cluster you will need to start both the HDFS and 
														
 
															-      Map/Reduce cluster.</p>
														
 
															-
														
 
															-      <p>
														
 
															-        Format a new distributed filesystem:<br/>
														
 
															-        <code>$ bin/hadoop namenode -format</code>
														
 
															-      </p>
														
 
															-      
														
 
															-      <p>
														
 
															-        Start the HDFS with the following command, run on the designated
														
 
															-        <code>NameNode</code>:<br/>
														
 
															-        <code>$ bin/start-dfs.sh</code>
														
 
															-      </p>
														
 
															-      <p>The <code>bin/start-dfs.sh</code> script also consults the 
														
 
															-      <code>${HADOOP_CONF_DIR}/slaves</code> file on the <code>NameNode</code> 
														
 
															-      and starts the <code>DataNode</code> daemon on all the listed slaves.</p>
														
 
															-      
														
 
															-      <p>
														
 
															-        Start Map-Reduce with the following command, run on the designated
														
 
															-        <code>JobTracker</code>:<br/>
														
 
															-        <code>$ bin/start-mapred.sh</code>
														
 
															-      </p>
														
 
															-      <p>The <code>bin/start-mapred.sh</code> script also consults the 
														
 
															-      <code>${HADOOP_CONF_DIR}/slaves</code> file on the <code>JobTracker</code> 
														
 
															-      and starts the <code>TaskTracker</code> daemon on all the listed slaves.
														
 
															-      </p>
														
 
															-    </section>
														
 
															-
														
 
															-    <section>
														
 
															-      <title>Hadoop Shutdown</title>
														
 
															-      
														
 
															-      <p>
														
 
															-        Stop HDFS with the following command, run on the designated 
														
 
															-        <code>NameNode</code>:<br/>
														
 
															-        <code>$ bin/stop-dfs.sh</code>
														
 
															-      </p>
														
 
															-      <p>The <code>bin/stop-dfs.sh</code> script also consults the 
														
 
															-      <code>${HADOOP_CONF_DIR}/slaves</code> file on the <code>NameNode</code> 
														
 
															-      and stops the <code>DataNode</code> daemon on all the listed slaves.</p>
														
 
															-      
														
 
															-      <p>
														
 
															-        Stop Map/Reduce with the following command, run on the designated
														
 
															-        the designated <code>JobTracker</code>:<br/>
														
 
															-        <code>$ bin/stop-mapred.sh</code><br/>
														
 
															-      </p>
														
 
															-      <p>The <code>bin/stop-mapred.sh</code> script also consults the 
														
 
															-      <code>${HADOOP_CONF_DIR}/slaves</code> file on the <code>JobTracker</code> 
														
 
															-      and stops the <code>TaskTracker</code> daemon on all the listed slaves.</p>
														
 
															-    </section>
														
 
															-  </body>
														
 
															-  
														
 
															-</document>
														
--- a/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/commands_manual.xml
+++ b/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/commands_manual.xml
@@ -1,798 +0,0 @@
 
															-<?xml version="1.0"?>
														
 
															-<!--
														
 
															-  Licensed to the Apache Software Foundation (ASF) under one or more
														
 
															-  contributor license agreements.  See the NOTICE file distributed with
														
 
															-  this work for additional information regarding copyright ownership.
														
 
															-  The ASF licenses this file to You under the Apache License, Version 2.0
														
 
															-  (the "License"); you may not use this file except in compliance with
														
 
															-  the License.  You may obtain a copy of the License at
														
 
															-
														
 
															-      http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-  Unless required by applicable law or agreed to in writing, software
														
 
															-  distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-  See the License for the specific language governing permissions and
														
 
															-  limitations under the License.
														
 
															--->
														
 
															-
														
 
															-<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
														
 
															-<document>
														
 
															-	<header>
														
 
															-		<title>Hadoop Commands Guide</title>
														
 
															-	</header>
														
 
															-	
														
 
															-	<body>
														
 
															-		<section>
														
 
															-			<title>Overview</title>
														
 
															-			<p>
														
 
															-				All Hadoop commands are invoked by the bin/hadoop script. Running the Hadoop
														
 
															-				script without any arguments prints the description for all commands.
														
 
															-			</p>
														
 
															-			<p>
														
 
															-				<code>Usage: hadoop [--config confdir] [COMMAND] [GENERIC_OPTIONS] [COMMAND_OPTIONS]</code>
														
 
															-			</p>
														
 
															-			<p>
														
 
															-				Hadoop has an option parsing framework that employs parsing generic options as well as running classes.
														
 
															-			</p>
														
 
															-			<table>
														
 
															-			          <tr><th> COMMAND_OPTION </th><th> Description </th></tr>
														
 
															-			
														
 
															-			           <tr>
														
 
															-			          	<td><code>--config confdir</code></td>
														
 
															-			            <td>Overwrites the default Configuration directory. Default is ${HADOOP_PREFIX}/conf.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>GENERIC_OPTIONS</code></td>
														
 
															-			            <td>The common set of options supported by multiple commands.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>COMMAND</code><br/><code>COMMAND_OPTIONS</code></td>
														
 
															-			            <td>Various commands with their options are described in the following sections. The commands 
														
 
															-			            have been grouped into <a href="commands_manual.html#User+Commands">User Commands</a> 
														
 
															-			            and <a href="commands_manual.html#Administration+Commands">Administration Commands</a>.</td>
														
 
															-			           </tr>
														
 
															-			     </table>
														
 
															-			 <section>
														
 
															-				<title>Generic Options</title>
														
 
															-				<p>
														
 
															-				  The following options are supported by <a href="commands_manual.html#dfsadmin">dfsadmin</a>, 
														
 
															-                  <a href="commands_manual.html#fs">fs</a>, <a href="commands_manual.html#fsck">fsck</a>, 
														
 
															-                  <a href="commands_manual.html#job">job</a> and <a href="commands_manual.html#fetchdt">fetchdt</a>.
														
 
															-				  Applications should implement 
														
 
															-				  <a href="ext:api/org/apache/hadoop/util/tool">Tool</a> to support
														
 
															-				  <a href="ext:api/org/apache/hadoop/util/genericoptionsparser">
														
 
															-				  GenericOptions</a>.
														
 
															-				</p>
														
 
															-			     <table>
														
 
															-			          <tr><th> GENERIC_OPTION </th><th> Description </th></tr>
														
 
															-			
														
 
															-			           <tr>
														
 
															-			          	<td><code>-conf &lt;configuration file&gt;</code></td>
														
 
															-			            <td>Specify an application configuration file.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-D &lt;property=value&gt;</code></td>
														
 
															-			            <td>Use value for given property.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-fs &lt;local|namenode:port&gt;</code></td>
														
 
															-			            <td>Specify a namenode.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-jt &lt;local|jobtracker:port&gt;</code></td>
														
 
															-			            <td>Specify a job tracker. Applies only to <a href="commands_manual.html#job">job</a>.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-files &lt;comma separated list of files&gt;</code></td>
														
 
															-			            <td>Specify comma separated files to be copied to the map reduce cluster. 
														
 
															-			            Applies only to <a href="commands_manual.html#job">job</a>.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-libjars &lt;comma seperated list of jars&gt;</code></td>
														
 
															-			            <td>Specify comma separated jar files to include in the classpath. 
														
 
															-			            Applies only to <a href="commands_manual.html#job">job</a>.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-archives &lt;comma separated list of archives&gt;</code></td>
														
 
															-			            <td>Specify comma separated archives to be unarchived on the compute machines. 
														
 
															-			            Applies only to <a href="commands_manual.html#job">job</a>.</td>
														
 
															-			           </tr>
														
 
															-				</table>
														
 
															-			</section>	   
														
 
															-		</section>
														
 
															-		
														
 
															-		<section>
														
 
															-			<title> User Commands </title>
														
 
															-			<p>Commands useful for users of a Hadoop cluster.</p>
														
 
															-			<section>
														
 
															-				<title> archive </title>
														
 
															-				<p>
														
 
															-					Creates a Hadoop archive. More information see the <a href="ext:hadoop-archives">Hadoop Archives Guide</a>.
														
 
															-				</p>
														
 
															-				<p>
														
 
															-					<code>Usage: hadoop archive -archiveName NAME &lt;src&gt;* &lt;dest&gt;</code>
														
 
															-				</p>
														
 
															-				<table>
														
 
															-			          <tr><th> COMMAND_OPTION </th><th> Description </th></tr>
														
 
															-					   <tr>
														
 
															-			          	<td><code>-archiveName NAME</code></td>
														
 
															-			            <td>Name of the archive to be created.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>src</code></td>
														
 
															-			            <td>Filesystem pathnames which work as usual with regular expressions.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>dest</code></td>
														
 
															-			            <td>Destination directory which would contain the archive.</td>
														
 
															-			           </tr>
														
 
															-			     </table>
														
 
															-			</section>
														
 
															-			
														
 
															-			<section>
														
 
															-				<title> distcp </title>
														
 
															-				<p>
														
 
															-					Copy file or directories recursively. More information can be found at <a href="ext:distcp">DistCp Guide</a>.
														
 
															-				</p>
														
 
															-				<p>
														
 
															-					<code>Usage: hadoop distcp &lt;srcurl&gt; &lt;desturl&gt;</code>
														
 
															-				</p>
														
 
															-				<table>
														
 
															-			          <tr><th> COMMAND_OPTION </th><th> Description </th></tr>
														
 
															-			
														
 
															-			           <tr>
														
 
															-			          	<td><code>srcurl</code></td>
														
 
															-			            <td>Source Url</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>desturl</code></td>
														
 
															-			            <td>Destination Url</td>
														
 
															-			           </tr>
														
 
															-			     </table>
														
 
															-			</section>
														
 
															-			       
														
 
															-			<section>
														
 
															-				<title> fs </title>
														
 
															-				<p>
														
 
															-					Runs a generic filesystem user client.
														
 
															-				</p>
														
 
															-				<p>
														
 
															-					<code>Usage: hadoop fs [</code><a href="commands_manual.html#Generic+Options">GENERIC_OPTIONS</a><code>] 
														
 
															-					[COMMAND_OPTIONS]</code>
														
 
															-				</p>
														
 
															-				<p>
														
 
															-					The various COMMAND_OPTIONS can be found at 
														
 
															-					<a href="file_system_shell.html">File System Shell Guide</a>.
														
 
															-				</p>   
														
 
															-			</section>
														
 
															-			
														
 
															-			<section>
														
 
															-				<title> fsck </title>
														
 
															-				<p>
														
 
															-					Runs a HDFS filesystem checking utility. See <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Fsck">Fsck</a> for more info.
														
 
															-				</p> 
														
 
															-				<p><code>Usage: hadoop fsck [</code><a href="commands_manual.html#Generic+Options">GENERIC_OPTIONS</a><code>] 
														
 
															-				&lt;path&gt; [-move | -delete | -openforwrite] [-files [-blocks 
														
 
															-				[-locations | -racks]]]</code></p>
														
 
															-				<table>
														
 
															-			          <tr><th> COMMAND_OPTION </th><th> Description </th></tr>
														
 
															-			          <tr>
														
 
															-			            <td><code>&lt;path&gt;</code></td>
														
 
															-			            <td>Start checking from this path.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-move</code></td>
														
 
															-			            <td>Move corrupted files to /lost+found</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-delete</code></td>
														
 
															-			            <td>Delete corrupted files.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-openforwrite</code></td>
														
 
															-			            <td>Print out files opened for write.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-files</code></td>
														
 
															-			            <td>Print out files being checked.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-blocks</code></td>
														
 
															-			            <td>Print out block report.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-locations</code></td>
														
 
															-			            <td>Print out locations for every block.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-racks</code></td>
														
 
															-			            <td>Print out network topology for data-node locations.</td>
														
 
															-			           </tr>
														
 
															-					</table>
														
 
															-			</section>
														
 
															-			
														
 
															-			<section>
														
 
															-              <title> fetchdt </title>
														
 
															-              <p>
														
 
															-                Gets Delegation Token from a NameNode. See <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#fetchdt">fetchdt</a> for more info.
														
 
															-              </p> 
														
 
															-              <p><code>Usage: hadoop fetchdt [</code><a href="commands_manual.html#Generic+Options">GENERIC_OPTIONS</a><code>] 
														
 
															-                 [--webservice &lt;namenode_http_addr&gt;] &lt;file_name&gt; </code></p>
														
 
															-                 <table>
														
 
															-                   <tr><th> COMMAND_OPTION </th><th> Description </th></tr>
														
 
															-                   <tr>
														
 
															-                     <td><code>&lt;file_name&gt;</code></td>
														
 
															-                     <td>File name to store the token into.</td>
														
 
															-                   </tr>
														
 
															-                   <tr>
														
 
															-                     <td><code>--webservice &lt;https_address&gt;</code></td>
														
 
															-                     <td>use http protocol instead of RPC</td>
														
 
															-                   </tr>
														
 
															-                 </table>
														
 
															-             </section>
														
 
															-                        
														
 
															-             <section>
														
 
															-				<title> jar </title>
														
 
															-				<p>
														
 
															-					Runs a jar file. Users can bundle their Map Reduce code in a jar file and execute it using this command.
														
 
															-				</p> 
														
 
															-				<p>
														
 
															-					<code>Usage: hadoop jar &lt;jar&gt; [mainClass] args...</code>
														
 
															-				</p>
														
 
															-				<p>
														
 
															-					The streaming jobs are run via this command. For examples, see 
														
 
															-					<a href="ext:streaming">Hadoop Streaming</a>.
														
 
															-				</p>
														
 
															-				<p>
														
 
															-					The WordCount example is also run using jar command. For examples, see the
														
 
															-					<a href="ext:mapred-tutorial">MapReduce Tutorial</a>.
														
 
															-				</p>
														
 
															-			</section>
														
 
															-			
														
 
															-			<section>
														
 
															-				<title> job </title>
														
 
															-				<p>
														
 
															-					Command to interact with Map Reduce Jobs.
														
 
															-				</p>
														
 
															-				<p>
														
 
															-					<code>Usage: hadoop job [</code><a href="commands_manual.html#Generic+Options">GENERIC_OPTIONS</a><code>] 
														
 
															-					[-submit &lt;job-file&gt;] | [-status &lt;job-id&gt;] | 
														
 
															-					[-counter &lt;job-id&gt; &lt;group-name&gt; &lt;counter-name&gt;] | [-kill &lt;job-id&gt;] | 
														
 
															-					[-events &lt;job-id&gt; &lt;from-event-#&gt; &lt;#-of-events&gt;] | [-history [all] &lt;historyFile&gt;] |
														
 
															-					[-list [all]] | [-kill-task &lt;task-id&gt;] | [-fail-task &lt;task-id&gt;] | 
														
 
															-          [-set-priority &lt;job-id&gt; &lt;priority&gt;]</code>
														
 
															-				</p>
														
 
															-				<table>
														
 
															-			          <tr><th> COMMAND_OPTION </th><th> Description </th></tr>
														
 
															-			
														
 
															-			           <tr>
														
 
															-			          	<td><code>-submit &lt;job-file&gt;</code></td>
														
 
															-			            <td>Submits the job.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-status &lt;job-id&gt;</code></td>
														
 
															-			            <td>Prints the map and reduce completion percentage and all job counters.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-counter &lt;job-id&gt; &lt;group-name&gt; &lt;counter-name&gt;</code></td>
														
 
															-			            <td>Prints the counter value.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-kill &lt;job-id&gt;</code></td>
														
 
															-			            <td>Kills the job.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-events &lt;job-id&gt; &lt;from-event-#&gt; &lt;#-of-events&gt;</code></td>
														
 
															-			            <td>Prints the events' details received by jobtracker for the given range.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-history [all] &lt;historyFile&gt;</code></td>
														
 
															-			            <td>-history &lt;historyFile&gt; prints job details, failed and killed tip details. More details 
														
 
															-			            about the job such as successful tasks and task attempts made for each task can be viewed by 
														
 
															-			            specifying the [all] option. </td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-list [all]</code></td>
														
 
															-			            <td>-list all displays all jobs. -list displays only jobs which are yet to complete.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-kill-task &lt;task-id&gt;</code></td>
														
 
															-			            <td>Kills the task. Killed tasks are NOT counted against failed attempts.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-fail-task &lt;task-id&gt;</code></td>
														
 
															-			            <td>Fails the task. Failed tasks are counted against failed attempts.</td>
														
 
															-			           </tr>
														
 
															-                 <tr>
														
 
															-                  <td><code>-set-priority &lt;job-id&gt; &lt;priority&gt;</code></td>
														
 
															-                  <td>Changes the priority of the job. 
														
 
															-                  Allowed priority values are VERY_HIGH, HIGH, NORMAL, LOW, VERY_LOW</td>
														
 
															-                 </tr>
														
 
															-					</table>
														
 
															-			</section>
														
 
															-			
														
 
															-			<section>
														
 
															-				<title> pipes </title>
														
 
															-				<p>
														
 
															-					Runs a pipes job.
														
 
															-				</p>
														
 
															-				<p>
														
 
															-					<code>Usage: hadoop pipes [-conf &lt;path&gt;] [-jobconf &lt;key=value&gt;, &lt;key=value&gt;, ...] 
														
 
															-					[-input &lt;path&gt;] [-output &lt;path&gt;] [-jar &lt;jar file&gt;] [-inputformat &lt;class&gt;] 
														
 
															-					[-map &lt;class&gt;] [-partitioner &lt;class&gt;] [-reduce &lt;class&gt;] [-writer &lt;class&gt;] 
														
 
															-					[-program &lt;executable&gt;] [-reduces &lt;num&gt;] </code>
														
 
															-				</p>
														
 
															-				<table>
														
 
															-			          <tr><th> COMMAND_OPTION </th><th> Description </th></tr>
														
 
															-			
														
 
															-			          <tr>
														
 
															-			          	<td><code>-conf &lt;path&gt;</code></td>
														
 
															-			            <td>Configuration for job</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-jobconf &lt;key=value&gt;, &lt;key=value&gt;, ...</code></td>
														
 
															-			            <td>Add/override configuration for job</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-input &lt;path&gt;</code></td>
														
 
															-			            <td>Input directory</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-output &lt;path&gt;</code></td>
														
 
															-			            <td>Output directory</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-jar &lt;jar file&gt;</code></td>
														
 
															-			            <td>Jar filename</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-inputformat &lt;class&gt;</code></td>
														
 
															-			            <td>InputFormat class</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-map &lt;class&gt;</code></td>
														
 
															-			            <td>Java Map class</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-partitioner &lt;class&gt;</code></td>
														
 
															-			            <td>Java Partitioner</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-reduce &lt;class&gt;</code></td>
														
 
															-			            <td>Java Reduce class</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-writer &lt;class&gt;</code></td>
														
 
															-			            <td>Java RecordWriter</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-program &lt;executable&gt;</code></td>
														
 
															-			            <td>Executable URI</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-reduces &lt;num&gt;</code></td>
														
 
															-			            <td>Number of reduces</td>
														
 
															-			           </tr>
														
 
															-					</table>
														
 
															-			</section>
														
 
															-      <section>
														
 
															-        <title> queue </title>
														
 
															-        <p>
														
 
															-          command to interact and view Job Queue information
														
 
															-        </p>
														
 
															-        <p>
														
 
															-          <code>Usage : hadoop queue [-list] | [-info &lt;job-queue-name&gt; [-showJobs]] | [-showacls]</code>
														
 
															-        </p>
														
 
															-        <table>
														
 
															-        <tr>
														
 
															-          <th> COMMAND_OPTION </th><th> Description </th>
														
 
															-        </tr>
														
 
															-        <tr>
														
 
															-          <td><anchor id="QueuesList"/><code>-list</code> </td>
														
 
															-          <td>Gets list of Job Queues configured in the system. Along with scheduling information
														
 
															-          associated with the job queues.
														
 
															-          </td>
														
 
															-        </tr>
														
 
															-        <tr>
														
 
															-          <td><anchor id="QueuesInfo"/><code>-info &lt;job-queue-name&gt; [-showJobs]</code></td>
														
 
															-          <td>
														
 
															-           Displays the job queue information and associated scheduling information of particular
														
 
															-           job queue. If -showJobs options is present a list of jobs submitted to the particular job
														
 
															-           queue is displayed. 
														
 
															-          </td>
														
 
															-        </tr>
														
 
															-        <tr>
														
 
															-          <td><code>-showacls</code></td>
														
 
															-          <td>Displays the queue name and associated queue operations allowed for the current user.
														
 
															-          The list consists of only those queues to which the user has access.
														
 
															-          </td>
														
 
															-          </tr>
														
 
															-        </table>
														
 
															-      </section>  	
														
 
															-			<section>
														
 
															-				<title> version </title>
														
 
															-				<p>
														
 
															-					Prints the version.
														
 
															-				</p> 
														
 
															-				<p>
														
 
															-					<code>Usage: hadoop version</code>
														
 
															-				</p>
														
 
															-			</section>
														
 
															-			<section>
														
 
															-				<title> CLASSNAME </title>
														
 
															-				<p>
														
 
															-					 Hadoop script can be used to invoke any class.
														
 
															-				</p>
														
 
															-				<p>
														
 
															-					 Runs the class named CLASSNAME.
														
 
															-				</p>
														
 
															-
														
 
															-				<p>
														
 
															-					<code>Usage: hadoop CLASSNAME</code>
														
 
															-				</p>
														
 
															-
														
 
															-			</section>
														
 
															-    </section>
														
 
															-		<section>
														
 
															-			<title> Administration Commands </title>
														
 
															-			<p>Commands useful for administrators of a Hadoop cluster.</p>
														
 
															-			<section>
														
 
															-				<title> balancer </title>
														
 
															-				<p>
														
 
															-					Runs a cluster balancing utility. An administrator can simply press Ctrl-C to stop the 
														
 
															-					rebalancing process. For more details see 
														
 
															-					<a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Rebalancer">Rebalancer</a>.
														
 
															-				</p>
														
 
															-				<p>
														
 
															-					<code>Usage: hadoop balancer [-policy &lt;blockpool|datanode&gt;] [-threshold &lt;threshold&gt;]</code>
														
 
															-				</p>
														
 
															-				<table>
														
 
															-			          <tr><th> COMMAND_OPTION </th><th> Description </th></tr>
														
 
															-			           <tr>
														
 
															-					<td><code>-policy &lt;blockpool|datanode&gt;</code></td>
														
 
															-					<td>The balancing policy.
														
 
															-					    <br /><code>datanode</code>: Cluster is balance if the disk usage of each datanode is balance.
														
 
															-					    <br /><code>blockpool</code>: Cluster is balance if the disk usage of each block pool in each datanode is balance.
														
 
															-					    <br />Note that <code>blockpool</code> is a condition stronger than <code>datanode</code>.
														
 
															-					    The default policy is <code>datanode</code>.
														
 
															-					</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-threshold &lt;threshold&gt;</code></td>
														
 
															-			            <td>Percentage of disk capacity. This default threshold is 10%.</td>
														
 
															-			           </tr>
														
 
															-			     </table>
														
 
															-			</section>
														
 
															-			
														
 
															-			<section>
														
 
															-				<title> daemonlog </title>
														
 
															-				<p>
														
 
															-					 Get/Set the log level for each daemon.
														
 
															-				</p> 
														
 
															-				<p>
														
 
															-					<code>Usage: hadoop daemonlog  -getlevel &lt;host:port&gt; &lt;name&gt;</code><br/>
														
 
															-					<code>Usage: hadoop daemonlog  -setlevel &lt;host:port&gt; &lt;name&gt; &lt;level&gt;</code>
														
 
															-				</p>
														
 
															-				<table>
														
 
															-			          <tr><th> COMMAND_OPTION </th><th> Description </th></tr>
														
 
															-			
														
 
															-			           <tr>
														
 
															-			          	<td><code>-getlevel &lt;host:port&gt; &lt;name&gt;</code></td>
														
 
															-			            <td>Prints the log level of the daemon running at &lt;host:port&gt;. 
														
 
															-			            This command internally connects to http://&lt;host:port&gt;/logLevel?log=&lt;name&gt;</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-setlevel &lt;host:port&gt; &lt;name&gt; &lt;level&gt;</code></td>
														
 
															-			            <td>Sets the log level of the daemon running at &lt;host:port&gt;. 
														
 
															-			            This command internally connects to http://&lt;host:port&gt;/logLevel?log=&lt;name&gt;</td>
														
 
															-			           </tr>
														
 
															-			     </table>
														
 
															-			</section>
														
 
															-			
														
 
															-			<section>
														
 
															-				<title> datanode</title>
														
 
															-				<p>
														
 
															-					Runs a HDFS datanode.
														
 
															-				</p> 
														
 
															-				<p>
														
 
															-					<code>Usage: hadoop datanode [-rollback]</code>
														
 
															-				</p>
														
 
															-				<table>
														
 
															-			          <tr><th> COMMAND_OPTION </th><th> Description </th></tr>
														
 
															-			
														
 
															-			           <tr>
														
 
															-			          	<td><code>-rollback</code></td>
														
 
															-			            <td>Rollsback the datanode to the previous version. This should be used after stopping the datanode 
														
 
															-			            and distributing the old Hadoop version.</td>
														
 
															-			           </tr>
														
 
															-			     </table>
														
 
															-			</section>
														
 
															-			
														
 
															-			<section>
														
 
															-				<title> dfsadmin </title>
														
 
															-				<p>
														
 
															-					Runs a HDFS dfsadmin client.
														
 
															-				</p> 
														
 
															-				<p>
														
 
															-					<code>Usage: hadoop dfsadmin  [</code><a href="commands_manual.html#Generic+Options">GENERIC_OPTIONS</a><code>] [-report] [-safemode enter | leave | get | wait] [-refreshNodes]
														
 
															-					 [-finalizeUpgrade] [-upgradeProgress status | details | force] [-metasave filename] 
														
 
															-					 [-setQuota &lt;quota&gt; &lt;dirname&gt;...&lt;dirname&gt;] [-clrQuota &lt;dirname&gt;...&lt;dirname&gt;] 
														
 
															-					 [-restoreFailedStorage true|false|check] 
														
 
															-					 [-help [cmd]]</code>
														
 
															-				</p>
														
 
															-				<table>
														
 
															-			          <tr><th> COMMAND_OPTION </th><th> Description </th></tr>
														
 
															-			
														
 
															-			           <tr>
														
 
															-			          	<td><code>-report</code></td>
														
 
															-			            <td>Reports basic filesystem information and statistics.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-safemode enter | leave | get | wait</code></td>
														
 
															-			            <td>Safe mode maintenance command.
														
 
															-                Safe mode is a Namenode state in which it <br/>
														
 
															-                        1.  does not accept changes to the name space (read-only) <br/> 
														
 
															-                        2.  does not replicate or delete blocks. <br/>
														
 
															-                Safe mode is entered automatically at Namenode startup, and
														
 
															-                leaves safe mode automatically when the configured minimum
														
 
															-                percentage of blocks satisfies the minimum replication
														
 
															-                condition.  Safe mode can also be entered manually, but then
														
 
															-                it can only be turned off manually as well.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-refreshNodes</code></td>
														
 
															-			            <td>Re-read the hosts and exclude files to update the set
														
 
															-                of Datanodes that are allowed to connect to the Namenode
														
 
															-                and those that should be decommissioned or recommissioned.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-finalizeUpgrade</code></td>
														
 
															-			            <td>Finalize upgrade of HDFS.
														
 
															-                Datanodes delete their previous version working directories,
														
 
															-                followed by Namenode doing the same.
														
 
															-                This completes the upgrade process.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-printTopology</code></td>
														
 
															-			            <td>Print a tree of the rack/datanode topology of the
														
 
															-                 cluster as seen by the NameNode.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-upgradeProgress status | details | force</code></td>
														
 
															-			            <td>Request current distributed upgrade status,
														
 
															-                a detailed status or force the upgrade to proceed.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-metasave filename</code></td>
														
 
															-			            <td>Save Namenode's primary data structures
														
 
															-                to &lt;filename&gt; in the directory specified by hadoop.log.dir property.
														
 
															-                &lt;filename&gt; will contain one line for each of the following <br/>
														
 
															-                        1. Datanodes heart beating with Namenode<br/>
														
 
															-                        2. Blocks waiting to be replicated<br/>
														
 
															-                        3. Blocks currrently being replicated<br/>
														
 
															-                        4. Blocks waiting to be deleted</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-setQuota &lt;quota&gt; &lt;dirname&gt;...&lt;dirname&gt;</code></td>
														
 
															-			            <td>Set the quota &lt;quota&gt; for each directory &lt;dirname&gt;.
														
 
															-                The directory quota is a long integer that puts a hard limit on the number of names in the directory tree.<br/>
														
 
															-                Best effort for the directory, with faults reported if<br/>
														
 
															-                1. N is not a positive integer, or<br/>
														
 
															-                2. user is not an administrator, or<br/>
														
 
															-                3. the directory does not exist or is a file, or<br/>
														
 
															-                4. the directory would immediately exceed the new quota.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-clrQuota &lt;dirname&gt;...&lt;dirname&gt;</code></td>
														
 
															-			            <td>Clear the quota for each directory &lt;dirname&gt;.<br/>
														
 
															-                Best effort for the directory. with fault reported if<br/>
														
 
															-                1. the directory does not exist or is a file, or<br/>
														
 
															-                2. user is not an administrator.<br/>
														
 
															-                It does not fault if the directory has no quota.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-restoreFailedStorage true | false | check</code></td>
														
 
															-			            <td>This option will turn on/off automatic attempt to restore failed storage replicas. 
														
 
															-			            If a failed storage becomes available again the system will attempt to restore 
														
 
															-			            edits and/or fsimage during checkpoint. 'check' option will return current setting.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-help [cmd]</code></td>
														
 
															-			            <td> Displays help for the given command or all commands if none
														
 
															-                is specified.</td>
														
 
															-			           </tr>
														
 
															-			     </table>
														
 
															-			</section>
														
 
															-			<section>
														
 
															-        <title>mradmin</title>
														
 
															-        <p>Runs MR admin client</p>
														
 
															-        <p><code>Usage: hadoop mradmin  [</code>
														
 
															-        <a href="commands_manual.html#Generic+Options">GENERIC_OPTIONS</a>
														
 
															-        <code>] [-refreshServiceAcl] [-refreshQueues] [-refreshNodes] [-help [cmd]] </code></p>
														
 
															-        <table>
														
 
															-        <tr>
														
 
															-        <th> COMMAND_OPTION </th><th> Description </th>
														
 
															-        </tr>
														
 
															-        <tr>
														
 
															-        <td><code>-refreshServiceAcl</code></td>
														
 
															-        <td> Reload the service-level authorization policies. Jobtracker
														
 
															-         will reload the authorization policy file.</td>
														
 
															-        </tr>
														
 
															-        <tr>
														
 
															-        <td><anchor id="RefreshQueues"/><code>-refreshQueues</code></td>
														
 
															-        <td><p> Reload the queues' configuration at the JobTracker.
														
 
															-          Most of the configuration of the queues can be refreshed/reloaded
														
 
															-          without restarting the Map/Reduce sub-system. Administrators
														
 
															-          typically own the
														
 
															-          <a href="cluster_setup.html#mapred-queues.xml">
														
 
															-          <em>conf/mapred-queues.xml</em></a>
														
 
															-          file, can edit it while the JobTracker is still running, and can do
														
 
															-          a reload by running this command.</p>
														
 
															-          <p>It should be noted that while trying to refresh queues'
														
 
															-          configuration, one cannot change the hierarchy of queues itself.
														
 
															-          This means no operation that involves a change in either the
														
 
															-          hierarchy structure itself or the queues' names will be allowed.
														
 
															-          Only selected properties of queues can be changed during refresh.
														
 
															-          For example, new queues cannot be added dynamically, neither can an
														
 
															-          existing queue be deleted.</p>
														
 
															-          <p>If during a reload of queue configuration,
														
 
															-          a syntactic or semantic error in made during the editing of the
														
 
															-          configuration file, the refresh command fails with an exception that
														
 
															-          is printed on the standard output of this command, thus informing the
														
 
															-          requester with any helpful messages of what has gone wrong during
														
 
															-          the edit/reload. Importantly, the existing queue configuration is
														
 
															-          untouched and the system is left in a consistent state.
														
 
															-          </p>
														
 
															-          <p>As described in the
														
 
															-          <a href="cluster_setup.html#mapred-queues.xml"><em>
														
 
															-          conf/mapred-queues.xml</em></a> section, the
														
 
															-          <a href="cluster_setup.html#properties_tag"><em>
														
 
															-          &lt;properties&gt;</em></a> tag in the queue configuration file can
														
 
															-          also be used to specify per-queue properties needed by the scheduler.
														
 
															-           When the framework's queue configuration is reloaded using this
														
 
															-          command, this scheduler specific configuration will also be reloaded
														
 
															-          , provided the scheduler being configured supports this reload.
														
 
															-          Please see the documentation of the particular scheduler in use.</p>
														
 
															-          </td>
														
 
															-        </tr>
														
 
															-        <tr>
														
 
															-        <td><code>-refreshNodes</code></td>
														
 
															-        <td> Refresh the hosts information at the jobtracker.</td>
														
 
															-        </tr>
														
 
															-        <tr>
														
 
															-        <td><code>-help [cmd]</code></td>
														
 
															-        <td>Displays help for the given command or all commands if none
														
 
															-                is specified.</td>
														
 
															-        </tr>
														
 
															-        </table>
														
 
															-      </section>
														
 
															-			<section>
														
 
															-				<title> jobtracker </title>
														
 
															-				<p>
														
 
															-					Runs the MapReduce job Tracker node.
														
 
															-				</p> 
														
 
															-				<p>
														
 
															-					<code>Usage: hadoop jobtracker [-dumpConfiguration]</code>
														
 
															-					</p>
														
 
															-          <table>
														
 
															-          <tr>
														
 
															-          <th>COMMAND_OPTION</th><th> Description</th>
														
 
															-          </tr>
														
 
															-          <tr>
														
 
															-          <td><code>-dumpConfiguration</code></td>
														
 
															-          <td> Dumps the configuration used by the JobTracker alongwith queue
														
 
															-          configuration in JSON format into Standard output used by the 
														
 
															-          jobtracker and exits.</td>
														
 
															-          </tr>
														
 
															-          </table>
														
 
															-				
														
 
															-			</section>
														
 
															-			
														
 
															-			<section>
														
 
															-				<title> namenode </title>
														
 
															-				<p>
														
 
															-					Runs the namenode. For more information about upgrade, rollback and finalize see 
														
 
															-					<a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Upgrade+and+Rollback">Upgrade and Rollback</a>.
														
 
															-				</p>
														
 
															-				<p>
														
 
															-					<code>Usage: hadoop namenode [-format [-force] [-nonInteractive] [-clusterid someid]] | [-upgrade] | [-rollback] | [-finalize] | [-importCheckpoint] | [-checkpoint] | [-backup]</code>
														
 
															-				</p>
														
 
															-				<table>
														
 
															-			          <tr><th> COMMAND_OPTION </th><th> Description </th></tr>
														
 
															-			
														
 
															-                <tr>
														
 
															-                  <td><code>-regular</code></td>
														
 
															-                  <td>Start namenode in standard, active role rather than as backup or checkpoint node. This is the default role.</td>
														
 
															-                </tr>
														
 
															-                <tr>
														
 
															-                  <td><code>-checkpoint</code></td>
														
 
															-                  <td>Start namenode in checkpoint role, creating periodic checkpoints of the active namenode metadata.</td>
														
 
															-                </tr>
														
 
															-                <tr>
														
 
															-                  <td><code>-backup</code></td>
														
 
															-                  <td>Start namenode in backup role, maintaining an up-to-date in-memory copy of the namespace and creating periodic checkpoints.</td>
														
 
															-                </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-format [-force] [-nonInteractive] [-clusterid someid]</code></td>
														
 
															-			            <td>Formats the namenode. It starts the namenode, formats it and then shuts it down. User will be prompted before formatting any non empty name directories in the local filesystem.<br/>
														
 
															-                                    -nonInteractive: User will not be prompted for input if non empty name directories exist in the local filesystem and the format will fail.<br/>
														
 
															-                                    -force: Formats the namenode and the user will NOT be prompted to confirm formatting of the name directories in the local filesystem. If -nonInteractive option is specified it will be ignored.<br/>
														
 
															-                                    -clusterid: Associates the namenode with the id specified. When formatting federated namenodes use this option to make sure all namenodes are associated with the same id.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-upgrade</code></td>
														
 
															-			            <td>Namenode should be started with upgrade option after the distribution of new Hadoop version.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-rollback</code></td>
														
 
															-			            <td>Rollsback the namenode to the previous version. This should be used after stopping the cluster 
														
 
															-			            and distributing the old Hadoop version.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-finalize</code></td>
														
 
															-			            <td>Finalize will remove the previous state of the files system. Recent upgrade will become permanent. 
														
 
															-			            Rollback option will not be available anymore. After finalization it shuts the namenode down.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-importCheckpoint</code></td>
														
 
															-			            <td>Loads image from a checkpoint directory and saves it into the current one. Checkpoint directory 
														
 
															-			            is read from property dfs.namenode.checkpoint.dir
														
 
															-			            (see <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Import+checkpoint">Import Checkpoint</a>).
														
 
															-			            </td>
														
 
															-			           </tr>
														
 
															-			            <tr>
														
 
															-			          	<td><code>-checkpoint</code></td>
														
 
															-			            <td>Enables checkpointing 
														
 
															-			            (see <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Checkpoint+Node">Checkpoint Node</a>).</td>
														
 
															-			           </tr>
														
 
															-			            <tr>
														
 
															-			          	<td><code>-backup</code></td>
														
 
															-			            <td>Enables checkpointing and maintains an in-memory, up-to-date copy of the file system namespace 
														
 
															-			            (see <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Backup+Node">Backup Node</a>).</td>
														
 
															-			           </tr>
														
 
															-			     </table>
														
 
															-			</section>
														
 
															-			
														
 
															-			<section>
														
 
															-				<title> secondarynamenode </title>
														
 
															-				<p>	
														
 
															-					Runs the HDFS secondary 
														
 
															-					namenode. See <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Secondary+NameNode">Secondary NameNode</a> 
														
 
															-					for more info.
														
 
															-				</p>
														
 
															-				<p>
														
 
															-					<code>Usage: hadoop secondarynamenode [-checkpoint [force]] | [-geteditsize]</code>
														
 
															-				</p>
														
 
															-				<table>
														
 
															-			          <tr><th> COMMAND_OPTION </th><th> Description </th></tr>
														
 
															-			
														
 
															-			           <tr>
														
 
															-			          	<td><code>-checkpoint [force]</code></td>
														
 
															-			            <td>Checkpoints the Secondary namenode if EditLog size >= dfs.namenode.checkpoint.size. 
														
 
															-			            If -force is used, checkpoint irrespective of EditLog size.</td>
														
 
															-			           </tr>
														
 
															-			           <tr>
														
 
															-			          	<td><code>-geteditsize</code></td>
														
 
															-			            <td>Prints the EditLog size.</td>
														
 
															-			           </tr>
														
 
															-			     </table>
														
 
															-			</section>
														
 
															-			
														
 
															-			<section>
														
 
															-				<title> tasktracker </title>
														
 
															-				<p>
														
 
															-					Runs a MapReduce task Tracker node.
														
 
															-				</p> 
														
 
															-				<p>
														
 
															-					<code>Usage: hadoop tasktracker</code>
														
 
															-				</p>
														
 
															-			</section>
														
 
															-			
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-		      
														
 
															-
														
 
															-	</body>
														
 
															-</document>      
														
--- a/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/file_system_shell.xml
+++ b/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/file_system_shell.xml
@@ -1,594 +0,0 @@
 
															-<?xml version="1.0"?>
														
 
															-<!--
														
 
															-  Licensed to the Apache Software Foundation (ASF) under one or more
														
 
															-  contributor license agreements.  See the NOTICE file distributed with
														
 
															-  this work for additional information regarding copyright ownership.
														
 
															-  The ASF licenses this file to You under the Apache License, Version 2.0
														
 
															-  (the "License"); you may not use this file except in compliance with
														
 
															-  the License.  You may obtain a copy of the License at
														
 
															-
														
 
															-      http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-  Unless required by applicable law or agreed to in writing, software
														
 
															-  distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-  See the License for the specific language governing permissions and
														
 
															-  limitations under the License.
														
 
															--->
														
 
															-<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
														
 
															-<document>
														
 
															-	<header>
														
 
															-		<title>File System Shell Guide</title>
														
 
															-	</header>
														
 
															-	<body>
														
 
															-		<section>
														
 
															-			<title>Overview</title>
														
 
															-			<p>
														
 
															-      The File System (FS) shell includes various shell-like commands that directly
														
 
															-      interact with the Hadoop Distributed File System (HDFS) as well as other file systems that Hadoop supports,  
														
 
															-      such as Local FS, HFTP FS, S3 FS, and others. The FS shell is invoked by: </p>
														
 
															-
														
 
															-    <source>bin/hdfs dfs &lt;args&gt;</source>
														
 
															-    
														
 
															-      <p>
														
 
															-      All FS shell commands take path URIs as arguments. The URI
														
 
															-      format is <em>scheme://autority/path</em>. For HDFS the scheme
														
 
															-      is <em>hdfs</em>, and for the Local FS the scheme
														
 
															-      is <em>file</em>. The scheme and authority are optional. If not
														
 
															-      specified, the default scheme specified in the configuration is
														
 
															-      used. An HDFS file or directory such as <em>/parent/child</em>
														
 
															-      can be specified as <em>hdfs://namenodehost/parent/child</em> or
														
 
															-      simply as <em>/parent/child</em> (given that your configuration
														
 
															-      is set to point to <em>hdfs://namenodehost</em>). 
														
 
															-      </p>
														
 
															-     <p>
														
 
															-      Most of the commands in FS shell behave like corresponding Unix
														
 
															-      commands. Differences are described with each of the
														
 
															-      commands. Error information is sent to <em>stderr</em> and the
														
 
															-      output is sent to <em>stdout</em>.
														
 
															-  </p>
														
 
															-  
														
 
															-  
														
 
															-<!-- CAT --> 
														
 
															-		<section>
														
 
															-			<title> cat </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -cat URI [URI &#x2026;]</code>
														
 
															-			</p>
														
 
															-			<p>
														
 
															-		   Copies source paths to <em>stdout</em>. 
														
 
															-		   </p>
														
 
															-			<p>Example:</p>
														
 
															-			<ul>
														
 
															-				<li>
														
 
															-					<code> hdfs dfs -cat hdfs://nn1.example.com/file1 hdfs://nn2.example.com/file2 
														
 
															-		   </code>
														
 
															-				</li>
														
 
															-				<li>
														
 
															-					<code>hdfs dfs -cat file:///file3 /user/hadoop/file4 </code>
														
 
															-				</li>
														
 
															-			</ul>
														
 
															-			<p>Exit Code:<br/>
														
 
															-		   <code> Returns 0 on success and -1 on error. </code></p>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- CHGRP --> 
														
 
															-		<section>
														
 
															-			<title> chgrp </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -chgrp [-R] GROUP URI [URI &#x2026;]</code>
														
 
															-			</p>
														
 
															-			<p>
														
 
															-	    Change group association of files. With <code>-R</code>, make the change recursively through the directory structure. 
														
 
															-	    The user must be the owner of files, or else a super-user. 
														
 
															-	    Additional information is in the <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_permissions_guide.html">HDFS Permissions Guide</a>.
														
 
															-	    </p>
														
 
															-		</section>
														
 
															-		<section>
														
 
															-			<title> chmod </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -chmod [-R] &lt;MODE[,MODE]... | OCTALMODE&gt; URI [URI &#x2026;]</code>
														
 
															-			</p>
														
 
															-			<p>
														
 
															-	    Change the permissions of files. With <code>-R</code>, make the change recursively through the directory structure. 
														
 
															-	    The user must be the owner of the file, or else a super-user. 
														
 
															-	    Additional information is in the <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_permissions_guide.html">HDFS Permissions Guide</a>.
														
 
															-	    </p>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- CHOWN --> 		
														
 
															-		<section>
														
 
															-			<title> chown </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -chown [-R] [OWNER][:[GROUP]] URI [URI ]</code>
														
 
															-			</p>
														
 
															-			<p>
														
 
															-	    Change the owner of files. With <code>-R</code>, make the change recursively through the directory structure. 
														
 
															-	    The user must be a super-user. 
														
 
															-	    Additional information is in the <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_permissions_guide.html">HDFS Permissions Guide</a>.
														
 
															-	    </p>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- COPYFROMLOCAL --> 		
														
 
															-		<section>
														
 
															-			<title>copyFromLocal</title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -copyFromLocal &lt;localsrc&gt; URI</code>
														
 
															-			</p>
														
 
															-			<p>Similar to <a href="#put"><strong>put</strong></a> command, except that the source is restricted to a local file reference. </p>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- COPYTOLOCAL -->
														
 
															-		<section>
														
 
															-			<title> copyToLocal</title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -copyToLocal [-ignorecrc] [-crc] URI &lt;localdst&gt;</code>
														
 
															-			</p>
														
 
															-			<p> Similar to <a href="#get"><strong>get</strong></a> command, except that the destination is restricted to a local file reference.</p>
														
 
															-		</section>
														
 
															-		
														
 
															-<!-- COUNT -->		
														
 
															-		<section>
														
 
															-			<title> count </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -count [-q]  &lt;paths&gt;</code>
														
 
															-			</p>
														
 
															-			<p>
														
 
															-				Count the number of directories, files and bytes under the paths that match the specified file pattern. <br/><br/>
														
 
															-				The output columns with <code>-count </code> are:<br/><br/>
														
 
															-				<code>DIR_COUNT, FILE_COUNT, CONTENT_SIZE FILE_NAME</code> <br/><br/>
														
 
															-				The output columns with <code>-count -q</code> are:<br/><br/>
														
 
															-				<code>QUOTA, REMAINING_QUATA, SPACE_QUOTA, REMAINING_SPACE_QUOTA, 
														
 
															-				DIR_COUNT, FILE_COUNT, CONTENT_SIZE, FILE_NAME</code>
														
 
															-		   </p>
														
 
															-			<p>Example:</p>
														
 
															-			<ul>
														
 
															-				<li>
														
 
															-					<code> hdfs dfs -count hdfs://nn1.example.com/file1 hdfs://nn2.example.com/file2 
														
 
															-		   </code>
														
 
															-				</li>
														
 
															-				<li>
														
 
															-					<code> hdfs dfs -count -q hdfs://nn1.example.com/file1
														
 
															-		   </code>
														
 
															-				</li>
														
 
															-			</ul>
														
 
															-			<p>Exit Code:</p>
														
 
															-			<p>
														
 
															-				<code> Returns 0 on success and -1 on error.</code>
														
 
															-			</p>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- CP -->		
														
 
															-		<section>
														
 
															-			<title> cp </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -cp URI [URI &#x2026;] &lt;dest&gt;</code>
														
 
															-			</p>
														
 
															-			<p>
														
 
															-	    Copy files from source to destination. This command allows multiple sources as well in which case the destination must be a directory.
														
 
															-	    <br/>
														
 
															-	    Example:</p>
														
 
															-			<ul>
														
 
															-				<li>
														
 
															-					<code> hdfs dfs -cp /user/hadoop/file1 /user/hadoop/file2</code>
														
 
															-				</li>
														
 
															-				<li>
														
 
															-					<code> hdfs dfs -cp /user/hadoop/file1 /user/hadoop/file2 /user/hadoop/dir </code>
														
 
															-				</li>
														
 
															-			</ul>
														
 
															-			<p>Exit Code:</p>
														
 
															-			<p>
														
 
															-				<code> Returns 0 on success and -1 on error.</code>
														
 
															-			</p>
														
 
															-		</section>
														
 
															-		
														
 
															-<!-- DU -->
														
 
															-		<section>
														
 
															-			<title>du</title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -du [-s] [-h] URI [URI &#x2026;]</code>
														
 
															-			</p>
														
 
															-			<p>
														
 
															-	     Displays sizes of files and directories contained in the given directory or the length of a file in case its just a file.</p>
														
 
															-             <p>Options:</p>
														
 
															-             <ul>
														
 
															-             <li>The <code>-s</code> option will result in an aggregate summary of file lengths being displayed, rather than the individual files.</li>
														
 
															-             <li>The <code>-h</code> option will format file sizes in a &quot;human-readable&quot; fashion (e.g 64.0m instead of 67108864)</li>
														
 
															-             </ul>
														
 
															-             <p>
														
 
															-	     Example:<br/><code>hdfs dfs -du /user/hadoop/dir1 /user/hadoop/file1 hdfs://nn.example.com/user/hadoop/dir1</code><br/>
														
 
															-	     Exit Code:<br/><code> Returns 0 on success and -1 on error. </code><br/></p>
														
 
															-		</section>
														
 
															-		
														
 
															-<!-- DUS -->		
														
 
															-		<section>
														
 
															-			<title> dus </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -dus &lt;args&gt;</code>
														
 
															-			</p>
														
 
															-			<p>
														
 
															-	    Displays a summary of file lengths. This is an alternate form of <code>hdfs dfs -du -s</code>.
														
 
															-	   </p>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- EXPUNGE -->		
														
 
															-		<section>
														
 
															-			<title> expunge </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -expunge</code>
														
 
															-			</p>
														
 
															-			<p>Empty the Trash. Refer to the <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_design.html">HDFS Architecture Guide</a>
														
 
															-			 for more information on the Trash feature.</p>
														
 
															-		</section>
														
 
															-
														
 
															-
														
 
															-<!-- GET -->			
														
 
															-		<section>
														
 
															-			<title> get </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -get [-ignorecrc] [-crc] &lt;src&gt; &lt;localdst&gt;</code>
														
 
															-				<br/>
														
 
															-			</p>
														
 
															-			<p>
														
 
															-	   Copy files to the local file system. Files that fail the CRC check may be copied with the  
														
 
															-	   <code>-ignorecrc</code> option. Files and CRCs may be copied using the 
														
 
															-	   <code>-crc</code> option.
														
 
															-	  </p>
														
 
															-			<p>Example:</p>
														
 
															-			<ul>
														
 
															-				<li>
														
 
															-					<code> hdfs dfs -get /user/hadoop/file localfile </code>
														
 
															-				</li>
														
 
															-				<li>
														
 
															-					<code> hdfs dfs -get hdfs://nn.example.com/user/hadoop/file localfile</code>
														
 
															-				</li>
														
 
															-			</ul>
														
 
															-			<p>Exit Code:</p>
														
 
															-			<p>
														
 
															-				<code> Returns 0 on success and -1 on error. </code>
														
 
															-			</p>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- GETMERGE -->			
														
 
															-		<section>
														
 
															-			<title> getmerge </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -getmerge [-nl] &lt;src&gt; &lt;localdst&gt;</code>
														
 
															-			</p>
														
 
															-			<p>
														
 
															-	  Takes a source directory and a destination file as input and concatenates files in src into the destination local file. 
														
 
															-	  Optionally <code>-nl</code> flag can be set to enable adding a newline character at the end of each file during merge.
														
 
															-	  </p>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- LS -->		
														
 
															-       <section>
														
 
															-           <title>ls</title>
														
 
															-           <p>
														
 
															-               <code>Usage: hdfs dfs -ls [-d] [-h] [-R] &lt;args&gt;</code>
														
 
															-           </p>
														
 
															-           <p>For a file returns stat on the file with the following format:</p>
														
 
															-           <p>
														
 
															-               <code>permissions number_of_replicas userid  groupid  filesize modification_date modification_time filename</code>
														
 
															-           </p>
														
 
															-           <p>For a directory it returns list of its direct children as in unix.A directory is listed as:</p>
														
 
															-           <p>
														
 
															-               <code>permissions userid groupid modification_date modification_time dirname</code>
														
 
															-           </p>
														
 
															-           <p>Options:</p>
														
 
															-             <ul>
														
 
															-               <li><code>-d</code>  Directories are listed as plain files</li>
														
 
															-               <li><code>-h</code>  Format file sizes in a &quot;human-readable&quot; fashion (e.g 64.0m instead of 67108864)</li>
														
 
															-               <li><code>-R</code>  Recursively list subdirectories encountered</li>
														
 
															-             </ul>
														
 
															-           <p>Example:</p>
														
 
															-           <p>
														
 
															-               <code>hdfs dfs -ls /user/hadoop/file1 </code>
														
 
															-           </p>
														
 
															-           <p>Exit Code:</p>
														
 
															-           <p>
														
 
															-               <code>Returns 0 on success and -1 on error.</code>
														
 
															-           </p>
														
 
															-       </section>
														
 
															-       
														
 
															-       
														
 
															-<!-- LSR -->       
														
 
															-		<section>
														
 
															-			<title>lsr</title>
														
 
															-			<p><code>Usage: hdfs dfs -lsr &lt;args&gt;</code><br/>
														
 
															-	      Recursive version of <code>ls</code>. Similar to Unix <code>ls -R</code>.
														
 
															-	      </p>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- MKDIR -->  
														
 
															-		<section>
														
 
															-			<title> mkdir </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -mkdir &lt;paths&gt;</code>
														
 
															-				<br/>
														
 
															-			</p>
														
 
															-			<p>
														
 
															-	   Takes path uri's as argument and creates directories. The behavior is much like unix mkdir -p creating parent directories along the path.
														
 
															-	  </p>
														
 
															-			<p>Example:</p>
														
 
															-			<ul>
														
 
															-				<li>
														
 
															-					<code>hdfs dfs -mkdir /user/hadoop/dir1 /user/hadoop/dir2 </code>
														
 
															-				</li>
														
 
															-				<li>
														
 
															-					<code>hdfs dfs -mkdir hdfs://nn1.example.com/user/hadoop/dir hdfs://nn2.example.com/user/hadoop/dir
														
 
															-	  </code>
														
 
															-				</li>
														
 
															-			</ul>
														
 
															-			<p>Exit Code:</p>
														
 
															-			<p>
														
 
															-				<code>Returns 0 on success and -1 on error.</code>
														
 
															-			</p>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- MOVEFROMLOCAL -->  
														
 
															-		<section>
														
 
															-			<title> moveFromLocal </title>
														
 
															-			<p>
														
 
															-				<code>Usage: dfs -moveFromLocal &lt;localsrc&gt; &lt;dst&gt;</code>
														
 
															-			</p>
														
 
															-			<p>Similar to <a href="#put"><strong>put</strong></a> command, except that the source <code>localsrc</code> is deleted after it's copied. </p>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- MOVETOLOCAL -->  
														
 
															-		<section>
														
 
															-			<title> moveToLocal</title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -moveToLocal [-crc] &lt;src&gt; &lt;dst&gt;</code>
														
 
															-			</p>
														
 
															-			<p>Displays a "Not implemented yet" message.</p>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- MV -->  
														
 
															-		<section>
														
 
															-			<title> mv </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -mv URI [URI &#x2026;] &lt;dest&gt;</code>
														
 
															-			</p>
														
 
															-			<p>
														
 
															-	    Moves files from source to destination. This command allows multiple sources as well in which case the destination needs to be a directory. 
														
 
															-	    Moving files across file systems is not permitted.
														
 
															-	    <br/>
														
 
															-	    Example:
														
 
															-	    </p>
														
 
															-			<ul>
														
 
															-				<li>
														
 
															-					<code> hdfs dfs -mv /user/hadoop/file1 /user/hadoop/file2</code>
														
 
															-				</li>
														
 
															-				<li>
														
 
															-					<code> hdfs dfs -mv hdfs://nn.example.com/file1 hdfs://nn.example.com/file2 hdfs://nn.example.com/file3 hdfs://nn.example.com/dir1</code>
														
 
															-				</li>
														
 
															-			</ul>
														
 
															-			<p>Exit Code:</p>
														
 
															-			<p>
														
 
															-				<code> Returns 0 on success and -1 on error.</code>
														
 
															-			</p>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- PUT --> 
														
 
															-		<section>
														
 
															-			<title> put </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -put &lt;localsrc&gt; ... &lt;dst&gt;</code>
														
 
															-			</p>
														
 
															-			<p>Copy single src, or multiple srcs from local file system to the destination file system. 
														
 
															-			Also reads input from stdin and writes to destination file system.<br/>
														
 
															-	   </p>
														
 
															-			<ul>
														
 
															-				<li>
														
 
															-					<code> hdfs dfs -put localfile /user/hadoop/hadoopfile</code>
														
 
															-				</li>
														
 
															-				<li>
														
 
															-					<code> hdfs dfs -put localfile1 localfile2 /user/hadoop/hadoopdir</code>
														
 
															-				</li>
														
 
															-				<li>
														
 
															-					<code> hdfs dfs -put localfile hdfs://nn.example.com/hadoop/hadoopfile</code>
														
 
															-				</li>
														
 
															-				<li><code>hdfs dfs -put - hdfs://nn.example.com/hadoop/hadoopfile</code><br/>Reads the input from stdin.</li>
														
 
															-			</ul>
														
 
															-			<p>Exit Code:</p>
														
 
															-			<p>
														
 
															-				<code> Returns 0 on success and -1 on error. </code>
														
 
															-			</p>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- RM --> 
														
 
															-		<section>
														
 
															-			<title> rm </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -rm [-skipTrash] URI [URI &#x2026;] </code>
														
 
															-			</p>
														
 
															-			<p>
														
 
															-	   Delete files specified as args. Only deletes files. If the <code>-skipTrash</code> option
														
 
															-	   is specified, the trash, if enabled, will be bypassed and the specified file(s) deleted immediately.  	This can be
														
 
															-		   useful when it is necessary to delete files from an over-quota directory.
														
 
															-	   Use -rm -r or rmr for recursive deletes.<br/>
														
 
															-	   Example:
														
 
															-	   </p>
														
 
															-			<ul>
														
 
															-				<li>
														
 
															-					<code> hdfs dfs -rm hdfs://nn.example.com/file </code>
														
 
															-				</li>
														
 
															-			</ul>
														
 
															-			<p>Exit Code:</p>
														
 
															-			<p>
														
 
															-				<code> Returns 0 on success and -1 on error.</code>
														
 
															-			</p>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- RMR --> 
														
 
															-		<section>
														
 
															-			<title> rmr </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -rmr [-skipTrash] URI [URI &#x2026;]</code>
														
 
															-			</p>
														
 
															-			<p>Recursive version of delete. The rmr command recursively deletes the directory and any content under it. If the <code>-skipTrash</code> option
														
 
															-		   is specified, the trash, if enabled, will be bypassed and the specified file(s) deleted immediately. This can be
														
 
															-		   useful when it is necessary to delete files from an over-quota directory.<br/>
														
 
															-	   Example:
														
 
															-	   </p>
														
 
															-			<ul>
														
 
															-				<li>
														
 
															-					<code> hdfs dfs -rmr /user/hadoop/dir </code>
														
 
															-				</li>
														
 
															-				<li>
														
 
															-					<code> hdfs dfs -rmr hdfs://nn.example.com/user/hadoop/dir </code>
														
 
															-				</li>
														
 
															-			</ul>
														
 
															-			<p>Exit Code:</p>
														
 
															-			<p>
														
 
															-				<code> Returns 0 on success and -1 on error. </code>
														
 
															-			</p>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- SETREP --> 
														
 
															-		<section>
														
 
															-			<title> setrep </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -setrep [-R] &lt;path&gt;</code>
														
 
															-			</p>
														
 
															-			<p>
														
 
															-	   Changes the replication factor of a file. -R option is for recursively increasing the replication factor of files within a directory.
														
 
															-	  </p>
														
 
															-			<p>Example:</p>
														
 
															-			<ul>
														
 
															-				<li>
														
 
															-					<code> hdfs dfs -setrep -w 3 -R /user/hadoop/dir1 </code>
														
 
															-				</li>
														
 
															-			</ul>
														
 
															-			<p>Exit Code:</p>
														
 
															-			<p>
														
 
															-				<code>Returns 0 on success and -1 on error. </code>
														
 
															-			</p>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- STAT --> 
														
 
															-		<section>
														
 
															-			<title> stat </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -stat [format] URI [URI &#x2026;]</code>
														
 
															-			</p>
														
 
															-			<p>Print statistics about the file/directory matching the given URI pattern in the specified format.</p>
														
 
															-			<p>Format accepts:</p>
														
 
															-			  <ul>
														
 
															-			    <li>filesize in blocks (%b)</li>
														
 
															-			    <li>filename (%n)</li>
														
 
															-		      <li>block size (%o)</li>
														
 
															-		      <li>replication (%r)</li>
														
 
															-		      <li>modification date, formatted as Y-M-D H:M:S (%y)</li>
														
 
															-		      <li>modification date, in epoch seconds (%Y)</li>
														
 
															-        </ul>
														
 
															-			<p>Example:</p>
														
 
															-			<ul>
														
 
															-        <li>
														
 
															-          <code> hdfs dfs -stat path </code>
														
 
															-        </li>
														
 
															-				<li>
														
 
															-					<code> hdfs dfs -stat %y path </code>
														
 
															-				</li>
														
 
															-        <li>
														
 
															-          <code> hdfs dfs -stat '%b %r' path </code>
														
 
															-        </li>
														
 
															-			</ul>
														
 
															-			<p>Exit Code:<br/>
														
 
															-	   <code> Returns 0 on success and -1 on error.</code></p>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- TAIL--> 
														
 
															-		<section>
														
 
															-			<title> tail </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -tail [-f] URI </code>
														
 
															-			</p>
														
 
															-			<p>
														
 
															-	   Displays last kilobyte of the file to stdout. -f option can be used as in Unix.
														
 
															-	   </p>
														
 
															-			<p>Example:</p>
														
 
															-			<ul>
														
 
															-				<li>
														
 
															-					<code> hdfs dfs -tail pathname </code>
														
 
															-				</li>
														
 
															-			</ul>
														
 
															-			<p>Exit Code: <br/>
														
 
															-	   <code> Returns 0 on success and -1 on error.</code></p>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- TEST --> 
														
 
															-		<section>
														
 
															-			<title> test </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -test -[ezd] URI</code>
														
 
															-			</p>
														
 
															-			<p>
														
 
															-	   Options: <br/>
														
 
															-	   -e check to see if the file exists. Return 0 if true. <br/>
														
 
															-	   -z check to see if the file is zero length. Return 0 if true. <br/>
														
 
															-	   -d check to see if the path is directory. Return 0 if true. <br/></p>
														
 
															-			<p>Example:</p>
														
 
															-			<ul>
														
 
															-				<li>
														
 
															-					<code> hdfs dfs -test -e filename </code>
														
 
															-				</li>
														
 
															-			</ul>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- TEXT --> 
														
 
															-		<section>
														
 
															-			<title> text </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -text &lt;src&gt;</code>
														
 
															-				<br/>
														
 
															-			</p>
														
 
															-			<p>
														
 
															-	   Takes a source file and outputs the file in text format. The allowed formats are zip and TextRecordInputStream.
														
 
															-	  </p>
														
 
															-		</section>
														
 
															-		
														
 
															-		
														
 
															-<!-- TOUCHZ --> 
														
 
															-		<section>
														
 
															-			<title> touchz </title>
														
 
															-			<p>
														
 
															-				<code>Usage: hdfs dfs -touchz URI [URI &#x2026;]</code>
														
 
															-				<br/>
														
 
															-			</p>
														
 
															-			<p>
														
 
															-	   Create a file of zero length.
														
 
															-	   </p>
														
 
															-			<p>Example:</p>
														
 
															-			<ul>
														
 
															-				<li>
														
 
															-					<code> hadoop -touchz pathname </code>
														
 
															-				</li>
														
 
															-			</ul>
														
 
															-			<p>Exit Code:<br/>
														
 
															-	   <code> Returns 0 on success and -1 on error.</code></p>
														
 
															-		</section>
														
 
															-        </section>
														
 
															-	</body>
														
 
															-</document>
														
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DU.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DU.java
@@ -136,7 +136,7 @@ public class DU extends Shell {
 
															       }
														
 
															     }
														
 
															-    return used.longValue();
														
 
															+    return Math.max(used.longValue(), 0L);
														
 
															   }
														
 
															   /**
														
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java
@@ -349,9 +349,15 @@ public class FileStatus implements Writable, Comparable {
 
															       sb.append("; replication=" + block_replication);
														
 
															       sb.append("; blocksize=" + blocksize);
														
 
															     }
														
 
															+    sb.append("; modification_time=" + modification_time);
														
 
															+    sb.append("; access_time=" + access_time);
														
 
															     sb.append("; owner=" + owner);
														
 
															     sb.append("; group=" + group);
														
 
															     sb.append("; permission=" + permission);
														
 
															+    sb.append("; isSymlink=" + isSymlink());
														
 
															+    if(isSymlink()) {
														
 
															+      sb.append("; symlink=" + symlink);
														
 
															+    }
														
 
															     sb.append("}");
														
 
															     return sb.toString();
														
 
															   }
														
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java
@@ -166,6 +166,18 @@ public class FilterFileSystem extends FileSystem {
 
															     return fs.create(f, permission,
														
 
															         overwrite, bufferSize, replication, blockSize, progress);
														
 
															   }
														
 
															+  
														
 
															+
														
 
															+  
														
 
															+  @Override
														
 
															+  @Deprecated
														
 
															+  public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
														
 
															+      EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize,
														
 
															+      Progressable progress) throws IOException {
														
 
															+    
														
 
															+    return fs.createNonRecursive(f, permission, flags, bufferSize, replication, blockSize,
														
 
															+        progress);
														
 
															+  }
														
 
															   /**
														
 
															    * Set replication for an existing file.
														
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java
@@ -30,6 +30,7 @@ import java.io.FileDescriptor;
 
															 import java.net.URI;
														
 
															 import java.nio.ByteBuffer;
														
 
															 import java.util.Arrays;
														
 
															+import java.util.EnumSet;
														
 
															 import java.util.StringTokenizer;
														
 
															 import org.apache.hadoop.classification.InterfaceAudience;
														
@@ -281,6 +282,18 @@ public class RawLocalFileSystem extends FileSystem {
 
															     return new FSDataOutputStream(new BufferedOutputStream(
														
 
															         new LocalFSFileOutputStream(f, false), bufferSize), statistics);
														
 
															   }
														
 
															+  
														
 
															+  @Override
														
 
															+  @Deprecated
														
 
															+  public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
														
 
															+      EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize,
														
 
															+      Progressable progress) throws IOException {
														
 
															+    if (exists(f) && !flags.contains(CreateFlag.OVERWRITE)) {
														
 
															+      throw new IOException("File already exists: "+f);
														
 
															+    }
														
 
															+    return new FSDataOutputStream(new BufferedOutputStream(
														
 
															+        new LocalFSFileOutputStream(f, false), bufferSize), statistics);
														
 
															+  }
														
 
															   @Override
														
 
															   public FSDataOutputStream create(Path f, FsPermission permission,
														
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Command.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Command.java
@@ -311,6 +311,7 @@ abstract public class Command extends Configured {
 
															         if (recursive && item.stat.isDirectory()) {
														
 
															           recursePath(item);
														
 
															         }
														
 
															+        postProcessPath(item);
														
 
															       } catch (IOException e) {
														
 
															         displayError(e);
														
 
															       }
														
@@ -329,6 +330,15 @@ abstract public class Command extends Configured {
 
															     throw new RuntimeException("processPath() is not implemented");    
														
 
															   }
														
 
															+  /**
														
 
															+   * Hook for commands to implement an operation to be applied on each
														
 
															+   * path for the command after being processed successfully
														
 
															+   * @param item a {@link PathData} object
														
 
															+   * @throws IOException if anything goes wrong...
														
 
															+   */
														
 
															+  protected void postProcessPath(PathData item) throws IOException {    
														
 
															+  }
														
 
															+
														
 
															   /**
														
 
															    *  Gets the directory listing for a path and invokes
														
 
															    *  {@link #processPaths(PathData, PathData...)}
														
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/MoveCommands.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/MoveCommands.java
@@ -24,6 +24,7 @@ import java.util.LinkedList;
 
															 import org.apache.hadoop.classification.InterfaceAudience;
														
 
															 import org.apache.hadoop.classification.InterfaceStability;
														
 
															 import org.apache.hadoop.fs.PathIOException;
														
 
															+import org.apache.hadoop.fs.PathExistsException;
														
 
															 import org.apache.hadoop.fs.shell.CopyCommands.CopyFromLocal;
														
 
															 /** Various commands for moving files */
														
@@ -49,7 +50,21 @@ class MoveCommands {
 
															     @Override
														
 
															     protected void processPath(PathData src, PathData target) throws IOException {
														
 
															-      target.fs.moveFromLocalFile(src.path, target.path);
														
 
															+      // unlike copy, don't merge existing dirs during move
														
 
															+      if (target.exists && target.stat.isDirectory()) {
														
 
															+        throw new PathExistsException(target.toString());
														
 
															+      }
														
 
															+      super.processPath(src, target);
														
 
															+    }
														
 
															+    
														
 
															+    @Override
														
 
															+    protected void postProcessPath(PathData src) throws IOException {
														
 
															+      if (!src.fs.delete(src.path, false)) {
														
 
															+        // we have no way to know the actual error...
														
 
															+        PathIOException e = new PathIOException(src.toString());
														
 
															+        e.setOperation("remove");
														
 
															+        throw e;
														
 
															+      }
														
 
															     }
														
 
															   }
														
@@ -95,4 +110,4 @@ class MoveCommands {
 
															       }
														
 
															     }
														
 
															   }
														
 
															-}
														
 
															+}
														
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ChRootedFileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ChRootedFileSystem.java
@@ -19,11 +19,14 @@ package org.apache.hadoop.fs.viewfs;
 
															 import java.io.FileNotFoundException;
														
 
															 import java.io.IOException;
														
 
															 import java.net.URI;
														
 
															+import java.util.EnumSet;
														
 
															+
														
 
															 import org.apache.hadoop.classification.InterfaceAudience;
														
 
															 import org.apache.hadoop.classification.InterfaceStability;
														
 
															 import org.apache.hadoop.conf.Configuration;
														
 
															 import org.apache.hadoop.fs.BlockLocation;
														
 
															 import org.apache.hadoop.fs.ContentSummary;
														
 
															+import org.apache.hadoop.fs.CreateFlag;
														
 
															 import org.apache.hadoop.fs.FSDataInputStream;
														
 
															 import org.apache.hadoop.fs.FSDataOutputStream;
														
 
															 import org.apache.hadoop.fs.FileChecksum;
														
@@ -171,6 +174,16 @@ class ChRootedFileSystem extends FilterFileSystem {
 
															     return super.create(fullPath(f), permission, overwrite, bufferSize,
														
 
															         replication, blockSize, progress);
														
 
															   }
														
 
															+  
														
 
															+  @Override
														
 
															+  @Deprecated
														
 
															+  public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
														
 
															+      EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize,
														
 
															+      Progressable progress) throws IOException {
														
 
															+    
														
 
															+    return super.createNonRecursive(fullPath(f), permission, flags, bufferSize, replication, blockSize,
														
 
															+        progress);
														
 
															+  }
														
 
															   @Override
														
 
															   public boolean delete(final Path f, final boolean recursive) 
														
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java
@@ -24,6 +24,7 @@ import java.io.IOException;
 
															 import java.net.URI;
														
 
															 import java.net.URISyntaxException;
														
 
															 import java.util.Arrays;
														
 
															+import java.util.EnumSet;
														
 
															 import java.util.HashSet;
														
 
															 import java.util.List;
														
 
															 import java.util.Set;
														
@@ -35,6 +36,7 @@ import org.apache.hadoop.classification.InterfaceStability;
 
															 import org.apache.hadoop.conf.Configuration;
														
 
															 import org.apache.hadoop.fs.BlockLocation;
														
 
															 import org.apache.hadoop.fs.ContentSummary;
														
 
															+import org.apache.hadoop.fs.CreateFlag;
														
 
															 import org.apache.hadoop.fs.FSDataInputStream;
														
 
															 import org.apache.hadoop.fs.FSDataOutputStream;
														
 
															 import org.apache.hadoop.fs.FileAlreadyExistsException;
														
@@ -264,6 +266,21 @@ public class ViewFileSystem extends FileSystem {
 
															     return res.targetFileSystem.append(res.remainingPath, bufferSize, progress);
														
 
															   }
														
 
															+  @Override
														
 
															+  public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
														
 
															+      EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize,
														
 
															+      Progressable progress) throws IOException {
														
 
															+    InodeTree.ResolveResult<FileSystem> res;
														
 
															+    try {
														
 
															+      res = fsState.resolve(getUriPath(f), false);
														
 
															+    } catch (FileNotFoundException e) {
														
 
															+        throw readOnlyMountTable("create", f);
														
 
															+    }
														
 
															+    assert(res.remainingPath != null);
														
 
															+    return res.targetFileSystem.createNonRecursive(res.remainingPath, permission,
														
 
															+         flags, bufferSize, replication, blockSize, progress);
														
 
															+  }
														
 
															+  
														
 
															   @Override
														
 
															   public FSDataOutputStream create(final Path f, final FsPermission permission,
														
 
															       final boolean overwrite, final int bufferSize, final short replication,
														
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
@@ -63,11 +63,10 @@ import org.apache.hadoop.io.WritableUtils;
 
															 import org.apache.hadoop.io.retry.RetryPolicies;
														
 
															 import org.apache.hadoop.io.retry.RetryPolicy;
														
 
															 import org.apache.hadoop.io.retry.RetryPolicy.RetryAction;
														
 
															-import org.apache.hadoop.ipc.protobuf.IpcConnectionContextProtos.IpcConnectionContextProto;
														
 
															-import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcPayloadHeaderProto;
														
 
															-import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcPayloadOperationProto;
														
 
															-import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcResponseHeaderProto;
														
 
															-import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.RpcStatusProto;
														
 
															+import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcRequestHeaderProto;
														
 
															+import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcRequestHeaderProto.OperationProto;
														
 
															+import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto;
														
 
															+import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcStatusProto;
														
 
															 import org.apache.hadoop.net.NetUtils;
														
 
															 import org.apache.hadoop.security.KerberosInfo;
														
 
															 import org.apache.hadoop.security.SaslRpcClient;
														
@@ -191,7 +190,7 @@ public class Client {
 
															    */
														
 
															   private class Call {
														
 
															     final int id;               // call id
														
 
															-    final Writable rpcRequest;  // the serialized rpc request - RpcPayload
														
 
															+    final Writable rpcRequest;  // the serialized rpc request
														
 
															     Writable rpcResponse;       // null if rpc has error
														
 
															     IOException error;          // exception, null if success
														
 
															     final RPC.RpcKind rpcKind;      // Rpc EngineKind
														
@@ -266,7 +265,7 @@ public class Client {
 
															     private AtomicBoolean shouldCloseConnection = new AtomicBoolean();  // indicate if the connection is closed
														
 
															     private IOException closeException; // close reason
														
 
															-    private final Object sendParamsLock = new Object();
														
 
															+    private final Object sendRpcRequestLock = new Object();
														
 
															     public Connection(ConnectionId remoteId) throws IOException {
														
 
															       this.remoteId = remoteId;
														
@@ -768,7 +767,7 @@ public class Client {
 
															           remoteId.getTicket(),
														
 
															           authMethod).writeTo(buf);
														
 
															-      // Write out the payload length
														
 
															+      // Write out the packet length
														
 
															       int bufLen = buf.getLength();
														
 
															       out.writeInt(bufLen);
														
@@ -832,7 +831,7 @@ public class Client {
 
															       try {
														
 
															         while (waitForWork()) {//wait here for work - read or close connection
														
 
															-          receiveResponse();
														
 
															+          receiveRpcResponse();
														
 
															         }
														
 
															       } catch (Throwable t) {
														
 
															         // This truly is unexpected, since we catch IOException in receiveResponse
														
@@ -849,11 +848,12 @@ public class Client {
 
															             + connections.size());
														
 
															     }
														
 
															-    /** Initiates a call by sending the parameter to the remote server.
														
 
															+    /** Initiates a rpc call by sending the rpc request to the remote server.
														
 
															      * Note: this is not called from the Connection thread, but by other
														
 
															      * threads.
														
 
															+     * @param call - the rpc request
														
 
															      */
														
 
															-    public void sendParam(final Call call)
														
 
															+    public void sendRpcRequest(final Call call)
														
 
															         throws InterruptedException, IOException {
														
 
															       if (shouldCloseConnection.get()) {
														
 
															         return;
														
@@ -866,17 +866,17 @@ public class Client {
 
															       //
														
 
															       // Format of a call on the wire:
														
 
															       // 0) Length of rest below (1 + 2)
														
 
															-      // 1) PayloadHeader  - is serialized Delimited hence contains length
														
 
															-      // 2) the Payload - the RpcRequest
														
 
															+      // 1) RpcRequestHeader  - is serialized Delimited hence contains length
														
 
															+      // 2) RpcRequest
														
 
															       //
														
 
															       // Items '1' and '2' are prepared here. 
														
 
															       final DataOutputBuffer d = new DataOutputBuffer();
														
 
															-      RpcPayloadHeaderProto header = ProtoUtil.makeRpcPayloadHeader(
														
 
															-         call.rpcKind, RpcPayloadOperationProto.RPC_FINAL_PAYLOAD, call.id);
														
 
															+      RpcRequestHeaderProto header = ProtoUtil.makeRpcRequestHeader(
														
 
															+         call.rpcKind, OperationProto.RPC_FINAL_PACKET, call.id);
														
 
															       header.writeDelimitedTo(d);
														
 
															       call.rpcRequest.write(d);
														
 
															-      synchronized (sendParamsLock) {
														
 
															+      synchronized (sendRpcRequestLock) {
														
 
															         Future<?> senderFuture = SEND_PARAMS_EXECUTOR.submit(new Runnable() {
														
 
															           @Override
														
 
															           public void run() {
														
@@ -892,7 +892,7 @@ public class Client {
 
															                 byte[] data = d.getData();
														
 
															                 int totalLength = d.getLength();
														
 
															                 out.writeInt(totalLength); // Total Length
														
 
															-                out.write(data, 0, totalLength);//PayloadHeader + RpcRequest
														
 
															+                out.write(data, 0, totalLength);// RpcRequestHeader + RpcRequest
														
 
															                 out.flush();
														
 
															               }
														
 
															             } catch (IOException e) {
														
@@ -927,7 +927,7 @@ public class Client {
 
															     /* Receive a response.
														
 
															      * Because only one receiver, so no synchronization on in.
														
 
															      */
														
 
															-    private void receiveResponse() {
														
 
															+    private void receiveRpcResponse() {
														
 
															       if (shouldCloseConnection.get()) {
														
 
															         return;
														
 
															       }
														
@@ -1194,12 +1194,12 @@ public class Client {
 
															     Call call = new Call(rpcKind, rpcRequest);
														
 
															     Connection connection = getConnection(remoteId, call);
														
 
															     try {
														
 
															-      connection.sendParam(call);                 // send the parameter
														
 
															+      connection.sendRpcRequest(call);                 // send the rpc request
														
 
															     } catch (RejectedExecutionException e) {
														
 
															       throw new IOException("connection has been closed", e);
														
 
															     } catch (InterruptedException e) {
														
 
															       Thread.currentThread().interrupt();
														
 
															-      LOG.warn("interrupted waiting to send params to server", e);
														
 
															+      LOG.warn("interrupted waiting to send rpc request to server", e);
														
 
															       throw new IOException(e);
														
 
															     }
														
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java
@@ -39,7 +39,7 @@ import org.apache.hadoop.io.Writable;
 
															 import org.apache.hadoop.io.retry.RetryPolicy;
														
 
															 import org.apache.hadoop.ipc.Client.ConnectionId;
														
 
															 import org.apache.hadoop.ipc.RPC.RpcInvoker;
														
 
															-import org.apache.hadoop.ipc.protobuf.HadoopRpcProtos.HadoopRpcRequestProto;
														
 
															+import org.apache.hadoop.ipc.protobuf.ProtobufRpcEngineProtos.RequestProto;
														
 
															 import org.apache.hadoop.security.UserGroupInformation;
														
 
															 import org.apache.hadoop.security.token.SecretManager;
														
 
															 import org.apache.hadoop.security.token.TokenIdentifier;
														
@@ -128,10 +128,10 @@ public class ProtobufRpcEngine implements RpcEngine {
 
															           .getProtocolVersion(protocol);
														
 
															     }
														
 
															-    private HadoopRpcRequestProto constructRpcRequest(Method method,
														
 
															+    private RequestProto constructRpcRequest(Method method,
														
 
															         Object[] params) throws ServiceException {
														
 
															-      HadoopRpcRequestProto rpcRequest;
														
 
															-      HadoopRpcRequestProto.Builder builder = HadoopRpcRequestProto
														
 
															+      RequestProto rpcRequest;
														
 
															+      RequestProto.Builder builder = RequestProto
														
 
															           .newBuilder();
														
 
															       builder.setMethodName(method.getName());
														
@@ -190,7 +190,7 @@ public class ProtobufRpcEngine implements RpcEngine {
 
															         startTime = Time.now();
														
 
															       }
														
 
															-      HadoopRpcRequestProto rpcRequest = constructRpcRequest(method, args);
														
 
															+      RequestProto rpcRequest = constructRpcRequest(method, args);
														
 
															       RpcResponseWritable val = null;
														
 
															       if (LOG.isTraceEnabled()) {
														
@@ -271,13 +271,13 @@ public class ProtobufRpcEngine implements RpcEngine {
 
															    * Writable Wrapper for Protocol Buffer Requests
														
 
															    */
														
 
															   private static class RpcRequestWritable implements Writable {
														
 
															-    HadoopRpcRequestProto message;
														
 
															+    RequestProto message;
														
 
															     @SuppressWarnings("unused")
														
 
															     public RpcRequestWritable() {
														
 
															     }
														
 
															-    RpcRequestWritable(HadoopRpcRequestProto message) {
														
 
															+    RpcRequestWritable(RequestProto message) {
														
 
															       this.message = message;
														
 
															     }
														
@@ -292,7 +292,7 @@ public class ProtobufRpcEngine implements RpcEngine {
 
															       int length = ProtoUtil.readRawVarint32(in);
														
 
															       byte[] bytes = new byte[length];
														
 
															       in.readFully(bytes);
														
 
															-      message = HadoopRpcRequestProto.parseFrom(bytes);
														
 
															+      message = RequestProto.parseFrom(bytes);
														
 
															     }
														
 
															     @Override
														
@@ -426,7 +426,7 @@ public class ProtobufRpcEngine implements RpcEngine {
 
															       public Writable call(RPC.Server server, String connectionProtocolName,
														
 
															           Writable writableRequest, long receiveTime) throws Exception {
														
 
															         RpcRequestWritable request = (RpcRequestWritable) writableRequest;
														
 
															-        HadoopRpcRequestProto rpcRequest = request.message;
														
 
															+        RequestProto rpcRequest = request.message;
														
 
															         String methodName = rpcRequest.getMethodName();
														
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
@@ -80,7 +80,8 @@ import org.apache.hadoop.ipc.RPC.VersionMismatch;
 
															 import org.apache.hadoop.ipc.metrics.RpcDetailedMetrics;
														
 
															 import org.apache.hadoop.ipc.metrics.RpcMetrics;
														
 
															 import org.apache.hadoop.ipc.protobuf.IpcConnectionContextProtos.IpcConnectionContextProto;
														
 
															-import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.*;
														
 
															+import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RpcResponseHeaderProto.RpcStatusProto;
														
 
															+import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.*;
														
 
															 import org.apache.hadoop.net.NetUtils;
														
 
															 import org.apache.hadoop.security.AccessControlException;
														
 
															 import org.apache.hadoop.security.SaslRpcServer;
														
@@ -160,7 +161,7 @@ public abstract class Server {
 
															   public static final ByteBuffer HEADER = ByteBuffer.wrap("hrpc".getBytes());
														
 
															   /**
														
 
															-   * Serialization type for ConnectionContext and RpcPayloadHeader
														
 
															+   * Serialization type for ConnectionContext and RpcRequestHeader
														
 
															    */
														
 
															   public enum IpcSerializationType {
														
 
															     // Add new serialization type to the end without affecting the enum order
														
@@ -197,7 +198,7 @@ public abstract class Server {
 
															   // 4 : Introduced SASL security layer
														
 
															   // 5 : Introduced use of {@link ArrayPrimitiveWritable$Internal}
														
 
															   //     in ObjectWritable to efficiently transmit arrays of primitives
														
 
															-  // 6 : Made RPC payload header explicit
														
 
															+  // 6 : Made RPC Request header explicit
														
 
															   // 7 : Changed Ipc Connection Header to use Protocol buffers
														
 
															   // 8 : SASL server always sends a final response
														
 
															   public static final byte CURRENT_VERSION = 8;
														
@@ -1637,14 +1638,15 @@ public abstract class Server {
 
															     private void processData(byte[] buf) throws  IOException, InterruptedException {
														
 
															       DataInputStream dis =
														
 
															         new DataInputStream(new ByteArrayInputStream(buf));
														
 
															-      RpcPayloadHeaderProto header = RpcPayloadHeaderProto.parseDelimitedFrom(dis);
														
 
															+      RpcRequestHeaderProto header = RpcRequestHeaderProto.parseDelimitedFrom(dis);
														
 
															       if (LOG.isDebugEnabled())
														
 
															         LOG.debug(" got #" + header.getCallId());
														
 
															       if (!header.hasRpcOp()) {
														
 
															-        throw new IOException(" IPC Server: No rpc op in rpcPayloadHeader");
														
 
															+        throw new IOException(" IPC Server: No rpc op in rpcRequestHeader");
														
 
															       }
														
 
															-      if (header.getRpcOp() != RpcPayloadOperationProto.RPC_FINAL_PAYLOAD) {
														
 
															+      if (header.getRpcOp() != 
														
 
															+          RpcRequestHeaderProto.OperationProto.RPC_FINAL_PACKET) {
														
 
															         throw new IOException("IPC Server does not implement operation" + 
														
 
															               header.getRpcOp());
														
 
															       }
														
@@ -1652,7 +1654,7 @@ public abstract class Server {
 
															       // (Note it would make more sense to have the handler deserialize but 
														
 
															       // we continue with this original design.
														
 
															       if (!header.hasRpcKind()) {
														
 
															-        throw new IOException(" IPC Server: No rpc kind in rpcPayloadHeader");
														
 
															+        throw new IOException(" IPC Server: No rpc kind in rpcRequestHeader");
														
 
															       }
														
 
															       Class<? extends Writable> rpcRequestClass = 
														
 
															           getRpcRequestWrapper(header.getRpcKind());
														
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java
@@ -0,0 +1,75 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.util;
														
 
															+
														
 
															+import org.apache.hadoop.util.NativeCodeLoader;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.io.compress.SnappyCodec;
														
 
															+import org.apache.hadoop.io.compress.zlib.ZlibFactory;
														
 
															+import org.apache.hadoop.classification.InterfaceAudience;
														
 
															+import org.apache.hadoop.classification.InterfaceStability;
														
 
															+
														
 
															+@InterfaceAudience.Private
														
 
															+@InterfaceStability.Unstable
														
 
															+public class NativeLibraryChecker {
														
 
															+  /**
														
 
															+   * A tool to test native library availability, 
														
 
															+   */
														
 
															+  public static void main(String[] args) {
														
 
															+    String usage = "NativeLibraryChecker [-a|-h]\n"
														
 
															+        + "  -a  use -a to check all libraries are available\n"
														
 
															+        + "      by default just check hadoop library is available\n"
														
 
															+        + "      exit with error code if check failed\n"
														
 
															+        + "  -h  print this message\n";
														
 
															+    if (args.length > 1 ||
														
 
															+        (args.length == 1 &&
														
 
															+            !(args[0].equals("-a") || args[0].equals("-h")))) {
														
 
															+      System.err.println(usage);
														
 
															+      ExitUtil.terminate(1);
														
 
															+    }
														
 
															+    boolean checkAll = false;
														
 
															+    if (args.length == 1) {
														
 
															+      if (args[0].equals("-h")) {
														
 
															+        System.out.println(usage);
														
 
															+        return;
														
 
															+      }
														
 
															+      checkAll = true;
														
 
															+    }
														
 
															+    boolean nativeHadoopLoaded = NativeCodeLoader.isNativeCodeLoaded();
														
 
															+    boolean zlibLoaded = false;
														
 
															+    boolean snappyLoaded = false;
														
 
															+    // lz4 is linked within libhadoop
														
 
															+    boolean lz4Loaded = nativeHadoopLoaded;
														
 
															+    if (nativeHadoopLoaded) {
														
 
															+      zlibLoaded = ZlibFactory.isNativeZlibLoaded(new Configuration());
														
 
															+      snappyLoaded = NativeCodeLoader.buildSupportsSnappy() &&
														
 
															+          SnappyCodec.isNativeCodeLoaded();
														
 
															+    }
														
 
															+    System.out.println("Native library checking:");
														
 
															+    System.out.printf("hadoop: %b\n", nativeHadoopLoaded);
														
 
															+    System.out.printf("zlib:   %b\n", zlibLoaded);
														
 
															+    System.out.printf("snappy: %b\n", snappyLoaded);
														
 
															+    System.out.printf("lz4:    %b\n", lz4Loaded);
														
 
															+    if ((!nativeHadoopLoaded) ||
														
 
															+        (checkAll && !(zlibLoaded && snappyLoaded && lz4Loaded))) {
														
 
															+      // return 1 to indicated check failed
														
 
															+      ExitUtil.terminate(1);
														
 
															+    }
														
 
															+  }
														
 
															+}
														
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProtoUtil.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProtoUtil.java
@@ -24,7 +24,7 @@ import java.io.IOException;
 
															 import org.apache.hadoop.ipc.RPC;
														
 
															 import org.apache.hadoop.ipc.protobuf.IpcConnectionContextProtos.IpcConnectionContextProto;
														
 
															 import org.apache.hadoop.ipc.protobuf.IpcConnectionContextProtos.UserInformationProto;
														
 
															-import org.apache.hadoop.ipc.protobuf.RpcPayloadHeaderProtos.*;
														
 
															+import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.*;
														
 
															 import org.apache.hadoop.security.SaslRpcServer.AuthMethod;
														
 
															 import org.apache.hadoop.security.UserGroupInformation;
														
@@ -157,9 +157,9 @@ public abstract class ProtoUtil {
 
															     return null;
														
 
															   }
														
 
															-  public static RpcPayloadHeaderProto makeRpcPayloadHeader(RPC.RpcKind rpcKind,
														
 
															-      RpcPayloadOperationProto operation, int callId) {
														
 
															-    RpcPayloadHeaderProto.Builder result = RpcPayloadHeaderProto.newBuilder();
														
 
															+  public static RpcRequestHeaderProto makeRpcRequestHeader(RPC.RpcKind rpcKind,
														
 
															+      RpcRequestHeaderProto.OperationProto operation, int callId) {
														
 
															+    RpcRequestHeaderProto.Builder result = RpcRequestHeaderProto.newBuilder();
														
 
															     result.setRpcKind(convert(rpcKind)).setRpcOp(operation).setCallId(callId);
														
 
															     return result.build();
														
 
															   }
														
--- a/hadoop-common-project/hadoop-common/src/main/proto/ProtobufRpcEngine.proto
+++ b/hadoop-common-project/hadoop-common/src/main/proto/ProtobufRpcEngine.proto
@@ -17,11 +17,13 @@
 
															  */
														
 
															 /**
														
 
															- * These are the messages used by Hadoop RPC to marshal the
														
 
															- * request and response in the RPC layer.
														
 
															+ * These are the messages used by Hadoop RPC for the Rpc Engine Protocol Buffer
														
 
															+ * to marshal the request and response in the RPC layer.
														
 
															+ * The messages are sent in addition to the normal RPC header as 
														
 
															+ * defined in RpcHeader.proto
														
 
															  */
														
 
															 option java_package = "org.apache.hadoop.ipc.protobuf";
														
 
															-option java_outer_classname = "HadoopRpcProtos";
														
 
															+option java_outer_classname = "ProtobufRpcEngineProtos";
														
 
															 option java_generate_equals_and_hash = true;
														
 
															 package hadoop.common;
														
@@ -29,10 +31,11 @@ package hadoop.common;
 
															  * This message is used for Protobuf Rpc Engine.
														
 
															  * The message is used to marshal a Rpc-request
														
 
															  * from RPC client to the RPC server.
														
 
															- * The Response to the Rpc call (including errors) are handled
														
 
															- * as part of the standard Rpc response. 
														
 
															+ *
														
 
															+ * No special header is needed for the Rpc Response for Protobuf Rpc Engine.
														
 
															+ * The normal RPC response header (see RpcHeader.proto) are sufficient. 
														
 
															  */
														
 
															-message HadoopRpcRequestProto {
														
 
															+message RequestProto {
														
 
															   /** Name of the RPC method */
														
 
															   required string methodName = 1;
														
--- a/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto
+++ b/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto
@@ -0,0 +1,92 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+option java_package = "org.apache.hadoop.ipc.protobuf";
														
 
															+option java_outer_classname = "RpcHeaderProtos";
														
 
															+option java_generate_equals_and_hash = true;
														
 
															+package hadoop.common;
														
 
															+
														
 
															+/**
														
 
															+ * This is the rpc request header. It is sent with every rpc call.
														
 
															+ * 
														
 
															+ * The format of RPC call is as follows:
														
 
															+ * +--------------------------------------------------------------+
														
 
															+ * | Rpc length in bytes (4 bytes int) sum of next two parts      |
														
 
															+ * +--------------------------------------------------------------+
														
 
															+ * | RpcRequestHeaderProto - serialized delimited ie has len      |
														
 
															+ * +--------------------------------------------------------------+
														
 
															+ * | RpcRequest The actual rpc request                            |
														
 
															+ * | This request is serialized based on RpcKindProto             |
														
 
															+ * +--------------------------------------------------------------+
														
 
															+ *
														
 
															+ */
														
 
															+
														
 
															+/**
														
 
															+ * RpcKind determine the rpcEngine and the serialization of the rpc request
														
 
															+ */
														
 
															+enum RpcKindProto {
														
 
															+  RPC_BUILTIN          = 0;  // Used for built in calls by tests
														
 
															+  RPC_WRITABLE         = 1;  // Use WritableRpcEngine 
														
 
															+  RPC_PROTOCOL_BUFFER  = 2;  // Use ProtobufRpcEngine
														
 
															+}
														
 
															+
														
 
															+
														
 
															+   
														
 
															+message RpcRequestHeaderProto { // the header for the RpcRequest
														
 
															+  enum OperationProto {
														
 
															+    RPC_FINAL_PACKET        = 0; // The final RPC Packet
														
 
															+    RPC_CONTINUATION_PACKET = 1; // not implemented yet
														
 
															+    RPC_CLOSE_CONNECTION     = 2; // close the rpc connection
														
 
															+  }
														
 
															+
														
 
															+  optional RpcKindProto rpcKind = 1;
														
 
															+  optional OperationProto rpcOp = 2;
														
 
															+  required uint32 callId = 3; // each rpc has a callId that is also used in response
														
 
															+}
														
 
															+
														
 
															+
														
 
															+
														
 
															+/**
														
 
															+ * Rpc Response Header
														
 
															+ * ** If request is successfull response is returned as below ********
														
 
															+ * +------------------------------------------------------------------+
														
 
															+ * | Rpc reponse length in bytes (4 bytes int)                        |
														
 
															+ * |  (sum of next two parts)                                         |
														
 
															+ * +------------------------------------------------------------------+
														
 
															+ * | RpcResponseHeaderProto - serialized delimited ie has len         |
														
 
															+ * +------------------------------------------------------------------+
														
 
															+ * | if request is successful:                                        |
														
 
															+ * |   - RpcResponse -  The actual rpc response  bytes                 |
														
 
															+ * |     This response is serialized based on RpcKindProto             |
														
 
															+ * | if request fails :                                               |
														
 
															+ * |   - length (4 byte int) + Class name of exception - UTF-8 string |
														
 
															+ * |   - length (4 byte int) + Stacktrace - UTF-8 string              |
														
 
															+ * |   if the strings are null then the length is -1                  |
														
 
															+ * +------------------------------------------------------------------+
														
 
															+ *
														
 
															+ */
														
 
															+message RpcResponseHeaderProto {
														
 
															+  enum RpcStatusProto {
														
 
															+   SUCCESS = 0;  // RPC succeeded
														
 
															+   ERROR = 1;    // RPC Failed
														
 
															+   FATAL = 2;    // Fatal error - connection is closed
														
 
															+  }
														
 
															+
														
 
															+  required uint32 callId = 1; // callId used in Request
														
 
															+  required RpcStatusProto status = 2;
														
 
															+  optional uint32 serverIpcVersionNum = 3; // in case of an fatal IPC error 
														
 
															+}
														
--- a/hadoop-common-project/hadoop-common/src/main/proto/RpcPayloadHeader.proto
+++ b/hadoop-common-project/hadoop-common/src/main/proto/RpcPayloadHeader.proto
@@ -1,78 +0,0 @@
 
															-/**
														
 
															- * Licensed to the Apache Software Foundation (ASF) under one
														
 
															- * or more contributor license agreements.  See the NOTICE file
														
 
															- * distributed with this work for additional information
														
 
															- * regarding copyright ownership.  The ASF licenses this file
														
 
															- * to you under the Apache License, Version 2.0 (the
														
 
															- * "License"); you may not use this file except in compliance
														
 
															- * with the License.  You may obtain a copy of the License at
														
 
															- *
														
 
															- *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															- *
														
 
															- * Unless required by applicable law or agreed to in writing, software
														
 
															- * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															- * See the License for the specific language governing permissions and
														
 
															- * limitations under the License.
														
 
															- */
														
 
															-option java_package = "org.apache.hadoop.ipc.protobuf";
														
 
															-option java_outer_classname = "RpcPayloadHeaderProtos";
														
 
															-option java_generate_equals_and_hash = true;
														
 
															-package hadoop.common;
														
 
															-
														
 
															-/**
														
 
															- * This is the rpc payload header. It is sent with every rpc call.
														
 
															- * 
														
 
															- * The format of RPC call is as follows:
														
 
															- * +-----------------------------------------------------+
														
 
															- * |  Rpc length in bytes                                |
														
 
															- * +-----------------------------------------------------+
														
 
															- * | RpcPayloadHeader - serialized delimited ie has len  |
														
 
															- * +-----------------------------------------------------+
														
 
															- * |  RpcRequest Payload                                 |
														
 
															- * +-----------------------------------------------------+
														
 
															- *
														
 
															- */
														
 
															-
														
 
															-/**
														
 
															- * RpcKind determine the rpcEngine and the serialization of the rpc payload
														
 
															- */
														
 
															-enum RpcKindProto {
														
 
															-  RPC_BUILTIN          = 0;  // Used for built in calls by tests
														
 
															-  RPC_WRITABLE         = 1;  // Use WritableRpcEngine 
														
 
															-  RPC_PROTOCOL_BUFFER  = 2;  // Use ProtobufRpcEngine
														
 
															-}
														
 
															-
														
 
															-enum RpcPayloadOperationProto {
														
 
															-  RPC_FINAL_PAYLOAD        = 0; // The final payload
														
 
															-  RPC_CONTINUATION_PAYLOAD = 1; // not implemented yet
														
 
															-  RPC_CLOSE_CONNECTION     = 2; // close the rpc connection
														
 
															-}
														
 
															-   
														
 
															-message RpcPayloadHeaderProto { // the header for the RpcRequest
														
 
															-  optional RpcKindProto rpcKind = 1;
														
 
															-  optional RpcPayloadOperationProto rpcOp = 2;
														
 
															-  required uint32 callId = 3; // each rpc has a callId that is also used in response
														
 
															-}
														
 
															-
														
 
															-enum RpcStatusProto {
														
 
															- SUCCESS = 0;  // RPC succeeded
														
 
															- ERROR = 1;    // RPC Failed
														
 
															- FATAL = 2;    // Fatal error - connection is closed
														
 
															-}
														
 
															-
														
 
															-/**
														
 
															- * Rpc Response Header
														
 
															- *    - If successfull then the Respose follows after this header
														
 
															- *        - length (4 byte int), followed by the response
														
 
															- *    - If error or fatal - the exception info follow
														
 
															- *        - length (4 byte int) Class name of exception - UTF-8 string
														
 
															- *        - length (4 byte int) Stacktrace - UTF-8 string
														
 
															- *        - if the strings are null then the length is -1
														
 
															- * In case of Fatal error then the respose contains the Serverside's IPC version
														
 
															- */
														
 
															-message RpcResponseHeaderProto {
														
 
															-  required uint32 callId = 1; // callId used in Request
														
 
															-  required RpcStatusProto status = 2;
														
 
															-  optional uint32 serverIpcVersionNum = 3; // in case of an fatal IPC error 
														
 
															-}
														
--- a/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm
+++ b/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm
--- a/hadoop-common-project/hadoop-common/src/site/apt/CommandsManual.apt.vm
+++ b/hadoop-common-project/hadoop-common/src/site/apt/CommandsManual.apt.vm
@@ -0,0 +1,490 @@
 
															+~~ Licensed to the Apache Software Foundation (ASF) under one or more
														
 
															+~~ contributor license agreements.  See the NOTICE file distributed with
														
 
															+~~ this work for additional information regarding copyright ownership.
														
 
															+~~ The ASF licenses this file to You under the Apache License, Version 2.0
														
 
															+~~ (the "License"); you may not use this file except in compliance with
														
 
															+~~ the License.  You may obtain a copy of the License at
														
 
															+~~
														
 
															+~~     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+~~
														
 
															+~~ Unless required by applicable law or agreed to in writing, software
														
 
															+~~ distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+~~ See the License for the specific language governing permissions and
														
 
															+~~ limitations under the License.
														
 
															+
														
 
															+  ---
														
 
															+  Hadoop Commands Guide
														
 
															+  ---
														
 
															+  ---
														
 
															+  ${maven.build.timestamp}
														
 
															+
														
 
															+%{toc}
														
 
															+
														
 
															+Overview
														
 
															+
														
 
															+   All hadoop commands are invoked by the <<<bin/hadoop>>> script. Running the
														
 
															+   hadoop script without any arguments prints the description for all
														
 
															+   commands.
														
 
															+
														
 
															+   Usage: <<<hadoop [--config confdir] [COMMAND] [GENERIC_OPTIONS] [COMMAND_OPTIONS]>>>
														
 
															+
														
 
															+   Hadoop has an option parsing framework that employs parsing generic
														
 
															+   options as well as running classes.
														
 
															+
														
 
															+*-----------------------+---------------+
														
 
															+|| COMMAND_OPTION       || Description
														
 
															+*-----------------------+---------------+
														
 
															+| <<<--config confdir>>>| Overwrites the default Configuration directory.  Default is <<<${HADOOP_HOME}/conf>>>.
														
 
															+*-----------------------+---------------+
														
 
															+| GENERIC_OPTIONS       | The common set of options supported by multiple commands.
														
 
															+| COMMAND_OPTIONS       | Various commands with their options are described in the following sections. The commands have been grouped into User Commands and Administration Commands.
														
 
															+*-----------------------+---------------+
														
 
															+
														
 
															+Generic Options
														
 
															+
														
 
															+   The following options are supported by {{dfsadmin}}, {{fs}}, {{fsck}},
														
 
															+   {{job}} and {{fetchdt}}. Applications should implement {{{some_useful_url}Tool}} to support
														
 
															+   {{{another_useful_url}GenericOptions}}.
														
 
															+
														
 
															+*------------------------------------------------+-----------------------------+
														
 
															+||            GENERIC_OPTION                     ||            Description
														
 
															+*------------------------------------------------+-----------------------------+
														
 
															+|<<<-conf \<configuration file\> >>>             | Specify an application
														
 
															+                                                 | configuration file.
														
 
															+*------------------------------------------------+-----------------------------+
														
 
															+|<<<-D \<property\>=\<value\> >>>                | Use value for given property.
														
 
															+*------------------------------------------------+-----------------------------+
														
 
															+|<<<-jt \<local\> or \<jobtracker:port\> >>>     | Specify a job tracker.
														
 
															+                                                 | Applies only to job.
														
 
															+*------------------------------------------------+-----------------------------+
														
 
															+|<<<-files \<comma separated list of files\> >>> | Specify comma separated files
														
 
															+                                                 | to be copied to the map
														
 
															+                                                 | reduce cluster.  Applies only
														
 
															+                                                 | to job.
														
 
															+*------------------------------------------------+-----------------------------+
														
 
															+|<<<-libjars \<comma seperated list of jars\> >>>| Specify comma separated jar
														
 
															+                                                 | files to include in the
														
 
															+                                                 | classpath. Applies only to
														
 
															+                                                 | job.
														
 
															+*------------------------------------------------+-----------------------------+
														
 
															+|<<<-archives \<comma separated list of archives\> >>> | Specify comma separated
														
 
															+                                                 | archives to be unarchived on
														
 
															+                                                 | the compute machines. Applies
														
 
															+                                                 | only to job.
														
 
															+*------------------------------------------------+-----------------------------+
														
 
															+
														
 
															+User Commands
														
 
															+
														
 
															+   Commands useful for users of a hadoop cluster.
														
 
															+
														
 
															+* <<<archive>>>
														
 
															+
														
 
															+   Creates a hadoop archive. More information can be found at Hadoop
														
 
															+   Archives.
														
 
															+
														
 
															+   Usage: <<<hadoop archive -archiveName NAME <src>* <dest> >>>
														
 
															+
														
 
															+*-------------------+-------------------------------------------------------+
														
 
															+||COMMAND_OPTION    ||                   Description
														
 
															+*-------------------+-------------------------------------------------------+
														
 
															+| -archiveName NAME |  Name of the archive to be created.
														
 
															+*-------------------+-------------------------------------------------------+
														
 
															+| src               | Filesystem pathnames which work as usual with regular
														
 
															+                    | expressions.
														
 
															+*-------------------+-------------------------------------------------------+
														
 
															+| dest              | Destination directory which would contain the archive.
														
 
															+*-------------------+-------------------------------------------------------+
														
 
															+
														
 
															+* <<<distcp>>>
														
 
															+
														
 
															+   Copy file or directories recursively. More information can be found at
														
 
															+   Hadoop DistCp Guide.
														
 
															+
														
 
															+   Usage: <<<hadoop distcp <srcurl> <desturl> >>>
														
 
															+
														
 
															+*-------------------+--------------------------------------------+
														
 
															+||COMMAND_OPTION    || Description
														
 
															+*-------------------+--------------------------------------------+
														
 
															+| srcurl            | Source Url
														
 
															+*-------------------+--------------------------------------------+
														
 
															+| desturl           | Destination Url
														
 
															+*-------------------+--------------------------------------------+
														
 
															+
														
 
															+* <<<fs>>>
														
 
															+
														
 
															+   Usage: <<<hadoop fs [GENERIC_OPTIONS] [COMMAND_OPTIONS]>>>
														
 
															+
														
 
															+   Deprecated, use <<<hdfs dfs>>> instead.
														
 
															+
														
 
															+   Runs a generic filesystem user client.
														
 
															+
														
 
															+   The various COMMAND_OPTIONS can be found at File System Shell Guide.
														
 
															+
														
 
															+* <<<fsck>>>
														
 
															+
														
 
															+   Runs a HDFS filesystem checking utility. See {{Fsck}} for more info.
														
 
															+
														
 
															+   Usage: <<<hadoop fsck [GENERIC_OPTIONS] <path> [-move | -delete | -openforwrite] [-files [-blocks [-locations | -racks]]]>>>
														
 
															+
														
 
															+*------------------+---------------------------------------------+
														
 
															+||  COMMAND_OPTION || Description
														
 
															+*------------------+---------------------------------------------+
														
 
															+|   <path>         | Start checking from this path.
														
 
															+*------------------+---------------------------------------------+
														
 
															+|   -move          | Move corrupted files to /lost+found
														
 
															+*------------------+---------------------------------------------+
														
 
															+|   -delete        | Delete corrupted files.
														
 
															+*------------------+---------------------------------------------+
														
 
															+|   -openforwrite  | Print out files opened for write.
														
 
															+*------------------+---------------------------------------------+
														
 
															+|   -files         | Print out files being checked.
														
 
															+*------------------+---------------------------------------------+
														
 
															+|   -blocks        | Print out block report.
														
 
															+*------------------+---------------------------------------------+
														
 
															+|   -locations     | Print out locations for every block.
														
 
															+*------------------+---------------------------------------------+
														
 
															+|   -racks         | Print out network topology for data-node locations.
														
 
															+*------------------+---------------------------------------------+
														
 
															+
														
 
															+* <<<fetchdt>>>
														
 
															+
														
 
															+   Gets Delegation Token from a NameNode. See {{fetchdt}} for more info.
														
 
															+
														
 
															+   Usage: <<<hadoop fetchdt [GENERIC_OPTIONS] [--webservice <namenode_http_addr>] <path> >>>
														
 
															+
														
 
															+*------------------------------+---------------------------------------------+
														
 
															+|| COMMAND_OPTION              || Description
														
 
															+*------------------------------+---------------------------------------------+
														
 
															+| <fileName>                   | File name to store the token into.
														
 
															+*------------------------------+---------------------------------------------+
														
 
															+| --webservice <https_address> | use http protocol instead of RPC
														
 
															+*------------------------------+---------------------------------------------+
														
 
															+
														
 
															+* <<<jar>>>
														
 
															+
														
 
															+   Runs a jar file. Users can bundle their Map Reduce code in a jar file and
														
 
															+   execute it using this command.
														
 
															+
														
 
															+   Usage: <<<hadoop jar <jar> [mainClass] args...>>>
														
 
															+
														
 
															+   The streaming jobs are run via this command. Examples can be referred from
														
 
															+   Streaming examples
														
 
															+
														
 
															+   Word count example is also run using jar command. It can be referred from
														
 
															+   Wordcount example
														
 
															+
														
 
															+* <<<job>>>
														
 
															+
														
 
															+   Command to interact with Map Reduce Jobs.
														
 
															+
														
 
															+   Usage: <<<hadoop job [GENERIC_OPTIONS] [-submit <job-file>] | [-status <job-id>] | [-counter <job-id> <group-name> <counter-name>] | [-kill <job-id>] | [-events <job-id> <from-event-#> <#-of-events>] | [-history [all] <jobOutputDir>] | [-list [all]] | [-kill-task <task-id>] | [-fail-task <task-id>] | [-set-priority <job-id> <priority>]>>>
														
 
															+
														
 
															+*------------------------------+---------------------------------------------+
														
 
															+|| COMMAND_OPTION              || Description
														
 
															+*------------------------------+---------------------------------------------+
														
 
															+| -submit <job-file>           | Submits the job.
														
 
															+*------------------------------+---------------------------------------------+
														
 
															+| -status <job-id>             | Prints the map and reduce completion
														
 
															+                               | percentage and all job counters.
														
 
															+*------------------------------+---------------------------------------------+
														
 
															+| -counter <job-id> <group-name> <counter-name> | Prints the counter value.
														
 
															+*------------------------------+---------------------------------------------+
														
 
															+| -kill <job-id>               | Kills the job.
														
 
															+*------------------------------+---------------------------------------------+
														
 
															+| -events <job-id> <from-event-#> <#-of-events> | Prints the events' details
														
 
															+                               | received by jobtracker for the given range.
														
 
															+*------------------------------+---------------------------------------------+
														
 
															+| -history [all]<jobOutputDir> | Prints job details, failed and killed tip
														
 
															+                               | details.  More details about the job such as
														
 
															+                               | successful tasks and task attempts made for
														
 
															+                               | each task can be viewed by specifying the [all]
														
 
															+                               | option.
														
 
															+*------------------------------+---------------------------------------------+
														
 
															+| -list [all]                  | Displays jobs which are yet to complete.
														
 
															+                               | <<<-list all>>> displays all jobs.
														
 
															+*------------------------------+---------------------------------------------+
														
 
															+| -kill-task <task-id>         | Kills the task. Killed tasks are NOT counted
														
 
															+                               | against failed attempts.
														
 
															+*------------------------------+---------------------------------------------+
														
 
															+| -fail-task <task-id>         | Fails the task. Failed tasks are counted
														
 
															+                               | against failed attempts.
														
 
															+*------------------------------+---------------------------------------------+
														
 
															+| -set-priority <job-id> <priority> | Changes the priority of the job. Allowed
														
 
															+                               | priority values are VERY_HIGH, HIGH, NORMAL,
														
 
															+                               | LOW, VERY_LOW
														
 
															+*------------------------------+---------------------------------------------+
														
 
															+
														
 
															+* <<<pipes>>>
														
 
															+
														
 
															+   Runs a pipes job.
														
 
															+
														
 
															+   Usage: <<<hadoop pipes [-conf <path>] [-jobconf <key=value>, <key=value>,
														
 
															+   ...] [-input <path>] [-output <path>] [-jar <jar file>] [-inputformat
														
 
															+   <class>] [-map <class>] [-partitioner <class>] [-reduce <class>] [-writer
														
 
															+   <class>] [-program <executable>] [-reduces <num>]>>>
														
 
															+ 
														
 
															+*----------------------------------------+------------------------------------+
														
 
															+|| COMMAND_OPTION                        || Description
														
 
															+*----------------------------------------+------------------------------------+
														
 
															+| -conf <path>                           | Configuration for job
														
 
															+*----------------------------------------+------------------------------------+
														
 
															+| -jobconf <key=value>, <key=value>, ... | Add/override configuration for job
														
 
															+*----------------------------------------+------------------------------------+
														
 
															+| -input <path>                          | Input directory
														
 
															+*----------------------------------------+------------------------------------+
														
 
															+| -output <path>                         | Output directory
														
 
															+*----------------------------------------+------------------------------------+
														
 
															+| -jar <jar file>                        | Jar filename
														
 
															+*----------------------------------------+------------------------------------+
														
 
															+| -inputformat <class>                   | InputFormat class
														
 
															+*----------------------------------------+------------------------------------+
														
 
															+| -map <class>                           | Java Map class
														
 
															+*----------------------------------------+------------------------------------+
														
 
															+| -partitioner <class>                   | Java Partitioner
														
 
															+*----------------------------------------+------------------------------------+
														
 
															+| -reduce <class>                        | Java Reduce class
														
 
															+*----------------------------------------+------------------------------------+
														
 
															+| -writer <class>                        | Java RecordWriter
														
 
															+*----------------------------------------+------------------------------------+
														
 
															+| -program <executable>                  | Executable URI
														
 
															+*----------------------------------------+------------------------------------+
														
 
															+| -reduces <num>                         | Number of reduces
														
 
															+*----------------------------------------+------------------------------------+
														
 
															+
														
 
															+* <<<queue>>>
														
 
															+
														
 
															+   command to interact and view Job Queue information
														
 
															+
														
 
															+   Usage: <<<hadoop queue [-list] | [-info <job-queue-name> [-showJobs]] | [-showacls]>>>
														
 
															+
														
 
															+*-----------------+-----------------------------------------------------------+
														
 
															+|| COMMAND_OPTION || Description
														
 
															+*-----------------+-----------------------------------------------------------+
														
 
															+| -list           | Gets list of Job Queues configured in the system.
														
 
															+                  | Along with scheduling information associated with the job queues.
														
 
															+*-----------------+-----------------------------------------------------------+
														
 
															+| -info <job-queue-name> [-showJobs] | Displays the job queue information and
														
 
															+                  | associated scheduling information of particular job queue.
														
 
															+                  | If <<<-showJobs>>> options is present a list of jobs
														
 
															+                  | submitted to the particular job queue is displayed.
														
 
															+*-----------------+-----------------------------------------------------------+
														
 
															+| -showacls       | Displays the queue name and associated queue operations
														
 
															+                  | allowed for the current user. The list consists of only
														
 
															+                  | those queues to which the user has access.
														
 
															+*-----------------+-----------------------------------------------------------+
														
 
															+
														
 
															+* <<<version>>>
														
 
															+
														
 
															+   Prints the version.
														
 
															+
														
 
															+   Usage: <<<hadoop version>>>
														
 
															+
														
 
															+* <<<CLASSNAME>>>
														
 
															+
														
 
															+   hadoop script can be used to invoke any class.
														
 
															+
														
 
															+   Usage: <<<hadoop CLASSNAME>>>
														
 
															+
														
 
															+   Runs the class named <<<CLASSNAME>>>.
														
 
															+
														
 
															+* <<<classpath>>>
														
 
															+
														
 
															+   Prints the class path needed to get the Hadoop jar and the required
														
 
															+   libraries.
														
 
															+
														
 
															+   Usage: <<<hadoop classpath>>>
														
 
															+
														
 
															+Administration Commands
														
 
															+
														
 
															+   Commands useful for administrators of a hadoop cluster.
														
 
															+
														
 
															+* <<<balancer>>>
														
 
															+
														
 
															+   Runs a cluster balancing utility. An administrator can simply press Ctrl-C
														
 
															+   to stop the rebalancing process. See Rebalancer for more details.
														
 
															+
														
 
															+   Usage: <<<hadoop balancer [-threshold <threshold>]>>>
														
 
															+
														
 
															+*------------------------+-----------------------------------------------------------+
														
 
															+|| COMMAND_OPTION        | Description
														
 
															+*------------------------+-----------------------------------------------------------+
														
 
															+| -threshold <threshold> | Percentage of disk capacity. This overwrites the
														
 
															+                         | default threshold.
														
 
															+*------------------------+-----------------------------------------------------------+
														
 
															+
														
 
															+* <<<daemonlog>>>
														
 
															+
														
 
															+   Get/Set the log level for each daemon.
														
 
															+
														
 
															+   Usage: <<<hadoop daemonlog -getlevel <host:port> <name> >>>
														
 
															+   Usage: <<<hadoop daemonlog -setlevel <host:port> <name> <level> >>>
														
 
															+
														
 
															+*------------------------------+-----------------------------------------------------------+
														
 
															+|| COMMAND_OPTION              || Description
														
 
															+*------------------------------+-----------------------------------------------------------+
														
 
															+| -getlevel <host:port> <name> | Prints the log level of the daemon running at
														
 
															+                               | <host:port>. This command internally connects
														
 
															+                               | to http://<host:port>/logLevel?log=<name>
														
 
															+*------------------------------+-----------------------------------------------------------+
														
 
															+|   -setlevel <host:port> <name> <level> | Sets the log level of the daemon
														
 
															+                               | running at <host:port>. This command internally
														
 
															+                               | connects to http://<host:port>/logLevel?log=<name>
														
 
															+*------------------------------+-----------------------------------------------------------+
														
 
															+
														
 
															+* <<<datanode>>>
														
 
															+
														
 
															+   Runs a HDFS datanode.
														
 
															+
														
 
															+   Usage: <<<hadoop datanode [-rollback]>>>
														
 
															+
														
 
															+*-----------------+-----------------------------------------------------------+
														
 
															+|| COMMAND_OPTION || Description
														
 
															+*-----------------+-----------------------------------------------------------+
														
 
															+| -rollback       | Rollsback the datanode to the previous version. This should
														
 
															+                  | be used after stopping the datanode and distributing the old
														
 
															+                  | hadoop version.
														
 
															+*-----------------+-----------------------------------------------------------+
														
 
															+
														
 
															+* <<<dfsadmin>>>
														
 
															+
														
 
															+   Runs a HDFS dfsadmin client.
														
 
															+
														
 
															+   Usage: <<<hadoop dfsadmin [GENERIC_OPTIONS] [-report] [-safemode enter | leave | get | wait] [-refreshNodes] [-finalizeUpgrade] [-upgradeProgress status | details | force] [-metasave filename] [-setQuota <quota> <dirname>...<dirname>] [-clrQuota <dirname>...<dirname>] [-help [cmd]]>>>
														
 
															+
														
 
															+*-----------------+-----------------------------------------------------------+
														
 
															+|| COMMAND_OPTION || Description
														
 
															+| -report         | Reports basic filesystem information and statistics.
														
 
															+*-----------------+-----------------------------------------------------------+
														
 
															+| -safemode enter / leave / get / wait | Safe mode maintenance command. Safe
														
 
															+                  | mode is a Namenode state in which it \
														
 
															+                  | 1. does not accept changes to the name space (read-only) \
														
 
															+                  | 2. does not replicate or delete blocks. \
														
 
															+                  | Safe mode is entered automatically at Namenode startup, and
														
 
															+                  | leaves safe mode automatically when the configured minimum
														
 
															+                  | percentage of blocks satisfies the minimum replication
														
 
															+                  | condition. Safe mode can also be entered manually, but then
														
 
															+                  | it can only be turned off manually as well.
														
 
															+*-----------------+-----------------------------------------------------------+
														
 
															+| -refreshNodes   | Re-read the hosts and exclude files to update the set of
														
 
															+                  | Datanodes that are allowed to connect to the Namenode and
														
 
															+                  | those that should be decommissioned or recommissioned.
														
 
															+*-----------------+-----------------------------------------------------------+
														
 
															+| -finalizeUpgrade| Finalize upgrade of HDFS. Datanodes delete their previous
														
 
															+                  | version working directories, followed by Namenode doing the
														
 
															+                  | same. This completes the upgrade process.
														
 
															+*-----------------+-----------------------------------------------------------+
														
 
															+| -upgradeProgress status / details / force | Request current distributed
														
 
															+                  | upgrade status, a detailed status or force the upgrade to
														
 
															+                  | proceed.
														
 
															+*-----------------+-----------------------------------------------------------+
														
 
															+| -metasave filename | Save Namenode's primary data structures to <filename> in
														
 
															+                  | the directory specified by hadoop.log.dir property.
														
 
															+                  | <filename> will contain one line for each of the following\
														
 
															+                  | 1. Datanodes heart beating with Namenode\
														
 
															+                  | 2. Blocks waiting to be replicated\
														
 
															+                  | 3. Blocks currrently being replicated\
														
 
															+                  | 4. Blocks waiting to be deleted\
														
 
															+*-----------------+-----------------------------------------------------------+
														
 
															+| -setQuota <quota> <dirname>...<dirname> | Set the quota <quota> for each
														
 
															+                  | directory <dirname>. The directory quota is a long integer
														
 
															+                  | that puts a hard limit on the number of names in the
														
 
															+                  | directory tree.  Best effort for the directory, with faults
														
 
															+                  | reported if \
														
 
															+                  | 1. N is not a positive integer, or \
														
 
															+                  | 2. user is not an administrator, or \
														
 
															+                  | 3. the directory does not exist or is a file, or \
														
 
															+                  | 4. the directory would immediately exceed the new quota. \
														
 
															+*-----------------+-----------------------------------------------------------+
														
 
															+| -clrQuota <dirname>...<dirname> | Clear the quota for each directory
														
 
															+                  | <dirname>.  Best effort for the directory. with fault
														
 
															+                  | reported if \
														
 
															+                  | 1. the directory does not exist or is a file, or \
														
 
															+                  | 2. user is not an administrator.  It does not fault if the
														
 
															+                  | directory has no quota.
														
 
															+*-----------------+-----------------------------------------------------------+
														
 
															+| -help [cmd]     | Displays help for the given command or all commands if none
														
 
															+                  | is specified.
														
 
															+*-----------------+-----------------------------------------------------------+
														
 
															+
														
 
															+* <<<mradmin>>>
														
 
															+
														
 
															+   Runs MR admin client
														
 
															+
														
 
															+   Usage: <<<hadoop mradmin [ GENERIC_OPTIONS ] [-refreshQueueAcls]>>>
														
 
															+
														
 
															+*-------------------+-----------------------------------------------------------+
														
 
															+|| COMMAND_OPTION   || Description
														
 
															+*-------------------+-----------------------------------------------------------+
														
 
															+| -refreshQueueAcls | Refresh the queue acls used by hadoop, to check access
														
 
															+                    | during submissions and administration of the job by the
														
 
															+                    | user. The properties present in mapred-queue-acls.xml is
														
 
															+                    | reloaded by the queue manager.
														
 
															+*-------------------+-----------------------------------------------------------+
														
 
															+
														
 
															+* <<<jobtracker>>>
														
 
															+
														
 
															+   Runs the MapReduce job Tracker node.
														
 
															+
														
 
															+   Usage: <<<hadoop jobtracker [-dumpConfiguration]>>>
														
 
															+
														
 
															+*--------------------+-----------------------------------------------------------+
														
 
															+|| COMMAND_OPTION    || Description
														
 
															+*--------------------+-----------------------------------------------------------+
														
 
															+| -dumpConfiguration | Dumps the configuration used by the JobTracker alongwith
														
 
															+                     | queue configuration in JSON format into Standard output
														
 
															+                     | used by the jobtracker and exits.
														
 
															+*--------------------+-----------------------------------------------------------+
														
 
															+
														
 
															+* <<<namenode>>>
														
 
															+
														
 
															+   Runs the namenode. More info about the upgrade, rollback and finalize is
														
 
															+   at Upgrade Rollback
														
 
															+
														
 
															+   Usage: <<<hadoop namenode [-format] | [-upgrade] | [-rollback] | [-finalize] | [-importCheckpoint]>>>
														
 
															+
														
 
															+*--------------------+-----------------------------------------------------------+
														
 
															+|| COMMAND_OPTION    || Description
														
 
															+*--------------------+-----------------------------------------------------------+
														
 
															+| -format            | Formats the namenode. It starts the namenode, formats
														
 
															+                     | it and then shut it down.
														
 
															+*--------------------+-----------------------------------------------------------+
														
 
															+| -upgrade           | Namenode should be started with upgrade option after
														
 
															+                     | the distribution of new hadoop version.
														
 
															+*--------------------+-----------------------------------------------------------+
														
 
															+| -rollback          | Rollsback the namenode to the previous version. This
														
 
															+                     | should be used after stopping the cluster and
														
 
															+                     | distributing the old hadoop version.
														
 
															+*--------------------+-----------------------------------------------------------+
														
 
															+| -finalize          | Finalize will remove the previous state of the files
														
 
															+                     | system. Recent upgrade will become permanent.  Rollback
														
 
															+                     | option will not be available anymore. After finalization
														
 
															+                     | it shuts the namenode down.
														
 
															+*--------------------+-----------------------------------------------------------+
														
 
															+| -importCheckpoint  | Loads image from a checkpoint directory and save it
														
 
															+                     | into the current one. Checkpoint dir is read from
														
 
															+                     | property fs.checkpoint.dir
														
 
															+*--------------------+-----------------------------------------------------------+
														
 
															+
														
 
															+* <<<secondarynamenode>>>
														
 
															+
														
 
															+   Runs the HDFS secondary namenode. See Secondary Namenode for more
														
 
															+   info.
														
 
															+
														
 
															+   Usage: <<<hadoop secondarynamenode [-checkpoint [force]] | [-geteditsize]>>>
														
 
															+
														
 
															+*----------------------+-----------------------------------------------------------+
														
 
															+|| COMMAND_OPTION      || Description
														
 
															+*----------------------+-----------------------------------------------------------+
														
 
															+| -checkpoint [-force] | Checkpoints the Secondary namenode if EditLog size
														
 
															+                       | >= fs.checkpoint.size. If <<<-force>>> is used,
														
 
															+                       | checkpoint irrespective of EditLog size.
														
 
															+*----------------------+-----------------------------------------------------------+
														
 
															+| -geteditsize         | Prints the EditLog size.
														
 
															+*----------------------+-----------------------------------------------------------+
														
 
															+
														
 
															+* <<<tasktracker>>>
														
 
															+
														
 
															+   Runs a MapReduce task Tracker node.
														
 
															+
														
 
															+   Usage: <<<hadoop tasktracker>>>
														
--- a/hadoop-common-project/hadoop-common/src/site/apt/FileSystemShell.apt.vm
+++ b/hadoop-common-project/hadoop-common/src/site/apt/FileSystemShell.apt.vm
@@ -0,0 +1,418 @@
 
															+~~ Licensed to the Apache Software Foundation (ASF) under one or more
														
 
															+~~ contributor license agreements.  See the NOTICE file distributed with
														
 
															+~~ this work for additional information regarding copyright ownership.
														
 
															+~~ The ASF licenses this file to You under the Apache License, Version 2.0
														
 
															+~~ (the "License"); you may not use this file except in compliance with
														
 
															+~~ the License.  You may obtain a copy of the License at
														
 
															+~~
														
 
															+~~     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+~~
														
 
															+~~ Unless required by applicable law or agreed to in writing, software
														
 
															+~~ distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+~~ See the License for the specific language governing permissions and
														
 
															+~~ limitations under the License.
														
 
															+
														
 
															+  ---
														
 
															+  File System Shell Guide
														
 
															+  ---
														
 
															+  ---
														
 
															+  ${maven.build.timestamp}
														
 
															+
														
 
															+%{toc}
														
 
															+
														
 
															+Overview
														
 
															+
														
 
															+   The File System (FS) shell includes various shell-like commands that
														
 
															+   directly interact with the Hadoop Distributed File System (HDFS) as well as
														
 
															+   other file systems that Hadoop supports, such as Local FS, HFTP FS, S3 FS,
														
 
															+   and others. The FS shell is invoked by:
														
 
															+
														
 
															++---
														
 
															+bin/hadoop fs <args>
														
 
															++---
														
 
															+
														
 
															+   All FS shell commands take path URIs as arguments. The URI format is
														
 
															+   <<<scheme://authority/path>>>. For HDFS the scheme is <<<hdfs>>>, and for
														
 
															+   the Local FS the scheme is <<<file>>>. The scheme and authority are
														
 
															+   optional. If not specified, the default scheme specified in the
														
 
															+   configuration is used. An HDFS file or directory such as /parent/child can
														
 
															+   be specified as <<<hdfs://namenodehost/parent/child>>> or simply as
														
 
															+   <<</parent/child>>> (given that your configuration is set to point to
														
 
															+   <<<hdfs://namenodehost>>>).
														
 
															+
														
 
															+   Most of the commands in FS shell behave like corresponding Unix commands.
														
 
															+   Differences are described with each of the commands. Error information is
														
 
															+   sent to stderr and the output is sent to stdout.
														
 
															+
														
 
															+cat
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -cat URI [URI ...]>>>
														
 
															+
														
 
															+   Copies source paths to stdout.
														
 
															+
														
 
															+   Example:
														
 
															+
														
 
															+     * <<<hdfs dfs -cat hdfs://nn1.example.com/file1 hdfs://nn2.example.com/file2>>>
														
 
															+
														
 
															+     * <<<hdfs dfs -cat file:///file3 /user/hadoop/file4>>>
														
 
															+
														
 
															+   Exit Code:
														
 
															+
														
 
															+   Returns 0 on success and -1 on error.
														
 
															+
														
 
															+chgrp
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -chgrp [-R] GROUP URI [URI ...]>>>
														
 
															+
														
 
															+   Change group association of files. With -R, make the change recursively
														
 
															+   through the directory structure. The user must be the owner of files, or
														
 
															+   else a super-user. Additional information is in the
														
 
															+   {{{betterurl}Permissions Guide}}.
														
 
															+
														
 
															+chmod
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -chmod [-R] <MODE[,MODE]... | OCTALMODE> URI [URI ...]>>>
														
 
															+
														
 
															+   Change the permissions of files. With -R, make the change recursively
														
 
															+   through the directory structure. The user must be the owner of the file, or
														
 
															+   else a super-user. Additional information is in the 
														
 
															+   {{{betterurl}Permissions Guide}}.
														
 
															+
														
 
															+chown
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -chown [-R] [OWNER][:[GROUP]] URI [URI ]>>>
														
 
															+
														
 
															+   Change the owner of files. With -R, make the change recursively through the
														
 
															+   directory structure. The user must be a super-user. Additional information
														
 
															+   is in the {{{betterurl}Permissions Guide}}.
														
 
															+
														
 
															+copyFromLocal
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -copyFromLocal <localsrc> URI>>>
														
 
															+
														
 
															+   Similar to put command, except that the source is restricted to a local
														
 
															+   file reference.
														
 
															+
														
 
															+copyToLocal
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -copyToLocal [-ignorecrc] [-crc] URI <localdst> >>>
														
 
															+
														
 
															+   Similar to get command, except that the destination is restricted to a
														
 
															+   local file reference.
														
 
															+
														
 
															+count
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -count [-q] <paths> >>>
														
 
															+
														
 
															+   Count the number of directories, files and bytes under the paths that match
														
 
															+   the specified file pattern.  The output columns with -count are: DIR_COUNT,
														
 
															+   FILE_COUNT, CONTENT_SIZE FILE_NAME
														
 
															+
														
 
															+   The output columns with -count -q are: QUOTA, REMAINING_QUATA, SPACE_QUOTA,
														
 
															+   REMAINING_SPACE_QUOTA, DIR_COUNT, FILE_COUNT, CONTENT_SIZE, FILE_NAME
														
 
															+
														
 
															+   Example:
														
 
															+
														
 
															+     * <<<hdfs dfs -count hdfs://nn1.example.com/file1 hdfs://nn2.example.com/file2>>>
														
 
															+
														
 
															+     * <<<hdfs dfs -count -q hdfs://nn1.example.com/file1>>>
														
 
															+
														
 
															+   Exit Code:
														
 
															+
														
 
															+   Returns 0 on success and -1 on error.
														
 
															+
														
 
															+cp
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -cp URI [URI ...] <dest> >>>
														
 
															+
														
 
															+   Copy files from source to destination. This command allows multiple sources
														
 
															+   as well in which case the destination must be a directory.
														
 
															+
														
 
															+   Example:
														
 
															+
														
 
															+     * <<<hdfs dfs -cp /user/hadoop/file1 /user/hadoop/file2>>>
														
 
															+
														
 
															+     * <<<hdfs dfs -cp /user/hadoop/file1 /user/hadoop/file2 /user/hadoop/dir>>>
														
 
															+
														
 
															+   Exit Code:
														
 
															+
														
 
															+   Returns 0 on success and -1 on error.
														
 
															+
														
 
															+du
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -du [-s] [-h] URI [URI ...]>>>
														
 
															+
														
 
															+   Displays sizes of files and directories contained in the given directory or
														
 
															+   the length of a file in case its just a file.
														
 
															+
														
 
															+   Options:
														
 
															+
														
 
															+     * The -s option will result in an aggregate summary of file lengths being
														
 
															+       displayed, rather than the individual files.
														
 
															+
														
 
															+     * The -h option will format file sizes in a "human-readable" fashion (e.g
														
 
															+       64.0m instead of 67108864)
														
 
															+
														
 
															+   Example:
														
 
															+
														
 
															+    * hdfs dfs -du /user/hadoop/dir1 /user/hadoop/file1 hdfs://nn.example.com/user/hadoop/dir1
														
 
															+
														
 
															+   Exit Code:
														
 
															+   Returns 0 on success and -1 on error.
														
 
															+
														
 
															+dus
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -dus <args> >>>
														
 
															+
														
 
															+   Displays a summary of file lengths. This is an alternate form of hdfs dfs -du -s.
														
 
															+
														
 
															+expunge
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -expunge>>>
														
 
															+
														
 
															+   Empty the Trash. Refer to the {{{betterurl}HDFS Architecture Guide}} for
														
 
															+   more information on the Trash feature.
														
 
															+
														
 
															+get
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -get [-ignorecrc] [-crc] <src> <localdst> >>>
														
 
															+
														
 
															+   Copy files to the local file system. Files that fail the CRC check may be
														
 
															+   copied with the -ignorecrc option. Files and CRCs may be copied using the
														
 
															+   -crc option.
														
 
															+
														
 
															+   Example:
														
 
															+
														
 
															+     * <<<hdfs dfs -get /user/hadoop/file localfile>>>
														
 
															+
														
 
															+     * <<<hdfs dfs -get hdfs://nn.example.com/user/hadoop/file localfile>>>
														
 
															+
														
 
															+   Exit Code:
														
 
															+
														
 
															+   Returns 0 on success and -1 on error.
														
 
															+
														
 
															+getmerge
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -getmerge <src> <localdst> [addnl]>>>
														
 
															+
														
 
															+   Takes a source directory and a destination file as input and concatenates
														
 
															+   files in src into the destination local file. Optionally addnl can be set to
														
 
															+   enable adding a newline character at the
														
 
															+   end of each file.
														
 
															+
														
 
															+ls
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -ls <args> >>>
														
 
															+
														
 
															+   For a file returns stat on the file with the following format:
														
 
															+
														
 
															++---+
														
 
															+permissions number_of_replicas userid groupid filesize modification_date modification_time filename
														
 
															++---+
														
 
															+
														
 
															+   For a directory it returns list of its direct children as in unix.A directory is listed as:
														
 
															+
														
 
															++---+
														
 
															+permissions userid groupid modification_date modification_time dirname
														
 
															++---+
														
 
															+
														
 
															+   Example:
														
 
															+
														
 
															+     * <<<hdfs dfs -ls /user/hadoop/file1>>>
														
 
															+
														
 
															+   Exit Code:
														
 
															+
														
 
															+   Returns 0 on success and -1 on error.
														
 
															+
														
 
															+lsr
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -lsr <args> >>>
														
 
															+
														
 
															+   Recursive version of ls. Similar to Unix ls -R.
														
 
															+
														
 
															+mkdir
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -mkdir [-p] <paths> >>>
														
 
															+
														
 
															+   Takes path uri's as argument and creates directories.  With -p the behavior
														
 
															+   is much like unix mkdir -p creating parent directories along the path.
														
 
															+
														
 
															+   Example:
														
 
															+
														
 
															+     * <<<hdfs dfs -mkdir /user/hadoop/dir1 /user/hadoop/dir2>>>
														
 
															+
														
 
															+     * <<<hdfs dfs -mkdir hdfs://nn1.example.com/user/hadoop/dir hdfs://nn2.example.com/user/hadoop/dir>>>
														
 
															+
														
 
															+   Exit Code:
														
 
															+
														
 
															+   Returns 0 on success and -1 on error.
														
 
															+
														
 
															+moveFromLocal
														
 
															+
														
 
															+   Usage: <<<dfs -moveFromLocal <localsrc> <dst> >>>
														
 
															+
														
 
															+   Similar to put command, except that the source localsrc is deleted after
														
 
															+   it's copied.
														
 
															+
														
 
															+moveToLocal
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -moveToLocal [-crc] <src> <dst> >>>
														
 
															+
														
 
															+   Displays a "Not implemented yet" message.
														
 
															+
														
 
															+mv
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -mv URI [URI ...] <dest> >>>
														
 
															+
														
 
															+   Moves files from source to destination. This command allows multiple sources
														
 
															+   as well in which case the destination needs to be a directory. Moving files
														
 
															+   across file systems is not permitted.
														
 
															+
														
 
															+   Example:
														
 
															+
														
 
															+     * <<<hdfs dfs -mv /user/hadoop/file1 /user/hadoop/file2>>>
														
 
															+
														
 
															+     * <<<hdfs dfs -mv hdfs://nn.example.com/file1 hdfs://nn.example.com/file2 hdfs://nn.example.com/file3 hdfs://nn.example.com/dir1>>>
														
 
															+
														
 
															+   Exit Code:
														
 
															+
														
 
															+   Returns 0 on success and -1 on error.
														
 
															+
														
 
															+put
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -put <localsrc> ... <dst> >>>
														
 
															+
														
 
															+   Copy single src, or multiple srcs from local file system to the destination
														
 
															+   file system. Also reads input from stdin and writes to destination file
														
 
															+   system.
														
 
															+
														
 
															+     * <<<hdfs dfs -put localfile /user/hadoop/hadoopfile>>>
														
 
															+
														
 
															+     * <<<hdfs dfs -put localfile1 localfile2 /user/hadoop/hadoopdir>>>
														
 
															+
														
 
															+     * <<<hdfs dfs -put localfile hdfs://nn.example.com/hadoop/hadoopfile>>>
														
 
															+
														
 
															+     * <<<hdfs dfs -put - hdfs://nn.example.com/hadoop/hadoopfile>>>
														
 
															+       Reads the input from stdin.
														
 
															+
														
 
															+   Exit Code:
														
 
															+
														
 
															+   Returns 0 on success and -1 on error.
														
 
															+
														
 
															+rm
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -rm [-skipTrash] URI [URI ...]>>>
														
 
															+
														
 
															+   Delete files specified as args. Only deletes non empty directory and files.
														
 
															+   If the -skipTrash option is specified, the trash, if enabled, will be
														
 
															+   bypassed and the specified file(s) deleted immediately. This can be useful
														
 
															+   when it is necessary to delete files from an over-quota directory. Refer to
														
 
															+   rmr for recursive deletes.
														
 
															+
														
 
															+   Example:
														
 
															+
														
 
															+     * <<<hdfs dfs -rm hdfs://nn.example.com/file /user/hadoop/emptydir>>>
														
 
															+
														
 
															+   Exit Code:
														
 
															+
														
 
															+   Returns 0 on success and -1 on error.
														
 
															+
														
 
															+rmr
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -rmr [-skipTrash] URI [URI ...]>>>
														
 
															+
														
 
															+   Recursive version of delete. If the -skipTrash option is specified, the
														
 
															+   trash, if enabled, will be bypassed and the specified file(s) deleted
														
 
															+   immediately. This can be useful when it is necessary to delete files from an
														
 
															+   over-quota directory.
														
 
															+
														
 
															+   Example:
														
 
															+
														
 
															+     * <<<hdfs dfs -rmr /user/hadoop/dir>>>
														
 
															+
														
 
															+     * <<<hdfs dfs -rmr hdfs://nn.example.com/user/hadoop/dir>>>
														
 
															+
														
 
															+   Exit Code:
														
 
															+
														
 
															+   Returns 0 on success and -1 on error.
														
 
															+
														
 
															+setrep
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -setrep [-R] <path> >>>
														
 
															+
														
 
															+   Changes the replication factor of a file. -R option is for recursively
														
 
															+   increasing the replication factor of files within a directory.
														
 
															+
														
 
															+   Example:
														
 
															+
														
 
															+     * <<<hdfs dfs -setrep -w 3 -R /user/hadoop/dir1>>>
														
 
															+
														
 
															+   Exit Code:
														
 
															+
														
 
															+   Returns 0 on success and -1 on error.
														
 
															+
														
 
															+stat
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -stat URI [URI ...]>>>
														
 
															+
														
 
															+   Returns the stat information on the path.
														
 
															+
														
 
															+   Example:
														
 
															+
														
 
															+     * <<<hdfs dfs -stat path>>>
														
 
															+
														
 
															+   Exit Code:
														
 
															+   Returns 0 on success and -1 on error.
														
 
															+
														
 
															+tail
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -tail [-f] URI>>>
														
 
															+
														
 
															+   Displays last kilobyte of the file to stdout. -f option can be used as in
														
 
															+   Unix.
														
 
															+
														
 
															+   Example:
														
 
															+
														
 
															+     * <<<hdfs dfs -tail pathname>>>
														
 
															+
														
 
															+   Exit Code:
														
 
															+   Returns 0 on success and -1 on error.
														
 
															+
														
 
															+test
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -test -[ezd] URI>>>
														
 
															+
														
 
															+   Options:
														
 
															+
														
 
															+*----+------------+
														
 
															+| -e | check to see if the file exists. Return 0 if true.
														
 
															+*----+------------+
														
 
															+| -z | check to see if the file is zero length. Return 0 if true.
														
 
															+*----+------------+
														
 
															+| -d | check to see if the path is directory. Return 0 if true.
														
 
															+*----+------------+
														
 
															+
														
 
															+   Example:
														
 
															+
														
 
															+     * <<<hdfs dfs -test -e filename>>>
														
 
															+
														
 
															+text
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -text <src> >>>
														
 
															+
														
 
															+   Takes a source file and outputs the file in text format. The allowed formats
														
 
															+   are zip and TextRecordInputStream.
														
 
															+
														
 
															+touchz
														
 
															+
														
 
															+   Usage: <<<hdfs dfs -touchz URI [URI ...]>>>
														
 
															+
														
 
															+   Create a file of zero length.
														
 
															+
														
 
															+   Example:
														
 
															+
														
 
															+     * <<<hadoop -touchz pathname>>>
														
 
															+
														
 
															+   Exit Code:
														
 
															+   Returns 0 on success and -1 on error.
														
--- a/hadoop-common-project/hadoop-common/src/site/apt/HttpAuthentication.apt.vm
+++ b/hadoop-common-project/hadoop-common/src/site/apt/HttpAuthentication.apt.vm
@@ -0,0 +1,99 @@
 
															+~~ Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+~~ you may not use this file except in compliance with the License.
														
 
															+~~ You may obtain a copy of the License at
														
 
															+~~
														
 
															+~~   http://www.apache.org/licenses/LICENSE-2.0
														
 
															+~~
														
 
															+~~ Unless required by applicable law or agreed to in writing, software
														
 
															+~~ distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+~~ See the License for the specific language governing permissions and
														
 
															+~~ limitations under the License. See accompanying LICENSE file.
														
 
															+
														
 
															+  ---
														
 
															+  Authentication for Hadoop HTTP web-consoles
														
 
															+  ---
														
 
															+  ---
														
 
															+  ${maven.build.timestamp}
														
 
															+
														
 
															+Authentication for Hadoop HTTP web-consoles
														
 
															+
														
 
															+%{toc|section=1|fromDepth=0}
														
 
															+
														
 
															+* Introduction
														
 
															+
														
 
															+   This document describes how to configure Hadoop HTTP web-consoles to
														
 
															+   require user authentication.
														
 
															+
														
 
															+   By default Hadoop HTTP web-consoles (JobTracker, NameNode, TaskTrackers
														
 
															+   and DataNodes) allow access without any form of authentication.
														
 
															+
														
 
															+   Similarly to Hadoop RPC, Hadoop HTTP web-consoles can be configured to
														
 
															+   require Kerberos authentication using HTTP SPNEGO protocol (supported
														
 
															+   by browsers like Firefox and Internet Explorer).
														
 
															+
														
 
															+   In addition, Hadoop HTTP web-consoles support the equivalent of
														
 
															+   Hadoop's Pseudo/Simple authentication. If this option is enabled, user
														
 
															+   must specify their user name in the first browser interaction using the
														
 
															+   user.name query string parameter. For example:
														
 
															+   <<<http://localhost:50030/jobtracker.jsp?user.name=babu>>>.
														
 
															+
														
 
															+   If a custom authentication mechanism is required for the HTTP
														
 
															+   web-consoles, it is possible to implement a plugin to support the
														
 
															+   alternate authentication mechanism (refer to Hadoop hadoop-auth for details
														
 
															+   on writing an <<<AuthenticatorHandler>>>).
														
 
															+
														
 
															+   The next section describes how to configure Hadoop HTTP web-consoles to
														
 
															+   require user authentication.
														
 
															+
														
 
															+* Configuration
														
 
															+
														
 
															+   The following properties should be in the <<<core-site.xml>>> of all the
														
 
															+   nodes in the cluster.
														
 
															+
														
 
															+   <<<hadoop.http.filter.initializers>>>: add to this property the
														
 
															+   <<<org.apache.hadoop.security.AuthenticationFilterInitializer>>> initializer
														
 
															+   class.
														
 
															+
														
 
															+   <<<hadoop.http.authentication.type>>>: Defines authentication used for the
														
 
															+   HTTP web-consoles. The supported values are: <<<simple>>> | <<<kerberos>>> |
														
 
															+   <<<#AUTHENTICATION_HANDLER_CLASSNAME#>>>. The dfeault value is <<<simple>>>.
														
 
															+
														
 
															+   <<<hadoop.http.authentication.token.validity>>>: Indicates how long (in
														
 
															+   seconds) an authentication token is valid before it has to be renewed.
														
 
															+   The default value is <<<36000>>>.
														
 
															+
														
 
															+   <<<hadoop.http.authentication.signature.secret.file>>>: The signature secret
														
 
															+   file for signing the authentication tokens. If not set a random secret is
														
 
															+   generated at startup time. The same secret should be used for all nodes
														
 
															+   in the cluster, JobTracker, NameNode, DataNode and TastTracker. The
														
 
															+   default value is <<<${user.home}/hadoop-http-auth-signature-secret>>>.
														
 
															+   IMPORTANT: This file should be readable only by the Unix user running the
														
 
															+   daemons.
														
 
															+
														
 
															+   <<<hadoop.http.authentication.cookie.domain>>>: The domain to use for the
														
 
															+   HTTP cookie that stores the authentication token. In order to
														
 
															+   authentiation to work correctly across all nodes in the cluster the
														
 
															+   domain must be correctly set. There is no default value, the HTTP
														
 
															+   cookie will not have a domain working only with the hostname issuing
														
 
															+   the HTTP cookie.
														
 
															+
														
 
															+   IMPORTANT: when using IP addresses, browsers ignore cookies with domain
														
 
															+   settings. For this setting to work properly all nodes in the cluster
														
 
															+   must be configured to generate URLs with <<<hostname.domain>>> names on it.
														
 
															+
														
 
															+   <<<hadoop.http.authentication.simple.anonymous.allowed>>>: Indicates if
														
 
															+   anonymous requests are allowed when using 'simple' authentication. The
														
 
															+   default value is <<<true>>>
														
 
															+
														
 
															+   <<<hadoop.http.authentication.kerberos.principal>>>: Indicates the Kerberos
														
 
															+   principal to be used for HTTP endpoint when using 'kerberos'
														
 
															+   authentication. The principal short name must be <<<HTTP>>> per Kerberos HTTP
														
 
															+   SPNEGO specification. The default value is <<<HTTP/_HOST@$LOCALHOST>>>,
														
 
															+   where <<<_HOST>>> -if present- is replaced with bind address of the HTTP
														
 
															+   server.
														
 
															+
														
 
															+   <<<hadoop.http.authentication.kerberos.keytab>>>: Location of the keytab file
														
 
															+   with the credentials for the Kerberos principal used for the HTTP
														
 
															+   endpoint. The default value is <<<${user.home}/hadoop.keytab>>>.i
														
 
															+
														
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestDU.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestDU.java
@@ -103,4 +103,12 @@ public class TestDU extends TestCase {
 
															         duSize >= writtenSize &&
														
 
															         writtenSize <= (duSize + slack));
														
 
															   }
														
 
															+  public void testDUGetUsedWillNotReturnNegative() throws IOException {
														
 
															+    File file = new File(DU_DIR, "data");
														
 
															+    assertTrue(file.createNewFile());
														
 
															+    DU du = new DU(file, 10000);
														
 
															+    du.decDfsUsed(Long.MAX_VALUE);
														
 
															+    long duSize = du.getUsed();
														
 
															+    assertTrue(String.valueOf(duSize), duSize >= 0L);
														
 
															+  }
														
 
															 }
														
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileStatus.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFileStatus.java
@@ -25,18 +25,34 @@ import java.io.DataInput;
 
															 import java.io.DataInputStream;
														
 
															 import java.io.DataOutput;
														
 
															 import java.io.DataOutputStream;
														
 
															+import java.io.IOException;
														
 
															 import org.junit.Test;
														
 
															 import org.apache.commons.logging.Log;
														
 
															 import org.apache.commons.logging.LogFactory;
														
 
															 import org.apache.hadoop.fs.FileStatus;
														
 
															 import org.apache.hadoop.fs.Path;
														
 
															+import org.apache.hadoop.fs.permission.FsPermission;
														
 
															 public class TestFileStatus {
														
 
															   private static final Log LOG =
														
 
															     LogFactory.getLog(TestFileStatus.class);
														
 
															+  
														
 
															+  /** Values for creating {@link FileStatus} in some tests */
														
 
															+  static final int LENGTH = 1;
														
 
															+  static final int REPLICATION = 2;
														
 
															+  static final long BLKSIZE = 3;
														
 
															+  static final long MTIME = 4;
														
 
															+  static final long ATIME = 5;
														
 
															+  static final String OWNER = "owner";
														
 
															+  static final String GROUP = "group";
														
 
															+  static final FsPermission PERMISSION = FsPermission.valueOf("-rw-rw-rw-");
														
 
															+  static final Path PATH = new Path("path");
														
 
															+  /**
														
 
															+   * Check that the write and readField methods work correctly.
														
 
															+   */
														
 
															   @Test
														
 
															   public void testFileStatusWritable() throws Exception {
														
 
															     FileStatus[] tests = {
														
@@ -68,4 +84,181 @@ public class TestFileStatus {
 
															       iterator++;
														
 
															     }
														
 
															   }
														
 
															+  
														
 
															+  /**
														
 
															+   * Check that the full parameter constructor works correctly.
														
 
															+   */
														
 
															+  @Test
														
 
															+  public void constructorFull() throws IOException {
														
 
															+    boolean isdir = false;
														
 
															+    Path symlink = new Path("symlink");
														
 
															+    FileStatus fileStatus = new FileStatus(LENGTH, isdir, REPLICATION, BLKSIZE,
														
 
															+        MTIME, ATIME, PERMISSION, OWNER, GROUP, symlink, PATH);
														
 
															+    
														
 
															+    validateAccessors(fileStatus, LENGTH, isdir, REPLICATION, BLKSIZE, MTIME,
														
 
															+      ATIME, PERMISSION, OWNER, GROUP, symlink, PATH);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Check that the non-symlink constructor works correctly.
														
 
															+   */
														
 
															+  @Test
														
 
															+  public void constructorNoSymlink() throws IOException {
														
 
															+    boolean isdir = true;  
														
 
															+    FileStatus fileStatus = new FileStatus(LENGTH, isdir, REPLICATION, BLKSIZE,
														
 
															+        MTIME, ATIME, PERMISSION, OWNER, GROUP, PATH);
														
 
															+    validateAccessors(fileStatus, LENGTH, isdir, REPLICATION, BLKSIZE, MTIME,
														
 
															+        ATIME, PERMISSION, OWNER, GROUP, null, PATH);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Check that the constructor without owner, group and permissions works
														
 
															+   * correctly.
														
 
															+   */
														
 
															+  @Test
														
 
															+  public void constructorNoOwner() throws IOException {
														
 
															+    boolean isdir = true;    
														
 
															+    FileStatus fileStatus = new FileStatus(LENGTH, isdir,
														
 
															+        REPLICATION, BLKSIZE, MTIME, PATH);   
														
 
															+    validateAccessors(fileStatus, LENGTH, isdir, REPLICATION, BLKSIZE, MTIME,
														
 
															+        0, FsPermission.getDefault(), "", "", null, PATH);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Check that the no parameter constructor works correctly.
														
 
															+   */
														
 
															+  @Test
														
 
															+  public void constructorBlank() throws IOException {
														
 
															+    FileStatus fileStatus = new FileStatus();  
														
 
															+    validateAccessors(fileStatus, 0, false, 0, 0, 0,
														
 
															+        0, FsPermission.getDefault(), "", "", null, null);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Check that FileStatus are equal if their paths are equal.
														
 
															+   */
														
 
															+  @Test
														
 
															+  public void testEquals() {
														
 
															+    Path path = new Path("path");
														
 
															+    FileStatus fileStatus1 = new FileStatus(1, true, 1, 1, 1, 1,
														
 
															+        FsPermission.valueOf("-rw-rw-rw-"), "one", "one", null, path);
														
 
															+    FileStatus fileStatus2 = new FileStatus(2, true, 2, 2, 2, 2,
														
 
															+        FsPermission.valueOf("---x--x--x"), "two", "two", null, path);
														
 
															+    assertEquals(fileStatus1, fileStatus2);
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Check that FileStatus are not equal if their paths are not equal.
														
 
															+   */
														
 
															+  @Test
														
 
															+  public void testNotEquals() {
														
 
															+    Path path1 = new Path("path1");
														
 
															+    Path path2 = new Path("path2");
														
 
															+    FileStatus fileStatus1 = new FileStatus(1, true, 1, 1, 1, 1,
														
 
															+        FsPermission.valueOf("-rw-rw-rw-"), "one", "one", null, path1);
														
 
															+    FileStatus fileStatus2 = new FileStatus(1, true, 1, 1, 1, 1,
														
 
															+        FsPermission.valueOf("-rw-rw-rw-"), "one", "one", null, path2);
														
 
															+    assertFalse(fileStatus1.equals(fileStatus2));
														
 
															+    assertFalse(fileStatus2.equals(fileStatus1));
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Check that toString produces the expected output for a file.
														
 
															+   */
														
 
															+  @Test
														
 
															+  public void toStringFile() throws IOException {
														
 
															+    boolean isdir = false; 
														
 
															+    FileStatus fileStatus = new FileStatus(LENGTH, isdir, REPLICATION, BLKSIZE,
														
 
															+        MTIME, ATIME, PERMISSION, OWNER, GROUP, null, PATH);   
														
 
															+    validateToString(fileStatus);
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * Check that toString produces the expected output for a directory.
														
 
															+   */
														
 
															+  @Test
														
 
															+  public void toStringDir() throws IOException {
														
 
															+    FileStatus fileStatus = new FileStatus(LENGTH, true, REPLICATION, BLKSIZE,
														
 
															+        MTIME, ATIME, PERMISSION, OWNER, GROUP, null, PATH); 
														
 
															+    validateToString(fileStatus);
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * Check that toString produces the expected output for a symlink.
														
 
															+   */
														
 
															+  @Test
														
 
															+  public void toStringSymlink() throws IOException {
														
 
															+    boolean isdir = false;
														
 
															+    Path symlink = new Path("symlink");
														
 
															+    FileStatus fileStatus = new FileStatus(LENGTH, isdir, REPLICATION, BLKSIZE,
														
 
															+        MTIME, ATIME, PERMISSION, OWNER, GROUP, symlink, PATH);  
														
 
															+    validateToString(fileStatus);
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * Validate the accessors for FileStatus.
														
 
															+   * @param fileStatus FileStatus to checked
														
 
															+   * @param length expected length
														
 
															+   * @param isdir expected isDirectory
														
 
															+   * @param replication expected replication
														
 
															+   * @param blocksize expected blocksize
														
 
															+   * @param mtime expected modification time
														
 
															+   * @param atime expected access time
														
 
															+   * @param permission expected permission
														
 
															+   * @param owner expected owner
														
 
															+   * @param group expected group
														
 
															+   * @param symlink expected symlink
														
 
															+   * @param path expected path
														
 
															+   */
														
 
															+  private void validateAccessors(FileStatus fileStatus,
														
 
															+    long length, boolean isdir, int replication, long blocksize, long mtime,
														
 
															+    long atime, FsPermission permission, String owner, String group,
														
 
															+    Path symlink, Path path) throws IOException {
														
 
															+    
														
 
															+    assertEquals(length, fileStatus.getLen());
														
 
															+    assertEquals(isdir, fileStatus.isDirectory());
														
 
															+    assertEquals(replication, fileStatus.getReplication());
														
 
															+    assertEquals(blocksize, fileStatus.getBlockSize());
														
 
															+    assertEquals(mtime, fileStatus.getModificationTime());
														
 
															+    assertEquals(atime, fileStatus.getAccessTime());
														
 
															+    assertEquals(permission, fileStatus.getPermission());
														
 
															+    assertEquals(owner, fileStatus.getOwner());
														
 
															+    assertEquals(group, fileStatus.getGroup());
														
 
															+    if(symlink == null) {
														
 
															+      assertFalse(fileStatus.isSymlink());
														
 
															+    } else {
														
 
															+      assertTrue(fileStatus.isSymlink());
														
 
															+      assertEquals(symlink, fileStatus.getSymlink());
														
 
															+    }
														
 
															+    assertEquals(path, fileStatus.getPath());
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * Validates the toString method for FileStatus.
														
 
															+   * @param fileStatus FileStatus to be validated
														
 
															+   */
														
 
															+  private void validateToString(FileStatus fileStatus) throws IOException {
														
 
															+    StringBuilder expected = new StringBuilder();
														
 
															+    expected.append("FileStatus{");
														
 
															+    expected.append("path=").append(fileStatus.getPath()).append("; ");
														
 
															+    expected.append("isDirectory=").append(fileStatus.isDirectory()).append("; ");
														
 
															+    if(!fileStatus.isDirectory()) {
														
 
															+      expected.append("length=").append(fileStatus.getLen()).append("; ");
														
 
															+      expected.append("replication=").append(fileStatus.getReplication()).append("; ");
														
 
															+      expected.append("blocksize=").append(fileStatus.getBlockSize()).append("; ");
														
 
															+    }
														
 
															+    expected.append("modification_time=").append(fileStatus.getModificationTime()).append("; ");
														
 
															+    expected.append("access_time=").append(fileStatus.getAccessTime()).append("; ");
														
 
															+    expected.append("owner=").append(fileStatus.getOwner()).append("; ");
														
 
															+    expected.append("group=").append(fileStatus.getGroup()).append("; ");
														
 
															+    expected.append("permission=").append(fileStatus.getPermission()).append("; ");
														
 
															+    if(fileStatus.isSymlink()) {
														
 
															+      expected.append("isSymlink=").append(true).append("; ");
														
 
															+      expected.append("symlink=").append(fileStatus.getSymlink()).append("}");
														
 
															+    } else {
														
 
															+      expected.append("isSymlink=").append(false).append("}");
														
 
															+    }
														
 
															+    
														
 
															+    assertEquals(expected.toString(), fileStatus.toString());
														
 
															+  }
														
 
															 }
														
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShellCopy.java
@@ -357,6 +357,66 @@ public class TestFsShellCopy {
 
															     assertEquals(0, exit);
														
 
															     assertEquals("f1\ndf1\ndf2\ndf3\nf2\n", readFile("out"));
														
 
															   }
														
 
															+
														
 
															+
														
 
															+  @Test
														
 
															+  public void testMoveFileFromLocal() throws Exception {
														
 
															+    Path testRoot = new Path(testRootDir, "testPutFile");
														
 
															+    lfs.delete(testRoot, true);
														
 
															+    lfs.mkdirs(testRoot);
														
 
															+
														
 
															+    Path target = new Path(testRoot, "target");    
														
 
															+    Path srcFile = new Path(testRoot, new Path("srcFile"));
														
 
															+    lfs.createNewFile(srcFile);
														
 
															+
														
 
															+    int exit = shell.run(new String[]{
														
 
															+        "-moveFromLocal", srcFile.toString(), target.toString() });
														
 
															+    assertEquals(0, exit);
														
 
															+    assertFalse(lfs.exists(srcFile));
														
 
															+    assertTrue(lfs.exists(target));
														
 
															+    assertTrue(lfs.isFile(target));
														
 
															+  }
														
 
															+  
														
 
															+  @Test
														
 
															+  public void testMoveDirFromLocal() throws Exception {    
														
 
															+    Path testRoot = new Path(testRootDir, "testPutDir");
														
 
															+    lfs.delete(testRoot, true);
														
 
															+    lfs.mkdirs(testRoot);
														
 
															+    
														
 
															+    Path srcDir = new Path(testRoot, "srcDir");
														
 
															+    lfs.mkdirs(srcDir);
														
 
															+    Path targetDir = new Path(testRoot, "target");    
														
 
															+
														
 
															+    int exit = shell.run(new String[]{
														
 
															+        "-moveFromLocal", srcDir.toString(), targetDir.toString() });
														
 
															+    assertEquals(0, exit);
														
 
															+    assertFalse(lfs.exists(srcDir));
														
 
															+    assertTrue(lfs.exists(targetDir));
														
 
															+  }
														
 
															+
														
 
															+  @Test
														
 
															+  public void testMoveDirFromLocalDestExists() throws Exception {    
														
 
															+    Path testRoot = new Path(testRootDir, "testPutDir");
														
 
															+    lfs.delete(testRoot, true);
														
 
															+    lfs.mkdirs(testRoot);
														
 
															+    
														
 
															+    Path srcDir = new Path(testRoot, "srcDir");
														
 
															+    lfs.mkdirs(srcDir);
														
 
															+    Path targetDir = new Path(testRoot, "target");
														
 
															+    lfs.mkdirs(targetDir);
														
 
															+
														
 
															+    int exit = shell.run(new String[]{
														
 
															+        "-moveFromLocal", srcDir.toString(), targetDir.toString() });
														
 
															+    assertEquals(0, exit);
														
 
															+    assertFalse(lfs.exists(srcDir));
														
 
															+    assertTrue(lfs.exists(new Path(targetDir, srcDir.getName())));
														
 
															+    
														
 
															+    lfs.mkdirs(srcDir);
														
 
															+    exit = shell.run(new String[]{
														
 
															+        "-moveFromLocal", srcDir.toString(), targetDir.toString() });
														
 
															+    assertEquals(1, exit);
														
 
															+    assertTrue(lfs.exists(srcDir));
														
 
															+  }
														
 
															   private void createFile(Path ... paths) throws IOException {
														
 
															     for (Path path : paths) {
														
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemBaseTest.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemBaseTest.java
@@ -662,4 +662,15 @@ public class ViewFileSystemBaseTest {
 
															   public void testInternalSetOwner() throws IOException {
														
 
															     fsView.setOwner(new Path("/internalDir"), "foo", "bar");
														
 
															   }
														
 
															+  
														
 
															+  @Test
														
 
															+  public void testCreateNonRecursive() throws IOException {
														
 
															+    Path path = FileSystemTestHelper.getTestRootPath(fsView, "/user/foo");
														
 
															+    fsView.createNonRecursive(path, false, 1024, (short)1, 1024L, null);
														
 
															+    FileStatus status = fsView.getFileStatus(new Path("/user/foo"));
														
 
															+    Assert.assertTrue("Created file should be type file",
														
 
															+        fsView.isFile(new Path("/user/foo")));
														
 
															+    Assert.assertTrue("Target of created file should be type file",
														
 
															+        fsTarget.isFile(new Path(targetTestRoot,"user/foo")));
														
 
															+  }
														
 
															 }
														
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeLibraryChecker.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeLibraryChecker.java
@@ -0,0 +1,54 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+package org.apache.hadoop.util;
														
 
															+
														
 
															+import junit.framework.TestCase;
														
 
															+
														
 
															+import org.apache.hadoop.util.ExitUtil.ExitException;
														
 
															+import org.junit.Test;
														
 
															+
														
 
															+public class TestNativeLibraryChecker extends TestCase {
														
 
															+  private void expectExit(String [] args) {
														
 
															+    try {
														
 
															+      // should throw exit exception
														
 
															+      NativeLibraryChecker.main(args);
														
 
															+      fail("should call exit");
														
 
															+    } catch (ExitException e) {
														
 
															+      // pass
														
 
															+      ExitUtil.resetFirstExitException();
														
 
															+    }
														
 
															+  }
														
 
															+  
														
 
															+  @Test
														
 
															+  public void testNativeLibraryChecker() {
														
 
															+    ExitUtil.disableSystemExit();
														
 
															+    // help should return normally
														
 
															+    NativeLibraryChecker.main(new String[] {"-h"});
														
 
															+    // illegal argmuments should exit
														
 
															+    expectExit(new String[] {"-a", "-h"});
														
 
															+    expectExit(new String[] {"aaa"});
														
 
															+    if (NativeCodeLoader.isNativeCodeLoaded()) {
														
 
															+      // no argument should return normally
														
 
															+      NativeLibraryChecker.main(new String[0]);
														
 
															+    } else {
														
 
															+      // no argument should exit
														
 
															+      expectExit(new String[0]);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+}
														
--- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml
@@ -34,7 +34,7 @@
 
															   <description>Apache Hadoop HttpFS</description>
														
 
															   <properties>
														
 
															-    <tomcat.version>6.0.32</tomcat.version>
														
 
															+    <tomcat.version>6.0.36</tomcat.version>
														
 
															     <httpfs.source.repository>REPO NOT AVAIL</httpfs.source.repository>
														
 
															     <httpfs.source.repository>REPO NOT AVAIL</httpfs.source.repository>
														
 
															     <httpfs.source.revision>REVISION NOT AVAIL</httpfs.source.revision>
														
@@ -531,7 +531,7 @@
 
															                     <mkdir dir="downloads"/>
														
 
															                     <get
														
 
															                         src="${tomcat.download.url}"
														
 
															-                        dest="downloads/tomcat.tar.gz" verbose="true" skipexisting="true"/>
														
 
															+                        dest="downloads/apache-tomcat-${tomcat.version}.tar.gz" verbose="true" skipexisting="true"/>
														
 
															                     <delete dir="${project.build.directory}/tomcat.exp"/>
														
 
															                     <mkdir dir="${project.build.directory}/tomcat.exp"/>
														
@@ -541,7 +541,7 @@
 
															 from os.path import abspath
														
 
															 import tarfile
														
 
															-src = abspath(r"${basedir}/downloads/tomcat.tar.gz")
														
 
															+src = abspath(r"${basedir}/downloads/apache-tomcat-${tomcat.version}.tar.gz")
														
 
															 dest = abspath(r"${project.build.directory}/tomcat.exp")
														
 
															 with tarfile.open(src, "r:gz") as tar:
														
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -172,6 +172,8 @@ Trunk (Unreleased)
 
															     HDFS-4234. Use generic code for choosing datanode in Balancer.  (szetszwo)
														
 
															+    HDFS-4334. Add a unique id to INode.  (Brandon Li via szetszwo)
														
 
															+
														
 
															   OPTIMIZATIONS
														
 
															   BUG FIXES
														
@@ -286,6 +288,9 @@ Trunk (Unreleased)
 
															     HDFS-4275. MiniDFSCluster-based tests fail on Windows due to failure
														
 
															     to delete test namenode directory. (Chris Nauroth via suresh)
														
 
															+    HDFS-4338. TestNameNodeMetrics#testCorruptBlock is flaky. (Andrew Wang via
														
 
															+    atm)
														
 
															+
														
 
															 Release 2.0.3-alpha - Unreleased 
														
 
															   INCOMPATIBLE CHANGES
														
@@ -430,6 +435,8 @@ Release 2.0.3-alpha - Unreleased
 
															     HDFS-4130. BKJM: The reading for editlog at NN starting using bkjm is not efficient.
														
 
															     (Han Xiao via umamahesh)
														
 
															+    HDFS-4326. bump up Tomcat version for HttpFS to 6.0.36. (tucu via acmurthy)
														
 
															+
														
 
															   OPTIMIZATIONS
														
 
															   BUG FIXES
														
@@ -622,6 +629,17 @@ Release 2.0.3-alpha - Unreleased
 
															     HDFS-4315. DNs with multiple BPs can have BPOfferServices fail to start
														
 
															     due to unsynchronized map access. (atm)
														
 
															+    HDFS-4140. fuse-dfs handles open(O_TRUNC) poorly. (Colin Patrick McCabe
														
 
															+    via atm)
														
 
															+
														
 
															+    HDFS-4308. addBlock() should persist file blocks once.
														
 
															+    (Plamen Jeliazkov via shv)
														
 
															+
														
 
															+    HDFS-4347. Avoid infinite waiting checkpoint to complete in TestBackupNode.
														
 
															+    (Plamen Jeliazkov via shv)
														
 
															+
														
 
															+    HDFS-4349. Add test for reading files from BackupNode. (shv)
														
 
															+
														
 
															   BREAKDOWN OF HDFS-3077 SUBTASKS
														
 
															     HDFS-3077. Quorum-based protocol for reading and writing edit logs.
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/hdfs_design.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/hdfs_design.xml
@@ -1,536 +0,0 @@
 
															-<?xml version="1.0"?>
														
 
															-<!--
														
 
															-  Licensed to the Apache Software Foundation (ASF) under one or more
														
 
															-  contributor license agreements.  See the NOTICE file distributed with
														
 
															-  this work for additional information regarding copyright ownership.
														
 
															-  The ASF licenses this file to You under the Apache License, Version 2.0
														
 
															-  (the "License"); you may not use this file except in compliance with
														
 
															-  the License.  You may obtain a copy of the License at
														
 
															-
														
 
															-      http://www.apache.org/licenses/LICENSE-2.0
														
 
															-
														
 
															-  Unless required by applicable law or agreed to in writing, software
														
 
															-  distributed under the License is distributed on an "AS IS" BASIS,
														
 
															-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															-  See the License for the specific language governing permissions and
														
 
															-  limitations under the License.
														
 
															--->
														
 
															-
														
 
															-<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
														
 
															-          "http://forrest.apache.org/dtd/document-v20.dtd">
														
 
															-
														
 
															-
														
 
															-<document>
														
 
															-
														
 
															-  <header>
														
 
															-    <title> 
														
 
															-      HDFS Architecture Guide
														
 
															-    </title>
														
 
															-    <authors>
														
 
															-      <person name="Dhruba Borthakur" email="dhruba@yahoo-inc.com"/>
														
 
															-    </authors> 
														
 
															-  </header>
														
 
															-
														
 
															-  <body>
														
 
															-    <section>
														
 
															-      <title> Introduction </title>
														
 
															-      <p>
														
 
															-      The Hadoop Distributed File System (<acronym title="Hadoop Distributed File System">HDFS</acronym>) is a distributed file system 
														
 
															-      designed to run on commodity hardware. It has many similarities with existing distributed file systems. However, the differences from 
														
 
															-      other distributed file systems are significant. HDFS is highly fault-tolerant and is designed to be deployed on low-cost hardware. 
														
 
															-      HDFS provides high throughput access to application data and is suitable for applications that have large data sets. HDFS relaxes 
														
 
															-      a few POSIX requirements to enable streaming access to file system data.  HDFS was originally built as infrastructure for the 
														
 
															-      Apache Nutch web search engine project. HDFS is now an Apache Hadoop subproject.
														
 
															-      The project URL is <a href="http://hadoop.apache.org/hdfs/">http://hadoop.apache.org/hdfs/</a>.
														
 
															-      </p>
														
 
															-    </section>
														
 
															-
														
 
															-    <section> 
														
 
															-      <title> Assumptions and Goals </title>
														
 
															-
														
 
															-      <section> 
														
 
															-        <title> Hardware Failure </title>
														
 
															-        <p>
														
 
															-        Hardware failure is the norm rather than the exception. An HDFS instance may consist of hundreds or thousands of server machines, 
														
 
															-        each storing part of the file system&#x2019;s data. The fact that there are a huge number of components and that each component has 
														
 
															-        a non-trivial probability of failure means that some component of HDFS is always non-functional. Therefore, detection of faults and quick, 
														
 
															-        automatic recovery from them is a core architectural goal of HDFS.
														
 
															-       </p>
														
 
															-     </section>
														
 
															-
														
 
															- 
														
 
															-      <section> 
														
 
															-        <title> Streaming Data Access </title>
														
 
															-        <p>
														
 
															-        Applications that run on HDFS need streaming access to their data sets. They are not general purpose applications that typically run 
														
 
															-        on general purpose file systems. HDFS is designed more for batch processing rather than interactive use by users. The emphasis is on 
														
 
															-        high throughput of data access rather than low latency of data access. POSIX imposes many hard requirements that are not needed for 
														
 
															-        applications that are targeted for HDFS. POSIX semantics in a few key areas has been traded to increase data throughput rates. 
														
 
															-        </p>
														
 
															-      </section>
														
 
															-
														
 
															-      <section> 
														
 
															-        <title> Large Data Sets </title>
														
 
															-        <p>
														
 
															-        Applications that run on HDFS have large data sets. A typical file in HDFS is gigabytes to terabytes in size. Thus, HDFS is tuned to 
														
 
															-        support large files. It should provide high aggregate data bandwidth and scale to thousands of nodes in a single cluster. It should support 
														
 
															-        tens of millions of files in a single instance.
														
 
															-        </p>
														
 
															-      </section>
														
 
															-
														
 
															- 
														
 
															-      <section> 
														
 
															-        <title> Appending-Writes and File Syncs </title>
														
 
															-        <p>
														
 
															-        Most HDFS applications need a write-once-read-many access model for files. HDFS provides two additional advanced features: hflush and
														
 
															-        append.  Hflush makes the last block of an unclosed file visible to readers while providing read consistency and data durability.  Append
														
 
															-        provides a mechanism for opening a closed file to add additional data.
														
 
															-        </p>
														
 
															-        <p>
														
 
															-        For complete details of the hflush and append design, see the 
														
 
															-        <a href="https://issues.apache.org/jira/secure/attachment/12445209/appendDesign3.pdf">Append/Hflush/Read Design document</a> (PDF).
														
 
															-        </p>
														
 
															-      </section>
														
 
															-
														
 
															- 
														
 
															-      <section> 
														
 
															-        <title> &#x201c;Moving Computation is Cheaper than Moving Data&#x201d; </title>
														
 
															-        <p>
														
 
															-        A computation requested by an application is much more efficient if it is executed near the data it operates on. This is especially true 
														
 
															-        when the size of the data set is huge. This minimizes network congestion and increases the overall throughput of the system. The 
														
 
															-        assumption is that it is often better to migrate the computation closer to where the data is located rather than moving the data to where 
														
 
															-        the application is running. HDFS provides interfaces for applications to move themselves closer to where the data is located. 
														
 
															-        </p>
														
 
															-      </section>
														
 
															-
														
 
															-
														
 
															-      <section> 
														
 
															-        <title> Portability Across Heterogeneous Hardware and Software Platforms </title>
														
 
															-        <p>
														
 
															-        HDFS has been designed to be easily portable from one platform to another. This facilitates widespread adoption of HDFS as a 
														
 
															-        platform of choice for a large set of applications. 
														
 
															-        </p>
														
 
															-      </section>
														
 
															-    </section>
														
 
															-
														
 
															- 
														
 
															-    <section>
														
 
															-      <title> NameNode and DataNodes </title>
														
 
															-      <p>
														
 
															-      HDFS has a master/slave architecture. An HDFS cluster consists of a single NameNode, a master server that manages the file 
														
 
															-      system namespace and regulates access to files by clients. In addition, there are a number of DataNodes, usually one per node 
														
 
															-      in the cluster, which manage storage attached to the nodes that they run on. HDFS exposes a file system namespace and allows 
														
 
															-      user data to be stored in files. Internally, a file is split into one or more blocks and these blocks are stored in a set of DataNodes. 
														
 
															-      The NameNode executes file system namespace operations like opening, closing, and renaming files and directories. It also 
														
 
															-      determines the mapping of blocks to DataNodes. The DataNodes are responsible for serving read and write requests from the file 
														
 
															-      system&#x2019;s clients. The DataNodes also perform block creation, deletion, and replication upon instruction from the NameNode.
														
 
															-      </p>
														
 
															-      
														
 
															-           <figure alt="HDFS Architecture" src="images/hdfsarchitecture.gif"/>
														
 
															-
														
 
															-      <p>
														
 
															-      The NameNode and DataNode are pieces of software designed to run on commodity machines. These machines typically run a 
														
 
															-      GNU/Linux operating system (<acronym title="operating system">OS</acronym>). HDFS is built using the Java language; any 
														
 
															-      machine that supports Java can run the NameNode or the DataNode software. Usage of the highly portable Java language means 
														
 
															-      that HDFS can be deployed on a wide range of machines. A typical deployment has a dedicated machine that runs only the 
														
 
															-      NameNode software. Each of the other machines in the cluster runs one instance of the DataNode software. The architecture 
														
 
															-      does not preclude running multiple DataNodes on the same machine but in a real deployment that is rarely the case.
														
 
															-      </p>
														
 
															-      <p>
														
 
															-      The existence of a single NameNode in a cluster greatly simplifies the architecture of the system. The NameNode is the arbitrator 
														
 
															-      and repository for all HDFS metadata. The system is designed in such a way that user data never flows through the NameNode.
														
 
															-      </p>
														
 
															-    </section>
														
 
															-
														
 
															- 
														
 
															-
														
 
															-    <section>
														
 
															-      <title> The File System Namespace </title>
														
 
															-      <p>
														
 
															-      HDFS supports a traditional hierarchical file organization. A user or an application can create directories and store files inside 
														
 
															-      these directories. The file system namespace hierarchy is similar to most other existing file systems; one can create and 
														
 
															-      remove files, move a file from one directory to another, or rename a file. HDFS implements user quotas for number of names and 
														
 
															-      amount of data stored in a particular directory (See 
														
 
															-      <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_quota_admin_guide.html">HDFS Quota Admin Guide</a>). In addition, HDFS
														
 
															-      supports <a href="http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/fs/FileContext.html#createSymlink(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, boolean)">symbolic links</a>.
														
 
															-      </p>
														
 
															-      <p>
														
 
															-      The NameNode maintains the file system namespace. Any change to the file system namespace or its properties is 
														
 
															-      recorded by the NameNode. An application can specify the number of replicas of a file that should be maintained by 
														
 
															-      HDFS. The number of copies of a file is called the replication factor of that file. This information is stored by the NameNode.
														
 
															-      </p>
														
 
															-    </section>
														
 
															-
														
 
															- 
														
 
															-
														
 
															-    <section> 
														
 
															-      <title> Data Replication </title>
														
 
															-      <p>
														
 
															-      HDFS is designed to reliably store very large files across machines in a large cluster. It stores each file as a sequence 
														
 
															-      of blocks; all blocks in a file except the last block are the same size. The blocks of a file are replicated for fault tolerance. 
														
 
															-      The block size and replication factor are configurable per file. An application can specify the number of replicas of a file. 
														
 
															-      The replication factor can be specified at file creation time and can be changed later. Files in HDFS are strictly one writer at any 
														
 
															-      time. 
														
 
															-      </p>
														
 
															-      <p>
														
 
															-      The NameNode makes all decisions regarding replication of blocks. It periodically receives a Heartbeat and a Blockreport 
														
 
															-      from each of the DataNodes in the cluster. Receipt of a Heartbeat implies that the DataNode is functioning properly. A 
														
 
															-      Blockreport contains a list of all blocks on a DataNode. 
														
 
															-    </p>
														
 
															-    <figure alt="HDFS DataNodes" src="images/hdfsdatanodes.gif"/>
														
 
															-
														
 
															-      <section>
														
 
															-        <title> Replica Placement: The First Baby Steps </title>
														
 
															-        <p>
														
 
															-        The placement of replicas is critical to HDFS reliability and performance. Optimizing replica placement distinguishes 
														
 
															-        HDFS from most other distributed file systems. This is a feature that needs lots of tuning and experience. The purpose 
														
 
															-        of a rack-aware replica placement policy is to improve data reliability, availability, and network bandwidth utilization. 
														
 
															-        The current implementation for the replica placement policy is a first effort in this direction. The short-term goals of 
														
 
															-        implementing this policy are to validate it on production systems, learn more about its behavior, and build a foundation 
														
 
															-        to test and research more sophisticated policies. 
														
 
															-        </p>
														
 
															-        <p>
														
 
															-        Large HDFS instances run on a cluster of computers that commonly spread across many racks. Communication 
														
 
															-        between two nodes in different racks has to go through switches. In most cases, network bandwidth between machines 
														
 
															-        in the same rack is greater than network bandwidth between machines in different racks.  
														
 
															-        </p>
														
 
															-        <p>
														
 
															-        The NameNode determines the rack id each DataNode belongs to via the process outlined in 
														
 
															-        <a href="http://hadoop.apache.org/common/docs/current/cluster_setup.html#Hadoop+Rack+Awareness">Hadoop Rack Awareness</a>. 
														
 
															-        A simple but non-optimal policy is to place replicas on unique racks. This prevents losing data when an entire rack 
														
 
															-        fails and allows use of bandwidth from multiple racks when reading data. This policy evenly distributes replicas in 
														
 
															-        the cluster which makes it easy to balance load on component failure. However, this policy increases the cost of 
														
 
															-        writes because a write needs to transfer blocks to multiple racks. 
														
 
															-        </p>
														
 
															-        <p>
														
 
															-        For the common case, when the replication factor is three, HDFS&#x2019;s placement policy is to put one replica 
														
 
															-        on one node in the local rack, another on a node in a different (remote) rack, and the last on a different node in the 
														
 
															-        same remote rack. This policy cuts the inter-rack write traffic which generally improves write performance. The 
														
 
															-        chance of rack failure is far less than that of node failure; this policy does not impact data reliability and availability 
														
 
															-        guarantees. However, it does reduce the aggregate network bandwidth used when reading data since a block is 
														
 
															-        placed in only two unique racks rather than three. With this policy, the replicas of a file do not evenly distribute 
														
 
															-        across the racks. One third of replicas are on one node, two thirds of replicas are on one rack, and the other third 
														
 
															-        are evenly distributed across the remaining racks. This policy improves write performance without compromising 
														
 
															-        data reliability or read performance.
														
 
															-        </p>
														
 
															-        <p>
														
 
															-        In addition to the default placement policy described above, HDFS also provides a pluggable interface for block placement. See
														
 
															-        <a href="http://hadoop.apache.org/hdfs/docs/current/api/org/apache/hadoop/hdfs/server/namenode/BlockPlacementPolicy.html">BlockPlacementPolicy</a>.
														
 
															-        </p>
														
 
															-      </section>
														
 
															-
														
 
															-      <section> 
														
 
															-        <title> Replica Selection </title>
														
 
															-        <p>
														
 
															-        To minimize global bandwidth consumption and read latency, HDFS tries to satisfy a read request from a replica 
														
 
															-        that is closest to the reader. If there exists a replica on the same rack as the reader node, then that replica is 
														
 
															-        preferred to satisfy the read request. If an HDFS cluster spans multiple data centers, then a replica that is 
														
 
															-        resident in the local data center is preferred over any remote replica.
														
 
															-        </p>
														
 
															-      </section>
														
 
															-
														
 
															-      <section> 
														
 
															-        <title> Safemode </title>
														
 
															-        <p>
														
 
															-        On startup, the NameNode enters a special state called Safemode. Replication of data blocks does not occur 
														
 
															-        when the NameNode is in the Safemode state. The NameNode receives Heartbeat and Blockreport messages 
														
 
															-        from the DataNodes. A Blockreport contains the list of data blocks that a DataNode is hosting. Each block 
														
 
															-        has a specified minimum number of replicas. A block is considered safely replicated when the minimum number 
														
 
															-        of replicas of that data block has checked in with the NameNode. After a configurable percentage of safely 
														
 
															-        replicated data blocks checks in with the NameNode (plus an additional 30 seconds), the NameNode exits 
														
 
															-        the Safemode state. It then determines the list of data blocks (if any) that still have fewer than the specified 
														
 
															-        number of replicas. The NameNode then replicates these blocks to other DataNodes.
														
 
															-        </p>
														
 
															-      </section>
														
 
															-
														
 
															-    </section>
														
 
															-
														
 
															-    <section>
														
 
															-      <title> The Persistence of File System Metadata </title>
														
 
															-        <p>
														
 
															-        The HDFS namespace is stored by the NameNode. The NameNode uses a transaction log called the EditLog 
														
 
															-        to persistently record every change that occurs to file system metadata. For example, creating a new file in 
														
 
															-        HDFS causes the NameNode to insert a record into the EditLog indicating this. Similarly, changing the 
														
 
															-        replication factor of a file causes a new record to be inserted into the EditLog. The NameNode uses a file 
														
 
															-        in its local host OS file system to store the EditLog. The entire file system namespace, including the mapping 
														
 
															-        of blocks to files and file system properties, is stored in a file called the FsImage. The FsImage is stored as 
														
 
															-        a file in the NameNode&#x2019;s local file system too.
														
 
															-        </p>
														
 
															-        <p>
														
 
															-        The NameNode keeps an image of the entire file system namespace and file Blockmap in memory. This key 
														
 
															-        metadata item is designed to be compact, such that a NameNode with 4 GB of RAM is plenty to support a 
														
 
															-        huge number of files and directories. When the NameNode starts up, it reads the FsImage and EditLog from 
														
 
															-        disk, applies all the transactions from the EditLog to the in-memory representation of the FsImage, and flushes 
														
 
															-        out this new version into a new FsImage on disk. It can then truncate the old EditLog because its transactions 
														
 
															-        have been applied to the persistent FsImage. This process is called a checkpoint. The 
														
 
															-        <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Checkpoint+Node">Checkpoint Node</a> is a 
														
 
															-        separate daemon that can be configured to periodically build checkpoints from the FsImage and EditLog which are 
														
 
															-        uploaded to the NameNode.  The 
														
 
															-        <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Backup+Node">Backup Node</a> builds 
														
 
															-        checkpoints like the Checkpoint Node and also maintains an up-to-date copy of the FsImage in memory.
														
 
															-        </p>
														
 
															-        <p>
														
 
															-        The DataNode stores HDFS data in files in its local file system. The DataNode has no knowledge about HDFS files. 
														
 
															-        It stores each block of HDFS data in a separate file in its local file system. The DataNode does not create all files 
														
 
															-        in the same directory. Instead, it uses a heuristic to determine the optimal number of files per directory and creates 
														
 
															-        subdirectories appropriately. It is not optimal to create all local files in the same directory because the local file 
														
 
															-        system might not be able to efficiently support a huge number of files in a single directory. When a DataNode starts 
														
 
															-        up, it scans through its local file system, generates a list of all HDFS data blocks that correspond to each of these 
														
 
															-        local files and sends this report to the NameNode: this is the Blockreport. 
														
 
															-        </p>
														
 
															-    </section>
														
 
															-
														
 
															-
														
 
															-    <section> 
														
 
															-      <title> The Communication Protocols </title>
														
 
															-      <p>
														
 
															-      All HDFS communication protocols are layered on top of the TCP/IP protocol. A client establishes a connection to 
														
 
															-      a configurable <acronym title="Transmission Control Protocol">TCP</acronym> port on the NameNode machine. 
														
 
															-      It talks the ClientProtocol with the NameNode. The DataNodes talk to the NameNode using the DataNode Protocol. 
														
 
															-      A Remote Procedure Call (<acronym title="Remote Procedure Call">RPC</acronym>) abstraction wraps both the 
														
 
															-      Client Protocol and the DataNode Protocol. By design, the NameNode never initiates any RPCs. Instead, it only 
														
 
															-      responds to RPC requests issued by DataNodes or clients. 
														
 
															-      </p>
														
 
															-    </section>
														
 
															- 
														
 
															-
														
 
															-    <section> 
														
 
															-      <title> Robustness </title>
														
 
															-      <p>
														
 
															-      The primary objective of HDFS is to store data reliably even in the presence of failures. The three common types 
														
 
															-      of failures are NameNode failures, DataNode failures and network partitions.
														
 
															-      </p>
														
 
															- 
														
 
															-      <section>
														
 
															-        <title> Data Disk Failure, Heartbeats and Re-Replication </title>
														
 
															-        <p>
														
 
															-        Each DataNode sends a Heartbeat message to the NameNode periodically. A network partition can cause a 
														
 
															-        subset of DataNodes to lose connectivity with the NameNode. The NameNode detects this condition by the 
														
 
															-        absence of a Heartbeat message. The NameNode marks DataNodes without recent Heartbeats as dead and 
														
 
															-        does not forward any new <acronym title="Input/Output">IO</acronym> requests to them. Any data that was 
														
 
															-        registered to a dead DataNode is not available to HDFS any more. DataNode death may cause the replication 
														
 
															-        factor of some blocks to fall below their specified value. The NameNode constantly tracks which blocks need 
														
 
															-        to be replicated and initiates replication whenever necessary. The necessity for re-replication may arise due 
														
 
															-        to many reasons: a DataNode may become unavailable, a replica may become corrupted, a hard disk on a 
														
 
															-        DataNode may fail, or the replication factor of a file may be increased. 
														
 
															-        </p>
														
 
															-      </section>
														
 
															-
														
 
															-      <section>
														
 
															-        <title> Cluster Rebalancing </title>
														
 
															-        <p>
														
 
															-        The HDFS architecture is compatible with data rebalancing schemes. A scheme might automatically move 
														
 
															-        data from one DataNode to another if the free space on a DataNode falls below a certain threshold. In the 
														
 
															-        event of a sudden high demand for a particular file, a scheme might dynamically create additional replicas 
														
 
															-        and rebalance other data in the cluster. These types of data rebalancing schemes are not yet implemented. 
														
 
															-        </p>
														
 
															-      </section>
														
 
															-
														
 
															-      <section>
														
 
															-        <title> Data Integrity </title>
														
 
															-        <p>
														
 
															-        <!-- XXX "checksum checking" sounds funny -->
														
 
															-        It is possible that a block of data fetched from a DataNode arrives corrupted. This corruption can occur 
														
 
															-        because of faults in a storage device, network faults, or buggy software. The HDFS client software 
														
 
															-        implements checksum checking on the contents of HDFS files. When a client creates an HDFS file, 
														
 
															-        it computes a checksum of each block of the file and stores these checksums in a separate hidden 
														
 
															-        file in the same HDFS namespace. When a client retrieves file contents it verifies that the data it 
														
 
															-        received from each DataNode matches the checksum stored in the associated checksum file. If not, 
														
 
															-        then the client can opt to retrieve that block from another DataNode that has a replica of that block.
														
 
															-        </p>
														
 
															-      </section>
														
 
															- 
														
 
															-
														
 
															-      <section>
														
 
															-        <title> Metadata Disk Failure </title>
														
 
															-        <p>
														
 
															-        The FsImage and the EditLog are central data structures of HDFS. A corruption of these files can 
														
 
															-        cause the HDFS instance to be non-functional. For this reason, the NameNode can be configured 
														
 
															-        to support maintaining multiple copies of the FsImage and EditLog. Any update to either the FsImage 
														
 
															-        or EditLog causes each of the FsImages and EditLogs to get updated synchronously. This 
														
 
															-        synchronous updating of multiple copies of the FsImage and EditLog may degrade the rate of 
														
 
															-        namespace transactions per second that a NameNode can support. However, this degradation is 
														
 
															-        acceptable because even though HDFS applications are very data intensive in nature, they are not 
														
 
															-        metadata intensive. When a NameNode restarts, it selects the latest consistent FsImage and EditLog to use.
														
 
															-        </p>
														
 
															-        <p> 
														
 
															-        The NameNode machine is a single point of failure for an HDFS cluster. If the NameNode machine fails, 
														
 
															-        manual intervention is necessary. Currently, automatic restart and failover of the NameNode software to 
														
 
															-        another machine is not supported.
														
 
															-        </p>
														
 
															-      </section>
														
 
															-
														
 
															-      <section>
														
 
															-        <title> Snapshots </title>
														
 
															-        <p>
														
 
															-        Snapshots support storing a copy of data at a particular instant of time. One usage of the snapshot 
														
 
															-        feature may be to roll back a corrupted HDFS instance to a previously known good point in time. 
														
 
															-        HDFS does not currently support snapshots but will in a future release.
														
 
															-        </p>
														
 
															-      </section>
														
 
															-
														
 
															-    </section>
														
 
															- 
														
 
															-
														
 
															-    <section> 
														
 
															-      <!-- XXX Better name -->
														
 
															-      <title> Data Organization </title>
														
 
															-
														
 
															-      <section>
														
 
															-        <title> Data Blocks </title>
														
 
															-        <p>
														
 
															-        HDFS is designed to support very large files. Applications that are compatible with HDFS are those 
														
 
															-        that deal with large data sets. These applications write their data only once but they read it one or 
														
 
															-        more times and require these reads to be satisfied at streaming speeds. HDFS supports 
														
 
															-        write-once-read-many semantics on files. A typical block size used by HDFS is 64 MB. Thus, 
														
 
															-        an HDFS file is chopped up into 64 MB chunks, and if possible, each chunk will reside on a different DataNode.
														
 
															-        </p>
														
 
															-      </section>
														
 
															-
														
 
															-      <section>
														
 
															-        <title> Replication Pipelining </title>
														
 
															-        <p>
														
 
															-        When a client is writing data to an HDFS file with a replication factor of 3, the NameNode retrieves a list of DataNodes using a replication target choosing algorithm.
														
 
															-        This list contains the DataNodes that will host a replica of that block. The client then writes to the first DataNode. The first DataNode starts receiving the data in small portions (64 KB, configurable), 
														
 
															-        writes each portion to its local repository and transfers that portion to the second DataNode in the list. 
														
 
															-        The second DataNode, in turn starts receiving each portion of the data block, writes that portion to its 
														
 
															-        repository and then flushes that portion to the third DataNode. Finally, the third DataNode writes the 
														
 
															-        data to its local repository. Thus, a DataNode can be receiving data from the previous one in the pipeline 
														
 
															-        and at the same time forwarding data to the next one in the pipeline. Thus, the data is pipelined from 
														
 
															-        one DataNode to the next.
														
 
															-        </p>
														
 
															-      </section>
														
 
															-
														
 
															-    </section>
														
 
															-
														
 
															-    <section>
														
 
															-      <!-- XXX "Accessibility" sounds funny - "Interfaces" ? -->
														
 
															-      <title> Accessibility </title>
														
 
															-      <!-- XXX Make an API section ? (HTTP is "web service" API?) -->
														
 
															-      <p>
														
 
															-      HDFS can be accessed from applications in many different ways. Natively, HDFS provides a 
														
 
															-      <a href="http://hadoop.apache.org/core/docs/current/api/">Java API</a> for applications to 
														
 
															-      use. A C language wrapper for this Java API is also available. In addition, an HTTP browser 
														
 
															-      can also be used to browse the files of an HDFS instance. Work is in progress to expose 
														
 
															-      HDFS through the <acronym title="Web-based Distributed Authoring and Versioning">WebDAV</acronym> protocol. 
														
 
															-      </p>
														
 
															-
														
 
															-      <section>
														
 
															-        <title> FS Shell </title>
														
 
															-        <p>
														
 
															-        HDFS allows user data to be organized in the form of files and directories. It provides a commandline 
														
 
															-        interface called  FS shell that lets a user interact with the data in HDFS. The syntax of this command 
														
 
															-        set is similar to other shells (e.g. bash, csh) that users are already familiar with. Here are some sample 
														
 
															-        action/command pairs:
														
 
															-        </p>
														
 
															-        <table>
														
 
															-          <tr>
														
 
															-            <th> Action </th><th> Command </th>
														
 
															-          </tr>
														
 
															-          <tr>
														
 
															-            <td> Create a directory named <code>/foodir</code> </td> 
														
 
															-            <td> <code>bin/hadoop dfs -mkdir /foodir</code> </td>
														
 
															-          </tr>
														
 
															-          <tr>
														
 
															-            <td> Remove a directory named <code>/foodir</code> </td> 
														
 
															-            <td> <code>bin/hadoop dfs -rmr /foodir</code> </td>
														
 
															-          </tr>
														
 
															-          <tr>
														
 
															-            <td> View the contents of a file named <code>/foodir/myfile.txt</code> </td> 
														
 
															-            <td> <code>bin/hadoop dfs -cat /foodir/myfile.txt</code> </td>
														
 
															-          </tr>
														
 
															-        </table>
														
 
															-        <p>
														
 
															-        FS shell is targeted for applications that need a scripting language to interact with the stored data.
														
 
															-        </p>
														
 
															-      </section>
														
 
															-
														
 
															-      <section> 
														
 
															-        <title> DFSAdmin </title>
														
 
															-        <p>
														
 
															-        The DFSAdmin command set is used for administering an HDFS cluster. These are commands that are 
														
 
															-        used only by an HDFS administrator. Here are some sample action/command pairs:
														
 
															-        </p>
														
 
															-        <table>
														
 
															-          <tr>
														
 
															-            <th> Action </th><th> Command </th>
														
 
															-          </tr>
														
 
															-          <tr>
														
 
															-            <td> Put the cluster in Safemode </td> <td> <code>bin/hadoop dfsadmin -safemode enter</code> </td>
														
 
															-          </tr>
														
 
															-          <tr>
														
 
															-            <td> Generate a list of DataNodes </td> <td> <code>bin/hadoop dfsadmin -report</code> </td>
														
 
															-          </tr>
														
 
															-          <tr>
														
 
															-            <td> Recommission or decommission DataNode(s) </td>
														
 
															-            <td> <code>bin/hadoop dfsadmin -refreshNodes</code> </td>
														
 
															-          </tr>
														
 
															-        </table>
														
 
															-      </section>
														
 
															-
														
 
															-      <section> 
														
 
															-        <title> Browser Interface </title>
														
 
															-        <p>
														
 
															-        A typical HDFS install configures a web server to expose the HDFS namespace through 
														
 
															-        a configurable TCP port. This allows a user to navigate the HDFS namespace and view 
														
 
															-        the contents of its files using a web browser.
														
 
															-       </p>
														
 
															-      </section>
														
 
															-
														
 
															-    </section> 
														
 
															-
														
 
															-    <section> 
														
 
															-      <title> Space Reclamation </title>
														
 
															-
														
 
															-      <section>
														
 
															-        <title> File Deletes and Undeletes </title>
														
 
															-        <p>
														
 
															-        When a file is deleted by a user or an application, it is not immediately removed from HDFS.  Instead, 
														
 
															-        HDFS first renames it to a file in the <code>/trash</code> directory. The file can be restored quickly 
														
 
															-        as long as it remains in <code>/trash</code>. A file remains in <code>/trash</code> for a configurable 
														
 
															-        amount of time. After the expiry of its life in <code>/trash</code>, the NameNode deletes the file from 
														
 
															-        the HDFS namespace. The deletion of a file causes the blocks associated with the file to be freed. 
														
 
															-        Note that there could be an appreciable time delay between the time a file is deleted by a user and 
														
 
															-        the time of the corresponding increase in free space in HDFS.
														
 
															-        </p>
														
 
															-        <p>
														
 
															-        A user can Undelete a file after deleting it as long as it remains in the <code>/trash</code> directory. 
														
 
															-        If a user wants to undelete a file that he/she has deleted, he/she can navigate the <code>/trash</code> 
														
 
															-        directory and retrieve the file. The <code>/trash</code> directory contains only the latest copy of the file 
														
 
															-        that was deleted. The <code>/trash</code> directory is just like any other directory with one special 
														
 
															-        feature: HDFS applies specified policies to automatically delete files from this directory.
														
 
															-        By default, the trash feature is disabled. It can be enabled by setting the <em>fs.trash.interval</em> property in core-site.xml to a non-zero value (set as minutes of retention required). The property needs to exist on both client and server side configurations.
														
 
															-        </p>
														
 
															-      </section>
														
 
															-
														
 
															-      <section>
														
 
															-        <title> Decrease Replication Factor </title>
														
 
															-        <p>
														
 
															-        When the replication factor of a file is reduced, the NameNode selects excess replicas that can be deleted. 
														
 
															-        The next Heartbeat transfers this information to the DataNode. The DataNode then removes the corresponding 
														
 
															-        blocks and the corresponding free space appears in the cluster. Once again, there might be a time delay 
														
 
															-        between the completion of the <code>setReplication</code> API call and the appearance of free space in the cluster.
														
 
															-        </p>
														
 
															-      </section>
														
 
															-    </section>
														
 
															-
														
 
															-
														
 
															-    <section>
														
 
															-      <title> References </title>
														
 
															-      <p>
														
 
															-      HDFS Java API: 
														
 
															-      <a href="http://hadoop.apache.org/core/docs/current/api/"> 
														
 
															-        http://hadoop.apache.org/core/docs/current/api/
														
 
															-      </a>
														
 
															-      </p>
														
 
															-      <p>
														
 
															-      HDFS source code: 
														
 
															-      <a href= "http://hadoop.apache.org/hdfs/version_control.html"> 
														
 
															-        http://hadoop.apache.org/hdfs/version_control.html
														
 
															-      </a>
														
 
															-      </p>
														
 
															-    </section> 
														
 
															-
														
 
															-  </body>
														
 
															-</document>
														
 
															-
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java
@@ -21,6 +21,8 @@ import java.io.IOException;
 
															 import java.net.InetSocketAddress;
														
 
															 import java.util.Arrays;
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															 import org.apache.hadoop.hdfs.HdfsConfiguration;
														
 
															 import org.apache.hadoop.hdfs.NameNodeProxies;
														
 
															 import org.apache.hadoop.hdfs.server.common.Storage;
														
@@ -41,6 +43,7 @@ import org.apache.hadoop.security.UserGroupInformation;
 
															  *  int, int, byte[])
														
 
															  */
														
 
															 class EditLogBackupOutputStream extends EditLogOutputStream {
														
 
															+  private static Log LOG = LogFactory.getLog(EditLogFileOutputStream.class);
														
 
															   static int DEFAULT_BUFFER_SIZE = 256;
														
 
															   private final JournalProtocol backupNode;  // RPC proxy to backup node
														
@@ -117,6 +120,11 @@ class EditLogBackupOutputStream extends EditLogOutputStream {
 
															   protected void flushAndSync(boolean durable) throws IOException {
														
 
															     assert out.getLength() == 0 : "Output buffer is not empty";
														
 
															+    if (doubleBuf.isFlushed()) {
														
 
															+      LOG.info("Nothing to flush");
														
 
															+      return;
														
 
															+    }
														
 
															+
														
 
															     int numReadyTxns = doubleBuf.countReadyTxns();
														
 
															     long firstTxToFlush = doubleBuf.getFirstReadyTxId();
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
@@ -76,8 +76,9 @@ import com.google.common.base.Preconditions;
 
															  *************************************************/
														
 
															 public class FSDirectory implements Closeable {
														
 
															   private static INodeDirectoryWithQuota createRoot(FSNamesystem namesystem) {
														
 
															-    return new INodeDirectoryWithQuota(INodeDirectory.ROOT_NAME,
														
 
															-        namesystem.createFsOwnerPermissions(new FsPermission((short)0755)));
														
 
															+    return new INodeDirectoryWithQuota(namesystem.allocateNewInodeId(),
														
 
															+        INodeDirectory.ROOT_NAME,
														
 
															+        namesystem.createFsOwnerPermissions(new FsPermission((short) 0755)));
														
 
															   }
														
 
															   INodeDirectoryWithQuota rootDir;
														
@@ -253,7 +254,9 @@ public class FSDirectory implements Closeable {
 
															     if (!mkdirs(parent.toString(), permissions, true, modTime)) {
														
 
															       return null;
														
 
															     }
														
 
															+    long id = namesystem.allocateNewInodeId();
														
 
															     INodeFileUnderConstruction newNode = new INodeFileUnderConstruction(
														
 
															+                                 id,
														
 
															                                  permissions,replication,
														
 
															                                  preferredBlockSize, modTime, clientName, 
														
 
															                                  clientMachine, clientNode);
														
@@ -275,7 +278,8 @@ public class FSDirectory implements Closeable {
 
															     return newNode;
														
 
															   }
														
 
															-  INode unprotectedAddFile( String path, 
														
 
															+  INode unprotectedAddFile( long id,
														
 
															+                            String path, 
														
 
															                             PermissionStatus permissions,
														
 
															                             short replication,
														
 
															                             long modificationTime,
														
@@ -287,13 +291,11 @@ public class FSDirectory implements Closeable {
 
															     final INode newNode;
														
 
															     assert hasWriteLock();
														
 
															     if (underConstruction) {
														
 
															-      newNode = new INodeFileUnderConstruction(
														
 
															-          permissions, replication,
														
 
															-          preferredBlockSize, modificationTime, clientName, 
														
 
															-          clientMachine, null);
														
 
															+      newNode = new INodeFileUnderConstruction(id, permissions, replication,
														
 
															+          preferredBlockSize, modificationTime, clientName, clientMachine, null);
														
 
															     } else {
														
 
															-      newNode = new INodeFile(permissions, BlockInfo.EMPTY_ARRAY, replication,
														
 
															-                              modificationTime, atime, preferredBlockSize);
														
 
															+      newNode = new INodeFile(id, permissions, BlockInfo.EMPTY_ARRAY,
														
 
															+          replication, modificationTime, atime, preferredBlockSize);
														
 
															     }
														
 
															     try {
														
@@ -392,19 +394,16 @@ public class FSDirectory implements Closeable {
 
															   /**
														
 
															    * Remove a block from the file.
														
 
															    */
														
 
															-  boolean removeBlock(String path, INodeFileUnderConstruction fileNode, 
														
 
															+  void removeBlock(String path, INodeFileUnderConstruction fileNode,
														
 
															                       Block block) throws IOException {
														
 
															     waitForReady();
														
 
															     writeLock();
														
 
															     try {
														
 
															       unprotectedRemoveBlock(path, fileNode, block);
														
 
															-      // write modified block locations to log
														
 
															-      fsImage.getEditLog().logOpenFile(path, fileNode);
														
 
															     } finally {
														
 
															       writeUnlock();
														
 
															     }
														
 
															-    return true;
														
 
															   }
														
 
															   void unprotectedRemoveBlock(String path, INodeFileUnderConstruction fileNode, 
														
@@ -1428,8 +1427,9 @@ public class FSDirectory implements Closeable {
 
															       // create directories beginning from the first null index
														
 
															       for(; i < inodes.length; i++) {
														
 
															         pathbuilder.append(Path.SEPARATOR + names[i]);
														
 
															-        unprotectedMkdir(inodesInPath, i, components[i],
														
 
															-            (i < lastInodeIndex) ? parentPermissions : permissions, now);
														
 
															+        unprotectedMkdir(namesystem.allocateNewInodeId(), inodesInPath, i,
														
 
															+            components[i], (i < lastInodeIndex) ? parentPermissions
														
 
															+                : permissions, now);
														
 
															         if (inodes[i] == null) {
														
 
															           return false;
														
 
															         }
														
@@ -1451,7 +1451,7 @@ public class FSDirectory implements Closeable {
 
															     return true;
														
 
															   }
														
 
															-  INode unprotectedMkdir(String src, PermissionStatus permissions,
														
 
															+  INode unprotectedMkdir(long inodeId, String src, PermissionStatus permissions,
														
 
															                           long timestamp) throws QuotaExceededException,
														
 
															                           UnresolvedLinkException {
														
 
															     assert hasWriteLock();
														
@@ -1460,7 +1460,8 @@ public class FSDirectory implements Closeable {
 
															         components.length, false);
														
 
															     INode[] inodes = inodesInPath.getINodes();
														
 
															     final int pos = inodes.length - 1;
														
 
															-    unprotectedMkdir(inodesInPath, pos, components[pos], permissions, timestamp);
														
 
															+    unprotectedMkdir(inodeId, inodesInPath, pos, components[pos], permissions,
														
 
															+        timestamp);
														
 
															     return inodes[pos];
														
 
															   }
														
@@ -1468,11 +1469,12 @@ public class FSDirectory implements Closeable {
 
															    * The parent path to the directory is at [0, pos-1].
														
 
															    * All ancestors exist. Newly created one stored at index pos.
														
 
															    */
														
 
															-  private void unprotectedMkdir(INodesInPath inodesInPath, int pos,
														
 
															-      byte[] name, PermissionStatus permission,
														
 
															-      long timestamp) throws QuotaExceededException {
														
 
															+  private void unprotectedMkdir(long inodeId, INodesInPath inodesInPath,
														
 
															+      int pos, byte[] name, PermissionStatus permission, long timestamp)
														
 
															+      throws QuotaExceededException {
														
 
															     assert hasWriteLock();
														
 
															-    final INodeDirectory dir = new INodeDirectory(name, permission, timestamp);
														
 
															+    final INodeDirectory dir = new INodeDirectory(inodeId, name, permission,
														
 
															+        timestamp);
														
 
															     if (addChild(inodesInPath, pos, dir, true)) {
														
 
															       inodesInPath.setINode(pos, dir);
														
 
															     }
														
@@ -2042,9 +2044,10 @@ public class FSDirectory implements Closeable {
 
															     }
														
 
															     final String userName = dirPerms.getUserName();
														
 
															     INodeSymlink newNode  = null;
														
 
															+    long id = namesystem.allocateNewInodeId();
														
 
															     writeLock();
														
 
															     try {
														
 
															-      newNode = unprotectedAddSymlink(path, target, modTime, modTime,
														
 
															+      newNode = unprotectedAddSymlink(id, path, target, modTime, modTime,
														
 
															           new PermissionStatus(userName, null, FsPermission.getDefault()));
														
 
															     } finally {
														
 
															       writeUnlock();
														
@@ -2064,12 +2067,13 @@ public class FSDirectory implements Closeable {
 
															   /**
														
 
															    * Add the specified path into the namespace. Invoked from edit log processing.
														
 
															    */
														
 
															-  INodeSymlink unprotectedAddSymlink(String path, String target, long mtime, 
														
 
															-                                  long atime, PermissionStatus perm) 
														
 
															+  INodeSymlink unprotectedAddSymlink(long id, String path, String target,
														
 
															+      long mtime, long atime, PermissionStatus perm)
														
 
															       throws UnresolvedLinkException, QuotaExceededException {
														
 
															     assert hasWriteLock();
														
 
															-    final INodeSymlink symlink = new INodeSymlink(target, mtime, atime, perm);
														
 
															-    return addINode(path, symlink)? symlink: null;
														
 
															+    final INodeSymlink symlink = new INodeSymlink(id, target, mtime, atime,
														
 
															+        perm);
														
 
															+    return addINode(path, symlink) ? symlink : null;
														
 
															   }
														
 
															   /**
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
@@ -120,7 +120,8 @@ public class FSEditLogLoader {
 
															     long lastTxId = in.getLastTxId();
														
 
															     long numTxns = (lastTxId - expectedStartingTxId) + 1;
														
 
															     long lastLogTime = now();
														
 
															-
														
 
															+    long lastInodeId = fsNamesys.getLastInodeId();
														
 
															+    
														
 
															     if (LOG.isDebugEnabled()) {
														
 
															       LOG.debug("edit log length: " + in.length() + ", start txid: "
														
 
															           + expectedStartingTxId + ", last txid: " + lastTxId);
														
@@ -170,7 +171,10 @@ public class FSEditLogLoader {
 
															             }
														
 
															           }
														
 
															           try {
														
 
															-            applyEditLogOp(op, fsDir, in.getVersion());
														
 
															+            long inodeId = applyEditLogOp(op, fsDir, in.getVersion());
														
 
															+            if (lastInodeId < inodeId) {
														
 
															+              lastInodeId = inodeId;
														
 
															+            }
														
 
															           } catch (Throwable e) {
														
 
															             LOG.error("Encountered exception on operation " + op, e);
														
 
															             MetaRecoveryContext.editLogLoaderPrompt("Failed to " +
														
@@ -205,6 +209,7 @@ public class FSEditLogLoader {
 
															         }
														
 
															       }
														
 
															     } finally {
														
 
															+      fsNamesys.resetLastInodeId(lastInodeId);
														
 
															       if(closeOnExit) {
														
 
															         in.close();
														
 
															       }
														
@@ -223,9 +228,9 @@ public class FSEditLogLoader {
 
															   }
														
 
															   @SuppressWarnings("deprecation")
														
 
															-  private void applyEditLogOp(FSEditLogOp op, FSDirectory fsDir,
														
 
															+  private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir,
														
 
															       int logVersion) throws IOException {
														
 
															-
														
 
															+    long inodeId = INodeId.GRANDFATHER_INODE_ID;
														
 
															     if (LOG.isTraceEnabled()) {
														
 
															       LOG.trace("replaying edit log: " + op);
														
 
															     }
														
@@ -255,11 +260,11 @@ public class FSEditLogLoader {
 
															         assert addCloseOp.blocks.length == 0;
														
 
															         // add to the file tree
														
 
															-        newFile = (INodeFile)fsDir.unprotectedAddFile(
														
 
															-            addCloseOp.path, addCloseOp.permissions,
														
 
															-            replication, addCloseOp.mtime,
														
 
															-            addCloseOp.atime, addCloseOp.blockSize,
														
 
															-            true, addCloseOp.clientName, addCloseOp.clientMachine);
														
 
															+        inodeId = fsNamesys.allocateNewInodeId();
														
 
															+        newFile = (INodeFile) fsDir.unprotectedAddFile(inodeId,
														
 
															+            addCloseOp.path, addCloseOp.permissions, replication,
														
 
															+            addCloseOp.mtime, addCloseOp.atime, addCloseOp.blockSize, true,
														
 
															+            addCloseOp.clientName, addCloseOp.clientMachine);
														
 
															         fsNamesys.leaseManager.addLease(addCloseOp.clientName, addCloseOp.path);
														
 
															       } else { // This is OP_ADD on an existing file
														
@@ -370,7 +375,8 @@ public class FSEditLogLoader {
 
															     }
														
 
															     case OP_MKDIR: {
														
 
															       MkdirOp mkdirOp = (MkdirOp)op;
														
 
															-      fsDir.unprotectedMkdir(mkdirOp.path, mkdirOp.permissions,
														
 
															+      inodeId = fsNamesys.allocateNewInodeId();
														
 
															+      fsDir.unprotectedMkdir(inodeId, mkdirOp.path, mkdirOp.permissions,
														
 
															                              mkdirOp.timestamp);
														
 
															       break;
														
 
															     }
														
@@ -423,9 +429,10 @@ public class FSEditLogLoader {
 
															     }
														
 
															     case OP_SYMLINK: {
														
 
															       SymlinkOp symlinkOp = (SymlinkOp)op;
														
 
															-      fsDir.unprotectedAddSymlink(symlinkOp.path, symlinkOp.value,
														
 
															-                               symlinkOp.mtime, symlinkOp.atime,
														
 
															-                               symlinkOp.permissionStatus);
														
 
															+      inodeId = fsNamesys.allocateNewInodeId();
														
 
															+      fsDir.unprotectedAddSymlink(inodeId, symlinkOp.path,
														
 
															+                                  symlinkOp.value, symlinkOp.mtime, 
														
 
															+                                  symlinkOp.atime, symlinkOp.permissionStatus);
														
 
															       break;
														
 
															     }
														
 
															     case OP_RENAME: {
														
@@ -485,6 +492,7 @@ public class FSEditLogLoader {
 
															     default:
														
 
															       throw new IOException("Invalid operation read " + op.opCode);
														
 
															     }
														
 
															+    return inodeId;
														
 
															   }
														
 
															   private static String formatEditLogReplayError(EditLogInputStream in,
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
@@ -166,7 +166,8 @@ class FSImageFormat {
 
															         in = compression.unwrapInputStream(fin);
														
 
															         LOG.info("Loading image file " + curFile + " using " + compression);
														
 
															-
														
 
															+        // reset INodeId. TODO: remove this after inodeId is persisted in fsimage
														
 
															+        namesystem.resetLastInodeIdWithoutChecking(INodeId.LAST_RESERVED_ID); 
														
 
															         // load all inodes
														
 
															         LOG.info("Number of files = " + numFiles);
														
 
															         if (LayoutVersion.supports(Feature.FSIMAGE_NAME_OPTIMIZATION,
														
@@ -334,6 +335,8 @@ class FSImageFormat {
 
															     long blockSize = 0;
														
 
															     int imgVersion = getLayoutVersion();
														
 
															+    long inodeId = namesystem.allocateNewInodeId();
														
 
															+    
														
 
															     short replication = in.readShort();
														
 
															     replication = namesystem.getBlockManager().adjustReplication(replication);
														
 
															     modificationTime = in.readLong();
														
@@ -371,7 +374,7 @@ class FSImageFormat {
 
															     PermissionStatus permissions = PermissionStatus.read(in);
														
 
															-    return INode.newINode(permissions, blocks, symlink, replication,
														
 
															+    return INode.newINode(inodeId, permissions, blocks, symlink, replication,
														
 
															         modificationTime, atime, nsQuota, dsQuota, blockSize);
														
 
															   }
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java
@@ -107,7 +107,9 @@ public class FSImageSerialization {
 
															     int numLocs = in.readInt();
														
 
															     assert numLocs == 0 : "Unexpected block locations";
														
 
															-    return new INodeFileUnderConstruction(name, 
														
 
															+    //TODO: get inodeId from fsimage after inodeId is persisted
														
 
															+    return new INodeFileUnderConstruction(INodeId.GRANDFATHER_INODE_ID,
														
 
															+                                          name,
														
 
															                                           blockReplication, 
														
 
															                                           modificationTime,
														
 
															                                           preferredBlockSize,
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -375,6 +375,30 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
															   private final boolean haEnabled;
														
 
															+  private INodeId inodeId;
														
 
															+  
														
 
															+  /**
														
 
															+   * Set the last allocated inode id when fsimage is loaded or editlog is
														
 
															+   * applied. 
														
 
															+   * @throws IOException
														
 
															+   */
														
 
															+  public void resetLastInodeId(long newValue) throws IOException {
														
 
															+    inodeId.resetLastInodeId(newValue);
														
 
															+  }
														
 
															+
														
 
															+  /** Should only be used for tests to reset to any value */
														
 
															+  void resetLastInodeIdWithoutChecking(long newValue) {
														
 
															+    inodeId.resetLastInodeIdWithoutChecking(newValue);
														
 
															+  }
														
 
															+  
														
 
															+  public long getLastInodeId() {
														
 
															+    return inodeId.getLastInodeId();
														
 
															+  }
														
 
															+
														
 
															+  public long allocateNewInodeId() {
														
 
															+    return inodeId.allocateNewInodeId();
														
 
															+  }
														
 
															+  
														
 
															   /**
														
 
															    * Clear all loaded data
														
 
															    */
														
@@ -383,6 +407,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
															     dtSecretManager.reset();
														
 
															     generationStamp.setStamp(GenerationStamp.FIRST_VALID_STAMP);
														
 
															     leaseManager.removeAllLeases();
														
 
															+    inodeId.resetLastInodeIdWithoutChecking(INodeId.LAST_RESERVED_ID);
														
 
															   }
														
 
															   @VisibleForTesting
														
@@ -534,6 +559,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
															       this.standbyShouldCheckpoint = conf.getBoolean(
														
 
															           DFS_HA_STANDBY_CHECKPOINTS_KEY, DFS_HA_STANDBY_CHECKPOINTS_DEFAULT);
														
 
															+      this.inodeId = new INodeId();
														
 
															+      
														
 
															       // For testing purposes, allow the DT secret manager to be started regardless
														
 
															       // of whether security is enabled.
														
 
															       alwaysUseDelegationTokensForTests = conf.getBoolean(
														
@@ -1931,6 +1958,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
															       String leaseHolder, String clientMachine, DatanodeDescriptor clientNode,
														
 
															       boolean writeToEditLog) throws IOException {
														
 
															     INodeFileUnderConstruction cons = new INodeFileUnderConstruction(
														
 
															+                                    file.getId(),
														
 
															                                     file.getLocalNameBytes(),
														
 
															                                     file.getBlockReplication(),
														
 
															                                     file.getModificationTime(),
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java
@@ -17,6 +17,7 @@
 
															  */
														
 
															 package org.apache.hadoop.hdfs.server.namenode;
														
 
															+import java.io.IOException;
														
 
															 import java.io.PrintWriter;
														
 
															 import java.io.StringWriter;
														
 
															 import java.util.ArrayList;
														
@@ -101,6 +102,11 @@ abstract class INode implements Comparable<byte[]> {
 
															     }
														
 
															   }
														
 
															+  /**
														
 
															+   * The inode id
														
 
															+   */
														
 
															+  final private long id;
														
 
															+
														
 
															   /**
														
 
															    *  The inode name is in java UTF8 encoding; 
														
 
															    *  The name in HdfsFileStatus should keep the same encoding as this.
														
@@ -120,8 +126,9 @@ abstract class INode implements Comparable<byte[]> {
 
															   protected long modificationTime = 0L;
														
 
															   protected long accessTime = 0L;
														
 
															-  private INode(byte[] name, long permission, INodeDirectory parent,
														
 
															+  private INode(long id, byte[] name, long permission, INodeDirectory parent,
														
 
															       long modificationTime, long accessTime) {
														
 
															+    this.id = id;
														
 
															     this.name = name;
														
 
															     this.permission = permission;
														
 
															     this.parent = parent;
														
@@ -129,26 +136,31 @@ abstract class INode implements Comparable<byte[]> {
 
															     this.accessTime = accessTime;
														
 
															   }
														
 
															-  INode(byte[] name, PermissionStatus permissions, INodeDirectory parent,
														
 
															-      long modificationTime, long accessTime) {
														
 
															-    this(name, PermissionStatusFormat.toLong(permissions), parent,
														
 
															+  INode(long id, byte[] name, PermissionStatus permissions,
														
 
															+      INodeDirectory parent, long modificationTime, long accessTime) {
														
 
															+    this(id, name, PermissionStatusFormat.toLong(permissions), parent,
														
 
															         modificationTime, accessTime);
														
 
															   }
														
 
															-
														
 
															-  INode(PermissionStatus permissions, long mtime, long atime) {
														
 
															-    this(null, permissions, null, mtime, atime);
														
 
															+  
														
 
															+  INode(long id, PermissionStatus permissions, long mtime, long atime) {
														
 
															+    this(id, null, PermissionStatusFormat.toLong(permissions), null, mtime, atime);
														
 
															   }
														
 
															-
														
 
															-  protected INode(String name, PermissionStatus permissions) {
														
 
															-    this(DFSUtil.string2Bytes(name), permissions, null, 0L, 0L);
														
 
															+  
														
 
															+  protected INode(long id, String name, PermissionStatus permissions) {
														
 
															+    this(id, DFSUtil.string2Bytes(name), permissions, null, 0L, 0L);
														
 
															   }
														
 
															   /** @param other Other node to be copied */
														
 
															   INode(INode other) {
														
 
															-    this(other.getLocalNameBytes(), other.permission, other.getParent(), 
														
 
															-        other.getModificationTime(), other.getAccessTime());
														
 
															+    this(other.getId(), other.getLocalNameBytes(), other.permission, other
														
 
															+        .getParent(), other.getModificationTime(), other.getAccessTime());
														
 
															   }
														
 
															+  /** Get inode id */
														
 
															+  public long getId() {
														
 
															+    return this.id;
														
 
															+  }
														
 
															+  
														
 
															   /**
														
 
															    * Check whether this is the root inode.
														
 
															    */
														
@@ -459,6 +471,7 @@ abstract class INode implements Comparable<byte[]> {
 
															   /**
														
 
															    * Create an INode; the inode's name is not set yet
														
 
															    * 
														
 
															+   * @param id preassigned inode id
														
 
															    * @param permissions permissions
														
 
															    * @param blocks blocks if a file
														
 
															    * @param symlink symblic link if a symbolic link
														
@@ -470,7 +483,8 @@ abstract class INode implements Comparable<byte[]> {
 
															    * @param preferredBlockSize block size
														
 
															    * @return an inode
														
 
															    */
														
 
															-  static INode newINode(PermissionStatus permissions,
														
 
															+  static INode newINode(long id,
														
 
															+                        PermissionStatus permissions,
														
 
															                         BlockInfo[] blocks,
														
 
															                         String symlink,
														
 
															                         short replication,
														
@@ -480,17 +494,17 @@ abstract class INode implements Comparable<byte[]> {
 
															                         long dsQuota,
														
 
															                         long preferredBlockSize) {
														
 
															     if (symlink.length() != 0) { // check if symbolic link
														
 
															-      return new INodeSymlink(symlink, modificationTime, atime, permissions);
														
 
															+      return new INodeSymlink(id, symlink, modificationTime, atime, permissions);
														
 
															     }  else if (blocks == null) { //not sym link and blocks null? directory!
														
 
															       if (nsQuota >= 0 || dsQuota >= 0) {
														
 
															         return new INodeDirectoryWithQuota(
														
 
															-            permissions, modificationTime, nsQuota, dsQuota);
														
 
															+             id, permissions, modificationTime, nsQuota, dsQuota);
														
 
															       } 
														
 
															       // regular directory
														
 
															-      return new INodeDirectory(permissions, modificationTime);
														
 
															+      return new INodeDirectory(id, permissions, modificationTime);
														
 
															     }
														
 
															     // file
														
 
															-    return new INodeFile(permissions, blocks, replication,
														
 
															+    return new INodeFile(id, permissions, blocks, replication,
														
 
															         modificationTime, atime, preferredBlockSize);
														
 
															   }
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java
@@ -53,17 +53,17 @@ class INodeDirectory extends INode {
 
															   private List<INode> children = null;
														
 
															-  INodeDirectory(String name, PermissionStatus permissions) {
														
 
															-    super(name, permissions);
														
 
															+  INodeDirectory(long id, String name, PermissionStatus permissions) {
														
 
															+    super(id, name, permissions);
														
 
															   }
														
 
															-  public INodeDirectory(PermissionStatus permissions, long mTime) {
														
 
															-    super(permissions, mTime, 0);
														
 
															+  public INodeDirectory(long id, PermissionStatus permissions, long mTime) {
														
 
															+    super(id, permissions, mTime, 0);
														
 
															   }
														
 
															-
														
 
															+  
														
 
															   /** constructor */
														
 
															-  INodeDirectory(byte[] name, PermissionStatus permissions, long mtime) {
														
 
															-    super(name, permissions, null, mtime, 0L);
														
 
															+  INodeDirectory(long id, byte[] name, PermissionStatus permissions, long mtime) {
														
 
															+    super(id, name, permissions, null, mtime, 0L);
														
 
															   }
														
 
															   /** copy constructor
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectoryWithQuota.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectoryWithQuota.java
@@ -54,16 +54,16 @@ class INodeDirectoryWithQuota extends INodeDirectory {
 
															   }
														
 
															   /** constructor with no quota verification */
														
 
															-  INodeDirectoryWithQuota(PermissionStatus permissions, long modificationTime,
														
 
															-      long nsQuota, long dsQuota) {
														
 
															-    super(permissions, modificationTime);
														
 
															+  INodeDirectoryWithQuota(long id, PermissionStatus permissions,
														
 
															+      long modificationTime, long nsQuota, long dsQuota) {
														
 
															+    super(id, permissions, modificationTime);
														
 
															     this.nsQuota = nsQuota;
														
 
															     this.dsQuota = dsQuota;
														
 
															   }
														
 
															   /** constructor with no quota verification */
														
 
															-  INodeDirectoryWithQuota(String name, PermissionStatus permissions) {
														
 
															-    super(name, permissions);
														
 
															+  INodeDirectoryWithQuota(long id, String name, PermissionStatus permissions) {
														
 
															+    super(id, name, permissions);
														
 
															   }
														
 
															   /** Get this directory's namespace quota
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
@@ -86,15 +86,15 @@ public class INodeFile extends INode implements BlockCollection {
 
															   private BlockInfo[] blocks;
														
 
															-  INodeFile(PermissionStatus permissions, BlockInfo[] blklist,
														
 
															-                      short replication, long modificationTime,
														
 
															-                      long atime, long preferredBlockSize) {
														
 
															-    super(permissions, modificationTime, atime);
														
 
															+  INodeFile(long id, PermissionStatus permissions, BlockInfo[] blklist,
														
 
															+      short replication, long modificationTime, long atime,
														
 
															+      long preferredBlockSize) {
														
 
															+    super(id, permissions, modificationTime, atime);
														
 
															     header = HeaderFormat.combineReplication(header, replication);
														
 
															     header = HeaderFormat.combinePreferredBlockSize(header, preferredBlockSize);
														
 
															     this.blocks = blklist;
														
 
															   }
														
 
															-
														
 
															+  
														
 
															   /** @return true unconditionally. */
														
 
															   @Override
														
 
															   public final boolean isFile() {
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java
@@ -49,21 +49,23 @@ class INodeFileUnderConstruction extends INodeFile implements MutableBlockCollec
 
															   private final String clientMachine;
														
 
															   private final DatanodeDescriptor clientNode; // if client is a cluster node too.
														
 
															-  INodeFileUnderConstruction(PermissionStatus permissions,
														
 
															+  INodeFileUnderConstruction(long id,
														
 
															+                             PermissionStatus permissions,
														
 
															                              short replication,
														
 
															                              long preferredBlockSize,
														
 
															                              long modTime,
														
 
															                              String clientName,
														
 
															                              String clientMachine,
														
 
															                              DatanodeDescriptor clientNode) {
														
 
															-    super(permissions.applyUMask(UMASK), BlockInfo.EMPTY_ARRAY, replication,
														
 
															-        modTime, modTime, preferredBlockSize);
														
 
															+    super(id, permissions.applyUMask(UMASK), BlockInfo.EMPTY_ARRAY,
														
 
															+        replication, modTime, modTime, preferredBlockSize);
														
 
															     this.clientName = clientName;
														
 
															     this.clientMachine = clientMachine;
														
 
															     this.clientNode = clientNode;
														
 
															   }
														
 
															-  INodeFileUnderConstruction(byte[] name,
														
 
															+  INodeFileUnderConstruction(long id,
														
 
															+                             byte[] name,
														
 
															                              short blockReplication,
														
 
															                              long modificationTime,
														
 
															                              long preferredBlockSize,
														
@@ -72,8 +74,8 @@ class INodeFileUnderConstruction extends INodeFile implements MutableBlockCollec
 
															                              String clientName,
														
 
															                              String clientMachine,
														
 
															                              DatanodeDescriptor clientNode) {
														
 
															-    super(perm, blocks, blockReplication, modificationTime, modificationTime,
														
 
															-          preferredBlockSize);
														
 
															+    super(id, perm, blocks, blockReplication, modificationTime,
														
 
															+        modificationTime, preferredBlockSize);
														
 
															     setLocalName(name);
														
 
															     this.clientName = clientName;
														
 
															     this.clientMachine = clientMachine;
														
@@ -112,7 +114,8 @@ class INodeFileUnderConstruction extends INodeFile implements MutableBlockCollec
 
															     assert allBlocksComplete() : "Can't finalize inode " + this
														
 
															       + " since it contains non-complete blocks! Blocks are "
														
 
															       + Arrays.asList(getBlocks());
														
 
															-    INodeFile obj = new INodeFile(getPermissionStatus(),
														
 
															+    INodeFile obj = new INodeFile(getId(),
														
 
															+                                  getPermissionStatus(),
														
 
															                                   getBlocks(),
														
 
															                                   getBlockReplication(),
														
 
															                                   getModificationTime(),
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeId.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeId.java
@@ -0,0 +1,105 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+package org.apache.hadoop.hdfs.server.namenode;
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+import java.util.concurrent.atomic.AtomicLong;
														
 
															+
														
 
															+import org.apache.hadoop.classification.InterfaceAudience;
														
 
															+
														
 
															+/**
														
 
															+ * An id which uniquely identifies an inode
														
 
															+ */
														
 
															+@InterfaceAudience.Private
														
 
															+class INodeId implements Comparable<INodeId> {
														
 
															+  /**
														
 
															+   * The last reserved inode id. Reserve id 1 to 1000 for potential future
														
 
															+   * usage. The id won't be recycled and is not expected to wrap around in a
														
 
															+   * very long time. Root inode id will be 1001.
														
 
															+   */
														
 
															+  public static final long LAST_RESERVED_ID = 1000L;
														
 
															+
														
 
															+  /**
														
 
															+   * The inode id validation of lease check will be skipped when the request
														
 
															+   * uses GRANDFATHER_INODE_ID for backward compatibility.
														
 
															+   */
														
 
															+  public static final long GRANDFATHER_INODE_ID = 0;
														
 
															+
														
 
															+  private AtomicLong lastInodeId = new AtomicLong();
														
 
															+
														
 
															+  /**
														
 
															+   * Create a new instance, initialized to LAST_RESERVED_ID.
														
 
															+   */
														
 
															+  INodeId() {
														
 
															+    lastInodeId.set(INodeId.LAST_RESERVED_ID);
														
 
															+  }
														
 
															+  
														
 
															+  /**
														
 
															+   * Set the last allocated inode id when fsimage is loaded or editlog is
														
 
															+   * applied.
														
 
															+   * @throws IOException
														
 
															+   */
														
 
															+  void resetLastInodeId(long newValue) throws IOException {
														
 
															+    if (newValue < getLastInodeId()) {
														
 
															+      throw new IOException(
														
 
															+          "Can't reset lastInodeId to be less than its current value "
														
 
															+              + getLastInodeId() + ", newValue=" + newValue);
														
 
															+    }
														
 
															+
														
 
															+    lastInodeId.set(newValue);
														
 
															+  }
														
 
															+
														
 
															+  void resetLastInodeIdWithoutChecking(long newValue) {
														
 
															+    lastInodeId.set(newValue);
														
 
															+  }
														
 
															+
														
 
															+  long getLastInodeId() {
														
 
															+    return lastInodeId.get();
														
 
															+  }
														
 
															+
														
 
															+  /**
														
 
															+   * First increment the counter and then get the id.
														
 
															+   */
														
 
															+  long allocateNewInodeId() {
														
 
															+    return lastInodeId.incrementAndGet();
														
 
															+  }
														
 
															+
														
 
															+  @Override
														
 
															+  // Comparable
														
 
															+  public int compareTo(INodeId that) {
														
 
															+    long id1 = this.getLastInodeId();
														
 
															+    long id2 = that.getLastInodeId();
														
 
															+    return id1 < id2 ? -1 : id1 > id2 ? 1 : 0;
														
 
															+  }
														
 
															+
														
 
															+  @Override
														
 
															+  // Object
														
 
															+  public boolean equals(Object o) {
														
 
															+    if (!(o instanceof INodeId)) {
														
 
															+      return false;
														
 
															+    }
														
 
															+    return compareTo((INodeId) o) == 0;
														
 
															+  }
														
 
															+
														
 
															+  @Override
														
 
															+  // Object
														
 
															+  public int hashCode() {
														
 
															+    long id = getLastInodeId();
														
 
															+    return (int) (id ^ (id >>> 32));
														
 
															+  }
														
 
															+}
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java
@@ -28,9 +28,9 @@ import org.apache.hadoop.hdfs.DFSUtil;
 
															 public class INodeSymlink extends INode {
														
 
															   private final byte[] symlink; // The target URI
														
 
															-  INodeSymlink(String value, long mtime, long atime,
														
 
															-               PermissionStatus permissions) {
														
 
															-    super(permissions, mtime, atime);
														
 
															+  INodeSymlink(long id, String value, long mtime, long atime,
														
 
															+      PermissionStatus permissions) {
														
 
															+    super(id, permissions, mtime, atime);
														
 
															     this.symlink = DFSUtil.string2Bytes(value);
														
 
															   }
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_connect.c
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_connect.c
@@ -131,7 +131,6 @@ static enum authConf discoverAuthConf(void)
 
															 int fuseConnectInit(const char *nnUri, int port)
														
 
															 {
														
 
															-  const char *timerPeriod;
														
 
															   int ret;
														
 
															   gTimerPeriod = FUSE_CONN_DEFAULT_TIMER_PERIOD;
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_impls_open.c
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_impls_open.c
@@ -24,12 +24,77 @@
 
															 #include <stdio.h>
														
 
															 #include <stdlib.h>
														
 
															+static int get_hdfs_open_flags_from_info(hdfsFS fs, const char *path,
														
 
															+                  int flags, int *outflags, const hdfsFileInfo *info);
														
 
															+
														
 
															+/**
														
 
															+ * Given a set of FUSE flags, determine the libhdfs flags we need.
														
 
															+ *
														
 
															+ * This is complicated by two things:
														
 
															+ * 1. libhdfs doesn't support O_RDWR at all;
														
 
															+ * 2. when given O_WRONLY, libhdfs will truncate the file unless O_APPEND is
														
 
															+ * also given.  In other words, there is an implicit O_TRUNC.
														
 
															+ *
														
 
															+ * Probably the next iteration of the libhdfs interface should not use the POSIX
														
 
															+ * flags at all, since, as you can see, they don't really match up very closely
														
 
															+ * to the POSIX meaning.  However, for the time being, this is the API.
														
 
															+ *
														
 
															+ * @param fs               The libhdfs object
														
 
															+ * @param path             The path we're opening
														
 
															+ * @param flags            The FUSE flags
														
 
															+ *
														
 
															+ * @return                 negative error code on failure; flags otherwise.
														
 
															+ */
														
 
															+static int64_t get_hdfs_open_flags(hdfsFS fs, const char *path, int flags)
														
 
															+{
														
 
															+  int hasContent;
														
 
															+  int64_t ret;
														
 
															+  hdfsFileInfo *info;
														
 
															+
														
 
															+  if ((flags & O_ACCMODE) == O_RDONLY) {
														
 
															+    return O_RDONLY;
														
 
															+  }
														
 
															+  if (flags & O_TRUNC) {
														
 
															+    /* If we're opening for write or read/write, O_TRUNC means we should blow
														
 
															+     * away the file which is there and create our own file.
														
 
															+     * */
														
 
															+    return O_WRONLY;
														
 
															+  }
														
 
															+  info = hdfsGetPathInfo(fs, path);
														
 
															+  if (info) {
														
 
															+    if (info->mSize == 0) {
														
 
															+      // If the file has zero length, we shouldn't feel bad about blowing it
														
 
															+      // away.
														
 
															+      ret = O_WRONLY;
														
 
															+    } else if ((flags & O_ACCMODE) == O_RDWR) {
														
 
															+      // HACK: translate O_RDWR requests into O_RDONLY if the file already
														
 
															+      // exists and has non-zero length.
														
 
															+      ret = O_RDONLY;
														
 
															+    } else { // O_WRONLY
														
 
															+      // HACK: translate O_WRONLY requests into append if the file already
														
 
															+      // exists.
														
 
															+      ret = O_WRONLY | O_APPEND;
														
 
															+    }
														
 
															+  } else { // !info
														
 
															+    if (flags & O_CREAT) {
														
 
															+      ret = O_WRONLY;
														
 
															+    } else {
														
 
															+      ret = -ENOENT;
														
 
															+    }
														
 
															+  }
														
 
															+  if (info) {
														
 
															+    hdfsFreeFileInfo(info, 1);
														
 
															+  }
														
 
															+  return ret;
														
 
															+}
														
 
															+
														
 
															 int dfs_open(const char *path, struct fuse_file_info *fi)
														
 
															 {
														
 
															   hdfsFS fs = NULL;
														
 
															   dfs_context *dfs = (dfs_context*)fuse_get_context()->private_data;
														
 
															   dfs_fh *fh = NULL;
														
 
															-  int mutexInit = 0, ret;
														
 
															+  int mutexInit = 0, ret, flags = 0;
														
 
															+  int64_t flagRet;
														
 
															   TRACE1("open", path)
														
@@ -38,10 +103,6 @@ int dfs_open(const char *path, struct fuse_file_info *fi)
 
															   assert('/' == *path);
														
 
															   assert(dfs);
														
 
															-  // 0x8000 is always passed in and hadoop doesn't like it, so killing it here
														
 
															-  // bugbug figure out what this flag is and report problem to Hadoop JIRA
														
 
															-  int flags = (fi->flags & 0x7FFF);
														
 
															-
														
 
															   // retrieve dfs specific data
														
 
															   fh = (dfs_fh*)calloc(1, sizeof (dfs_fh));
														
 
															   if (!fh) {
														
@@ -57,22 +118,12 @@ int dfs_open(const char *path, struct fuse_file_info *fi)
 
															     goto error;
														
 
															   }
														
 
															   fs = hdfsConnGetFs(fh->conn);
														
 
															-
														
 
															-  if (flags & O_RDWR) {
														
 
															-    hdfsFileInfo *info = hdfsGetPathInfo(fs, path);
														
 
															-    if (info == NULL) {
														
 
															-      // File does not exist (maybe?); interpret it as a O_WRONLY
														
 
															-      // If the actual error was something else, we'll get it again when
														
 
															-      // we try to open the file.
														
 
															-      flags ^= O_RDWR;
														
 
															-      flags |= O_WRONLY;
														
 
															-    } else {
														
 
															-      // File exists; open this as read only.
														
 
															-      flags ^= O_RDWR;
														
 
															-      flags |= O_RDONLY;
														
 
															-    }
														
 
															+  flagRet = get_hdfs_open_flags(fs, path, fi->flags);
														
 
															+  if (flagRet < 0) {
														
 
															+    ret = -flagRet;
														
 
															+    goto error;
														
 
															   }
														
 
															-
														
 
															+  flags = flagRet;
														
 
															   if ((fh->hdfsFH = hdfsOpenFile(fs, path, flags,  0, 0, 0)) == NULL) {
														
 
															     ERROR("Could not open file %s (errno=%d)", path, errno);
														
 
															     if (errno == 0 || errno == EINTERNAL) {
														
@@ -91,7 +142,7 @@ int dfs_open(const char *path, struct fuse_file_info *fi)
 
															   }
														
 
															   mutexInit = 1;
														
 
															-  if (fi->flags & O_WRONLY || fi->flags & O_CREAT) {
														
 
															+  if ((flags & O_ACCMODE) == O_WRONLY) {
														
 
															     fh->buf = NULL;
														
 
															   } else  {
														
 
															     assert(dfs->rdbuffer_size > 0);
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.c
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.c
@@ -98,7 +98,7 @@ static void dfsPrintOptions(FILE *fp, const struct options *o)
 
															           o->attribute_timeout, o->rdbuffer_size, o->direct_io);
														
 
															 }
														
 
															-void *dfs_init(void)
														
 
															+void *dfs_init(struct fuse_conn_info *conn)
														
 
															 {
														
 
															   int ret;
														
@@ -143,6 +143,45 @@ void *dfs_init(void)
 
															       exit(EXIT_FAILURE);
														
 
															     }
														
 
															   }
														
 
															+
														
 
															+#ifdef FUSE_CAP_ATOMIC_O_TRUNC
														
 
															+  // If FUSE_CAP_ATOMIC_O_TRUNC is set, open("foo", O_CREAT | O_TRUNC) will
														
 
															+  // result in dfs_open being called with O_TRUNC.
														
 
															+  //
														
 
															+  // If this capability is not present, fuse will try to use multiple
														
 
															+  // operation to "simulate" open(O_TRUNC).  This doesn't work very well with
														
 
															+  // HDFS.
														
 
															+  // Unfortunately, this capability is only implemented on Linux 2.6.29 or so.
														
 
															+  // See HDFS-4140 for details.
														
 
															+  if (conn->capable & FUSE_CAP_ATOMIC_O_TRUNC) {
														
 
															+    conn->want |= FUSE_CAP_ATOMIC_O_TRUNC;
														
 
															+  }
														
 
															+#endif
														
 
															+
														
 
															+#ifdef FUSE_CAP_ASYNC_READ
														
 
															+  // We're OK with doing reads at the same time as writes.
														
 
															+  if (conn->capable & FUSE_CAP_ASYNC_READ) {
														
 
															+    conn->want |= FUSE_CAP_ASYNC_READ;
														
 
															+  }
														
 
															+#endif
														
 
															+  
														
 
															+#ifdef FUSE_CAP_BIG_WRITES
														
 
															+  // Yes, we can read more than 4kb at a time.  In fact, please do!
														
 
															+  if (conn->capable & FUSE_CAP_BIG_WRITES) {
														
 
															+    conn->want |= FUSE_CAP_BIG_WRITES;
														
 
															+  }
														
 
															+#endif
														
 
															+
														
 
															+#ifdef FUSE_CAP_DONT_MASK
														
 
															+  if ((options.no_permissions) && (conn->capable & FUSE_CAP_DONT_MASK)) {
														
 
															+    // If we're handing permissions ourselves, we don't want the kernel
														
 
															+    // applying its own umask.  HDFS already implements its own per-user
														
 
															+    // umasks!  Sadly, this only actually does something on kernels 2.6.31 and
														
 
															+    // later.
														
 
															+    conn->want |= FUSE_CAP_DONT_MASK;
														
 
															+  }
														
 
															+#endif
														
 
															+
														
 
															   return (void*)dfs;
														
 
															 }
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.h
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/fuse_init.h
@@ -19,13 +19,15 @@
 
															 #ifndef __FUSE_INIT_H__
														
 
															 #define __FUSE_INIT_H__
														
 
															+struct fuse_conn_info;
														
 
															+
														
 
															 /**
														
 
															  * These are responsible for initializing connections to dfs and internal
														
 
															  * data structures and then freeing them.
														
 
															  * i.e., what happens on mount and unmount.
														
 
															  *
														
 
															  */
														
 
															-void *dfs_init();
														
 
															+void *dfs_init(struct fuse_conn_info *conn);
														
 
															 void dfs_destroy (void *ptr);
														
 
															 #endif
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/test/fuse_workload.c
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/test/fuse_workload.c
@@ -16,6 +16,8 @@
 
															  * limitations under the License.
														
 
															  */
														
 
															+#define FUSE_USE_VERSION 26
														
 
															+
														
 
															 #include "fuse-dfs/test/fuse_workload.h"
														
 
															 #include "libhdfs/expect.h"
														
 
															 #include "util/posix_util.h"
														
@@ -23,6 +25,7 @@
 
															 #include <dirent.h>
														
 
															 #include <errno.h>
														
 
															 #include <fcntl.h>
														
 
															+#include <fuse.h>
														
 
															 #include <pthread.h>
														
 
															 #include <stdio.h>
														
 
															 #include <stdlib.h>
														
@@ -138,13 +141,89 @@ static int safeRead(int fd, void *buf, int c)
 
															   return amt;
														
 
															 }
														
 
															+/* Bug: HDFS-2551.
														
 
															+ * When a program writes a file, closes it, and immediately re-opens it,
														
 
															+ * it might not appear to have the correct length.  This is because FUSE
														
 
															+ * invokes the release() callback asynchronously.
														
 
															+ *
														
 
															+ * To work around this, we keep retrying until the file length is what we
														
 
															+ * expect.
														
 
															+ */
														
 
															+static int closeWorkaroundHdfs2551(int fd, const char *path, off_t expectedSize)
														
 
															+{
														
 
															+  int ret, try;
														
 
															+  struct stat stBuf;
														
 
															+
														
 
															+  RETRY_ON_EINTR_GET_ERRNO(ret, close(fd));
														
 
															+  EXPECT_ZERO(ret);
														
 
															+  for (try = 0; try < MAX_TRIES; try++) {
														
 
															+    EXPECT_ZERO(stat(path, &stBuf));
														
 
															+    EXPECT_NONZERO(S_ISREG(stBuf.st_mode));
														
 
															+    if (stBuf.st_size == expectedSize) {
														
 
															+      return 0;
														
 
															+    }
														
 
															+    sleepNoSig(1);
														
 
															+  }
														
 
															+  fprintf(stderr, "FUSE_WORKLOAD: error: expected file %s to have length "
														
 
															+          "%lld; instead, it had length %lld\n",
														
 
															+          path, (long long)expectedSize, (long long)stBuf.st_size);
														
 
															+  return -EIO;
														
 
															+}
														
 
															+
														
 
															+#ifdef FUSE_CAP_ATOMIC_O_TRUNC
														
 
															+
														
 
															+/**
														
 
															+ * Test that we can create a file, write some contents to it, close that file,
														
 
															+ * and then successfully re-open with O_TRUNC.
														
 
															+ */
														
 
															+static int testOpenTrunc(const char *base)
														
 
															+{
														
 
															+  int fd, err;
														
 
															+  char path[PATH_MAX];
														
 
															+  const char * const SAMPLE1 = "this is the first file that we wrote.";
														
 
															+  const char * const SAMPLE2 = "this is the second file that we wrote.  "
														
 
															+    "It's #2!";
														
 
															+
														
 
															+  snprintf(path, sizeof(path), "%s/trunc.txt", base);
														
 
															+  fd = open(path, O_CREAT | O_TRUNC | O_WRONLY, 0644);
														
 
															+  if (fd < 0) {
														
 
															+    err = errno;
														
 
															+    fprintf(stderr, "TEST_ERROR: testOpenTrunc(%s): first open "
														
 
															+            "failed with error %d\n", path, err);
														
 
															+    return -err;
														
 
															+  }
														
 
															+  EXPECT_ZERO(safeWrite(fd, SAMPLE1, strlen(SAMPLE1)));
														
 
															+  EXPECT_ZERO(closeWorkaroundHdfs2551(fd, path, strlen(SAMPLE1)));
														
 
															+  fd = open(path, O_CREAT | O_TRUNC | O_WRONLY, 0644);
														
 
															+  if (fd < 0) {
														
 
															+    err = errno;
														
 
															+    fprintf(stderr, "TEST_ERROR: testOpenTrunc(%s): second open "
														
 
															+            "failed with error %d\n", path, err);
														
 
															+    return -err;
														
 
															+  }
														
 
															+  EXPECT_ZERO(safeWrite(fd, SAMPLE2, strlen(SAMPLE2)));
														
 
															+  EXPECT_ZERO(closeWorkaroundHdfs2551(fd, path, strlen(SAMPLE2)));
														
 
															+  return 0;
														
 
															+}
														
 
															+
														
 
															+#else
														
 
															+
														
 
															+static int testOpenTrunc(const char *base)
														
 
															+{
														
 
															+  fprintf(stderr, "FUSE_WORKLOAD: We lack FUSE_CAP_ATOMIC_O_TRUNC support.  "
														
 
															+          "Not testing open(O_TRUNC).\n");
														
 
															+  return 0;
														
 
															+}
														
 
															+
														
 
															+#endif
														
 
															+
														
 
															 int runFuseWorkloadImpl(const char *root, const char *pcomp,
														
 
															     struct fileCtx *ctx)
														
 
															 {
														
 
															   char base[PATH_MAX], tmp[PATH_MAX], *tmpBuf;
														
 
															   char src[PATH_MAX], dst[PATH_MAX];
														
 
															   struct stat stBuf;
														
 
															-  int ret, i, try;
														
 
															+  int ret, i;
														
 
															   struct utimbuf tbuf;
														
 
															   struct statvfs stvBuf;
														
@@ -241,34 +320,9 @@ int runFuseWorkloadImpl(const char *root, const char *pcomp,
 
															     EXPECT_ZERO(safeWrite(ctx[i].fd, ctx[i].str, ctx[i].strLen));
														
 
															   }
														
 
															   for (i = 0; i < NUM_FILE_CTX; i++) {
														
 
															-    RETRY_ON_EINTR_GET_ERRNO(ret, close(ctx[i].fd));
														
 
															-    EXPECT_ZERO(ret);
														
 
															+    EXPECT_ZERO(closeWorkaroundHdfs2551(ctx[i].fd, ctx[i].path, ctx[i].strLen));
														
 
															     ctx[i].fd = -1;
														
 
															   }
														
 
															-  for (i = 0; i < NUM_FILE_CTX; i++) {
														
 
															-    /* Bug: HDFS-2551.
														
 
															-     * When a program writes a file, closes it, and immediately re-opens it,
														
 
															-     * it might not appear to have the correct length.  This is because FUSE
														
 
															-     * invokes the release() callback asynchronously.
														
 
															-     *
														
 
															-     * To work around this, we keep retrying until the file length is what we
														
 
															-     * expect.
														
 
															-     */
														
 
															-    for (try = 0; try < MAX_TRIES; try++) {
														
 
															-      EXPECT_ZERO(stat(ctx[i].path, &stBuf));
														
 
															-      EXPECT_NONZERO(S_ISREG(stBuf.st_mode));
														
 
															-      if (ctx[i].strLen == stBuf.st_size) {
														
 
															-        break;
														
 
															-      }
														
 
															-      sleepNoSig(1);
														
 
															-    }
														
 
															-    if (try == MAX_TRIES) {
														
 
															-      fprintf(stderr, "FUSE_WORKLOAD: error: expected file %s to have length "
														
 
															-              "%d; instead, it had length %lld\n",
														
 
															-              ctx[i].path, ctx[i].strLen, (long long)stBuf.st_size);
														
 
															-      return -EIO;
														
 
															-    }
														
 
															-  }
														
 
															   for (i = 0; i < NUM_FILE_CTX; i++) {
														
 
															     ctx[i].fd = open(ctx[i].path, O_RDONLY);
														
 
															     if (ctx[i].fd < 0) {
														
@@ -308,6 +362,7 @@ int runFuseWorkloadImpl(const char *root, const char *pcomp,
 
															   for (i = 0; i < NUM_FILE_CTX; i++) {
														
 
															     free(ctx[i].path);
														
 
															   }
														
 
															+  EXPECT_ZERO(testOpenTrunc(base));
														
 
															   EXPECT_ZERO(recursiveDelete(base));
														
 
															   return 0;
														
 
															 }
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsDesign.apt.vm
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsDesign.apt.vm
@@ -0,0 +1,512 @@
 
															+~~ Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+~~ you may not use this file except in compliance with the License.
														
 
															+~~ You may obtain a copy of the License at
														
 
															+~~
														
 
															+~~   http://www.apache.org/licenses/LICENSE-2.0
														
 
															+~~
														
 
															+~~ Unless required by applicable law or agreed to in writing, software
														
 
															+~~ distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+~~ See the License for the specific language governing permissions and
														
 
															+~~ limitations under the License. See accompanying LICENSE file.
														
 
															+
														
 
															+  ---
														
 
															+  HDFS Architecture
														
 
															+  ---
														
 
															+  Dhruba Borthakur
														
 
															+  ---
														
 
															+  ${maven.build.timestamp}
														
 
															+
														
 
															+%{toc|section=1|fromDepth=0}
														
 
															+
														
 
															+HDFS Architecture
														
 
															+
														
 
															+Introduction
														
 
															+
														
 
															+   The Hadoop Distributed File System (HDFS) is a distributed file system
														
 
															+   designed to run on commodity hardware. It has many similarities with
														
 
															+   existing distributed file systems. However, the differences from other
														
 
															+   distributed file systems are significant. HDFS is highly fault-tolerant
														
 
															+   and is designed to be deployed on low-cost hardware. HDFS provides high
														
 
															+   throughput access to application data and is suitable for applications
														
 
															+   that have large data sets. HDFS relaxes a few POSIX requirements to
														
 
															+   enable streaming access to file system data. HDFS was originally built
														
 
															+   as infrastructure for the Apache Nutch web search engine project. HDFS
														
 
															+   is part of the Apache Hadoop Core project. The project URL is
														
 
															+   {{http://hadoop.apache.org/}}.
														
 
															+
														
 
															+Assumptions and Goals
														
 
															+
														
 
															+Hardware Failure
														
 
															+
														
 
															+   Hardware failure is the norm rather than the exception. An HDFS
														
 
															+   instance may consist of hundreds or thousands of server machines, each
														
 
															+   storing part of the file system’s data. The fact that there are a huge
														
 
															+   number of components and that each component has a non-trivial
														
 
															+   probability of failure means that some component of HDFS is always
														
 
															+   non-functional. Therefore, detection of faults and quick, automatic
														
 
															+   recovery from them is a core architectural goal of HDFS.
														
 
															+
														
 
															+Streaming Data Access
														
 
															+
														
 
															+   Applications that run on HDFS need streaming access to their data sets.
														
 
															+   They are not general purpose applications that typically run on general
														
 
															+   purpose file systems. HDFS is designed more for batch processing rather
														
 
															+   than interactive use by users. The emphasis is on high throughput of
														
 
															+   data access rather than low latency of data access. POSIX imposes many
														
 
															+   hard requirements that are not needed for applications that are
														
 
															+   targeted for HDFS. POSIX semantics in a few key areas has been traded
														
 
															+   to increase data throughput rates.
														
 
															+
														
 
															+Large Data Sets
														
 
															+
														
 
															+   Applications that run on HDFS have large data sets. A typical file in
														
 
															+   HDFS is gigabytes to terabytes in size. Thus, HDFS is tuned to support
														
 
															+   large files. It should provide high aggregate data bandwidth and scale
														
 
															+   to hundreds of nodes in a single cluster. It should support tens of
														
 
															+   millions of files in a single instance.
														
 
															+
														
 
															+Simple Coherency Model
														
 
															+
														
 
															+   HDFS applications need a write-once-read-many access model for files. A
														
 
															+   file once created, written, and closed need not be changed. This
														
 
															+   assumption simplifies data coherency issues and enables high throughput
														
 
															+   data access. A Map/Reduce application or a web crawler application fits
														
 
															+   perfectly with this model. There is a plan to support appending-writes
														
 
															+   to files in the future.
														
 
															+
														
 
															+“Moving Computation is Cheaper than Moving Data”
														
 
															+
														
 
															+   A computation requested by an application is much more efficient if it
														
 
															+   is executed near the data it operates on. This is especially true when
														
 
															+   the size of the data set is huge. This minimizes network congestion and
														
 
															+   increases the overall throughput of the system. The assumption is that
														
 
															+   it is often better to migrate the computation closer to where the data
														
 
															+   is located rather than moving the data to where the application is
														
 
															+   running. HDFS provides interfaces for applications to move themselves
														
 
															+   closer to where the data is located.
														
 
															+
														
 
															+Portability Across Heterogeneous Hardware and Software Platforms
														
 
															+
														
 
															+   HDFS has been designed to be easily portable from one platform to
														
 
															+   another. This facilitates widespread adoption of HDFS as a platform of
														
 
															+   choice for a large set of applications.
														
 
															+
														
 
															+NameNode and DataNodes
														
 
															+
														
 
															+   HDFS has a master/slave architecture. An HDFS cluster consists of a
														
 
															+   single NameNode, a master server that manages the file system namespace
														
 
															+   and regulates access to files by clients. In addition, there are a
														
 
															+   number of DataNodes, usually one per node in the cluster, which manage
														
 
															+   storage attached to the nodes that they run on. HDFS exposes a file
														
 
															+   system namespace and allows user data to be stored in files.
														
 
															+   Internally, a file is split into one or more blocks and these blocks
														
 
															+   are stored in a set of DataNodes. The NameNode executes file system
														
 
															+   namespace operations like opening, closing, and renaming files and
														
 
															+   directories. It also determines the mapping of blocks to DataNodes. The
														
 
															+   DataNodes are responsible for serving read and write requests from the
														
 
															+   file system’s clients. The DataNodes also perform block creation,
														
 
															+   deletion, and replication upon instruction from the NameNode.
														
 
															+
														
 
															+
														
 
															+[images/hdfsarchitecture.png] HDFS Architecture
														
 
															+
														
 
															+   The NameNode and DataNode are pieces of software designed to run on
														
 
															+   commodity machines. These machines typically run a GNU/Linux operating
														
 
															+   system (OS). HDFS is built using the Java language; any machine that
														
 
															+   supports Java can run the NameNode or the DataNode software. Usage of
														
 
															+   the highly portable Java language means that HDFS can be deployed on a
														
 
															+   wide range of machines. A typical deployment has a dedicated machine
														
 
															+   that runs only the NameNode software. Each of the other machines in the
														
 
															+   cluster runs one instance of the DataNode software. The architecture
														
 
															+   does not preclude running multiple DataNodes on the same machine but in
														
 
															+   a real deployment that is rarely the case.
														
 
															+
														
 
															+   The existence of a single NameNode in a cluster greatly simplifies the
														
 
															+   architecture of the system. The NameNode is the arbitrator and
														
 
															+   repository for all HDFS metadata. The system is designed in such a way
														
 
															+   that user data never flows through the NameNode.
														
 
															+
														
 
															+The File System Namespace
														
 
															+
														
 
															+   HDFS supports a traditional hierarchical file organization. A user or
														
 
															+   an application can create directories and store files inside these
														
 
															+   directories. The file system namespace hierarchy is similar to most
														
 
															+   other existing file systems; one can create and remove files, move a
														
 
															+   file from one directory to another, or rename a file. HDFS does not yet
														
 
															+   implement user quotas or access permissions. HDFS does not support hard
														
 
															+   links or soft links. However, the HDFS architecture does not preclude
														
 
															+   implementing these features.
														
 
															+
														
 
															+   The NameNode maintains the file system namespace. Any change to the
														
 
															+   file system namespace or its properties is recorded by the NameNode. An
														
 
															+   application can specify the number of replicas of a file that should be
														
 
															+   maintained by HDFS. The number of copies of a file is called the
														
 
															+   replication factor of that file. This information is stored by the
														
 
															+   NameNode.
														
 
															+
														
 
															+Data Replication
														
 
															+
														
 
															+   HDFS is designed to reliably store very large files across machines in
														
 
															+   a large cluster. It stores each file as a sequence of blocks; all
														
 
															+   blocks in a file except the last block are the same size. The blocks of
														
 
															+   a file are replicated for fault tolerance. The block size and
														
 
															+   replication factor are configurable per file. An application can
														
 
															+   specify the number of replicas of a file. The replication factor can be
														
 
															+   specified at file creation time and can be changed later. Files in HDFS
														
 
															+   are write-once and have strictly one writer at any time.
														
 
															+
														
 
															+   The NameNode makes all decisions regarding replication of blocks. It
														
 
															+   periodically receives a Heartbeat and a Blockreport from each of the
														
 
															+   DataNodes in the cluster. Receipt of a Heartbeat implies that the
														
 
															+   DataNode is functioning properly. A Blockreport contains a list of all
														
 
															+   blocks on a DataNode.
														
 
															+
														
 
															+[images/hdfsdatanodes.png] HDFS DataNodes
														
 
															+
														
 
															+Replica Placement: The First Baby Steps
														
 
															+
														
 
															+   The placement of replicas is critical to HDFS reliability and
														
 
															+   performance. Optimizing replica placement distinguishes HDFS from most
														
 
															+   other distributed file systems. This is a feature that needs lots of
														
 
															+   tuning and experience. The purpose of a rack-aware replica placement
														
 
															+   policy is to improve data reliability, availability, and network
														
 
															+   bandwidth utilization. The current implementation for the replica
														
 
															+   placement policy is a first effort in this direction. The short-term
														
 
															+   goals of implementing this policy are to validate it on production
														
 
															+   systems, learn more about its behavior, and build a foundation to test
														
 
															+   and research more sophisticated policies.
														
 
															+
														
 
															+   Large HDFS instances run on a cluster of computers that commonly spread
														
 
															+   across many racks. Communication between two nodes in different racks
														
 
															+   has to go through switches. In most cases, network bandwidth between
														
 
															+   machines in the same rack is greater than network bandwidth between
														
 
															+   machines in different racks.
														
 
															+
														
 
															+   The NameNode determines the rack id each DataNode belongs to via the
														
 
															+   process outlined in {{{../hadoop-common/ClusterSetup.html#Hadoop+Rack+Awareness}Hadoop Rack Awareness}}. A simple but non-optimal policy
														
 
															+   is to place replicas on unique racks. This prevents losing data when an
														
 
															+   entire rack fails and allows use of bandwidth from multiple racks when
														
 
															+   reading data. This policy evenly distributes replicas in the cluster
														
 
															+   which makes it easy to balance load on component failure. However, this
														
 
															+   policy increases the cost of writes because a write needs to transfer
														
 
															+   blocks to multiple racks.
														
 
															+
														
 
															+   For the common case, when the replication factor is three, HDFS’s
														
 
															+   placement policy is to put one replica on one node in the local rack,
														
 
															+   another on a different node in the local rack, and the last on a
														
 
															+   different node in a different rack. This policy cuts the inter-rack
														
 
															+   write traffic which generally improves write performance. The chance of
														
 
															+   rack failure is far less than that of node failure; this policy does
														
 
															+   not impact data reliability and availability guarantees. However, it
														
 
															+   does reduce the aggregate network bandwidth used when reading data
														
 
															+   since a block is placed in only two unique racks rather than three.
														
 
															+   With this policy, the replicas of a file do not evenly distribute
														
 
															+   across the racks. One third of replicas are on one node, two thirds of
														
 
															+   replicas are on one rack, and the other third are evenly distributed
														
 
															+   across the remaining racks. This policy improves write performance
														
 
															+   without compromising data reliability or read performance.
														
 
															+
														
 
															+   The current, default replica placement policy described here is a work
														
 
															+   in progress.
														
 
															+
														
 
															+Replica Selection
														
 
															+
														
 
															+   To minimize global bandwidth consumption and read latency, HDFS tries
														
 
															+   to satisfy a read request from a replica that is closest to the reader.
														
 
															+   If there exists a replica on the same rack as the reader node, then
														
 
															+   that replica is preferred to satisfy the read request. If angg/ HDFS
														
 
															+   cluster spans multiple data centers, then a replica that is resident in
														
 
															+   the local data center is preferred over any remote replica.
														
 
															+
														
 
															+Safemode
														
 
															+
														
 
															+   On startup, the NameNode enters a special state called Safemode.
														
 
															+   Replication of data blocks does not occur when the NameNode is in the
														
 
															+   Safemode state. The NameNode receives Heartbeat and Blockreport
														
 
															+   messages from the DataNodes. A Blockreport contains the list of data
														
 
															+   blocks that a DataNode is hosting. Each block has a specified minimum
														
 
															+   number of replicas. A block is considered safely replicated when the
														
 
															+   minimum number of replicas of that data block has checked in with the
														
 
															+   NameNode. After a configurable percentage of safely replicated data
														
 
															+   blocks checks in with the NameNode (plus an additional 30 seconds), the
														
 
															+   NameNode exits the Safemode state. It then determines the list of data
														
 
															+   blocks (if any) that still have fewer than the specified number of
														
 
															+   replicas. The NameNode then replicates these blocks to other DataNodes.
														
 
															+
														
 
															+The Persistence of File System Metadata
														
 
															+
														
 
															+   The HDFS namespace is stored by the NameNode. The NameNode uses a
														
 
															+   transaction log called the EditLog to persistently record every change
														
 
															+   that occurs to file system metadata. For example, creating a new file
														
 
															+   in HDFS causes the NameNode to insert a record into the EditLog
														
 
															+   indicating this. Similarly, changing the replication factor of a file
														
 
															+   causes a new record to be inserted into the EditLog. The NameNode uses
														
 
															+   a file in its local host OS file system to store the EditLog. The
														
 
															+   entire file system namespace, including the mapping of blocks to files
														
 
															+   and file system properties, is stored in a file called the FsImage. The
														
 
															+   FsImage is stored as a file in the NameNode’s local file system too.
														
 
															+
														
 
															+   The NameNode keeps an image of the entire file system namespace and
														
 
															+   file Blockmap in memory. This key metadata item is designed to be
														
 
															+   compact, such that a NameNode with 4 GB of RAM is plenty to support a
														
 
															+   huge number of files and directories. When the NameNode starts up, it
														
 
															+   reads the FsImage and EditLog from disk, applies all the transactions
														
 
															+   from the EditLog to the in-memory representation of the FsImage, and
														
 
															+   flushes out this new version into a new FsImage on disk. It can then
														
 
															+   truncate the old EditLog because its transactions have been applied to
														
 
															+   the persistent FsImage. This process is called a checkpoint. In the
														
 
															+   current implementation, a checkpoint only occurs when the NameNode
														
 
															+   starts up. Work is in progress to support periodic checkpointing in the
														
 
															+   near future.
														
 
															+
														
 
															+   The DataNode stores HDFS data in files in its local file system. The
														
 
															+   DataNode has no knowledge about HDFS files. It stores each block of
														
 
															+   HDFS data in a separate file in its local file system. The DataNode
														
 
															+   does not create all files in the same directory. Instead, it uses a
														
 
															+   heuristic to determine the optimal number of files per directory and
														
 
															+   creates subdirectories appropriately. It is not optimal to create all
														
 
															+   local files in the same directory because the local file system might
														
 
															+   not be able to efficiently support a huge number of files in a single
														
 
															+   directory. When a DataNode starts up, it scans through its local file
														
 
															+   system, generates a list of all HDFS data blocks that correspond to
														
 
															+   each of these local files and sends this report to the NameNode: this
														
 
															+   is the Blockreport.
														
 
															+
														
 
															+The Communication Protocols
														
 
															+
														
 
															+   All HDFS communication protocols are layered on top of the TCP/IP
														
 
															+   protocol. A client establishes a connection to a configurable TCP port
														
 
															+   on the NameNode machine. It talks the ClientProtocol with the NameNode.
														
 
															+   The DataNodes talk to the NameNode using the DataNode Protocol. A
														
 
															+   Remote Procedure Call (RPC) abstraction wraps both the Client Protocol
														
 
															+   and the DataNode Protocol. By design, the NameNode never initiates any
														
 
															+   RPCs. Instead, it only responds to RPC requests issued by DataNodes or
														
 
															+   clients.
														
 
															+
														
 
															+Robustness
														
 
															+
														
 
															+   The primary objective of HDFS is to store data reliably even in the
														
 
															+   presence of failures. The three common types of failures are NameNode
														
 
															+   failures, DataNode failures and network partitions.
														
 
															+
														
 
															+Data Disk Failure, Heartbeats and Re-Replication
														
 
															+
														
 
															+   Each DataNode sends a Heartbeat message to the NameNode periodically. A
														
 
															+   network partition can cause a subset of DataNodes to lose connectivity
														
 
															+   with the NameNode. The NameNode detects this condition by the absence
														
 
															+   of a Heartbeat message. The NameNode marks DataNodes without recent
														
 
															+   Heartbeats as dead and does not forward any new IO requests to them.
														
 
															+   Any data that was registered to a dead DataNode is not available to
														
 
															+   HDFS any more. DataNode death may cause the replication factor of some
														
 
															+   blocks to fall below their specified value. The NameNode constantly
														
 
															+   tracks which blocks need to be replicated and initiates replication
														
 
															+   whenever necessary. The necessity for re-replication may arise due to
														
 
															+   many reasons: a DataNode may become unavailable, a replica may become
														
 
															+   corrupted, a hard disk on a DataNode may fail, or the replication
														
 
															+   factor of a file may be increased.
														
 
															+
														
 
															+Cluster Rebalancing
														
 
															+
														
 
															+   The HDFS architecture is compatible with data rebalancing schemes. A
														
 
															+   scheme might automatically move data from one DataNode to another if
														
 
															+   the free space on a DataNode falls below a certain threshold. In the
														
 
															+   event of a sudden high demand for a particular file, a scheme might
														
 
															+   dynamically create additional replicas and rebalance other data in the
														
 
															+   cluster. These types of data rebalancing schemes are not yet
														
 
															+   implemented.
														
 
															+
														
 
															+Data Integrity
														
 
															+
														
 
															+   It is possible that a block of data fetched from a DataNode arrives
														
 
															+   corrupted. This corruption can occur because of faults in a storage
														
 
															+   device, network faults, or buggy software. The HDFS client software
														
 
															+   implements checksum checking on the contents of HDFS files. When a
														
 
															+   client creates an HDFS file, it computes a checksum of each block of
														
 
															+   the file and stores these checksums in a separate hidden file in the
														
 
															+   same HDFS namespace. When a client retrieves file contents it verifies
														
 
															+   that the data it received from each DataNode matches the checksum
														
 
															+   stored in the associated checksum file. If not, then the client can opt
														
 
															+   to retrieve that block from another DataNode that has a replica of that
														
 
															+   block.
														
 
															+
														
 
															+Metadata Disk Failure
														
 
															+
														
 
															+   The FsImage and the EditLog are central data structures of HDFS. A
														
 
															+   corruption of these files can cause the HDFS instance to be
														
 
															+   non-functional. For this reason, the NameNode can be configured to
														
 
															+   support maintaining multiple copies of the FsImage and EditLog. Any
														
 
															+   update to either the FsImage or EditLog causes each of the FsImages and
														
 
															+   EditLogs to get updated synchronously. This synchronous updating of
														
 
															+   multiple copies of the FsImage and EditLog may degrade the rate of
														
 
															+   namespace transactions per second that a NameNode can support. However,
														
 
															+   this degradation is acceptable because even though HDFS applications
														
 
															+   are very data intensive in nature, they are not metadata intensive.
														
 
															+   When a NameNode restarts, it selects the latest consistent FsImage and
														
 
															+   EditLog to use.
														
 
															+
														
 
															+   The NameNode machine is a single point of failure for an HDFS cluster.
														
 
															+   If the NameNode machine fails, manual intervention is necessary.
														
 
															+   Currently, automatic restart and failover of the NameNode software to
														
 
															+   another machine is not supported.
														
 
															+
														
 
															+Snapshots
														
 
															+
														
 
															+   Snapshots support storing a copy of data at a particular instant of
														
 
															+   time. One usage of the snapshot feature may be to roll back a corrupted
														
 
															+   HDFS instance to a previously known good point in time. HDFS does not
														
 
															+   currently support snapshots but will in a future release.
														
 
															+
														
 
															+Data Organization
														
 
															+
														
 
															+Data Blocks
														
 
															+
														
 
															+   HDFS is designed to support very large files. Applications that are
														
 
															+   compatible with HDFS are those that deal with large data sets. These
														
 
															+   applications write their data only once but they read it one or more
														
 
															+   times and require these reads to be satisfied at streaming speeds. HDFS
														
 
															+   supports write-once-read-many semantics on files. A typical block size
														
 
															+   used by HDFS is 64 MB. Thus, an HDFS file is chopped up into 64 MB
														
 
															+   chunks, and if possible, each chunk will reside on a different
														
 
															+   DataNode.
														
 
															+
														
 
															+Staging
														
 
															+
														
 
															+   A client request to create a file does not reach the NameNode
														
 
															+   immediately. In fact, initially the HDFS client caches the file data
														
 
															+   into a temporary local file. Application writes are transparently
														
 
															+   redirected to this temporary local file. When the local file
														
 
															+   accumulates data worth over one HDFS block size, the client contacts
														
 
															+   the NameNode. The NameNode inserts the file name into the file system
														
 
															+   hierarchy and allocates a data block for it. The NameNode responds to
														
 
															+   the client request with the identity of the DataNode and the
														
 
															+   destination data block. Then the client flushes the block of data from
														
 
															+   the local temporary file to the specified DataNode. When a file is
														
 
															+   closed, the remaining un-flushed data in the temporary local file is
														
 
															+   transferred to the DataNode. The client then tells the NameNode that
														
 
															+   the file is closed. At this point, the NameNode commits the file
														
 
															+   creation operation into a persistent store. If the NameNode dies before
														
 
															+   the file is closed, the file is lost.
														
 
															+
														
 
															+   The above approach has been adopted after careful consideration of
														
 
															+   target applications that run on HDFS. These applications need streaming
														
 
															+   writes to files. If a client writes to a remote file directly without
														
 
															+   any client side buffering, the network speed and the congestion in the
														
 
															+   network impacts throughput considerably. This approach is not without
														
 
															+   precedent. Earlier distributed file systems, e.g. AFS, have used client
														
 
															+   side caching to improve performance. A POSIX requirement has been
														
 
															+   relaxed to achieve higher performance of data uploads.
														
 
															+
														
 
															+Replication Pipelining
														
 
															+
														
 
															+   When a client is writing data to an HDFS file, its data is first
														
 
															+   written to a local file as explained in the previous section. Suppose
														
 
															+   the HDFS file has a replication factor of three. When the local file
														
 
															+   accumulates a full block of user data, the client retrieves a list of
														
 
															+   DataNodes from the NameNode. This list contains the DataNodes that will
														
 
															+   host a replica of that block. The client then flushes the data block to
														
 
															+   the first DataNode. The first DataNode starts receiving the data in
														
 
															+   small portions (4 KB), writes each portion to its local repository and
														
 
															+   transfers that portion to the second DataNode in the list. The second
														
 
															+   DataNode, in turn starts receiving each portion of the data block,
														
 
															+   writes that portion to its repository and then flushes that portion to
														
 
															+   the third DataNode. Finally, the third DataNode writes the data to its
														
 
															+   local repository. Thus, a DataNode can be receiving data from the
														
 
															+   previous one in the pipeline and at the same time forwarding data to
														
 
															+   the next one in the pipeline. Thus, the data is pipelined from one
														
 
															+   DataNode to the next.
														
 
															+
														
 
															+Accessibility
														
 
															+
														
 
															+   HDFS can be accessed from applications in many different ways.
														
 
															+   Natively, HDFS provides a
														
 
															+   {{{http://hadoop.apache.org/docs/current/api/}FileSystem Java API}}
														
 
															+   for applications to use. A C language wrapper for this Java API is also
														
 
															+   available. In addition, an HTTP browser can also be used to browse the files
														
 
															+   of an HDFS instance. Work is in progress to expose HDFS through the WebDAV
														
 
															+   protocol.
														
 
															+
														
 
															+FS Shell
														
 
															+
														
 
															+   HDFS allows user data to be organized in the form of files and
														
 
															+   directories. It provides a commandline interface called FS shell that
														
 
															+   lets a user interact with the data in HDFS. The syntax of this command
														
 
															+   set is similar to other shells (e.g. bash, csh) that users are already
														
 
															+   familiar with. Here are some sample action/command pairs:
														
 
															+
														
 
															+*---------+---------+
														
 
															+|| Action | Command
														
 
															+*---------+---------+
														
 
															+| Create a directory named <<</foodir>>> | <<<bin/hadoop dfs -mkdir /foodir>>>
														
 
															+*---------+---------+
														
 
															+| Remove a directory named <<</foodir>>> | <<<bin/hadoop dfs -rmr /foodir>>>
														
 
															+*---------+---------+
														
 
															+| View the contents of a file named <<</foodir/myfile.txt>>> | <<<bin/hadoop dfs -cat /foodir/myfile.txt>>>
														
 
															+*---------+---------+
														
 
															+
														
 
															+   FS shell is targeted for applications that need a scripting language to
														
 
															+   interact with the stored data.
														
 
															+
														
 
															+DFSAdmin
														
 
															+
														
 
															+   The DFSAdmin command set is used for administering an HDFS cluster.
														
 
															+   These are commands that are used only by an HDFS administrator. Here
														
 
															+   are some sample action/command pairs:
														
 
															+
														
 
															+*---------+---------+
														
 
															+|| Action | Command
														
 
															+*---------+---------+
														
 
															+|Put the cluster in Safemode              | <<<bin/hadoop dfsadmin -safemode enter>>>
														
 
															+*---------+---------+
														
 
															+|Generate a list of DataNodes             | <<<bin/hadoop dfsadmin -report>>>
														
 
															+*---------+---------+
														
 
															+|Recommission or decommission DataNode(s) | <<<bin/hadoop dfsadmin -refreshNodes>>>
														
 
															+*---------+---------+
														
 
															+
														
 
															+Browser Interface
														
 
															+
														
 
															+   A typical HDFS install configures a web server to expose the HDFS
														
 
															+   namespace through a configurable TCP port. This allows a user to
														
 
															+   navigate the HDFS namespace and view the contents of its files using a
														
 
															+   web browser.
														
 
															+
														
 
															+Space Reclamation
														
 
															+
														
 
															+File Deletes and Undeletes
														
 
															+
														
 
															+   When a file is deleted by a user or an application, it is not
														
 
															+   immediately removed from HDFS. Instead, HDFS first renames it to a file
														
 
															+   in the <<</trash>>> directory. The file can be restored quickly as long as it
														
 
															+   remains in <<</trash>>>. A file remains in <<</trash>>> for a configurable amount
														
 
															+   of time. After the expiry of its life in <<</trash>>>, the NameNode deletes
														
 
															+   the file from the HDFS namespace. The deletion of a file causes the
														
 
															+   blocks associated with the file to be freed. Note that there could be
														
 
															+   an appreciable time delay between the time a file is deleted by a user
														
 
															+   and the time of the corresponding increase in free space in HDFS.
														
 
															+
														
 
															+   A user can Undelete a file after deleting it as long as it remains in
														
 
															+   the <<</trash>>> directory. If a user wants to undelete a file that he/she
														
 
															+   has deleted, he/she can navigate the <<</trash>>> directory and retrieve the
														
 
															+   file. The <<</trash>>> directory contains only the latest copy of the file
														
 
															+   that was deleted. The <<</trash>>> directory is just like any other directory
														
 
															+   with one special feature: HDFS applies specified policies to
														
 
															+   automatically delete files from this directory. The current default
														
 
															+   policy is to delete files from <<</trash>>> that are more than 6 hours old.
														
 
															+   In the future, this policy will be configurable through a well defined
														
 
															+   interface.
														
 
															+
														
 
															+Decrease Replication Factor
														
 
															+
														
 
															+   When the replication factor of a file is reduced, the NameNode selects
														
 
															+   excess replicas that can be deleted. The next Heartbeat transfers this
														
 
															+   information to the DataNode. The DataNode then removes the
														
 
															+   corresponding blocks and the corresponding free space appears in the
														
 
															+   cluster. Once again, there might be a time delay between the completion
														
 
															+   of the setReplication API call and the appearance of free space in the
														
 
															+   cluster.
														
 
															+
														
 
															+References
														
 
															+
														
 
															+   Hadoop {{{http://hadoop.apache.org/docs/current/api/}JavaDoc API}}.
														
 
															+
														
 
															+   HDFS source code: {{http://hadoop.apache.org/version_control.html}}
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfs-logo.jpg
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfs-logo.jpg
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsarchitecture.gif
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsarchitecture.gif
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsarchitecture.odg
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsarchitecture.odg
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsarchitecture.png
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsarchitecture.png
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsdatanodes.gif
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsdatanodes.gif
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsdatanodes.odg
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsdatanodes.odg
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsdatanodes.png
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsdatanodes.png
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsproxy-forward.jpg
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsproxy-forward.jpg
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsproxy-overview.jpg
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsproxy-overview.jpg
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsproxy-server.jpg
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/resources/images/hdfsproxy-server.jpg
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/CreateEditsLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/CreateEditsLog.java
@@ -62,7 +62,8 @@ public class CreateEditsLog {
 
															     PermissionStatus p = new PermissionStatus("joeDoe", "people",
														
 
															                                       new FsPermission((short)0777));
														
 
															-    INodeDirectory dirInode = new INodeDirectory(p, 0L);
														
 
															+    INodeDirectory dirInode = new INodeDirectory(INodeId.GRANDFATHER_INODE_ID,
														
 
															+        p, 0L);
														
 
															     editLog.logMkDir(BASE_PATH, dirInode);
														
 
															     long blockSize = 10;
														
 
															     BlockInfo[] blocks = new BlockInfo[blocksPerFile];
														
@@ -81,8 +82,9 @@ public class CreateEditsLog {
 
															       }
														
 
															       INodeFileUnderConstruction inode = new INodeFileUnderConstruction(
														
 
															-                    null, replication, 0, blockSize, blocks, p, "", "", null);
														
 
															-      // Append path to filename with information about blockIDs 
														
 
															+          INodeId.GRANDFATHER_INODE_ID, null, replication, 0, blockSize,
														
 
															+          blocks, p, "", "", null);
														
 
															+     // Append path to filename with information about blockIDs 
														
 
															       String path = "_" + iF + "_B" + blocks[0].getBlockId() + 
														
 
															                     "_to_B" + blocks[blocksPerFile-1].getBlockId() + "_";
														
 
															       String filePath = nameGenerator.getNextFileName("");
														
@@ -90,12 +92,12 @@ public class CreateEditsLog {
 
															       // Log the new sub directory in edits
														
 
															       if ((iF % nameGenerator.getFilesPerDirectory())  == 0) {
														
 
															         String currentDir = nameGenerator.getCurrentDir();
														
 
															-        dirInode = new INodeDirectory(p, 0L);
														
 
															+        dirInode = new INodeDirectory(INodeId.GRANDFATHER_INODE_ID, p, 0L);
														
 
															         editLog.logMkDir(currentDir, dirInode);
														
 
															       }
														
 
															-      editLog.logOpenFile(filePath, 
														
 
															-          new INodeFileUnderConstruction(
														
 
															-              p, replication, 0, blockSize, "", "", null));
														
 
															+      editLog.logOpenFile(filePath, new INodeFileUnderConstruction(
														
 
															+          INodeId.GRANDFATHER_INODE_ID, p, replication, 0, blockSize, "", "",
														
 
															+          null));
														
 
															       editLog.logCloseFile(filePath, inode);
														
 
															       if (currentBlockId - bidAtSync >= 2000) { // sync every 2K blocks
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
@@ -208,7 +208,7 @@ public abstract class FSImageTestUtil {
 
															    * only a specified number of "mkdirs" operations.
														
 
															    */
														
 
															   public static void createAbortedLogWithMkdirs(File editsLogDir, int numDirs,
														
 
															-      long firstTxId) throws IOException {
														
 
															+      long firstTxId, long newInodeId) throws IOException {
														
 
															     FSEditLog editLog = FSImageTestUtil.createStandaloneEditLog(editsLogDir);
														
 
															     editLog.setNextTxId(firstTxId);
														
 
															     editLog.openForWrite();
														
@@ -217,7 +217,7 @@ public abstract class FSImageTestUtil {
 
															         FsPermission.createImmutable((short)0755));
														
 
															     for (int i = 1; i <= numDirs; i++) {
														
 
															       String dirName = "dir" + i;
														
 
															-      INodeDirectory dir = new INodeDirectory(dirName, perms);
														
 
															+      INodeDirectory dir = new INodeDirectory(newInodeId + i -1, dirName, perms);
														
 
															       editLog.logMkDir("/" + dirName, dir);
														
 
															     }
														
 
															     editLog.logSync();
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java
@@ -340,8 +340,8 @@ public class TestBackupNode {
 
															       //
														
 
															       // Take a checkpoint
														
 
															       //
														
 
															-      backup = startBackupNode(conf, op, 1);
														
 
															       long txid = cluster.getNameNodeRpc().getTransactionID();
														
 
															+      backup = startBackupNode(conf, op, 1);
														
 
															       waitCheckpointDone(cluster, txid);
														
 
															       for (int i = 0; i < 10; i++) {
														
@@ -417,11 +417,65 @@ public class TestBackupNode {
 
															       // verify that file2 exists
														
 
															       assertTrue(fileSys.exists(file2));
														
 
															     } catch(IOException e) {
														
 
															-      LOG.error("Error in TestBackupNode:", e);
														
 
															+      LOG.error("Error in TestBackupNode: ", e);
														
 
															       assertTrue(e.getLocalizedMessage(), false);
														
 
															     } finally {
														
 
															       fileSys.close();
														
 
															       cluster.shutdown();
														
 
															     }
														
 
															   }
														
 
															+
														
 
															+  /**
														
 
															+   * Verify that a file can be read both from NameNode and BackupNode.
														
 
															+   */
														
 
															+  @Test
														
 
															+  public void testCanReadData() throws IOException {
														
 
															+    Path file1 = new Path("/fileToRead.dat");
														
 
															+    Configuration conf = new HdfsConfiguration();
														
 
															+    conf.setBoolean(DFSConfigKeys.DFS_PERSIST_BLOCKS_KEY, true);
														
 
															+    MiniDFSCluster cluster = null;
														
 
															+    FileSystem fileSys = null;
														
 
															+    BackupNode backup = null;
														
 
															+    try {
														
 
															+      // Start NameNode and BackupNode
														
 
															+      cluster = new MiniDFSCluster.Builder(conf)
														
 
															+                                  .numDataNodes(0).format(true).build();
														
 
															+      fileSys = cluster.getFileSystem();
														
 
															+      long txid = cluster.getNameNodeRpc().getTransactionID();
														
 
															+      backup = startBackupNode(conf, StartupOption.BACKUP, 1);
														
 
															+      waitCheckpointDone(cluster, txid);
														
 
															+
														
 
															+      // Setup dual NameNode configuration for DataNodes
														
 
															+      String rpcAddrKeyPreffix =
														
 
															+          DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY + ".bnCluster";
														
 
															+      String nnAddr = cluster.getNameNode().getNameNodeAddressHostPortString();
														
 
															+          conf.get(DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
														
 
															+      String bnAddr = backup.getNameNodeAddressHostPortString();
														
 
															+      conf.set(DFSConfigKeys.DFS_NAMESERVICES, "bnCluster");
														
 
															+      conf.set(DFSConfigKeys.DFS_NAMESERVICE_ID, "bnCluster");
														
 
															+      conf.set(DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX + ".bnCluster",
														
 
															+          "nnActive, nnBackup");
														
 
															+      conf.set(rpcAddrKeyPreffix + ".nnActive", nnAddr);
														
 
															+      conf.set(rpcAddrKeyPreffix + ".nnBackup", bnAddr);
														
 
															+      cluster.startDataNodes(conf, 3, true, StartupOption.REGULAR, null);
														
 
															+
														
 
															+      DFSTestUtil.createFile(
														
 
															+          fileSys, file1, fileSize, fileSize, blockSize, (short)3, seed);
														
 
															+
														
 
															+      // Read the same file from file systems pointing to NN and BN
														
 
															+      FileSystem bnFS = FileSystem.get(
														
 
															+          new Path("hdfs://" + bnAddr).toUri(), conf);
														
 
															+      String nnData = DFSTestUtil.readFile(fileSys, file1);
														
 
															+      String bnData = DFSTestUtil.readFile(bnFS, file1);
														
 
															+      assertEquals("Data read from BackupNode and NameNode is not the same.",
														
 
															+          nnData, bnData);
														
 
															+    } catch(IOException e) {
														
 
															+      LOG.error("Error in TestBackupNode: ", e);
														
 
															+      assertTrue(e.getLocalizedMessage(), false);
														
 
															+    } finally {
														
 
															+      if(fileSys != null) fileSys.close();
														
 
															+      if(backup != null) backup.stop();
														
 
															+      if(cluster != null) cluster.shutdown();
														
 
															+    }
														
 
															+  }
														
 
															 }
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
@@ -152,7 +152,8 @@ public class TestEditLog {
 
															       for (int i = 0; i < numTransactions; i++) {
														
 
															         INodeFileUnderConstruction inode = new INodeFileUnderConstruction(
														
 
															-                            p, replication, blockSize, 0, "", "", null);
														
 
															+            namesystem.allocateNewInodeId(), p, replication, blockSize, 0, "",
														
 
															+            "", null);
														
 
															         editLog.logOpenFile("/filename" + (startIndex + i), inode);
														
 
															         editLog.logCloseFile("/filename" + (startIndex + i), inode);
														
 
															         editLog.logSync();
														
@@ -317,6 +318,11 @@ public class TestEditLog {
 
															       // we should now be writing to edits_inprogress_3
														
 
															       fsimage.rollEditLog();
														
 
															+      // Remember the current lastInodeId and will reset it back to test
														
 
															+      // loading editlog segments.The transactions in the following allocate new
														
 
															+      // inode id to write to editlogs but doesn't create ionde in namespace
														
 
															+      long originalLastInodeId = namesystem.getLastInodeId();
														
 
															+      
														
 
															       // Create threads and make them run transactions concurrently.
														
 
															       Thread threadId[] = new Thread[NUM_THREADS];
														
 
															       for (int i = 0; i < NUM_THREADS; i++) {
														
@@ -349,6 +355,7 @@ public class TestEditLog {
 
															       // If there were any corruptions, it is likely that the reading in
														
 
															       // of these transactions will throw an exception.
														
 
															       //
														
 
															+      namesystem.resetLastInodeIdWithoutChecking(originalLastInodeId);
														
 
															       for (Iterator<StorageDirectory> it = 
														
 
															               fsimage.getStorage().dirIterator(NameNodeDirType.EDITS); it.hasNext();) {
														
 
															         FSEditLogLoader loader = new FSEditLogLoader(namesystem, 0);
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsLimits.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsLimits.java
@@ -73,7 +73,8 @@ public class TestFsLimits {
 
															              fileAsURI(new File(MiniDFSCluster.getBaseDirectory(),
														
 
															                                 "namenode")).toString());
														
 
															-    rootInode = new INodeDirectoryWithQuota(INodeDirectory.ROOT_NAME, perms);
														
 
															+    rootInode = new INodeDirectoryWithQuota(getMockNamesystem()
														
 
															+        .allocateNewInodeId(), INodeDirectory.ROOT_NAME, perms);
														
 
															     inodes = new INode[]{ rootInode, null };
														
 
															     fs = null;
														
 
															     fsIsReady = true;
														
@@ -152,7 +153,8 @@ public class TestFsLimits {
 
															     // have to create after the caller has had a chance to set conf values
														
 
															     if (fs == null) fs = new MockFSDirectory();
														
 
															-    INode child = new INodeDirectory(name, perms);
														
 
															+    INode child = new INodeDirectory(getMockNamesystem().allocateNewInodeId(),
														
 
															+        name, perms);
														
 
															     child.setLocalName(name);
														
 
															     Class<?> generated = null;
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java
@@ -26,14 +26,18 @@ import java.io.FileNotFoundException;
 
															 import java.io.IOException;
														
 
															 import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.fs.FSDataOutputStream;
														
 
															+import org.apache.hadoop.fs.FileSystem;
														
 
															 import org.apache.hadoop.fs.Options;
														
 
															 import org.apache.hadoop.fs.Path;
														
 
															 import org.apache.hadoop.fs.PathIsNotDirectoryException;
														
 
															 import org.apache.hadoop.fs.permission.FsPermission;
														
 
															 import org.apache.hadoop.fs.permission.PermissionStatus;
														
 
															+import org.apache.hadoop.hdfs.DFSConfigKeys;
														
 
															 import org.apache.hadoop.hdfs.DFSTestUtil;
														
 
															 import org.apache.hadoop.hdfs.DistributedFileSystem;
														
 
															 import org.apache.hadoop.hdfs.MiniDFSCluster;
														
 
															+import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
														
 
															 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
														
 
															 import org.junit.Test;
														
@@ -54,9 +58,9 @@ public class TestINodeFile {
 
															   public void testReplication () {
														
 
															     replication = 3;
														
 
															     preferredBlockSize = 128*1024*1024;
														
 
															-    INodeFile inf = new INodeFile(new PermissionStatus(userName, null, 
														
 
															-                                  FsPermission.getDefault()), null, replication,
														
 
															-                                  0L, 0L, preferredBlockSize);
														
 
															+    INodeFile inf = new INodeFile(INodeId.GRANDFATHER_INODE_ID,
														
 
															+        new PermissionStatus(userName, null, FsPermission.getDefault()), null,
														
 
															+        replication, 0L, 0L, preferredBlockSize);
														
 
															     assertEquals("True has to be returned in this case", replication,
														
 
															                  inf.getBlockReplication());
														
 
															   }
														
@@ -71,9 +75,9 @@ public class TestINodeFile {
 
															               throws IllegalArgumentException {
														
 
															     replication = -1;
														
 
															     preferredBlockSize = 128*1024*1024;
														
 
															-    new INodeFile(new PermissionStatus(userName, null,
														
 
															-                                  FsPermission.getDefault()), null, replication,
														
 
															-                                  0L, 0L, preferredBlockSize);
														
 
															+    new INodeFile(INodeId.GRANDFATHER_INODE_ID, new PermissionStatus(userName,
														
 
															+        null, FsPermission.getDefault()), null, replication, 0L, 0L,
														
 
															+        preferredBlockSize);
														
 
															   }
														
 
															   /**
														
@@ -84,20 +88,20 @@ public class TestINodeFile {
 
															   public void testPreferredBlockSize () {
														
 
															     replication = 3;
														
 
															     preferredBlockSize = 128*1024*1024;
														
 
															-    INodeFile inf = new INodeFile(new PermissionStatus(userName, null,
														
 
															-                                  FsPermission.getDefault()), null, replication,
														
 
															-                                  0L, 0L, preferredBlockSize);
														
 
															-    assertEquals("True has to be returned in this case", preferredBlockSize,
														
 
															-           inf.getPreferredBlockSize());
														
 
															-  }
														
 
															+    INodeFile inf = new INodeFile(INodeId.GRANDFATHER_INODE_ID,
														
 
															+        new PermissionStatus(userName, null, FsPermission.getDefault()), null,
														
 
															+        replication, 0L, 0L, preferredBlockSize);
														
 
															+   assertEquals("True has to be returned in this case", preferredBlockSize,
														
 
															+        inf.getPreferredBlockSize());
														
 
															+ }
														
 
															   @Test
														
 
															   public void testPreferredBlockSizeUpperBound () {
														
 
															     replication = 3;
														
 
															     preferredBlockSize = BLKSIZE_MAXVALUE;
														
 
															-    INodeFile inf = new INodeFile(new PermissionStatus(userName, null, 
														
 
															-                                  FsPermission.getDefault()), null, replication,
														
 
															-                                  0L, 0L, preferredBlockSize);
														
 
															+    INodeFile inf = new INodeFile(INodeId.GRANDFATHER_INODE_ID,
														
 
															+        new PermissionStatus(userName, null, FsPermission.getDefault()), null,
														
 
															+        replication, 0L, 0L, preferredBlockSize);
														
 
															     assertEquals("True has to be returned in this case", BLKSIZE_MAXVALUE,
														
 
															                  inf.getPreferredBlockSize());
														
 
															   }
														
@@ -112,9 +116,9 @@ public class TestINodeFile {
 
															               throws IllegalArgumentException {
														
 
															     replication = 3;
														
 
															     preferredBlockSize = -1;
														
 
															-    new INodeFile(new PermissionStatus(userName, null, 
														
 
															-                                  FsPermission.getDefault()), null, replication,
														
 
															-                                  0L, 0L, preferredBlockSize);
														
 
															+    new INodeFile(INodeId.GRANDFATHER_INODE_ID, new PermissionStatus(userName,
														
 
															+        null, FsPermission.getDefault()), null, replication, 0L, 0L,
														
 
															+        preferredBlockSize);
														
 
															   } 
														
 
															   /**
														
@@ -127,10 +131,10 @@ public class TestINodeFile {
 
															               throws IllegalArgumentException {
														
 
															     replication = 3;
														
 
															     preferredBlockSize = BLKSIZE_MAXVALUE+1;
														
 
															-    new INodeFile(new PermissionStatus(userName, null, 
														
 
															-                                  FsPermission.getDefault()), null, replication,
														
 
															-                                  0L, 0L, preferredBlockSize);
														
 
															-  }
														
 
															+    new INodeFile(INodeId.GRANDFATHER_INODE_ID, new PermissionStatus(userName,
														
 
															+        null, FsPermission.getDefault()), null, replication, 0L, 0L,
														
 
															+        preferredBlockSize);
														
 
															+ }
														
 
															   @Test
														
 
															   public void testGetFullPathName() {
														
@@ -139,12 +143,14 @@ public class TestINodeFile {
 
															     replication = 3;
														
 
															     preferredBlockSize = 128*1024*1024;
														
 
															-    INodeFile inf = new INodeFile(perms, null, replication,
														
 
															-                                  0L, 0L, preferredBlockSize);
														
 
															+    INodeFile inf = new INodeFile(INodeId.GRANDFATHER_INODE_ID, perms, null,
														
 
															+        replication, 0L, 0L, preferredBlockSize);
														
 
															     inf.setLocalName("f");
														
 
															-    INodeDirectory root = new INodeDirectory(INodeDirectory.ROOT_NAME, perms);
														
 
															-    INodeDirectory dir = new INodeDirectory("d", perms);
														
 
															+    INodeDirectory root = new INodeDirectory(INodeId.GRANDFATHER_INODE_ID,
														
 
															+        INodeDirectory.ROOT_NAME, perms);
														
 
															+    INodeDirectory dir = new INodeDirectory(INodeId.GRANDFATHER_INODE_ID, "d",
														
 
															+        perms);
														
 
															     assertEquals("f", inf.getFullPathName());
														
 
															     assertEquals("", inf.getLocalParentDir());
														
@@ -242,7 +248,7 @@ public class TestINodeFile {
 
															     for (int i = 0; i < nCount; i++) {
														
 
															       PermissionStatus perms = new PermissionStatus(userName, null,
														
 
															           FsPermission.getDefault());
														
 
															-      iNodes[i] = new INodeFile(perms, null, replication, 0L, 0L,
														
 
															+      iNodes[i] = new INodeFile(i, perms, null, replication, 0L, 0L,
														
 
															           preferredBlockSize);
														
 
															       iNodes[i].setLocalName(fileNamePrefix +  Integer.toString(i));
														
 
															       BlockInfo newblock = new BlockInfo(replication);
														
@@ -293,10 +299,10 @@ public class TestINodeFile {
 
															     }
														
 
															     {//cast from INodeFile
														
 
															-      final INode from = new INodeFile(
														
 
															-          perm, null, replication, 0L, 0L, preferredBlockSize);
														
 
															-      
														
 
															-      //cast to INodeFile, should success
														
 
															+      final INode from = new INodeFile(INodeId.GRANDFATHER_INODE_ID, perm,
														
 
															+          null, replication, 0L, 0L, preferredBlockSize);
														
 
															+
														
 
															+     //cast to INodeFile, should success
														
 
															       final INodeFile f = INodeFile.valueOf(from, path);
														
 
															       assertTrue(f == from);
														
@@ -318,8 +324,9 @@ public class TestINodeFile {
 
															     {//cast from INodeFileUnderConstruction
														
 
															       final INode from = new INodeFileUnderConstruction(
														
 
															-          perm, replication, 0L, 0L, "client", "machine", null);
														
 
															-      
														
 
															+          INodeId.GRANDFATHER_INODE_ID, perm, replication, 0L, 0L, "client",
														
 
															+          "machine", null);
														
 
															+    
														
 
															       //cast to INodeFile, should success
														
 
															       final INodeFile f = INodeFile.valueOf(from, path);
														
 
															       assertTrue(f == from);
														
@@ -338,7 +345,8 @@ public class TestINodeFile {
 
															     }
														
 
															     {//cast from INodeDirectory
														
 
															-      final INode from = new INodeDirectory(perm, 0L);
														
 
															+      final INode from = new INodeDirectory(INodeId.GRANDFATHER_INODE_ID, perm,
														
 
															+          0L);
														
 
															       //cast to INodeFile, should fail
														
 
															       try {
														
@@ -361,4 +369,47 @@ public class TestINodeFile {
 
															       assertTrue(d == from);
														
 
															     }
														
 
															   }
														
 
															+
														
 
															+  /**
														
 
															+   * Verify root always has inode id 1001 and new formated fsimage has last
														
 
															+   * allocated inode id 1000. Validate correct lastInodeId is persisted.
														
 
															+   * @throws IOException
														
 
															+   */
														
 
															+  @Test
														
 
															+  public void TestInodeId() throws IOException {
														
 
															+
														
 
															+    Configuration conf = new Configuration();
														
 
															+    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY,
														
 
															+        DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT);
														
 
															+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
														
 
															+        .build();
														
 
															+    cluster.waitActive();
														
 
															+    
														
 
															+    FSNamesystem fsn = cluster.getNamesystem();
														
 
															+    long lastId = fsn.getLastInodeId();
														
 
															+
														
 
															+    assertTrue(lastId == 1001);
														
 
															+
														
 
															+    // Create one directory and the last inode id should increase to 1002
														
 
															+    FileSystem fs = cluster.getFileSystem();
														
 
															+    Path path = new Path("/test1");
														
 
															+    assertTrue(fs.mkdirs(path));
														
 
															+    assertTrue(fsn.getLastInodeId() == 1002);
														
 
															+
														
 
															+    Path filePath = new Path("/test1/file");
														
 
															+    fs.create(filePath);
														
 
															+    assertTrue(fsn.getLastInodeId() == 1003);
														
 
															+
														
 
															+    // Rename doesn't increase inode id
														
 
															+    Path renamedPath = new Path("/test2");
														
 
															+    fs.rename(path, renamedPath);
														
 
															+    assertTrue(fsn.getLastInodeId() == 1003);
														
 
															+
														
 
															+    cluster.restartNameNode();
														
 
															+    cluster.waitActive();
														
 
															+    // Make sure empty editlog can be handled
														
 
															+    cluster.restartNameNode();
														
 
															+    cluster.waitActive();
														
 
															+    assertTrue(fsn.getLastInodeId() == 1003);
														
 
															+  }
														
 
															 }
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.hdfs.MiniDFSCluster;
 
															 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
														
 
															 import org.apache.hadoop.hdfs.server.namenode.EditLogFileOutputStream;
														
 
															 import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
														
 
															+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
														
 
															 import org.apache.hadoop.hdfs.server.namenode.NNStorage;
														
 
															 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
														
 
															 import org.apache.hadoop.io.IOUtils;
														
@@ -139,7 +140,9 @@ public class TestEditLogsDuringFailover {
 
															       // Create a fake in-progress edit-log in the shared directory
														
 
															       URI sharedUri = cluster.getSharedEditsDir(0, 1);
														
 
															       File sharedDir = new File(sharedUri.getPath(), "current");
														
 
															-      FSImageTestUtil.createAbortedLogWithMkdirs(sharedDir, NUM_DIRS_IN_LOG, 1);
														
 
															+      FSNamesystem fsn = cluster.getNamesystem(0);
														
 
															+      FSImageTestUtil.createAbortedLogWithMkdirs(sharedDir, NUM_DIRS_IN_LOG, 1,
														
 
															+          fsn.getLastInodeId() + 1);
														
 
															       assertEditFiles(Collections.singletonList(sharedUri),
														
 
															           NNStorage.getInProgressEditsFileName(1));
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
@@ -204,8 +204,9 @@ public class TestStandbyCheckpoints {
 
															     File sharedDir = new File(sharedUri.getPath(), "current");
														
 
															     File tmpDir = new File(MiniDFSCluster.getBaseDirectory(),
														
 
															         "testCheckpointCancellation-tmp");
														
 
															-    FSImageTestUtil.createAbortedLogWithMkdirs(tmpDir, NUM_DIRS_IN_LOG,
														
 
															-        3);
														
 
															+    FSNamesystem fsn = cluster.getNamesystem(0);
														
 
															+    FSImageTestUtil.createAbortedLogWithMkdirs(tmpDir, NUM_DIRS_IN_LOG, 3,
														
 
															+        fsn.getLastInodeId() + 1);
														
 
															     String fname = NNStorage.getInProgressEditsFileName(3); 
														
 
															     new File(tmpDir, fname).renameTo(new File(sharedDir, fname));
														
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
@@ -222,11 +222,9 @@ public class TestNameNodeMetrics {
 
															       cluster.getNamesystem().writeUnlock();
														
 
															     }
														
 
															     BlockManagerTestUtil.getComputedDatanodeWork(bm);
														
 
															-    BlockManagerTestUtil.updateState(bm);
														
 
															     MetricsRecordBuilder rb = getMetrics(NS_METRICS);
														
 
															     assertGauge("CorruptBlocks", 1L, rb);
														
 
															     assertGauge("PendingReplicationBlocks", 1L, rb);
														
 
															-    assertGauge("ScheduledReplicationBlocks", 1L, rb);
														
 
															     fs.delete(file, true);
														
 
															     // During the file deletion, both BlockManager#corruptReplicas and
														
@@ -235,7 +233,7 @@ public class TestNameNodeMetrics {
 
															     // corruptReplicas and pendingReplications. The corresponding
														
 
															     // metrics (CorruptBlocks and PendingReplicationBlocks) will only be updated
														
 
															     // when BlockManager#computeDatanodeWork is run where the
														
 
															-    // BlockManager#udpateState is called. And in
														
 
															+    // BlockManager#updateState is called. And in
														
 
															     // BlockManager#computeDatanodeWork the metric ScheduledReplicationBlocks
														
 
															     // will also be updated.
														
 
															     rb = waitForDnMetricValue(NS_METRICS, "CorruptBlocks", 0L);
														
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -16,6 +16,9 @@ Trunk (Unreleased)
 
															     MAPREDUCE-4807. Allow MapOutputBuffer to be pluggable. (masokan via tucu)
														
 
															+    MAPREDUCE-4887. Add RehashPartitioner, to smooth distributions
														
 
															+    with poor implementations of Object#hashCode().  (Radim Kolar via cutting)
														
 
															+
														
 
															   IMPROVEMENTS
														
 
															     MAPREDUCE-3787. [Gridmix] Optimize job monitoring and STRESS mode for
														
@@ -78,6 +81,9 @@ Trunk (Unreleased)
 
															   BUG FIXES
														
 
															+    MAPREDUCE-4272. SortedRanges.Range#compareTo is not spec compliant.
														
 
															+    (Yu Gao via llu)
														
 
															+
														
 
															     MAPREDUCE-4356. [Rumen] Provide access to the method
														
 
															                     ParsedTask.obtainTaskAttempts(). (ravigummadi)
														
@@ -154,6 +160,9 @@ Trunk (Unreleased)
 
															     MAPREDUCE-4574. Fix TotalOrderParitioner to work with
														
 
															     non-WritableComparable key types. (harsh)
														
 
															+    MAPREDUCE-4884. Streaming tests fail to start MiniMRCluster due to missing
														
 
															+    queue configuration. (Chris Nauroth via suresh)
														
 
															+
														
 
															 Release 2.0.3-alpha - Unreleased 
														
 
															   INCOMPATIBLE CHANGES
														
@@ -179,6 +188,13 @@ Release 2.0.3-alpha - Unreleased
 
															     MAPREDUCE-4703. Add the ability to start the MiniMRClientCluster using 
														
 
															     the configurations used before it is being stopped. (ahmed.radwan via tucu)
														
 
															+    MAPREDUCE-4845. ClusterStatus.getMaxMemory() and getUsedMemory() exist in
														
 
															+    MR1 but not MR2. (Sandy Ryza via tomwhite)
														
 
															+
														
 
															+    MAPREDUCE-4899. Implemented a MR specific plugin for tracking finished
														
 
															+    applications that YARN's ResourceManager doesn't keep track off anymore
														
 
															+    (Derek Dagit via vinodkv)
														
 
															+
														
 
															   OPTIMIZATIONS
														
 
															   BUG FIXES
														
@@ -624,6 +640,22 @@ Release 0.23.6 - UNRELEASED
 
															     MAPREDUCE-4836. Elapsed time for running tasks on AM web UI tasks page is 0
														
 
															     (Ravi Prakash via jeagles)
														
 
															+    MAPREDUCE-4842. Shuffle race can hang reducer (Mariappan Asokan via jlowe)
														
 
															+
														
 
															+    MAPREDUCE-4833. Task can get stuck in FAIL_CONTAINER_CLEANUP (Robert
														
 
															+    Parker via jlowe)
														
 
															+
														
 
															+    MAPREDUCE-4793. Problem with adding resources when using both -files and
														
 
															+    -file to hadoop streaming (jlowe)
														
 
															+
														
 
															+    MAPREDUCE-4890. Invalid TaskImpl state transitions when task fails while
														
 
															+    speculating (jlowe)
														
 
															+
														
 
															+    MAPREDUCE-4902. Fix typo "receievd" should be "received" in log output
														
 
															+    (Albert Chu via jlowe)
														
 
															+
														
 
															+    MAPREDUCE-4813. AM timing out during job commit (jlowe via bobby)
														
 
															+
														
 
															 Release 0.23.5 - UNRELEASED
														
 
															   INCOMPATIBLE CHANGES
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapTaskAttemptImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapTaskAttemptImpl.java
@@ -20,7 +20,6 @@ package org.apache.hadoop.mapred;
 
															 import org.apache.hadoop.fs.Path;
														
 
															 import org.apache.hadoop.mapreduce.MRJobConfig;
														
 
															-import org.apache.hadoop.mapreduce.OutputCommitter;
														
 
															 import org.apache.hadoop.mapreduce.TypeConverter;
														
 
															 import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
														
 
															 import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
														
@@ -42,12 +41,12 @@ public class MapTaskAttemptImpl extends TaskAttemptImpl {
 
															       EventHandler eventHandler, Path jobFile, 
														
 
															       int partition, TaskSplitMetaInfo splitInfo, JobConf conf,
														
 
															       TaskAttemptListener taskAttemptListener, 
														
 
															-      OutputCommitter committer, Token<JobTokenIdentifier> jobToken,
														
 
															+      Token<JobTokenIdentifier> jobToken,
														
 
															       Credentials credentials, Clock clock,
														
 
															       AppContext appContext) {
														
 
															     super(taskId, attempt, eventHandler, 
														
 
															         taskAttemptListener, jobFile, partition, conf, splitInfo.getLocations(),
														
 
															-        committer, jobToken, credentials, clock, appContext);
														
 
															+        jobToken, credentials, clock, appContext);
														
 
															     this.splitInfo = splitInfo;
														
 
															   }
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/ReduceTaskAttemptImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/ReduceTaskAttemptImpl.java
@@ -20,7 +20,6 @@ package org.apache.hadoop.mapred;
 
															 import org.apache.hadoop.fs.Path;
														
 
															 import org.apache.hadoop.mapreduce.MRJobConfig;
														
 
															-import org.apache.hadoop.mapreduce.OutputCommitter;
														
 
															 import org.apache.hadoop.mapreduce.TypeConverter;
														
 
															 import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
														
 
															 import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
														
@@ -40,12 +39,12 @@ public class ReduceTaskAttemptImpl extends TaskAttemptImpl {
 
															   public ReduceTaskAttemptImpl(TaskId id, int attempt,
														
 
															       EventHandler eventHandler, Path jobFile, int partition,
														
 
															       int numMapTasks, JobConf conf,
														
 
															-      TaskAttemptListener taskAttemptListener, OutputCommitter committer,
														
 
															+      TaskAttemptListener taskAttemptListener,
														
 
															       Token<JobTokenIdentifier> jobToken,
														
 
															       Credentials credentials, Clock clock,
														
 
															       AppContext appContext) {
														
 
															     super(id, attempt, eventHandler, taskAttemptListener, jobFile, partition,
														
 
															-        conf, new String[] {}, committer, jobToken, credentials, clock,
														
 
															+        conf, new String[] {}, jobToken, credentials, clock,
														
 
															         appContext);
														
 
															     this.numMapTasks = numMapTasks;
														
 
															   }
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java
@@ -62,6 +62,9 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 
															 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.client.ClientService;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.client.MRClientService;
														
 
															+import org.apache.hadoop.mapreduce.v2.app.commit.CommitterEvent;
														
 
															+import org.apache.hadoop.mapreduce.v2.app.commit.CommitterEventHandler;
														
 
															+import org.apache.hadoop.mapreduce.v2.app.commit.CommitterEventType;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.job.Job;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.job.Task;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt;
														
@@ -87,8 +90,6 @@ import org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator;
 
															 import org.apache.hadoop.mapreduce.v2.app.speculate.DefaultSpeculator;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.speculate.Speculator;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.speculate.SpeculatorEvent;
														
 
															-import org.apache.hadoop.mapreduce.v2.app.taskclean.TaskCleaner;
														
 
															-import org.apache.hadoop.mapreduce.v2.app.taskclean.TaskCleanerImpl;
														
 
															 import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
														
 
															 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
														
 
															 import org.apache.hadoop.security.Credentials;
														
@@ -162,7 +163,7 @@ public class MRAppMaster extends CompositeService {
 
															   private Recovery recoveryServ;
														
 
															   private ContainerAllocator containerAllocator;
														
 
															   private ContainerLauncher containerLauncher;
														
 
															-  private TaskCleaner taskCleaner;
														
 
															+  private EventHandler<CommitterEvent> committerEventHandler;
														
 
															   private Speculator speculator;
														
 
															   private TaskAttemptListener taskAttemptListener;
														
 
															   private JobTokenSecretManager jobTokenSecretManager =
														
@@ -268,8 +269,8 @@ public class MRAppMaster extends CompositeService {
 
															     addIfService(taskAttemptListener);
														
 
															     //service to do the task cleanup
														
 
															-    taskCleaner = createTaskCleaner(context);
														
 
															-    addIfService(taskCleaner);
														
 
															+    committerEventHandler = createCommitterEventHandler(context, committer);
														
 
															+    addIfService(committerEventHandler);
														
 
															     //service to handle requests from JobClient
														
 
															     clientService = createClientService(context);
														
@@ -288,7 +289,7 @@ public class MRAppMaster extends CompositeService {
 
															     dispatcher.register(TaskEventType.class, new TaskEventDispatcher());
														
 
															     dispatcher.register(TaskAttemptEventType.class, 
														
 
															         new TaskAttemptEventDispatcher());
														
 
															-    dispatcher.register(TaskCleaner.EventType.class, taskCleaner);
														
 
															+    dispatcher.register(CommitterEventType.class, committerEventHandler);
														
 
															     if (conf.getBoolean(MRJobConfig.MAP_SPECULATIVE, false)
														
 
															         || conf.getBoolean(MRJobConfig.REDUCE_SPECULATIVE, false)) {
														
@@ -493,7 +494,7 @@ public class MRAppMaster extends CompositeService {
 
															     Job newJob =
														
 
															         new JobImpl(jobId, appAttemptID, conf, dispatcher.getEventHandler(),
														
 
															             taskAttemptListener, jobTokenSecretManager, fsTokens, clock,
														
 
															-            completedTasksFromPreviousRun, metrics, committer, newApiCommitter,
														
 
															+            completedTasksFromPreviousRun, metrics, newApiCommitter,
														
 
															             currentUser.getUserName(), appSubmitTime, amInfos, context);
														
 
															     ((RunningAppContext) context).jobs.put(newJob.getID(), newJob);
														
@@ -585,8 +586,9 @@ public class MRAppMaster extends CompositeService {
 
															     return lis;
														
 
															   }
														
 
															-  protected TaskCleaner createTaskCleaner(AppContext context) {
														
 
															-    return new TaskCleanerImpl(context);
														
 
															+  protected EventHandler<CommitterEvent> createCommitterEventHandler(
														
 
															+      AppContext context, OutputCommitter committer) {
														
 
															+    return new CommitterEventHandler(context, committer);
														
 
															   }
														
 
															   protected ContainerAllocator createContainerAllocator(
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/commit/CommitterEvent.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/commit/CommitterEvent.java
@@ -0,0 +1,28 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.mapreduce.v2.app.commit;
														
 
															+
														
 
															+import org.apache.hadoop.yarn.event.AbstractEvent;
														
 
															+
														
 
															+public class CommitterEvent extends AbstractEvent<CommitterEventType> {
														
 
															+
														
 
															+  public CommitterEvent(CommitterEventType type) {
														
 
															+    super(type);
														
 
															+  }
														
 
															+}
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/commit/CommitterEventHandler.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/commit/CommitterEventHandler.java
@@ -0,0 +1,252 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.mapreduce.v2.app.commit;
														
 
															+
														
 
															+import java.io.IOException;
														
 
															+import java.util.concurrent.BlockingQueue;
														
 
															+import java.util.concurrent.LinkedBlockingQueue;
														
 
															+import java.util.concurrent.ThreadFactory;
														
 
															+import java.util.concurrent.ThreadPoolExecutor;
														
 
															+import java.util.concurrent.TimeUnit;
														
 
															+import java.util.concurrent.atomic.AtomicBoolean;
														
 
															+
														
 
															+import org.apache.commons.logging.Log;
														
 
															+import org.apache.commons.logging.LogFactory;
														
 
															+import org.apache.hadoop.conf.Configuration;
														
 
															+import org.apache.hadoop.mapreduce.MRJobConfig;
														
 
															+import org.apache.hadoop.mapreduce.OutputCommitter;
														
 
															+import org.apache.hadoop.mapreduce.v2.app.AppContext;
														
 
															+import org.apache.hadoop.mapreduce.v2.app.job.event.JobAbortCompletedEvent;
														
 
															+import org.apache.hadoop.mapreduce.v2.app.job.event.JobCommitCompletedEvent;
														
 
															+import org.apache.hadoop.mapreduce.v2.app.job.event.JobCommitFailedEvent;
														
 
															+import org.apache.hadoop.mapreduce.v2.app.job.event.JobSetupCompletedEvent;
														
 
															+import org.apache.hadoop.mapreduce.v2.app.job.event.JobSetupFailedEvent;
														
 
															+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
														
 
															+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
														
 
															+import org.apache.hadoop.util.StringUtils;
														
 
															+import org.apache.hadoop.yarn.YarnException;
														
 
															+import org.apache.hadoop.yarn.event.EventHandler;
														
 
															+import org.apache.hadoop.yarn.service.AbstractService;
														
 
															+
														
 
															+import com.google.common.util.concurrent.ThreadFactoryBuilder;
														
 
															+
														
 
															+public class CommitterEventHandler extends AbstractService
														
 
															+    implements EventHandler<CommitterEvent> {
														
 
															+
														
 
															+  private static final Log LOG =
														
 
															+      LogFactory.getLog(CommitterEventHandler.class);
														
 
															+
														
 
															+  private final AppContext context;
														
 
															+  private final OutputCommitter committer;
														
 
															+  private ThreadPoolExecutor launcherPool;
														
 
															+  private Thread eventHandlingThread;
														
 
															+  private BlockingQueue<CommitterEvent> eventQueue =
														
 
															+      new LinkedBlockingQueue<CommitterEvent>();
														
 
															+  private final AtomicBoolean stopped;
														
 
															+  private Thread jobCommitThread = null;
														
 
															+  private int commitThreadCancelTimeoutMs;
														
 
															+
														
 
															+  public CommitterEventHandler(AppContext context, OutputCommitter committer) {
														
 
															+    super("CommitterEventHandler");
														
 
															+    this.context = context;
														
 
															+    this.committer = committer;
														
 
															+    this.stopped = new AtomicBoolean(false);
														
 
															+  }
														
 
															+
														
 
															+  @Override
														
 
															+  public void init(Configuration conf) {
														
 
															+    super.init(conf);
														
 
															+    commitThreadCancelTimeoutMs = conf.getInt(
														
 
															+        MRJobConfig.MR_AM_COMMITTER_CANCEL_TIMEOUT_MS,
														
 
															+        MRJobConfig.DEFAULT_MR_AM_COMMITTER_CANCEL_TIMEOUT_MS);
														
 
															+  }
														
 
															+
														
 
															+  @Override
														
 
															+  public void start() {
														
 
															+    ThreadFactory tf = new ThreadFactoryBuilder()
														
 
															+      .setNameFormat("CommitterEvent Processor #%d")
														
 
															+      .build();
														
 
															+    launcherPool = new ThreadPoolExecutor(5, 5, 1,
														
 
															+        TimeUnit.HOURS, new LinkedBlockingQueue<Runnable>(), tf);
														
 
															+    eventHandlingThread = new Thread(new Runnable() {
														
 
															+      @Override
														
 
															+      public void run() {
														
 
															+        CommitterEvent event = null;
														
 
															+        while (!stopped.get() && !Thread.currentThread().isInterrupted()) {
														
 
															+          try {
														
 
															+            event = eventQueue.take();
														
 
															+          } catch (InterruptedException e) {
														
 
															+            if (!stopped.get()) {
														
 
															+              LOG.error("Returning, interrupted : " + e);
														
 
															+            }
														
 
															+            return;
														
 
															+          }
														
 
															+          // the events from the queue are handled in parallel
														
 
															+          // using a thread pool
														
 
															+          launcherPool.execute(new EventProcessor(event));        }
														
 
															+      }
														
 
															+    });
														
 
															+    eventHandlingThread.setName("CommitterEvent Handler");
														
 
															+    eventHandlingThread.start();
														
 
															+    super.start();
														
 
															+  }
														
 
															+
														
 
															+
														
 
															+  @Override
														
 
															+  public void handle(CommitterEvent event) {
														
 
															+    try {
														
 
															+      eventQueue.put(event);
														
 
															+    } catch (InterruptedException e) {
														
 
															+      throw new YarnException(e);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  @Override
														
 
															+  public void stop() {
														
 
															+    if (stopped.getAndSet(true)) {
														
 
															+      // return if already stopped
														
 
															+      return;
														
 
															+    }
														
 
															+    eventHandlingThread.interrupt();
														
 
															+    launcherPool.shutdown();
														
 
															+    super.stop();
														
 
															+  }
														
 
															+
														
 
															+  private synchronized void jobCommitStarted() throws IOException {
														
 
															+    if (jobCommitThread != null) {
														
 
															+      throw new IOException("Commit while another commit thread active: "
														
 
															+          + jobCommitThread.toString());
														
 
															+    }
														
 
															+
														
 
															+    jobCommitThread = Thread.currentThread();
														
 
															+  }
														
 
															+
														
 
															+  private synchronized void jobCommitEnded() {
														
 
															+    if (jobCommitThread == Thread.currentThread()) {
														
 
															+      jobCommitThread = null;
														
 
															+      notifyAll();
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private synchronized void cancelJobCommit() {
														
 
															+    Thread threadCommitting = jobCommitThread;
														
 
															+    if (threadCommitting != null && threadCommitting.isAlive()) {
														
 
															+      LOG.info("Canceling commit");
														
 
															+      threadCommitting.interrupt();
														
 
															+
														
 
															+      // wait up to configured timeout for commit thread to finish
														
 
															+      long now = context.getClock().getTime();
														
 
															+      long timeoutTimestamp = now + commitThreadCancelTimeoutMs;
														
 
															+      try {
														
 
															+        while (jobCommitThread == threadCommitting
														
 
															+            && now > timeoutTimestamp) {
														
 
															+          wait(now - timeoutTimestamp);
														
 
															+          now = context.getClock().getTime();
														
 
															+        }
														
 
															+      } catch (InterruptedException e) {
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private class EventProcessor implements Runnable {
														
 
															+    private CommitterEvent event;
														
 
															+
														
 
															+    EventProcessor(CommitterEvent event) {
														
 
															+      this.event = event;
														
 
															+    }
														
 
															+
														
 
															+    @Override
														
 
															+    public void run() {
														
 
															+      LOG.info("Processing the event " + event.toString());
														
 
															+      switch (event.getType()) {
														
 
															+      case JOB_SETUP:
														
 
															+        handleJobSetup((CommitterJobSetupEvent) event);
														
 
															+        break;
														
 
															+      case JOB_COMMIT:
														
 
															+        handleJobCommit((CommitterJobCommitEvent) event);
														
 
															+        break;
														
 
															+      case JOB_ABORT:
														
 
															+        handleJobAbort((CommitterJobAbortEvent) event);
														
 
															+        break;
														
 
															+      case TASK_ABORT:
														
 
															+        handleTaskAbort((CommitterTaskAbortEvent) event);
														
 
															+        break;
														
 
															+      default:
														
 
															+        throw new YarnException("Unexpected committer event "
														
 
															+            + event.toString());
														
 
															+      }
														
 
															+    }
														
 
															+
														
 
															+    @SuppressWarnings("unchecked")
														
 
															+    protected void handleJobSetup(CommitterJobSetupEvent event) {
														
 
															+      try {
														
 
															+        committer.setupJob(event.getJobContext());
														
 
															+        context.getEventHandler().handle(
														
 
															+            new JobSetupCompletedEvent(event.getJobID()));
														
 
															+      } catch (Exception e) {
														
 
															+        LOG.warn("Job setup failed", e);
														
 
															+        context.getEventHandler().handle(new JobSetupFailedEvent(
														
 
															+            event.getJobID(), StringUtils.stringifyException(e)));
														
 
															+      }
														
 
															+    }
														
 
															+
														
 
															+    @SuppressWarnings("unchecked")
														
 
															+    protected void handleJobCommit(CommitterJobCommitEvent event) {
														
 
															+      try {
														
 
															+        jobCommitStarted();
														
 
															+        committer.commitJob(event.getJobContext());
														
 
															+        context.getEventHandler().handle(
														
 
															+            new JobCommitCompletedEvent(event.getJobID()));
														
 
															+      } catch (Exception e) {
														
 
															+          LOG.error("Could not commit job", e);
														
 
															+          context.getEventHandler().handle(
														
 
															+              new JobCommitFailedEvent(event.getJobID(),
														
 
															+                  StringUtils.stringifyException(e)));
														
 
															+      } finally {
														
 
															+        jobCommitEnded();
														
 
															+      }
														
 
															+    }
														
 
															+
														
 
															+    @SuppressWarnings("unchecked")
														
 
															+    protected void handleJobAbort(CommitterJobAbortEvent event) {
														
 
															+      cancelJobCommit();
														
 
															+
														
 
															+      try {
														
 
															+        committer.abortJob(event.getJobContext(), event.getFinalState());
														
 
															+      } catch (Exception e) {
														
 
															+        LOG.warn("Could not abort job", e);
														
 
															+      }
														
 
															+
														
 
															+      context.getEventHandler().handle(new JobAbortCompletedEvent(
														
 
															+          event.getJobID(), event.getFinalState()));
														
 
															+    }
														
 
															+
														
 
															+    @SuppressWarnings("unchecked")
														
 
															+    protected void handleTaskAbort(CommitterTaskAbortEvent event) {
														
 
															+      try {
														
 
															+        committer.abortTask(event.getAttemptContext());
														
 
															+      } catch (Exception e) {
														
 
															+        LOG.warn("Task cleanup failed for attempt " + event.getAttemptID(), e);
														
 
															+      }
														
 
															+      context.getEventHandler().handle(
														
 
															+          new TaskAttemptEvent(event.getAttemptID(),
														
 
															+              TaskAttemptEventType.TA_CLEANUP_DONE));
														
 
															+    }
														
 
															+  }
														
 
															+}
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/commit/CommitterEventType.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/commit/CommitterEventType.java
@@ -0,0 +1,26 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.mapreduce.v2.app.commit;
														
 
															+
														
 
															+public enum CommitterEventType {
														
 
															+  JOB_SETUP,
														
 
															+  JOB_COMMIT,
														
 
															+  JOB_ABORT,
														
 
															+  TASK_ABORT
														
 
															+}
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/commit/CommitterJobAbortEvent.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/commit/CommitterJobAbortEvent.java
@@ -0,0 +1,50 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.mapreduce.v2.app.commit;
														
 
															+
														
 
															+import org.apache.hadoop.mapreduce.JobContext;
														
 
															+import org.apache.hadoop.mapreduce.JobStatus;
														
 
															+import org.apache.hadoop.mapreduce.v2.api.records.JobId;
														
 
															+
														
 
															+public class CommitterJobAbortEvent extends CommitterEvent {
														
 
															+
														
 
															+  private JobId jobID;
														
 
															+  private JobContext jobContext;
														
 
															+  private JobStatus.State finalState;
														
 
															+
														
 
															+  public CommitterJobAbortEvent(JobId jobID, JobContext jobContext,
														
 
															+      JobStatus.State finalState) {
														
 
															+    super(CommitterEventType.JOB_ABORT);
														
 
															+    this.jobID = jobID;
														
 
															+    this.jobContext = jobContext;
														
 
															+    this.finalState = finalState;
														
 
															+  }
														
 
															+
														
 
															+  public JobId getJobID() {
														
 
															+    return jobID;
														
 
															+  }
														
 
															+
														
 
															+  public JobContext getJobContext() {
														
 
															+    return jobContext;
														
 
															+  }
														
 
															+
														
 
															+  public JobStatus.State getFinalState() {
														
 
															+    return finalState;
														
 
															+  }
														
 
															+}
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/commit/CommitterJobCommitEvent.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/commit/CommitterJobCommitEvent.java
@@ -0,0 +1,42 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.mapreduce.v2.app.commit;
														
 
															+
														
 
															+import org.apache.hadoop.mapreduce.JobContext;
														
 
															+import org.apache.hadoop.mapreduce.v2.api.records.JobId;
														
 
															+
														
 
															+public class CommitterJobCommitEvent extends CommitterEvent {
														
 
															+
														
 
															+  private JobId jobID;
														
 
															+  private JobContext jobContext;
														
 
															+
														
 
															+  public CommitterJobCommitEvent(JobId jobID, JobContext jobContext) {
														
 
															+    super(CommitterEventType.JOB_COMMIT);
														
 
															+    this.jobID = jobID;
														
 
															+    this.jobContext = jobContext;
														
 
															+  }
														
 
															+
														
 
															+  public JobId getJobID() {
														
 
															+    return jobID;
														
 
															+  }
														
 
															+
														
 
															+  public JobContext getJobContext() {
														
 
															+    return jobContext;
														
 
															+  }
														
 
															+}
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/commit/CommitterJobSetupEvent.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/commit/CommitterJobSetupEvent.java
@@ -0,0 +1,42 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.mapreduce.v2.app.commit;
														
 
															+
														
 
															+import org.apache.hadoop.mapreduce.JobContext;
														
 
															+import org.apache.hadoop.mapreduce.v2.api.records.JobId;
														
 
															+
														
 
															+public class CommitterJobSetupEvent extends CommitterEvent {
														
 
															+
														
 
															+  private JobId jobID;
														
 
															+  private JobContext jobContext;
														
 
															+
														
 
															+  public CommitterJobSetupEvent(JobId jobID, JobContext jobContext) {
														
 
															+    super(CommitterEventType.JOB_SETUP);
														
 
															+    this.jobID = jobID;
														
 
															+    this.jobContext = jobContext;
														
 
															+  }
														
 
															+
														
 
															+  public JobId getJobID() {
														
 
															+    return jobID;
														
 
															+  }
														
 
															+
														
 
															+  public JobContext getJobContext() {
														
 
															+    return jobContext;
														
 
															+  }
														
 
															+}
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/commit/CommitterTaskAbortEvent.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/commit/CommitterTaskAbortEvent.java
@@ -0,0 +1,43 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.mapreduce.v2.app.commit;
														
 
															+
														
 
															+import org.apache.hadoop.mapreduce.TaskAttemptContext;
														
 
															+import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
														
 
															+
														
 
															+public class CommitterTaskAbortEvent extends CommitterEvent {
														
 
															+
														
 
															+  private final TaskAttemptId attemptID;
														
 
															+  private final TaskAttemptContext attemptContext;
														
 
															+
														
 
															+  public CommitterTaskAbortEvent(TaskAttemptId attemptID,
														
 
															+      TaskAttemptContext attemptContext) {
														
 
															+    super(CommitterEventType.TASK_ABORT);
														
 
															+    this.attemptID = attemptID;
														
 
															+    this.attemptContext = attemptContext;
														
 
															+  }
														
 
															+
														
 
															+  public TaskAttemptId getAttemptID() {
														
 
															+    return attemptID;
														
 
															+  }
														
 
															+
														
 
															+  public TaskAttemptContext getAttemptContext() {
														
 
															+    return attemptContext;
														
 
															+  }
														
 
															+}
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/taskclean/package-info.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/taskclean/package-info.java
@@ -16,5 +16,5 @@
 
															  * limitations under the License.
														
 
															  */
														
 
															 @InterfaceAudience.Private
														
 
															-package org.apache.hadoop.mapreduce.v2.app.taskclean;
														
 
															+package org.apache.hadoop.mapreduce.v2.app.commit;
														
 
															 import org.apache.hadoop.classification.InterfaceAudience;
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/JobStateInternal.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/JobStateInternal.java
@@ -20,11 +20,15 @@ package org.apache.hadoop.mapreduce.v2.app.job;
 
															 public enum JobStateInternal {
														
 
															   NEW,
														
 
															+  SETUP,
														
 
															   INITED,
														
 
															   RUNNING,
														
 
															+  COMMITTING,
														
 
															   SUCCEEDED,
														
 
															+  FAIL_ABORT,
														
 
															   FAILED,
														
 
															   KILL_WAIT,
														
 
															+  KILL_ABORT,
														
 
															   KILLED,
														
 
															   ERROR
														
 
															 }
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobAbortCompletedEvent.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobAbortCompletedEvent.java
@@ -0,0 +1,36 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.mapreduce.v2.app.job.event;
														
 
															+
														
 
															+import org.apache.hadoop.mapreduce.JobStatus;
														
 
															+import org.apache.hadoop.mapreduce.v2.api.records.JobId;
														
 
															+
														
 
															+public class JobAbortCompletedEvent extends JobEvent {
														
 
															+
														
 
															+  private JobStatus.State finalState;
														
 
															+
														
 
															+  public JobAbortCompletedEvent(JobId jobID, JobStatus.State finalState) {
														
 
															+    super(jobID, JobEventType.JOB_ABORT_COMPLETED);
														
 
															+    this.finalState = finalState;
														
 
															+  }
														
 
															+
														
 
															+  public JobStatus.State getFinalState() {
														
 
															+    return finalState;
														
 
															+  }
														
 
															+}
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobCommitCompletedEvent.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobCommitCompletedEvent.java
@@ -0,0 +1,27 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+package org.apache.hadoop.mapreduce.v2.app.job.event;
														
 
															+
														
 
															+import org.apache.hadoop.mapreduce.v2.api.records.JobId;
														
 
															+
														
 
															+public class JobCommitCompletedEvent extends JobEvent {
														
 
															+
														
 
															+  public JobCommitCompletedEvent(JobId jobID) {
														
 
															+    super(jobID, JobEventType.JOB_COMMIT_COMPLETED);
														
 
															+  }
														
 
															+}
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobCommitFailedEvent.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobCommitFailedEvent.java
@@ -0,0 +1,34 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.mapreduce.v2.app.job.event;
														
 
															+
														
 
															+import org.apache.hadoop.mapreduce.v2.api.records.JobId;
														
 
															+
														
 
															+public class JobCommitFailedEvent extends JobEvent {
														
 
															+  private String message;
														
 
															+
														
 
															+  public JobCommitFailedEvent(JobId jobID, String message) {
														
 
															+    super(jobID, JobEventType.JOB_COMMIT_FAILED);
														
 
															+    this.message = message;
														
 
															+  }
														
 
															+
														
 
															+  public String getMessage() {
														
 
															+    return this.message;
														
 
															+  }
														
 
															+}
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobEventType.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobEventType.java
@@ -35,6 +35,13 @@ public enum JobEventType {
 
															   JOB_MAP_TASK_RESCHEDULED,
														
 
															   JOB_TASK_ATTEMPT_COMPLETED,
														
 
															+  //Producer:CommitterEventHandler
														
 
															+  JOB_SETUP_COMPLETED,
														
 
															+  JOB_SETUP_FAILED,
														
 
															+  JOB_COMMIT_COMPLETED,
														
 
															+  JOB_COMMIT_FAILED,
														
 
															+  JOB_ABORT_COMPLETED,
														
 
															+
														
 
															   //Producer:Job
														
 
															   JOB_COMPLETED,
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobSetupCompletedEvent.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobSetupCompletedEvent.java
@@ -0,0 +1,28 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.mapreduce.v2.app.job.event;
														
 
															+
														
 
															+import org.apache.hadoop.mapreduce.v2.api.records.JobId;
														
 
															+
														
 
															+public class JobSetupCompletedEvent extends JobEvent {
														
 
															+
														
 
															+  public JobSetupCompletedEvent(JobId jobID) {
														
 
															+    super(jobID, JobEventType.JOB_SETUP_COMPLETED);
														
 
															+  }
														
 
															+}
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobSetupFailedEvent.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobSetupFailedEvent.java
@@ -0,0 +1,35 @@
 
															+/**
														
 
															+ * Licensed to the Apache Software Foundation (ASF) under one
														
 
															+ * or more contributor license agreements.  See the NOTICE file
														
 
															+ * distributed with this work for additional information
														
 
															+ * regarding copyright ownership.  The ASF licenses this file
														
 
															+ * to you under the Apache License, Version 2.0 (the
														
 
															+ * "License"); you may not use this file except in compliance
														
 
															+ * with the License.  You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+
														
 
															+package org.apache.hadoop.mapreduce.v2.app.job.event;
														
 
															+
														
 
															+import org.apache.hadoop.mapreduce.v2.api.records.JobId;
														
 
															+
														
 
															+public class JobSetupFailedEvent extends JobEvent {
														
 
															+
														
 
															+  private String message;
														
 
															+
														
 
															+  public JobSetupFailedEvent(JobId jobID, String message) {
														
 
															+    super(jobID, JobEventType.JOB_SETUP_FAILED);
														
 
															+    this.message = message;
														
 
															+  }
														
 
															+
														
 
															+  public String getMessage() {
														
 
															+    return message;
														
 
															+  }
														
 
															+}
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
@@ -47,7 +47,6 @@ import org.apache.hadoop.mapreduce.Counters;
 
															 import org.apache.hadoop.mapreduce.JobACL;
														
 
															 import org.apache.hadoop.mapreduce.JobContext;
														
 
															 import org.apache.hadoop.mapreduce.MRJobConfig;
														
 
															-import org.apache.hadoop.mapreduce.OutputCommitter;
														
 
															 import org.apache.hadoop.mapreduce.TypeConverter;
														
 
															 import org.apache.hadoop.mapreduce.jobhistory.JobFinishedEvent;
														
 
															 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent;
														
@@ -77,14 +76,20 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskState;
 
															 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.AppContext;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.TaskAttemptListener;
														
 
															+import org.apache.hadoop.mapreduce.v2.app.commit.CommitterJobAbortEvent;
														
 
															+import org.apache.hadoop.mapreduce.v2.app.commit.CommitterJobCommitEvent;
														
 
															+import org.apache.hadoop.mapreduce.v2.app.commit.CommitterJobSetupEvent;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.job.JobStateInternal;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.job.Task;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt;
														
 
															+import org.apache.hadoop.mapreduce.v2.app.job.event.JobAbortCompletedEvent;
														
 
															+import org.apache.hadoop.mapreduce.v2.app.job.event.JobCommitFailedEvent;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.job.event.JobDiagnosticsUpdateEvent;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.job.event.JobEventType;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.job.event.JobFinishEvent;
														
 
															+import org.apache.hadoop.mapreduce.v2.app.job.event.JobSetupFailedEvent;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskAttemptCompletedEvent;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskAttemptFetchFailureEvent;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskEvent;
														
@@ -138,7 +143,6 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															   private final Clock clock;
														
 
															   private final JobACLsManager aclsManager;
														
 
															   private final String username;
														
 
															-  private final OutputCommitter committer;
														
 
															   private final Map<JobACL, AccessControlList> jobACLs;
														
 
															   private float setupWeight = 0.05f;
														
 
															   private float cleanupWeight = 0.05f;
														
@@ -176,6 +180,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															   private Counters fullCounters = null;
														
 
															   private Counters finalMapCounters = null;
														
 
															   private Counters finalReduceCounters = null;
														
 
															+
														
 
															     // FIXME:  
														
 
															     //
														
 
															     // Can then replace task-level uber counters (MR-2424) with job-level ones
														
@@ -245,7 +250,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															               DIAGNOSTIC_UPDATE_TRANSITION)
														
 
															           .addTransition(JobStateInternal.INITED, JobStateInternal.INITED,
														
 
															               JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
														
 
															-          .addTransition(JobStateInternal.INITED, JobStateInternal.RUNNING,
														
 
															+          .addTransition(JobStateInternal.INITED, JobStateInternal.SETUP,
														
 
															               JobEventType.JOB_START,
														
 
															               new StartTransition())
														
 
															           .addTransition(JobStateInternal.INITED, JobStateInternal.KILLED,
														
@@ -257,19 +262,43 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															           // Ignore-able events
														
 
															           .addTransition(JobStateInternal.INITED, JobStateInternal.INITED,
														
 
															               JobEventType.JOB_UPDATED_NODES)
														
 
															-              
														
 
															+
														
 
															+          // Transitions from SETUP state
														
 
															+          .addTransition(JobStateInternal.SETUP, JobStateInternal.SETUP,
														
 
															+              JobEventType.JOB_DIAGNOSTIC_UPDATE,
														
 
															+              DIAGNOSTIC_UPDATE_TRANSITION)
														
 
															+          .addTransition(JobStateInternal.SETUP, JobStateInternal.SETUP,
														
 
															+              JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
														
 
															+          .addTransition(JobStateInternal.SETUP, JobStateInternal.RUNNING,
														
 
															+              JobEventType.JOB_SETUP_COMPLETED,
														
 
															+              new SetupCompletedTransition())
														
 
															+          .addTransition(JobStateInternal.SETUP, JobStateInternal.FAIL_ABORT,
														
 
															+              JobEventType.JOB_SETUP_FAILED,
														
 
															+              new SetupFailedTransition())
														
 
															+          .addTransition(JobStateInternal.SETUP, JobStateInternal.KILL_ABORT,
														
 
															+              JobEventType.JOB_KILL,
														
 
															+              new KilledDuringSetupTransition())
														
 
															+          .addTransition(JobStateInternal.SETUP, JobStateInternal.ERROR,
														
 
															+              JobEventType.INTERNAL_ERROR,
														
 
															+              INTERNAL_ERROR_TRANSITION)
														
 
															+          // Ignore-able events
														
 
															+          .addTransition(JobStateInternal.SETUP, JobStateInternal.SETUP,
														
 
															+              JobEventType.JOB_UPDATED_NODES)
														
 
															+
														
 
															           // Transitions from RUNNING state
														
 
															           .addTransition(JobStateInternal.RUNNING, JobStateInternal.RUNNING,
														
 
															               JobEventType.JOB_TASK_ATTEMPT_COMPLETED,
														
 
															               TASK_ATTEMPT_COMPLETED_EVENT_TRANSITION)
														
 
															           .addTransition
														
 
															               (JobStateInternal.RUNNING,
														
 
															-              EnumSet.of(JobStateInternal.RUNNING, JobStateInternal.SUCCEEDED, JobStateInternal.FAILED),
														
 
															+              EnumSet.of(JobStateInternal.RUNNING,
														
 
															+                  JobStateInternal.COMMITTING, JobStateInternal.FAIL_ABORT),
														
 
															               JobEventType.JOB_TASK_COMPLETED,
														
 
															               new TaskCompletedTransition())
														
 
															           .addTransition
														
 
															               (JobStateInternal.RUNNING,
														
 
															-              EnumSet.of(JobStateInternal.RUNNING, JobStateInternal.SUCCEEDED, JobStateInternal.FAILED),
														
 
															+              EnumSet.of(JobStateInternal.RUNNING,
														
 
															+                  JobStateInternal.COMMITTING),
														
 
															               JobEventType.JOB_COMPLETED,
														
 
															               new JobNoTasksCompletedTransition())
														
 
															           .addTransition(JobStateInternal.RUNNING, JobStateInternal.KILL_WAIT,
														
@@ -296,7 +325,8 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															           // Transitions from KILL_WAIT state.
														
 
															           .addTransition
														
 
															               (JobStateInternal.KILL_WAIT,
														
 
															-              EnumSet.of(JobStateInternal.KILL_WAIT, JobStateInternal.KILLED),
														
 
															+              EnumSet.of(JobStateInternal.KILL_WAIT,
														
 
															+                  JobStateInternal.KILL_ABORT),
														
 
															               JobEventType.JOB_TASK_COMPLETED,
														
 
															               new KillWaitTaskCompletedTransition())
														
 
															           .addTransition(JobStateInternal.KILL_WAIT, JobStateInternal.KILL_WAIT,
														
@@ -318,6 +348,35 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															                   JobEventType.JOB_MAP_TASK_RESCHEDULED,
														
 
															                   JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE))
														
 
															+          // Transitions from COMMITTING state
														
 
															+          .addTransition(JobStateInternal.COMMITTING,
														
 
															+              JobStateInternal.SUCCEEDED,
														
 
															+              JobEventType.JOB_COMMIT_COMPLETED,
														
 
															+              new CommitSucceededTransition())
														
 
															+          .addTransition(JobStateInternal.COMMITTING,
														
 
															+              JobStateInternal.FAIL_ABORT,
														
 
															+              JobEventType.JOB_COMMIT_FAILED,
														
 
															+              new CommitFailedTransition())
														
 
															+          .addTransition(JobStateInternal.COMMITTING,
														
 
															+              JobStateInternal.KILL_ABORT,
														
 
															+              JobEventType.JOB_KILL,
														
 
															+              new KilledDuringCommitTransition())
														
 
															+          .addTransition(JobStateInternal.COMMITTING,
														
 
															+              JobStateInternal.COMMITTING,
														
 
															+              JobEventType.JOB_DIAGNOSTIC_UPDATE,
														
 
															+              DIAGNOSTIC_UPDATE_TRANSITION)
														
 
															+          .addTransition(JobStateInternal.COMMITTING,
														
 
															+              JobStateInternal.COMMITTING,
														
 
															+              JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
														
 
															+          .addTransition(JobStateInternal.COMMITTING,
														
 
															+              JobStateInternal.ERROR, JobEventType.INTERNAL_ERROR,
														
 
															+              INTERNAL_ERROR_TRANSITION)
														
 
															+          // Ignore-able events
														
 
															+          .addTransition(JobStateInternal.COMMITTING,
														
 
															+              JobStateInternal.COMMITTING,
														
 
															+              EnumSet.of(JobEventType.JOB_UPDATED_NODES,
														
 
															+                  JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE))
														
 
															+
														
 
															           // Transitions from SUCCEEDED state
														
 
															           .addTransition(JobStateInternal.SUCCEEDED, JobStateInternal.SUCCEEDED,
														
 
															               JobEventType.JOB_DIAGNOSTIC_UPDATE,
														
@@ -334,6 +393,61 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															                   JobEventType.JOB_UPDATED_NODES,
														
 
															                   JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE))
														
 
															+          // Transitions from FAIL_ABORT state
														
 
															+          .addTransition(JobStateInternal.FAIL_ABORT,
														
 
															+              JobStateInternal.FAIL_ABORT,
														
 
															+              JobEventType.JOB_DIAGNOSTIC_UPDATE,
														
 
															+              DIAGNOSTIC_UPDATE_TRANSITION)
														
 
															+          .addTransition(JobStateInternal.FAIL_ABORT,
														
 
															+              JobStateInternal.FAIL_ABORT,
														
 
															+              JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
														
 
															+          .addTransition(JobStateInternal.FAIL_ABORT, JobStateInternal.FAILED,
														
 
															+              JobEventType.JOB_ABORT_COMPLETED,
														
 
															+              new JobAbortCompletedTransition())
														
 
															+          .addTransition(JobStateInternal.FAIL_ABORT, JobStateInternal.KILLED,
														
 
															+              JobEventType.JOB_KILL,
														
 
															+              new KilledDuringAbortTransition())
														
 
															+          .addTransition(JobStateInternal.FAIL_ABORT,
														
 
															+              JobStateInternal.ERROR, JobEventType.INTERNAL_ERROR,
														
 
															+              INTERNAL_ERROR_TRANSITION)
														
 
															+          // Ignore-able events
														
 
															+          .addTransition(JobStateInternal.FAIL_ABORT,
														
 
															+              JobStateInternal.FAIL_ABORT,
														
 
															+              EnumSet.of(JobEventType.JOB_UPDATED_NODES,
														
 
															+                  JobEventType.JOB_TASK_COMPLETED,
														
 
															+                  JobEventType.JOB_TASK_ATTEMPT_COMPLETED,
														
 
															+                  JobEventType.JOB_MAP_TASK_RESCHEDULED,
														
 
															+                  JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE,
														
 
															+                  JobEventType.JOB_COMMIT_COMPLETED,
														
 
															+                  JobEventType.JOB_COMMIT_FAILED))
														
 
															+
														
 
															+          // Transitions from KILL_ABORT state
														
 
															+          .addTransition(JobStateInternal.KILL_ABORT,
														
 
															+              JobStateInternal.KILL_ABORT,
														
 
															+              JobEventType.JOB_DIAGNOSTIC_UPDATE,
														
 
															+              DIAGNOSTIC_UPDATE_TRANSITION)
														
 
															+          .addTransition(JobStateInternal.KILL_ABORT,
														
 
															+              JobStateInternal.KILL_ABORT,
														
 
															+              JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
														
 
															+          .addTransition(JobStateInternal.KILL_ABORT, JobStateInternal.KILLED,
														
 
															+              JobEventType.JOB_ABORT_COMPLETED,
														
 
															+              new JobAbortCompletedTransition())
														
 
															+          .addTransition(JobStateInternal.KILL_ABORT, JobStateInternal.KILLED,
														
 
															+              JobEventType.JOB_KILL,
														
 
															+              new KilledDuringAbortTransition())
														
 
															+          .addTransition(JobStateInternal.KILL_ABORT,
														
 
															+              JobStateInternal.ERROR, JobEventType.INTERNAL_ERROR,
														
 
															+              INTERNAL_ERROR_TRANSITION)
														
 
															+          // Ignore-able events
														
 
															+          .addTransition(JobStateInternal.KILL_ABORT,
														
 
															+              JobStateInternal.KILL_ABORT,
														
 
															+              EnumSet.of(JobEventType.JOB_UPDATED_NODES,
														
 
															+                  JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE,
														
 
															+                  JobEventType.JOB_SETUP_COMPLETED,
														
 
															+                  JobEventType.JOB_SETUP_FAILED,
														
 
															+                  JobEventType.JOB_COMMIT_COMPLETED,
														
 
															+                  JobEventType.JOB_COMMIT_FAILED))
														
 
															+
														
 
															           // Transitions from FAILED state
														
 
															           .addTransition(JobStateInternal.FAILED, JobStateInternal.FAILED,
														
 
															               JobEventType.JOB_DIAGNOSTIC_UPDATE,
														
@@ -351,7 +465,12 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															                   JobEventType.JOB_TASK_COMPLETED,
														
 
															                   JobEventType.JOB_TASK_ATTEMPT_COMPLETED,
														
 
															                   JobEventType.JOB_MAP_TASK_RESCHEDULED,
														
 
															-                  JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE))
														
 
															+                  JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE,
														
 
															+                  JobEventType.JOB_SETUP_COMPLETED,
														
 
															+                  JobEventType.JOB_SETUP_FAILED,
														
 
															+                  JobEventType.JOB_COMMIT_COMPLETED,
														
 
															+                  JobEventType.JOB_COMMIT_FAILED,
														
 
															+                  JobEventType.JOB_ABORT_COMPLETED))
														
 
															           // Transitions from KILLED state
														
 
															           .addTransition(JobStateInternal.KILLED, JobStateInternal.KILLED,
														
@@ -366,8 +485,14 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															           // Ignore-able events
														
 
															           .addTransition(JobStateInternal.KILLED, JobStateInternal.KILLED,
														
 
															               EnumSet.of(JobEventType.JOB_KILL, 
														
 
															+                  JobEventType.JOB_START,
														
 
															                   JobEventType.JOB_UPDATED_NODES,
														
 
															-                  JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE))
														
 
															+                  JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE,
														
 
															+                  JobEventType.JOB_SETUP_COMPLETED,
														
 
															+                  JobEventType.JOB_SETUP_FAILED,
														
 
															+                  JobEventType.JOB_COMMIT_COMPLETED,
														
 
															+                  JobEventType.JOB_COMMIT_FAILED,
														
 
															+                  JobEventType.JOB_ABORT_COMPLETED))
														
 
															           // No transitions from INTERNAL_ERROR state. Ignore all.
														
 
															           .addTransition(
														
@@ -381,6 +506,11 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															                   JobEventType.JOB_DIAGNOSTIC_UPDATE,
														
 
															                   JobEventType.JOB_UPDATED_NODES,
														
 
															                   JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE,
														
 
															+                  JobEventType.JOB_SETUP_COMPLETED,
														
 
															+                  JobEventType.JOB_SETUP_FAILED,
														
 
															+                  JobEventType.JOB_COMMIT_COMPLETED,
														
 
															+                  JobEventType.JOB_COMMIT_FAILED,
														
 
															+                  JobEventType.JOB_ABORT_COMPLETED,
														
 
															                   JobEventType.INTERNAL_ERROR))
														
 
															           .addTransition(JobStateInternal.ERROR, JobStateInternal.ERROR,
														
 
															               JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
														
@@ -417,7 +547,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															       JobTokenSecretManager jobTokenSecretManager,
														
 
															       Credentials fsTokenCredentials, Clock clock,
														
 
															       Map<TaskId, TaskInfo> completedTasksFromPreviousRun, MRAppMetrics metrics,
														
 
															-      OutputCommitter committer, boolean newApiCommitter, String userName,
														
 
															+      boolean newApiCommitter, String userName,
														
 
															       long appSubmitTime, List<AMInfo> amInfos, AppContext appContext) {
														
 
															     this.applicationAttemptId = applicationAttemptId;
														
 
															     this.jobId = jobId;
														
@@ -442,7 +572,6 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															     this.fsTokens = fsTokenCredentials;
														
 
															     this.jobTokenSecretManager = jobTokenSecretManager;
														
 
															-    this.committer = committer;
														
 
															     this.aclsManager = new JobACLsManager(conf);
														
 
															     this.username = System.getProperty("user.name");
														
@@ -461,11 +590,6 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															     return jobId;
														
 
															   }
														
 
															-  // Getter methods that make unit testing easier (package-scoped)
														
 
															-  OutputCommitter getCommitter() {
														
 
															-    return this.committer;
														
 
															-  }
														
 
															-
														
 
															   EventHandler getEventHandler() {
														
 
															     return this.eventHandler;
														
 
															   }
														
@@ -751,9 +875,16 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															   }
														
 
															   private static JobState getExternalState(JobStateInternal smState) {
														
 
															-    if (smState == JobStateInternal.KILL_WAIT) {
														
 
															+    switch (smState) {
														
 
															+    case KILL_WAIT:
														
 
															+    case KILL_ABORT:
														
 
															       return JobState.KILLED;
														
 
															-    } else {
														
 
															+    case SETUP:
														
 
															+    case COMMITTING:
														
 
															+      return JobState.RUNNING;
														
 
															+    case FAIL_ABORT:
														
 
															+      return JobState.FAILED;
														
 
															+    default:
														
 
															       return JobState.valueOf(smState.name());
														
 
															     }
														
 
															   }
														
@@ -799,22 +930,15 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															     return FileSystem.get(conf);
														
 
															   }
														
 
															-  static JobStateInternal checkJobCompleteSuccess(JobImpl job) {
														
 
															-    // check for Job success
														
 
															-    if (job.completedTaskCount == job.tasks.size()) {
														
 
															-      try {
														
 
															-        // Commit job & do cleanup
														
 
															-        job.getCommitter().commitJob(job.getJobContext());
														
 
															-      } catch (IOException e) {
														
 
															-        LOG.error("Could not do commit for Job", e);
														
 
															-        job.addDiagnostic("Job commit failed: " + e.getMessage());
														
 
															-        job.abortJob(org.apache.hadoop.mapreduce.JobStatus.State.FAILED);
														
 
															-        return job.finished(JobStateInternal.FAILED);
														
 
															-      }
														
 
															-      job.logJobHistoryFinishedEvent();
														
 
															-      return job.finished(JobStateInternal.SUCCEEDED);
														
 
															+  protected JobStateInternal checkReadyForCommit() {
														
 
															+    JobStateInternal currentState = getInternalState();
														
 
															+    if (completedTaskCount == tasks.size()
														
 
															+        && currentState == JobStateInternal.RUNNING) {
														
 
															+      eventHandler.handle(new CommitterJobCommitEvent(jobId, getJobContext()));
														
 
															+      return JobStateInternal.COMMITTING;
														
 
															     }
														
 
															-    return null;
														
 
															+    // return the current state as job not ready to commit yet
														
 
															+    return getInternalState();
														
 
															   }
														
 
															   JobStateInternal finished(JobStateInternal finalState) {
														
@@ -1104,25 +1228,21 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															         job.allowedReduceFailuresPercent =
														
 
															             job.conf.getInt(MRJobConfig.REDUCE_FAILURES_MAXPERCENT, 0);
														
 
															-        // do the setup
														
 
															-        job.committer.setupJob(job.jobContext);
														
 
															-        job.setupProgress = 1.0f;
														
 
															-
														
 
															         // create the Tasks but don't start them yet
														
 
															         createMapTasks(job, inputLength, taskSplitMetaInfo);
														
 
															         createReduceTasks(job);
														
 
															         job.metrics.endPreparingJob(job);
														
 
															         return JobStateInternal.INITED;
														
 
															-        //TODO XXX Should JobInitedEvent be generated here (instead of in StartTransition)
														
 
															-
														
 
															       } catch (IOException e) {
														
 
															         LOG.warn("Job init failed", e);
														
 
															+        job.metrics.endPreparingJob(job);
														
 
															         job.addDiagnostic("Job init failed : "
														
 
															             + StringUtils.stringifyException(e));
														
 
															-        job.abortJob(org.apache.hadoop.mapreduce.JobStatus.State.FAILED);
														
 
															-        job.metrics.endPreparingJob(job);
														
 
															-        return job.finished(JobStateInternal.FAILED);
														
 
															+        job.eventHandler.handle(new CommitterJobAbortEvent(job.jobId,
														
 
															+            job.jobContext,
														
 
															+            org.apache.hadoop.mapreduce.JobStatus.State.FAILED));
														
 
															+        return JobStateInternal.FAILED;
														
 
															       }
														
 
															     }
														
@@ -1174,7 +1294,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															                 job.remoteJobConfFile, 
														
 
															                 job.conf, splits[i], 
														
 
															                 job.taskAttemptListener, 
														
 
															-                job.committer, job.jobToken, job.fsTokens,
														
 
															+                job.jobToken, job.fsTokens,
														
 
															                 job.clock, job.completedTasksFromPreviousRun, 
														
 
															                 job.applicationAttemptId.getAttemptId(),
														
 
															                 job.metrics, job.appContext);
														
@@ -1191,7 +1311,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															                 job.eventHandler, 
														
 
															                 job.remoteJobConfFile, 
														
 
															                 job.conf, job.numMapTasks, 
														
 
															-                job.taskAttemptListener, job.committer, job.jobToken,
														
 
															+                job.taskAttemptListener, job.jobToken,
														
 
															                 job.fsTokens, job.clock,
														
 
															                 job.completedTasksFromPreviousRun, 
														
 
															                 job.applicationAttemptId.getAttemptId(),
														
@@ -1224,6 +1344,35 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															     }
														
 
															   } // end of InitTransition
														
 
															+  private static class SetupCompletedTransition
														
 
															+      implements SingleArcTransition<JobImpl, JobEvent> {
														
 
															+    @Override
														
 
															+    public void transition(JobImpl job, JobEvent event) {
														
 
															+      job.setupProgress = 1.0f;
														
 
															+      job.scheduleTasks(job.mapTasks);  // schedule (i.e., start) the maps
														
 
															+      job.scheduleTasks(job.reduceTasks);
														
 
															+
														
 
															+      // If we have no tasks, just transition to job completed
														
 
															+      if (job.numReduceTasks == 0 && job.numMapTasks == 0) {
														
 
															+        job.eventHandler.handle(new JobEvent(job.jobId,
														
 
															+            JobEventType.JOB_COMPLETED));
														
 
															+      }
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private static class SetupFailedTransition
														
 
															+      implements SingleArcTransition<JobImpl, JobEvent> {
														
 
															+    @Override
														
 
															+    public void transition(JobImpl job, JobEvent event) {
														
 
															+      job.metrics.endRunningJob(job);
														
 
															+      job.addDiagnostic("Job setup failed : "
														
 
															+          + ((JobSetupFailedEvent) event).getMessage());
														
 
															+      job.eventHandler.handle(new CommitterJobAbortEvent(job.jobId,
														
 
															+          job.jobContext,
														
 
															+          org.apache.hadoop.mapreduce.JobStatus.State.FAILED));
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															   public static class StartTransition
														
 
															   implements SingleArcTransition<JobImpl, JobEvent> {
														
 
															     /**
														
@@ -1233,43 +1382,45 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															     @Override
														
 
															     public void transition(JobImpl job, JobEvent event) {
														
 
															       job.startTime = job.clock.getTime();
														
 
															-      job.scheduleTasks(job.mapTasks);  // schedule (i.e., start) the maps
														
 
															-      job.scheduleTasks(job.reduceTasks);
														
 
															       JobInitedEvent jie =
														
 
															         new JobInitedEvent(job.oldJobId,
														
 
															              job.startTime,
														
 
															              job.numMapTasks, job.numReduceTasks,
														
 
															              job.getState().toString(),
														
 
															-             job.isUber()); //Will transition to state running. Currently in INITED
														
 
															+             job.isUber());
														
 
															       job.eventHandler.handle(new JobHistoryEvent(job.jobId, jie));
														
 
															       JobInfoChangeEvent jice = new JobInfoChangeEvent(job.oldJobId,
														
 
															           job.appSubmitTime, job.startTime);
														
 
															       job.eventHandler.handle(new JobHistoryEvent(job.jobId, jice));
														
 
															       job.metrics.runningJob(job);
														
 
															-			// If we have no tasks, just transition to job completed
														
 
															-      if (job.numReduceTasks == 0 && job.numMapTasks == 0) {
														
 
															-        job.eventHandler.handle(new JobEvent(job.jobId, JobEventType.JOB_COMPLETED));
														
 
															-      }
														
 
															+      job.eventHandler.handle(new CommitterJobSetupEvent(
														
 
															+              job.jobId, job.jobContext));
														
 
															     }
														
 
															   }
														
 
															-  protected void abortJob(
														
 
															-      org.apache.hadoop.mapreduce.JobStatus.State finalState) {
														
 
															-    try {
														
 
															-      committer.abortJob(jobContext, finalState);
														
 
															-    } catch (IOException e) {
														
 
															-      LOG.warn("Could not abortJob", e);
														
 
															+  private void unsuccessfulFinish(JobStateInternal finalState) {
														
 
															+      if (finishTime == 0) setFinishTime();
														
 
															+      cleanupProgress = 1.0f;
														
 
															+      JobUnsuccessfulCompletionEvent unsuccessfulJobEvent =
														
 
															+          new JobUnsuccessfulCompletionEvent(oldJobId,
														
 
															+              finishTime,
														
 
															+              succeededMapTaskCount,
														
 
															+              succeededReduceTaskCount,
														
 
															+              finalState.toString());
														
 
															+      eventHandler.handle(new JobHistoryEvent(jobId,
														
 
															+          unsuccessfulJobEvent));
														
 
															+      finished(finalState);
														
 
															+  }
														
 
															+
														
 
															+  private static class JobAbortCompletedTransition
														
 
															+  implements SingleArcTransition<JobImpl, JobEvent> {
														
 
															+    @Override
														
 
															+    public void transition(JobImpl job, JobEvent event) {
														
 
															+      JobStateInternal finalState = JobStateInternal.valueOf(
														
 
															+          ((JobAbortCompletedEvent) event).getFinalState().name());
														
 
															+      job.unsuccessfulFinish(finalState);
														
 
															     }
														
 
															-    if (finishTime == 0) setFinishTime();
														
 
															-    cleanupProgress = 1.0f;
														
 
															-    JobUnsuccessfulCompletionEvent unsuccessfulJobEvent =
														
 
															-      new JobUnsuccessfulCompletionEvent(oldJobId,
														
 
															-          finishTime,
														
 
															-          succeededMapTaskCount,
														
 
															-          succeededReduceTaskCount,
														
 
															-          finalState.toString());
														
 
															-    eventHandler.handle(new JobHistoryEvent(jobId, unsuccessfulJobEvent));
														
 
															   }
														
 
															   // JobFinishedEvent triggers the move of the history file out of the staging
														
@@ -1343,9 +1494,22 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															   implements SingleArcTransition<JobImpl, JobEvent> {
														
 
															     @Override
														
 
															     public void transition(JobImpl job, JobEvent event) {
														
 
															-      job.abortJob(org.apache.hadoop.mapreduce.JobStatus.State.KILLED);
														
 
															       job.addDiagnostic("Job received Kill in INITED state.");
														
 
															-      job.finished(JobStateInternal.KILLED);
														
 
															+      job.eventHandler.handle(new CommitterJobAbortEvent(job.jobId,
														
 
															+          job.jobContext,
														
 
															+          org.apache.hadoop.mapreduce.JobStatus.State.KILLED));
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private static class KilledDuringSetupTransition
														
 
															+  implements SingleArcTransition<JobImpl, JobEvent> {
														
 
															+    @Override
														
 
															+    public void transition(JobImpl job, JobEvent event) {
														
 
															+      job.metrics.endRunningJob(job);
														
 
															+      job.addDiagnostic("Job received kill in SETUP state.");
														
 
															+      job.eventHandler.handle(new CommitterJobAbortEvent(job.jobId,
														
 
															+          job.jobContext,
														
 
															+          org.apache.hadoop.mapreduce.JobStatus.State.KILLED));
														
 
															     }
														
 
															   }
														
@@ -1470,10 +1634,10 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															         taskKilled(job, task);
														
 
															       }
														
 
															-      return checkJobForCompletion(job);
														
 
															+      return checkJobAfterTaskCompletion(job);
														
 
															     }
														
 
															-    protected JobStateInternal checkJobForCompletion(JobImpl job) {
														
 
															+    protected JobStateInternal checkJobAfterTaskCompletion(JobImpl job) {
														
 
															       //check for Job failure
														
 
															       if (job.failedMapTaskCount*100 > 
														
 
															         job.allowedMapFailuresPercent*job.numMapTasks ||
														
@@ -1486,17 +1650,13 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															             " failedReduces:" + job.failedReduceTaskCount;
														
 
															         LOG.info(diagnosticMsg);
														
 
															         job.addDiagnostic(diagnosticMsg);
														
 
															-        job.abortJob(org.apache.hadoop.mapreduce.JobStatus.State.FAILED);
														
 
															-        return job.finished(JobStateInternal.FAILED);
														
 
															-      }
														
 
															-      
														
 
															-      JobStateInternal jobCompleteSuccess = JobImpl.checkJobCompleteSuccess(job);
														
 
															-      if (jobCompleteSuccess != null) {
														
 
															-        return jobCompleteSuccess;
														
 
															+        job.eventHandler.handle(new CommitterJobAbortEvent(job.jobId,
														
 
															+            job.jobContext,
														
 
															+            org.apache.hadoop.mapreduce.JobStatus.State.FAILED));
														
 
															+        return JobStateInternal.FAIL_ABORT;
														
 
															       }
														
 
															-      //return the current state, Job not finished yet
														
 
															-      return job.getInternalState();
														
 
															+      return job.checkReadyForCommit();
														
 
															     }
														
 
															     private void taskSucceeded(JobImpl job, Task task) {
														
@@ -1529,18 +1689,52 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															   }
														
 
															   // Transition class for handling jobs with no tasks
														
 
															-  static class JobNoTasksCompletedTransition implements
														
 
															+  private static class JobNoTasksCompletedTransition implements
														
 
															   MultipleArcTransition<JobImpl, JobEvent, JobStateInternal> {
														
 
															     @Override
														
 
															     public JobStateInternal transition(JobImpl job, JobEvent event) {
														
 
															-      JobStateInternal jobCompleteSuccess = JobImpl.checkJobCompleteSuccess(job);
														
 
															-      if (jobCompleteSuccess != null) {
														
 
															-        return jobCompleteSuccess;
														
 
															-      }
														
 
															-      
														
 
															-      // Return the current state, Job not finished yet
														
 
															-      return job.getInternalState();
														
 
															+      return job.checkReadyForCommit();
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private static class CommitSucceededTransition implements
														
 
															+      SingleArcTransition<JobImpl, JobEvent> {
														
 
															+    @Override
														
 
															+    public void transition(JobImpl job, JobEvent event) {
														
 
															+      job.logJobHistoryFinishedEvent();
														
 
															+      job.finished(JobStateInternal.SUCCEEDED);
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private static class CommitFailedTransition implements
														
 
															+      SingleArcTransition<JobImpl, JobEvent> {
														
 
															+    @Override
														
 
															+    public void transition(JobImpl job, JobEvent event) {
														
 
															+      JobCommitFailedEvent jcfe = (JobCommitFailedEvent)event;
														
 
															+      job.addDiagnostic("Job commit failed: " + jcfe.getMessage());
														
 
															+      job.eventHandler.handle(new CommitterJobAbortEvent(job.jobId,
														
 
															+          job.jobContext,
														
 
															+          org.apache.hadoop.mapreduce.JobStatus.State.FAILED));
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private static class KilledDuringCommitTransition implements
														
 
															+      SingleArcTransition<JobImpl, JobEvent> {
														
 
															+    @Override
														
 
															+    public void transition(JobImpl job, JobEvent event) {
														
 
															+      job.setFinishTime();
														
 
															+      job.eventHandler.handle(new CommitterJobAbortEvent(job.jobId,
														
 
															+          job.jobContext,
														
 
															+          org.apache.hadoop.mapreduce.JobStatus.State.KILLED));
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  private static class KilledDuringAbortTransition implements
														
 
															+      SingleArcTransition<JobImpl, JobEvent> {
														
 
															+    @Override
														
 
															+    public void transition(JobImpl job, JobEvent event) {
														
 
															+      job.unsuccessfulFinish(JobStateInternal.KILLED);
														
 
															     }
														
 
															   }
														
@@ -1557,11 +1751,13 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
															   private static class KillWaitTaskCompletedTransition extends  
														
 
															       TaskCompletedTransition {
														
 
															     @Override
														
 
															-    protected JobStateInternal checkJobForCompletion(JobImpl job) {
														
 
															+    protected JobStateInternal checkJobAfterTaskCompletion(JobImpl job) {
														
 
															       if (job.completedTaskCount == job.tasks.size()) {
														
 
															         job.setFinishTime();
														
 
															-        job.abortJob(org.apache.hadoop.mapreduce.JobStatus.State.KILLED);
														
 
															-        return job.finished(JobStateInternal.KILLED);
														
 
															+        job.eventHandler.handle(new CommitterJobAbortEvent(job.jobId,
														
 
															+            job.jobContext,
														
 
															+            org.apache.hadoop.mapreduce.JobStatus.State.KILLED));
														
 
															+        return JobStateInternal.KILL_ABORT;
														
 
															       }
														
 
															       //return the current state, Job not finished yet
														
 
															       return job.getInternalState();
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/MapTaskImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/MapTaskImpl.java
@@ -24,7 +24,6 @@ import org.apache.hadoop.fs.Path;
 
															 import org.apache.hadoop.mapred.JobConf;
														
 
															 import org.apache.hadoop.mapred.MapTaskAttemptImpl;
														
 
															 import org.apache.hadoop.mapreduce.MRJobConfig;
														
 
															-import org.apache.hadoop.mapreduce.OutputCommitter;
														
 
															 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo;
														
 
															 import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
														
 
															 import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
														
@@ -47,13 +46,13 @@ public class MapTaskImpl extends TaskImpl {
 
															   public MapTaskImpl(JobId jobId, int partition, EventHandler eventHandler,
														
 
															       Path remoteJobConfFile, JobConf conf,
														
 
															       TaskSplitMetaInfo taskSplitMetaInfo,
														
 
															-      TaskAttemptListener taskAttemptListener, OutputCommitter committer,
														
 
															+      TaskAttemptListener taskAttemptListener,
														
 
															       Token<JobTokenIdentifier> jobToken,
														
 
															       Credentials credentials, Clock clock,
														
 
															       Map<TaskId, TaskInfo> completedTasksFromPreviousRun, int startCount,
														
 
															       MRAppMetrics metrics, AppContext appContext) {
														
 
															     super(jobId, TaskType.MAP, partition, eventHandler, remoteJobConfFile,
														
 
															-        conf, taskAttemptListener, committer, jobToken, credentials, clock,
														
 
															+        conf, taskAttemptListener, jobToken, credentials, clock,
														
 
															         completedTasksFromPreviousRun, startCount, metrics, appContext);
														
 
															     this.taskSplitMetaInfo = taskSplitMetaInfo;
														
 
															   }
														
@@ -68,7 +67,7 @@ public class MapTaskImpl extends TaskImpl {
 
															     return new MapTaskAttemptImpl(getID(), nextAttemptNumber,
														
 
															         eventHandler, jobFile,
														
 
															         partition, taskSplitMetaInfo, conf, taskAttemptListener,
														
 
															-        committer, jobToken, credentials, clock, appContext);
														
 
															+        jobToken, credentials, clock, appContext);
														
 
															   }
														
 
															   @Override
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/ReduceTaskImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/ReduceTaskImpl.java
@@ -24,7 +24,6 @@ import org.apache.hadoop.fs.Path;
 
															 import org.apache.hadoop.mapred.JobConf;
														
 
															 import org.apache.hadoop.mapred.ReduceTaskAttemptImpl;
														
 
															 import org.apache.hadoop.mapreduce.MRJobConfig;
														
 
															-import org.apache.hadoop.mapreduce.OutputCommitter;
														
 
															 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo;
														
 
															 import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
														
 
															 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
														
@@ -46,12 +45,12 @@ public class ReduceTaskImpl extends TaskImpl {
 
															   public ReduceTaskImpl(JobId jobId, int partition,
														
 
															       EventHandler eventHandler, Path jobFile, JobConf conf,
														
 
															       int numMapTasks, TaskAttemptListener taskAttemptListener,
														
 
															-      OutputCommitter committer, Token<JobTokenIdentifier> jobToken,
														
 
															+      Token<JobTokenIdentifier> jobToken,
														
 
															       Credentials credentials, Clock clock,
														
 
															       Map<TaskId, TaskInfo> completedTasksFromPreviousRun, int startCount,
														
 
															       MRAppMetrics metrics, AppContext appContext) {
														
 
															     super(jobId, TaskType.REDUCE, partition, eventHandler, jobFile, conf,
														
 
															-        taskAttemptListener, committer, jobToken, credentials, clock,
														
 
															+        taskAttemptListener, jobToken, credentials, clock,
														
 
															         completedTasksFromPreviousRun, startCount, metrics, appContext);
														
 
															     this.numMapTasks = numMapTasks;
														
 
															   }
														
@@ -66,7 +65,7 @@ public class ReduceTaskImpl extends TaskImpl {
 
															     return new ReduceTaskAttemptImpl(getID(), nextAttemptNumber,
														
 
															         eventHandler, jobFile,
														
 
															         partition, numMapTasks, conf, taskAttemptListener,
														
 
															-        committer, jobToken, credentials, clock, appContext);
														
 
															+        jobToken, credentials, clock, appContext);
														
 
															   }
														
 
															   @Override
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
@@ -39,7 +39,6 @@ import java.util.regex.Pattern;
 
															 import org.apache.commons.logging.Log;
														
 
															 import org.apache.commons.logging.LogFactory;
														
 
															-import org.apache.hadoop.classification.InterfaceAudience.Private;
														
 
															 import org.apache.hadoop.conf.Configuration;
														
 
															 import org.apache.hadoop.fs.FileStatus;
														
 
															 import org.apache.hadoop.fs.FileSystem;
														
@@ -57,7 +56,6 @@ import org.apache.hadoop.mapreduce.Counter;
 
															 import org.apache.hadoop.mapreduce.Counters;
														
 
															 import org.apache.hadoop.mapreduce.JobCounter;
														
 
															 import org.apache.hadoop.mapreduce.MRJobConfig;
														
 
															-import org.apache.hadoop.mapreduce.OutputCommitter;
														
 
															 import org.apache.hadoop.mapreduce.TaskAttemptContext;
														
 
															 import org.apache.hadoop.mapreduce.TaskCounter;
														
 
															 import org.apache.hadoop.mapreduce.TypeConverter;
														
@@ -76,6 +74,7 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 
															 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.AppContext;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.TaskAttemptListener;
														
 
															+import org.apache.hadoop.mapreduce.v2.app.commit.CommitterTaskAbortEvent;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.job.TaskAttemptStateInternal;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.job.event.JobDiagnosticsUpdateEvent;
														
@@ -99,7 +98,6 @@ import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator;
 
															 import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.rm.ContainerRequestEvent;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.speculate.SpeculatorEvent;
														
 
															-import org.apache.hadoop.mapreduce.v2.app.taskclean.TaskCleanupEvent;
														
 
															 import org.apache.hadoop.mapreduce.v2.util.MRApps;
														
 
															 import org.apache.hadoop.net.NetUtils;
														
 
															 import org.apache.hadoop.security.Credentials;
														
@@ -157,7 +155,6 @@ public abstract class TaskAttemptImpl implements
 
															   private final Clock clock;
														
 
															   private final org.apache.hadoop.mapred.JobID oldJobId;
														
 
															   private final TaskAttemptListener taskAttemptListener;
														
 
															-  private final OutputCommitter committer;
														
 
															   private final Resource resourceCapability;
														
 
															   private final String[] dataLocalHosts;
														
 
															   private final List<String> diagnostics = new ArrayList<String>();
														
@@ -501,7 +498,7 @@ public abstract class TaskAttemptImpl implements
 
															   public TaskAttemptImpl(TaskId taskId, int i, 
														
 
															       EventHandler eventHandler,
														
 
															       TaskAttemptListener taskAttemptListener, Path jobFile, int partition,
														
 
															-      JobConf conf, String[] dataLocalHosts, OutputCommitter committer,
														
 
															+      JobConf conf, String[] dataLocalHosts,
														
 
															       Token<JobTokenIdentifier> jobToken,
														
 
															       Credentials credentials, Clock clock,
														
 
															       AppContext appContext) {
														
@@ -525,7 +522,6 @@ public abstract class TaskAttemptImpl implements
 
															     this.credentials = credentials;
														
 
															     this.jobToken = jobToken;
														
 
															     this.eventHandler = eventHandler;
														
 
															-    this.committer = committer;
														
 
															     this.jobFile = jobFile;
														
 
															     this.partition = partition;
														
@@ -1436,10 +1432,8 @@ public abstract class TaskAttemptImpl implements
 
															       TaskAttemptContext taskContext =
														
 
															         new TaskAttemptContextImpl(taskAttempt.conf,
														
 
															             TypeConverter.fromYarn(taskAttempt.attemptId));
														
 
															-      taskAttempt.eventHandler.handle(new TaskCleanupEvent(
														
 
															-          taskAttempt.attemptId,
														
 
															-          taskAttempt.committer,
														
 
															-          taskContext));
														
 
															+      taskAttempt.eventHandler.handle(new CommitterTaskAbortEvent(
														
 
															+          taskAttempt.attemptId, taskContext));
														
 
															     }
														
 
															   }
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java
@@ -37,7 +37,6 @@ import org.apache.hadoop.fs.Path;
 
															 import org.apache.hadoop.mapred.JobConf;
														
 
															 import org.apache.hadoop.mapreduce.Counters;
														
 
															 import org.apache.hadoop.mapreduce.MRConfig;
														
 
															-import org.apache.hadoop.mapreduce.OutputCommitter;
														
 
															 import org.apache.hadoop.mapreduce.TaskAttemptID;
														
 
															 import org.apache.hadoop.mapreduce.TypeConverter;
														
 
															 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent;
														
@@ -100,7 +99,6 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
 
															   protected final JobConf conf;
														
 
															   protected final Path jobFile;
														
 
															-  protected final OutputCommitter committer;
														
 
															   protected final int partition;
														
 
															   protected final TaskAttemptListener taskAttemptListener;
														
 
															   protected final EventHandler eventHandler;
														
@@ -231,7 +229,12 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
 
															     // Transitions from FAILED state        
														
 
															     .addTransition(TaskStateInternal.FAILED, TaskStateInternal.FAILED,
														
 
															         EnumSet.of(TaskEventType.T_KILL,
														
 
															-                   TaskEventType.T_ADD_SPEC_ATTEMPT))
														
 
															+                   TaskEventType.T_ADD_SPEC_ATTEMPT,
														
 
															+                   TaskEventType.T_ATTEMPT_COMMIT_PENDING,
														
 
															+                   TaskEventType.T_ATTEMPT_FAILED,
														
 
															+                   TaskEventType.T_ATTEMPT_KILLED,
														
 
															+                   TaskEventType.T_ATTEMPT_LAUNCHED,
														
 
															+                   TaskEventType.T_ATTEMPT_SUCCEEDED))
														
 
															     // Transitions from KILLED state
														
 
															     .addTransition(TaskStateInternal.KILLED, TaskStateInternal.KILLED,
														
@@ -273,7 +276,7 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
 
															   public TaskImpl(JobId jobId, TaskType taskType, int partition,
														
 
															       EventHandler eventHandler, Path remoteJobConfFile, JobConf conf,
														
 
															-      TaskAttemptListener taskAttemptListener, OutputCommitter committer,
														
 
															+      TaskAttemptListener taskAttemptListener,
														
 
															       Token<JobTokenIdentifier> jobToken,
														
 
															       Credentials credentials, Clock clock,
														
 
															       Map<TaskId, TaskInfo> completedTasksFromPreviousRun, int startCount,
														
@@ -296,7 +299,6 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
 
															     this.partition = partition;
														
 
															     this.taskAttemptListener = taskAttemptListener;
														
 
															     this.eventHandler = eventHandler;
														
 
															-    this.committer = committer;
														
 
															     this.credentials = credentials;
														
 
															     this.jobToken = jobToken;
														
 
															     this.metrics = metrics;
														
@@ -941,6 +943,13 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
 
															         task.handleTaskAttemptCompletion(
														
 
															             taskAttemptId, 
														
 
															             TaskAttemptCompletionEventStatus.TIPFAILED);
														
 
															+
														
 
															+        // issue kill to all non finished attempts
														
 
															+        for (TaskAttempt taskAttempt : task.attempts.values()) {
														
 
															+          task.killUnfinishedAttempt
														
 
															+            (taskAttempt, "Task has failed. Killing attempt!");
														
 
															+        }
														
 
															+        task.inProgressAttempts.clear();
														
 
															         if (task.historyTaskStartGenerated) {
														
 
															         TaskFailedEvent taskFailedEvent = createTaskFailedEvent(task, attempt.getDiagnostics(),
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java
@@ -191,12 +191,9 @@ public class ContainerLauncherImpl extends AbstractService implements
 
															     @SuppressWarnings("unchecked")
														
 
															     public synchronized void kill() {
														
 
															-      if(isCompletelyDone()) { 
														
 
															-        return;
														
 
															-      }
														
 
															       if(this.state == ContainerState.PREP) {
														
 
															         this.state = ContainerState.KILLED_BEFORE_LAUNCH;
														
 
															-      } else {
														
 
															+      } else if (!isCompletelyDone()) {
														
 
															         LOG.info("KILLING " + taskAttemptID);
														
 
															         ContainerManager proxy = null;
														
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/RecoveryService.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/RecoveryService.java
@@ -48,6 +48,8 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState;
 
															 import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
														
 
															 import org.apache.hadoop.mapreduce.v2.api.records.TaskState;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.ControlledClock;
														
 
															+import org.apache.hadoop.mapreduce.v2.app.commit.CommitterTaskAbortEvent;
														
 
															+import org.apache.hadoop.mapreduce.v2.app.commit.CommitterEventType;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.job.event.JobDiagnosticsUpdateEvent;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.job.event.JobEventType;
														
@@ -65,8 +67,6 @@ import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent;
 
															 import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerRemoteLaunchEvent;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator;
														
 
															 import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent;
														
 
															-import org.apache.hadoop.mapreduce.v2.app.taskclean.TaskCleaner;
														
 
															-import org.apache.hadoop.mapreduce.v2.app.taskclean.TaskCleanupEvent;
														
 
															 import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils;
														
 
															 import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
														
 
															 import org.apache.hadoop.yarn.Clock;
														
@@ -339,8 +339,8 @@ public class RecoveryService extends CompositeService implements Recovery {
 
															         return;
														
 
															       }
														
 
															-      else if (event.getType() == TaskCleaner.EventType.TASK_CLEAN) {
														
 
															-        TaskAttemptId aId = ((TaskCleanupEvent) event).getAttemptID();
														
 
															+      else if (event.getType() == CommitterEventType.TASK_ABORT) {
														
 
															+        TaskAttemptId aId = ((CommitterTaskAbortEvent) event).getAttemptID();
														
 
															         LOG.debug("TASK_CLEAN");
														
 
															         actualHandler.handle(new TaskAttemptEvent(aId,
														
 
															             TaskAttemptEventType.TA_CLEANUP_DONE));