Przeglądaj źródła

Merging trunk into branch-trunk-win

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-trunk-win@1415815 13f79535-47bb-0310-9956-ffa450edef68
Suresh Srinivas 12 lat temu
rodzic
commit
2e4571afea
100 zmienionych plików z 3064 dodań i 1199 usunięć
  1. 1 1
      dev-support/test-patch.sh
  2. 71 1
      hadoop-common-project/hadoop-common/CHANGES.txt
  3. 9 1
      hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml
  4. 7 0
      hadoop-common-project/hadoop-common/src/main/conf/log4j.properties
  5. 133 17
      hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/cluster_setup.xml
  6. 7 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
  7. 65 12
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java
  8. 2 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java
  9. 7 16
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java
  10. 4 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java
  11. 8 12
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathAccessDeniedException.java
  12. 12 12
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathExistsException.java
  13. 117 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIOException.java
  14. 9 8
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIsDirectoryException.java
  15. 27 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIsNotDirectoryException.java
  16. 9 5
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIsNotEmptyDirectoryException.java
  17. 9 16
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathNotFoundException.java
  18. 9 14
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathOperationException.java
  19. 9 8
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathPermissionException.java
  20. 12 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsPermission.java
  21. 1 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Command.java
  22. 6 6
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java
  23. 1 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java
  24. 4 4
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Delete.java
  25. 1 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Display.java
  26. 4 4
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Mkdir.java
  27. 1 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/MoveCommands.java
  28. 4 4
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java
  29. 0 203
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathExceptions.java
  30. 1 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/SetReplication.java
  31. 1 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Tail.java
  32. 0 3
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Test.java
  33. 3 3
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Touchz.java
  34. 13 2
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer.java
  35. 52 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/NoCacheFilter.java
  36. 1 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SecureIOUtils.java
  37. 79 4
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java
  38. 7 6
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
  39. 59 53
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
  40. 16 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java
  41. 159 0
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslPlainServer.java
  42. 6 22
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcClient.java
  43. 4 1
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcServer.java
  44. 57 33
      hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
  45. 139 65
      hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/nativeio/NativeIO.c
  46. 11 0
      hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
  47. 0 0
      hadoop-common-project/hadoop-common/src/site/apt/CLIMiniCluster.apt.vm
  48. 0 0
      hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm
  49. 0 0
      hadoop-common-project/hadoop-common/src/site/apt/SingleCluster.apt.vm
  50. 8 27
      hadoop-common-project/hadoop-common/src/site/resources/css/site.css
  51. 28 0
      hadoop-common-project/hadoop-common/src/site/site.xml
  52. 23 2
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemContractBaseTest.java
  53. 150 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestDelegationTokenRenewer.java
  54. 297 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java
  55. 58 3
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java
  56. 1 1
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestPathExceptions.java
  57. 6 2
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemTestSetup.java
  58. 6 2
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFsTestSetup.java
  59. 13 0
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestHttpServer.java
  60. 13 3
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/nativeio/TestNativeIO.java
  61. 160 14
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestSaslRPC.java
  62. 61 2
      hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java
  63. 16 2
      hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/wsrs/UserProvider.java
  64. 30 0
      hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/resources/css/site.css
  65. 55 0
      hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/lib/wsrs/TestUserProvider.java
  66. 106 3
      hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
  67. 8 2
      hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogInputStream.java
  68. 10 3
      hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java
  69. 16 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.sh
  70. 16 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.sh
  71. 17 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
  72. 3 0
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
  73. 9 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
  74. 44 8
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
  75. 19 5
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HftpFileSystem.java
  76. 8 5
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
  77. 15 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeID.java
  78. 2 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java
  79. 3 3
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
  80. 4 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/GetJournalEditServlet.java
  81. 185 57
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
  82. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java
  83. 44 43
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
  84. 2 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java
  85. 40 44
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
  86. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/InvalidateBlocks.java
  87. 8 8
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/UnderReplicatedBlocks.java
  88. 8 8
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
  89. 4 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java
  90. 175 152
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
  91. 6 4
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DatanodeJspHelper.java
  92. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java
  93. 8 2
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
  94. 17 13
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
  95. 122 189
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
  96. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
  97. 2 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
  98. 32 7
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
  99. 1 1
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java
  100. 43 15
      hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

+ 1 - 1
dev-support/test-patch.sh

@@ -710,7 +710,7 @@ runTests () {
           ordered_modules="$ordered_modules $module"
       fi
   done
-  if [ -n $hdfs_modules ]; then
+  if [ -n "$hdfs_modules" ]; then
       ordered_modules="$ordered_modules $hdfs_modules"
       if [[ $building_common -eq 0 ]]; then
           echo "  Building hadoop-common with -Pnative in order to provide \

+ 71 - 1
hadoop-common-project/hadoop-common/CHANGES.txt

@@ -132,6 +132,15 @@ Trunk (Unreleased)
     HADOOP-9004. Allow security unit tests to use external KDC. (Stephen Chu
     via suresh)
 
+    HADOOP-6616. Improve documentation for rack awareness. (Adam Faris via 
+    jghoman)
+
+    HADOOP-9075. FileContext#FSLinkResolver should be made static.
+    (Arpit Agarwal via suresh)
+
+    HADOOP-9093. Move all the Exception in PathExceptions to o.a.h.fs package.
+    (suresh)
+
   BUG FIXES
 
     HADOOP-8177. MBeans shouldn't try to register when it fails to create MBeanName.
@@ -277,11 +286,15 @@ Trunk (Unreleased)
 
     HADOOP-8974. TestDFVariations fails on Windows. (Chris Nauroth via suresh)
 
+    HADOOP-9037. Bug in test-patch.sh and precommit build process (Kihwal Lee
+    via jlowe)
+
   OPTIMIZATIONS
 
     HADOOP-7761. Improve the performance of raw comparisons. (todd)
 
     HADOOP-8589 ViewFs tests fail when tests and home dirs are nested (sanjay Radia)
+
 Release 2.0.3-alpha - Unreleased 
 
   INCOMPATIBLE CHANGES
@@ -291,6 +304,8 @@ Release 2.0.3-alpha - Unreleased
     HADOOP-8597. Permit FsShell's text command to read Avro files.
     (Ivan Vladimirov Ivanov via cutting)
 
+    HADOOP-9020. Add a SASL PLAIN server (daryn via bobby)
+
   IMPROVEMENTS
 
     HADOOP-8789. Tests setLevel(Level.OFF) should be Level.ERROR.
@@ -355,6 +370,19 @@ Release 2.0.3-alpha - Unreleased
 
     HADOOP-9015. Standardize creation of SaslRpcServers (daryn via bobby)
 
+    HADOOP-8860. Split MapReduce and YARN sections in documentation navigation.
+    (tomwhite via tucu)
+
+    HADOOP-9021. Enforce configured SASL method on the server (daryn via
+    bobby)
+
+    HADOO-8998. set Cache-Control no-cache header on all dynamic content. (tucu)
+
+    HADOOP-9035. Generalize setup of LoginContext (daryn via bobby)
+
+    HADOOP-9042. Add a test for umask in FileSystemContractBaseTest.
+    (Colin McCabe via eli)
+
   OPTIMIZATIONS
 
     HADOOP-8866. SampleQuantiles#query is O(N^2) instead of O(N). (Andrew Wang
@@ -413,6 +441,21 @@ Release 2.0.3-alpha - Unreleased
 
     HADOOP-9012. IPC Client sends wrong connection context (daryn via bobby)
 
+    HADOOP-7115. Add a cache for getpwuid_r and getpwgid_r calls (tucu)
+
+    HADOOP-8999. SASL negotiation is flawed (daryn)
+
+    HADOOP-6607. Add different variants of non caching HTTP headers. (tucu)
+
+    HADOOP-9049. DelegationTokenRenewer needs to be Singleton and FileSystems
+    should register/deregister to/from. (Karthik Kambatla via tomwhite)
+
+    HADOOP-9064. Augment DelegationTokenRenewer API to cancel the tokens on 
+    calls to removeRenewAction. (kkambatl via tucu)
+
+    HADOOP-8958. ViewFs:Non absolute mount name failures when running 
+    multiple tests on Windows. (Chris Nauroth via suresh)
+
 Release 2.0.2-alpha - 2012-09-07 
 
   INCOMPATIBLE CHANGES
@@ -1103,6 +1146,27 @@ Release 2.0.0-alpha - 05-23-2012
     HADOOP-8655. Fix TextInputFormat for large deliminators. (Gelesh via
     bobby) 
 
+Release 0.23.6 - UNRELEASED
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
+    HADOOP-9072. Hadoop-Common-0.23-Build Fails to build in Jenkins 
+    (Robert Parker via tgraves)
+
+    HADOOP-8992. Enhance unit-test coverage of class HarFileSystem (Ivan A.
+    Veselovsky via bobby)
+
+    HADOOP-9038. unit-tests for AllocatorPerContext.PathIterator (Ivan A.
+    Veselovsky via bobby)
+
 Release 0.23.5 - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -1132,7 +1196,13 @@ Release 0.23.5 - UNRELEASED
 
     HADOOP-8986. Server$Call object is never released after it is sent (bobby)
 
-Release 0.23.4 - UNRELEASED
+    HADOOP-9022. Hadoop distcp tool fails to copy file if -m 0 specified
+    (Jonathan Eagles vai bobby)
+
+    HADOOP-9025. org.apache.hadoop.tools.TestCopyListing failing (Jonathan
+    Eagles via jlowe)
+
+Release 0.23.4
 
   INCOMPATIBLE CHANGES
 

+ 9 - 1
hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml

@@ -291,5 +291,13 @@
        <Field name="previousSnapshot" />
        <Bug pattern="IS2_INCONSISTENT_SYNC" />
      </Match>
-
+     <!--
+       The method uses a generic type T that extends two other types
+       T1 and T2. Findbugs complains of a cast from T1 to T2.
+     -->
+     <Match>
+       <Class name="org.apache.hadoop.fs.DelegationTokenRenewer" />
+       <Method name="removeRenewAction" />
+       <Bug pattern="BC_UNCONFIRMED_CAST" />
+     </Match>
  </FindBugsFilter>

+ 7 - 0
hadoop-common-project/hadoop-common/src/main/conf/log4j.properties

@@ -99,6 +99,13 @@ log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
 log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
 log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
 
+#
+# HDFS block state change log from block manager
+#
+# Uncomment the following to suppress normal block state change
+# messages from BlockManager in NameNode.
+#log4j.logger.BlockStateChange=WARN
+
 #
 #Security appender
 #

+ 133 - 17
hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/cluster_setup.xml

@@ -1292,23 +1292,139 @@
     
     <section>
       <title>Hadoop Rack Awareness</title>
-      <p>The HDFS and the Map/Reduce components are rack-aware.</p>
-      <p>The <code>NameNode</code> and the <code>JobTracker</code> obtains the
-      <code>rack id</code> of the slaves in the cluster by invoking an API 
-      <a href="ext:api/org/apache/hadoop/net/dnstoswitchmapping/resolve
-      ">resolve</a> in an administrator configured
-      module. The API resolves the slave's DNS name (also IP address) to a 
-      rack id. What module to use can be configured using the configuration
-      item <code>net.topology.node.switch.mapping.impl</code>. The default 
-      implementation of the same runs a script/command configured using 
-      <code>net.topology.script.file.name</code>. If topology.script.file.name is
-      not set, the rack id <code>/default-rack</code> is returned for any 
-      passed IP address. The additional configuration in the Map/Reduce
-      part is <code>mapred.cache.task.levels</code> which determines the number
-      of levels (in the network topology) of caches. So, for example, if it is
-      the default value of 2, two levels of caches will be constructed - 
-      one for hosts (host -> task mapping) and another for racks 
-      (rack -> task mapping).
+      <p>
+         Both HDFS and Map/Reduce components are rack-aware.  HDFS block placement will use rack 
+         awareness for fault tolerance by placing one block replica on a different rack.  This provides 
+         data availability in the event of a network switch failure within the cluster.  The jobtracker uses rack
+         awareness to reduce network transfers of HDFS data blocks by attempting to schedule tasks on datanodes with a local
+         copy of needed HDFS blocks.  If the tasks cannot be scheduled on the datanodes
+         containing the needed HDFS blocks, then the tasks will be scheduled on the same rack to reduce network transfers if possible.
+      </p>
+      <p>The NameNode and the JobTracker obtain the rack id of the cluster slaves by invoking either 
+         an external script or java class as specified by configuration files.  Using either the 
+         java class or external script for topology, output must adhere to the java 
+         <a href="ext:api/org/apache/hadoop/net/dnstoswitchmapping/resolve">DNSToSwitchMapping</a> 
+         interface.  The interface expects a one-to-one correspondence to be maintained 
+         and the topology information in the format of '/myrack/myhost', where '/' is the topology 
+         delimiter, 'myrack' is the rack identifier, and 'myhost' is the individual host.  Assuming 
+         a single /24 subnet per rack, one could use the format of '/192.168.100.0/192.168.100.5' as a 
+         unique rack-host topology mapping.
+      </p>
+      <p>
+         To use the java class for topology mapping, the class name is specified by the 
+         <code>'topology.node.switch.mapping.impl'</code> parameter in the configuration file. 
+         An example, NetworkTopology.java, is included with the hadoop distribution and can be customized 
+         by the hadoop administrator.  If not included with your distribution, NetworkTopology.java can also be found in the Hadoop 
+         <a href="http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java?view=markup">
+         subversion tree</a>.  Using a java class instead of an external script has a slight performance benefit in 
+         that it doesn't need to fork an external process when a new slave node registers itself with the jobtracker or namenode.  
+         As this class is only used during slave node registration, the performance benefit is limited.  
+      </p>
+      <p>
+         If implementing an external script, it will be specified with the
+         <code>topology.script.file.name</code> parameter in the configuration files.  Unlike the java 
+         class, the external topology script is not included with the Hadoop distribution and is provided by the 
+         administrator.  Hadoop will send multiple IP addresses to ARGV when forking the topology script.  The  
+         number of IP addresses sent to the topology script is controlled with <code>net.topology.script.number.args</code>
+         and defaults to 100. If <code>net.topology.script.number.args</code> was changed to 1, a topology script would 
+         get forked for each IP submitted by datanodes and/or tasktrackers.  Below are example topology scripts.
+      </p>
+      <section>
+      <title>Python example</title>
+      <source>
+      <code>
+      #!/usr/bin/python
+
+      # this script makes assumptions about the physical environment.
+      #  1) each rack is its own layer 3 network with a /24 subnet, which could be typical where each rack has its own
+      #     switch with uplinks to a central core router.
+      #     
+      #             +-----------+
+      #             |core router|
+      #             +-----------+
+      #            /             \
+      #   +-----------+        +-----------+
+      #   |rack switch|        |rack switch|
+      #   +-----------+        +-----------+
+      #   | data node |        | data node |
+      #   +-----------+        +-----------+
+      #   | data node |        | data node |
+      #   +-----------+        +-----------+
+      #
+      # 2) topology script gets list of IP's as input, calculates network address, and prints '/network_address/ip'.
+
+      import netaddr
+      import sys             
+      sys.argv.pop(0)                                                  # discard name of topology script from argv list as we just want IP addresses
+
+      netmask = '255.255.255.0'                                        # set netmask to what's being used in your environment.  The example uses a /24
+
+      for ip in sys.argv:                                              # loop over list of datanode IP's
+          address = '{0}/{1}'.format(ip, netmask)                      # format address string so it looks like 'ip/netmask' to make netaddr work
+          try:
+              network_address = netaddr.IPNetwork(address).network     # calculate and print network address
+              print "/{0}".format(network_address)                     
+          except:
+              print "/rack-unknown"                                    # print catch-all value if unable to calculate network address
+
+      </code>
+      </source>
+      </section>
+          
+      <section>
+      <title>Bash  example</title>
+      <source>
+      <code>
+      #!/bin/bash
+      # Here's a bash example to show just how simple these scripts can be
+      
+      # Assuming we have flat network with everything on a single switch, we can fake a rack topology. 
+      # This could occur in a lab environment where we have limited nodes,like 2-8 physical machines on a unmanaged switch. 
+      # This may also apply to multiple virtual machines running on the same physical hardware.  
+      # The number of machines isn't important, but that we are trying to fake a network topology when there isn't one. 
+      #
+      #       +----------+    +--------+
+      #       |jobtracker|    |datanode| 
+      #       +----------+    +--------+
+      #              \        /
+      #  +--------+  +--------+  +--------+
+      #  |datanode|--| switch |--|datanode|
+      #  +--------+  +--------+  +--------+
+      #              /        \
+      #       +--------+    +--------+
+      #       |datanode|    |namenode| 
+      #       +--------+    +--------+
+      #
+      # With this network topology, we are treating each host as a rack.  This is being done by taking the last octet 
+      # in the datanode's IP and prepending it with the word '/rack-'.  The advantage for doing this is so HDFS
+      # can create its 'off-rack' block copy.
+      
+      # 1) 'echo $@' will echo all ARGV values to xargs.  
+      # 2) 'xargs' will enforce that we print a single argv value per line
+      # 3) 'awk' will split fields on dots and append the last field to the string '/rack-'. If awk 
+      #    fails to split on four dots, it will still print '/rack-' last field value
+
+      echo $@ | xargs -n 1 | awk -F '.' '{print "/rack-"$NF}'
+
+
+      </code>
+      </source>
+      </section>
+
+
+      <p>
+         If <code>topology.script.file.name</code> or <code>topology.node.switch.mapping.impl</code> is 
+         not set, the rack id '/default-rack' is returned for any passed IP address.  
+         While this behavior appears desirable, it can cause issues with HDFS block replication as 
+         default behavior is to write one replicated block off rack and is unable to do so as there is 
+         only a single rack named '/default-rack'.
+      </p>
+      <p>
+         An additional configuration setting is <code>mapred.cache.task.levels</code> which determines 
+         the number of levels (in the network topology) of caches. So, for example, if it is the 
+         default value of 2, two levels of caches will be constructed - one for hosts 
+         (host -> task mapping) and another for racks (rack -> task mapping). Giving us our one-to-one 
+          mapping of '/myrack/myhost'
       </p>
     </section>
     

+ 7 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java

@@ -184,5 +184,11 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
    */
   public static final String KERBEROS_TICKET_CACHE_PATH =
       "hadoop.security.kerberos.ticket.cache.path";
-}
 
+  public static final String HADOOP_SECURITY_UID_NAME_CACHE_TIMEOUT_KEY =
+    "hadoop.security.uid.cache.secs";
+
+  public static final long HADOOP_SECURITY_UID_NAME_CACHE_TIMEOUT_DEFAULT =
+    4*60*60; // 4 hours
+
+}

+ 65 - 12
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java

@@ -24,6 +24,8 @@ import java.util.concurrent.DelayQueue;
 import java.util.concurrent.Delayed;
 import java.util.concurrent.TimeUnit;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
@@ -33,8 +35,11 @@ import org.apache.hadoop.util.Time;
  * A daemon thread that waits for the next file system to renew.
  */
 @InterfaceAudience.Private
-public class DelegationTokenRenewer<T extends FileSystem & DelegationTokenRenewer.Renewable>
+public class DelegationTokenRenewer
     extends Thread {
+  private static final Log LOG = LogFactory
+      .getLog(DelegationTokenRenewer.class);
+
   /** The renewable interface used by the renewer. */
   public interface Renewable {
     /** @return the renew token. */
@@ -93,7 +98,7 @@ public class DelegationTokenRenewer<T extends FileSystem & DelegationTokenRenewe
      * @param newTime the new time
      */
     private void updateRenewalTime() {
-      renewalTime = RENEW_CYCLE + Time.now();
+      renewalTime = renewCycle + Time.now();
     }
 
     /**
@@ -134,34 +139,82 @@ public class DelegationTokenRenewer<T extends FileSystem & DelegationTokenRenewe
   }
 
   /** Wait for 95% of a day between renewals */
-  private static final int RENEW_CYCLE = 24 * 60 * 60 * 950;
+  private static final int RENEW_CYCLE = 24 * 60 * 60 * 950; 
 
-  private DelayQueue<RenewAction<T>> queue = new DelayQueue<RenewAction<T>>();
+  @InterfaceAudience.Private
+  protected static int renewCycle = RENEW_CYCLE;
 
-  public DelegationTokenRenewer(final Class<T> clazz) {
+  /** Queue to maintain the RenewActions to be processed by the {@link #run()} */
+  private volatile DelayQueue<RenewAction<?>> queue = new DelayQueue<RenewAction<?>>();
+  
+  /**
+   * Create the singleton instance. However, the thread can be started lazily in
+   * {@link #addRenewAction(FileSystem)}
+   */
+  private static DelegationTokenRenewer INSTANCE = null;
+
+  private DelegationTokenRenewer(final Class<? extends FileSystem> clazz) {
     super(clazz.getSimpleName() + "-" + DelegationTokenRenewer.class.getSimpleName());
     setDaemon(true);
   }
 
+  public static synchronized DelegationTokenRenewer getInstance() {
+    if (INSTANCE == null) {
+      INSTANCE = new DelegationTokenRenewer(FileSystem.class);
+    }
+    return INSTANCE;
+  }
+
   /** Add a renew action to the queue. */
-  public void addRenewAction(final T fs) {
+  public synchronized <T extends FileSystem & Renewable> void addRenewAction(final T fs) {
     queue.add(new RenewAction<T>(fs));
+    if (!isAlive()) {
+      start();
+    }
+  }
+
+  /**
+   * Remove the associated renew action from the queue
+   * 
+   * @throws IOException
+   */
+  public synchronized <T extends FileSystem & Renewable> void removeRenewAction(
+      final T fs) throws IOException {
+    for (RenewAction<?> action : queue) {
+      if (action.weakFs.get() == fs) {
+        try {
+          fs.getRenewToken().cancel(fs.getConf());
+        } catch (InterruptedException ie) {
+          LOG.error("Interrupted while canceling token for " + fs.getUri()
+              + "filesystem");
+          if (LOG.isDebugEnabled()) {
+            LOG.debug(ie.getStackTrace());
+          }
+        }
+        queue.remove(action);
+        return;
+      }
+    }
   }
 
+  @SuppressWarnings("static-access")
   @Override
   public void run() {
     for(;;) {
-      RenewAction<T> action = null;
+      RenewAction<?> action = null;
       try {
-        action = queue.take();
-        if (action.renew()) {
-          action.updateRenewalTime();
-          queue.add(action);
+        synchronized (this) {
+          action = queue.take();
+          if (action.renew()) {
+            action.updateRenewalTime();
+            queue.add(action);
+          }
         }
       } catch (InterruptedException ie) {
         return;
       } catch (Exception ie) {
-        T.LOG.warn("Failed to renew token, action=" + action, ie);
+        action.weakFs.get().LOG.warn("Failed to renew token, action=" + action,
+            ie);
       }
     }
   }

+ 2 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java

@@ -1119,7 +1119,7 @@ public final class FileContext {
    * @param target The symlink's absolute target
    * @return Fully qualified version of the target.
    */
-  private Path qualifySymlinkTarget(final AbstractFileSystem pathFS,
+  private static Path qualifySymlinkTarget(final AbstractFileSystem pathFS,
     Path pathWithLink, Path target) {
     // NB: makeQualified uses the target's scheme and authority, if
     // specified, and the scheme and authority of pathFS, if not.
@@ -2321,7 +2321,7 @@ public final class FileContext {
    * Class used to perform an operation on and resolve symlinks in a
    * path. The operation may potentially span multiple file systems.  
    */
-  protected abstract class FSLinkResolver<T> {
+  protected static abstract class FSLinkResolver<T> {
     // The maximum number of symbolic link components in a path
     private static final int MAX_PATH_LINKS = 32;
 

+ 7 - 16
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java

@@ -584,13 +584,6 @@ public class HarFileSystem extends FilterFileSystem {
     public String getName() {
       return name;
     }
-    
-    public List<String> getChildren() {
-      return children;
-    }
-    public String getFileName() {
-      return name;
-    }
     public String getPartName() {
       return partName;
     }
@@ -662,15 +655,6 @@ public class HarFileSystem extends FilterFileSystem {
         hstatus.getStartIndex(), hstatus.getLength(), bufferSize);
   }
  
-  /*
-   * create throws an exception in Har filesystem.
-   * The archive once created cannot be changed.
-   */
-  public FSDataOutputStream create(Path f, int bufferSize) 
-                                    throws IOException {
-    throw new IOException("Har: Create not allowed");
-  }
-  
   @Override
   public FSDataOutputStream create(Path f,
       FsPermission permission,
@@ -1106,4 +1090,11 @@ public class HarFileSystem extends FilterFileSystem {
       }
     }
   }
+  
+  /*
+   * testing purposes only:
+   */
+  HarMetaData getMetadata() {
+    return metadata;
+  }
 }

+ 4 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java

@@ -481,12 +481,15 @@ public class LocalDirAllocator {
 
       @Override
       public Path next() {
-        Path result = next;
+        final Path result = next;
         try {
           advance();
         } catch (IOException ie) {
           throw new RuntimeException("Can't check existance of " + next, ie);
         }
+        if (result == null) {
+          throw new NoSuchElementException();
+        }
         return result;
       }
 

+ 8 - 12
hadoop-mapreduce-project/src/contrib/vertica/src/test/org/apache/hadoop/vertica/VerticaTestCase.java → hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathAccessDeniedException.java

@@ -15,17 +15,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+package org.apache.hadoop.fs;
 
-package org.apache.hadoop.vertica;
-
-import junit.framework.TestCase;
-
-public class VerticaTestCase extends TestCase {
-  public VerticaTestCase(String name) {
-    super(name);
-  }
-
-  {
-    AllTests.configure();
+/** EACCES */
+public class PathAccessDeniedException extends PathIOException {
+  static final long serialVersionUID = 0L;
+  /** @param path for the exception */
+  public PathAccessDeniedException(String path) {
+    super(path, "Permission denied");
   }
-}
+}

+ 12 - 12
hadoop-mapreduce-project/src/contrib/eclipse-plugin/src/java/org/apache/hadoop/eclipse/preferences/PreferenceInitializer.java → hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathExistsException.java

@@ -15,19 +15,19 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-package org.apache.hadoop.eclipse.preferences;
-
-import org.eclipse.core.runtime.preferences.AbstractPreferenceInitializer;
+package org.apache.hadoop.fs;
 
 /**
- * Class used to initialize default preference values.
+ * Exception corresponding to File Exists - EEXISTS
  */
-public class PreferenceInitializer extends AbstractPreferenceInitializer {
-
-  /* @inheritDoc */
-  @Override
-  public void initializeDefaultPreferences() {
+public class PathExistsException extends PathIOException {
+  static final long serialVersionUID = 0L;
+  /** @param path for the exception */
+  public PathExistsException(String path) {
+    super(path, "File exists");
   }
-
-}
+  
+  protected PathExistsException(String path, String error) {
+    super(path, error);
+  }
+}

+ 117 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIOException.java

@@ -0,0 +1,117 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.IOException;
+
+/**
+ * Exceptions based on standard posix/linux style exceptions for path related
+ * errors. Returns an exception with the format "path: standard error string".
+ * 
+ * This exception corresponds to Error Input/ouput(EIO)
+ */
+public class PathIOException extends IOException {
+  static final long serialVersionUID = 0L;
+  private static final String EIO = "Input/output error";
+  // NOTE: this really should be a Path, but a Path is buggy and won't
+  // return the exact string used to construct the path, and it mangles
+  // uris with no authority
+  private String operation;
+  private String path;
+  private String targetPath;
+
+  /**
+   * Constructor a generic I/O error exception
+   *  @param path for the exception
+   */
+  public PathIOException(String path) {
+    this(path, EIO, null);
+  }
+
+  /**
+   * Appends the text of a Throwable to the default error message
+   * @param path for the exception
+   * @param cause a throwable to extract the error message
+   */
+  public PathIOException(String path, Throwable cause) {
+    this(path, EIO, cause);
+  }
+
+  /**
+   * Avoid using this method.  Use a subclass of PathIOException if
+   * possible.
+   * @param path for the exception
+   * @param error custom string to use an the error text
+   */
+  public PathIOException(String path, String error) {
+    this(path, error, null);
+  }
+
+  protected PathIOException(String path, String error, Throwable cause) {
+    super(error, cause);
+    this.path = path;
+  }
+
+  /** Format:
+   * cmd: {operation} `path' {to `target'}: error string
+   */
+  @Override
+  public String getMessage() {
+    StringBuilder message = new StringBuilder();
+    if (operation != null) {
+      message.append(operation + " ");
+    }
+    message.append(formatPath(path));
+    if (targetPath != null) {
+      message.append(" to " + formatPath(targetPath));
+    }
+    message.append(": " + super.getMessage());
+    if (getCause() != null) {
+      message.append(": " + getCause().getMessage());
+    }
+    return message.toString();
+  }
+
+  /** @return Path that generated the exception */
+  public Path getPath()  { return new Path(path); }
+
+  /** @return Path if the operation involved copying or moving, else null */
+  public Path getTargetPath() {
+    return (targetPath != null) ? new Path(targetPath) : null;
+  }    
+  
+  /**
+   * Optional operation that will preface the path
+   * @param operation a string
+   */
+  public void setOperation(String operation) {
+    this.operation = operation;
+  }
+  
+  /**
+   * Optional path if the exception involved two paths, ex. a copy operation
+   * @param targetPath the of the operation
+   */
+  public void setTargetPath(String targetPath) {
+    this.targetPath = targetPath;
+  }
+  
+  private String formatPath(String path) {
+    return "`" + path + "'";
+  }
+}

+ 9 - 8
hadoop-mapreduce-project/src/c++/librecordio/test/test.hh → hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIsDirectoryException.java

@@ -15,12 +15,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+package org.apache.hadoop.fs;
 
-#ifndef TEST_HH_
-#define TEST_HH_
-
-#include "recordio.hh"
-#include "filestream.hh"
-#include "test.jr.hh"
-
-#endif /*TEST_HH_*/
+/** EISDIR */
+public class PathIsDirectoryException extends PathExistsException {
+  static final long serialVersionUID = 0L;
+  /** @param path for the exception */
+  public PathIsDirectoryException(String path) {
+    super(path, "Is a directory");
+  }
+}

+ 27 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIsNotDirectoryException.java

@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+/** ENOTDIR */
+public class PathIsNotDirectoryException extends PathExistsException {
+  static final long serialVersionUID = 0L;
+  /** @param path for the exception */
+  public PathIsNotDirectoryException(String path) {
+    super(path, "Is not a directory");
+  }
+}

+ 9 - 5
hadoop-mapreduce-project/src/tools/org/apache/hadoop/fs/package-info.java → hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIsNotEmptyDirectoryException.java

@@ -1,4 +1,4 @@
-/*
+/**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -15,8 +15,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-/**
- * Hadoop filesystem classes for MapReduce.
- */
 package org.apache.hadoop.fs;
+
+/** Generated by rm commands */
+public class PathIsNotEmptyDirectoryException extends PathExistsException {
+  /** @param path for the exception */
+  public PathIsNotEmptyDirectoryException(String path) {
+    super(path, "Directory is not empty");
+  }
+}

+ 9 - 16
hadoop-mapreduce-project/src/contrib/eclipse-plugin/src/java/org/apache/hadoop/eclipse/server/IJobListener.java → hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathNotFoundException.java

@@ -15,22 +15,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-package org.apache.hadoop.eclipse.server;
+package org.apache.hadoop.fs;
 
 /**
- * Interface for updating/adding jobs to the MapReduce Server view.
+ * Exception corresponding to Permission denied - ENOENT
  */
-public interface IJobListener {
-
-  void jobChanged(HadoopJob job);
-
-  void jobAdded(HadoopJob job);
-
-  void jobRemoved(HadoopJob job);
-
-  void publishStart(JarModule jar);
-
-  void publishDone(JarModule jar);
-
-}
+public class PathNotFoundException extends PathIOException {
+  static final long serialVersionUID = 0L;
+  /** @param path for the exception */
+  public PathNotFoundException(String path) {
+    super(path, "No such file or directory");
+  }    
+}

+ 9 - 14
hadoop-mapreduce-project/src/contrib/eclipse-plugin/src/java/org/apache/hadoop/eclipse/dfs/DFSContent.java → hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathOperationException.java

@@ -15,18 +15,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+package org.apache.hadoop.fs;
 
-package org.apache.hadoop.eclipse.dfs;
-
-/**
- * Interface to define content entities in the DFS browser
- */
-public interface DFSContent {
-
-  boolean hasChildren();
-  
-  DFSContent[] getChildren();
-  
-  void refresh();
-  
-}
+/** ENOTSUP */
+public class PathOperationException extends PathExistsException {
+  static final long serialVersionUID = 0L;
+  /** @param path for the exception */
+  public PathOperationException(String path) {
+    super(path, "Operation not supported");
+  }
+}

+ 9 - 8
hadoop-mapreduce-project/src/contrib/eclipse-plugin/src/java/org/apache/hadoop/eclipse/servers/IHadoopServerListener.java → hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathPermissionException.java

@@ -15,14 +15,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-package org.apache.hadoop.eclipse.servers;
-
-import org.apache.hadoop.eclipse.server.HadoopServer;
+package org.apache.hadoop.fs;
 
 /**
- * Interface for monitoring server changes
+ * Exception corresponding to Operation Not Permitted - EPERM
  */
-public interface IHadoopServerListener {
-  void serverChanged(HadoopServer location, int type);
-}
+public class PathPermissionException extends PathIOException {
+  static final long serialVersionUID = 0L;
+  /** @param path for the exception */
+  public PathPermissionException(String path) {
+    super(path, "Operation not permitted");
+  }
+}

+ 12 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsPermission.java

@@ -184,7 +184,18 @@ public class FsPermission implements Writable {
     return str;
   }
 
-  /** Apply a umask to this permission and return a new one */
+  /**
+   * Apply a umask to this permission and return a new one.
+   *
+   * The umask is used by create, mkdir, and other Hadoop filesystem operations.
+   * The mode argument for these operations is modified by removing the bits
+   * which are set in the umask.  Thus, the umask limits the permissions which
+   * newly created files and directories get.
+   *
+   * @param umask              The umask to use
+   * 
+   * @return                   The effective permission
+   */
   public FsPermission applyUMask(FsPermission umask) {
     return new FsPermission(useraction.and(umask.useraction.not()),
         groupaction.and(umask.groupaction.not()),

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Command.java

@@ -34,7 +34,7 @@ import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.shell.PathExceptions.PathNotFoundException;
+import org.apache.hadoop.fs.PathNotFoundException;
 import org.apache.hadoop.util.StringUtils;
 
 /**

+ 6 - 6
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandWithDestination.java

@@ -29,12 +29,12 @@ import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FilterFileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.shell.PathExceptions.PathExistsException;
-import org.apache.hadoop.fs.shell.PathExceptions.PathIOException;
-import org.apache.hadoop.fs.shell.PathExceptions.PathIsDirectoryException;
-import org.apache.hadoop.fs.shell.PathExceptions.PathIsNotDirectoryException;
-import org.apache.hadoop.fs.shell.PathExceptions.PathNotFoundException;
-import org.apache.hadoop.fs.shell.PathExceptions.PathOperationException;
+import org.apache.hadoop.fs.PathExistsException;
+import org.apache.hadoop.fs.PathIOException;
+import org.apache.hadoop.fs.PathIsDirectoryException;
+import org.apache.hadoop.fs.PathIsNotDirectoryException;
+import org.apache.hadoop.fs.PathNotFoundException;
+import org.apache.hadoop.fs.PathOperationException;
 import org.apache.hadoop.io.IOUtils;
 
 /**

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CopyCommands.java

@@ -29,7 +29,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.shell.PathExceptions.PathIsDirectoryException;
+import org.apache.hadoop.fs.PathIsDirectoryException;
 import org.apache.hadoop.io.IOUtils;
 
 /** Various commands for copy files */

+ 4 - 4
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Delete.java

@@ -24,11 +24,11 @@ import java.util.LinkedList;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.PathIOException;
+import org.apache.hadoop.fs.PathIsDirectoryException;
+import org.apache.hadoop.fs.PathIsNotDirectoryException;
+import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException;
 import org.apache.hadoop.fs.Trash;
-import org.apache.hadoop.fs.shell.PathExceptions.PathIOException;
-import org.apache.hadoop.fs.shell.PathExceptions.PathIsDirectoryException;
-import org.apache.hadoop.fs.shell.PathExceptions.PathIsNotDirectoryException;
-import org.apache.hadoop.fs.shell.PathExceptions.PathIsNotEmptyDirectoryException;
 
 /**
  * Classes that delete paths

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Display.java

@@ -38,7 +38,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.shell.PathExceptions.PathIsDirectoryException;
+import org.apache.hadoop.fs.PathIsDirectoryException;
 import org.apache.hadoop.io.DataInputBuffer;
 import org.apache.hadoop.io.DataOutputBuffer;
 import org.apache.hadoop.io.IOUtils;

+ 4 - 4
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Mkdir.java

@@ -24,10 +24,10 @@ import java.util.LinkedList;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.shell.PathExceptions.PathExistsException;
-import org.apache.hadoop.fs.shell.PathExceptions.PathIOException;
-import org.apache.hadoop.fs.shell.PathExceptions.PathIsNotDirectoryException;
-import org.apache.hadoop.fs.shell.PathExceptions.PathNotFoundException;
+import org.apache.hadoop.fs.PathExistsException;
+import org.apache.hadoop.fs.PathIOException;
+import org.apache.hadoop.fs.PathIsNotDirectoryException;
+import org.apache.hadoop.fs.PathNotFoundException;
 
 /**
  * Create the given dir

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/MoveCommands.java

@@ -23,8 +23,8 @@ import java.util.LinkedList;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.PathIOException;
 import org.apache.hadoop.fs.shell.CopyCommands.CopyFromLocal;
-import org.apache.hadoop.fs.shell.PathExceptions.PathIOException;
 
 /** Various commands for moving files */
 @InterfaceAudience.Private

+ 4 - 4
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathData.java

@@ -32,10 +32,10 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.shell.PathExceptions.PathIOException;
-import org.apache.hadoop.fs.shell.PathExceptions.PathIsDirectoryException;
-import org.apache.hadoop.fs.shell.PathExceptions.PathIsNotDirectoryException;
-import org.apache.hadoop.fs.shell.PathExceptions.PathNotFoundException;
+import org.apache.hadoop.fs.PathIOException;
+import org.apache.hadoop.fs.PathIsDirectoryException;
+import org.apache.hadoop.fs.PathIsNotDirectoryException;
+import org.apache.hadoop.fs.PathNotFoundException;
 
 /**
  * Encapsulates a Path (path), its FileStatus (stat), and its FileSystem (fs).

+ 0 - 203
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/PathExceptions.java

@@ -1,203 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.fs.shell;
-
-import java.io.IOException;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.fs.Path;
-
-/**
- * Standardized posix/linux style exceptions for path related errors.
- * Returns an IOException with the format "path: standard error string".
- */
-@InterfaceAudience.Private
-@InterfaceStability.Unstable
-
-@SuppressWarnings("serial")
-public class PathExceptions {
-
-  /** EIO */
-  public static class PathIOException extends IOException {
-    static final long serialVersionUID = 0L;
-    private static final String EIO = "Input/output error";
-    // NOTE: this really should be a Path, but a Path is buggy and won't
-    // return the exact string used to construct the path, and it mangles
-    // uris with no authority
-    private String operation;
-    private String path;
-    private String targetPath;
-
-    /**
-     * Constructor a generic I/O error exception
-     *  @param path for the exception
-     */
-    public PathIOException(String path) {
-      this(path, EIO, null);
-    }
-
-    /**
-     * Appends the text of a Throwable to the default error message
-     * @param path for the exception
-     * @param cause a throwable to extract the error message
-     */
-    public PathIOException(String path, Throwable cause) {
-      this(path, EIO, cause);
-    }
-
-    /**
-     * Avoid using this method.  Use a subclass of PathIOException if
-     * possible.
-     * @param path for the exception
-     * @param error custom string to use an the error text
-     */
-    public PathIOException(String path, String error) {
-      this(path, error, null);
-    }
-
-    protected PathIOException(String path, String error, Throwable cause) {
-      super(error, cause);
-      this.path = path;
-    }
-
-    /** Format:
-     * cmd: {operation} `path' {to `target'}: error string
-     */
-    @Override
-    public String getMessage() {
-      StringBuilder message = new StringBuilder();
-      if (operation != null) {
-        message.append(operation + " ");
-      }
-      message.append(formatPath(path));
-      if (targetPath != null) {
-        message.append(" to " + formatPath(targetPath));
-      }
-      message.append(": " + super.getMessage());
-      if (getCause() != null) {
-        message.append(": " + getCause().getMessage());
-      }
-      return message.toString();
-    }
-
-    /** @return Path that generated the exception */
-    public Path getPath()  { return new Path(path); }
-
-    /** @return Path if the operation involved copying or moving, else null */
-    public Path getTargetPath() {
-      return (targetPath != null) ? new Path(targetPath) : null;
-    }    
-    
-    /**
-     * Optional operation that will preface the path
-     * @param operation a string
-     */
-    public void setOperation(String operation) {
-      this.operation = operation;
-    }
-    
-    /**
-     * Optional path if the exception involved two paths, ex. a copy operation
-     * @param targetPath the of the operation
-     */
-    public void setTargetPath(String targetPath) {
-      this.targetPath = targetPath;
-    }
-    
-    private String formatPath(String path) {
-      return "`" + path + "'";
-    }
-  }
-
-  /** ENOENT */
-  public static class PathNotFoundException extends PathIOException {
-    static final long serialVersionUID = 0L;
-    /** @param path for the exception */
-    public PathNotFoundException(String path) {
-      super(path, "No such file or directory");
-    }    
-  }
-
-  /** EEXISTS */
-  public static class PathExistsException extends PathIOException {
-    static final long serialVersionUID = 0L;
-    /** @param path for the exception */
-    public PathExistsException(String path) {
-      super(path, "File exists");
-    }
-    
-    protected PathExistsException(String path, String error) {
-      super(path, error);
-    }
-  }
-
-  /** EISDIR */
-  public static class PathIsDirectoryException extends PathExistsException {
-    static final long serialVersionUID = 0L;
-    /** @param path for the exception */
-    public PathIsDirectoryException(String path) {
-      super(path, "Is a directory");
-    }
-  }
-
-  /** ENOTDIR */
-  public static class PathIsNotDirectoryException extends PathExistsException {
-    static final long serialVersionUID = 0L;
-    /** @param path for the exception */
-    public PathIsNotDirectoryException(String path) {
-      super(path, "Is not a directory");
-    }
-  }
-
-  /** Generated by rm commands */
-  public static class PathIsNotEmptyDirectoryException extends PathExistsException {
-    /** @param path for the exception */
-    public PathIsNotEmptyDirectoryException(String path) {
-      super(path, "Directory is not empty");
-    }
-  }  
-
-  /** EACCES */
-  public static class PathAccessDeniedException extends PathIOException {
-    static final long serialVersionUID = 0L;
-    /** @param path for the exception */
-    public PathAccessDeniedException(String path) {
-      super(path, "Permission denied");
-    }
-  }
-
-  /** EPERM */
-  public static class PathPermissionException extends PathIOException {
-    static final long serialVersionUID = 0L;
-    /** @param path for the exception */
-    public PathPermissionException(String path) {
-      super(path, "Operation not permitted");
-    }
-  }
-  
-  /** ENOTSUP */
-  public static class PathOperationException extends PathExistsException {
-    static final long serialVersionUID = 0L;
-    /** @param path for the exception */
-    public PathOperationException(String path) {
-      super(path, "Operation not supported");
-    }
-  }
-}

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/SetReplication.java

@@ -25,7 +25,7 @@ import java.util.List;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.BlockLocation;
-import org.apache.hadoop.fs.shell.PathExceptions.PathIOException;
+import org.apache.hadoop.fs.PathIOException;
 
 /**
  * Modifies the replication factor

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Tail.java

@@ -25,7 +25,7 @@ import java.util.List;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.shell.PathExceptions.PathIsDirectoryException;
+import org.apache.hadoop.fs.PathIsDirectoryException;
 import org.apache.hadoop.io.IOUtils;
 
 /**

+ 0 - 3
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Test.java

@@ -23,7 +23,6 @@ import java.util.LinkedList;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.fs.shell.PathExceptions.PathNotFoundException;
 
 /**
  * Perform shell-like file tests 
@@ -91,8 +90,6 @@ class Test extends FsCommand {
 
   @Override
   protected void processNonexistentPath(PathData item) throws IOException {
-    // NOTE: errors for FNF is not how the shell works!
-    if (flag != 'e') displayError(new PathNotFoundException(item.toString()));
     exitCode = 1;
   }
 }

+ 3 - 3
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Touchz.java

@@ -23,9 +23,9 @@ import java.util.LinkedList;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.fs.shell.PathExceptions.PathIOException;
-import org.apache.hadoop.fs.shell.PathExceptions.PathIsDirectoryException;
-import org.apache.hadoop.fs.shell.PathExceptions.PathNotFoundException;
+import org.apache.hadoop.fs.PathIOException;
+import org.apache.hadoop.fs.PathIsDirectoryException;
+import org.apache.hadoop.fs.PathNotFoundException;
 
 /**
  * Unix touch like commands 

+ 13 - 2
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer.java

@@ -26,6 +26,7 @@ import java.net.InetSocketAddress;
 import java.net.URL;
 import java.security.GeneralSecurityException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Enumeration;
 import java.util.HashMap;
 import java.util.List;
@@ -104,6 +105,7 @@ public class HttpServer implements FilterContainer {
   public static final String CONF_CONTEXT_ATTRIBUTE = "hadoop.conf";
   public static final String ADMINS_ACL = "admins.acl";
   public static final String SPNEGO_FILTER = "SpnegoFilter";
+  public static final String NO_CACHE_FILTER = "NoCacheFilter";
 
   public static final String BIND_ADDRESS = "bind.address";
 
@@ -256,6 +258,7 @@ public class HttpServer implements FilterContainer {
     webAppContext.setWar(appDir + "/" + name);
     webAppContext.getServletContext().setAttribute(CONF_CONTEXT_ATTRIBUTE, conf);
     webAppContext.getServletContext().setAttribute(ADMINS_ACL, adminsAcl);
+    addNoCacheFilter(webAppContext);
     webServer.addHandler(webAppContext);
 
     addDefaultApps(contexts, appDir, conf);
@@ -280,6 +283,12 @@ public class HttpServer implements FilterContainer {
     }
   }
 
+  @SuppressWarnings("unchecked")
+  private void addNoCacheFilter(WebAppContext ctxt) {
+    defineFilter(ctxt, NO_CACHE_FILTER,
+      NoCacheFilter.class.getName(), Collections.EMPTY_MAP, new String[] { "/*"});
+  }
+
   /**
    * Create a required listener for the Jetty instance listening on the port
    * provided. This wrapper and all subclasses must create at least one
@@ -346,6 +355,7 @@ public class HttpServer implements FilterContainer {
       }
       logContext.setDisplayName("logs");
       setContextAttributes(logContext, conf);
+      addNoCacheFilter(webAppContext);
       defaultContexts.put(logContext, true);
     }
     // set up the context for "/static/*"
@@ -377,6 +387,7 @@ public class HttpServer implements FilterContainer {
   public void addContext(Context ctxt, boolean isFiltered)
       throws IOException {
     webServer.addHandler(ctxt);
+    addNoCacheFilter(webAppContext);
     defaultContexts.put(ctxt, isFiltered);
   }
 
@@ -470,7 +481,7 @@ public class HttpServer implements FilterContainer {
       holder.setName(name);
     }
     webAppContext.addServlet(holder, pathSpec);
-    
+
     if(requireAuth && UserGroupInformation.isSecurityEnabled()) {
        LOG.info("Adding Kerberos (SPNEGO) filter to " + name);
        ServletHandler handler = webAppContext.getServletHandler();
@@ -962,7 +973,7 @@ public class HttpServer implements FilterContainer {
       @Override
       public Enumeration<String> getParameterNames() {
         return new Enumeration<String>() {
-          private Enumeration<String> rawIterator = 
+          private Enumeration<String> rawIterator =
             rawRequest.getParameterNames();
           @Override
           public boolean hasMoreElements() {

+ 52 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/NoCacheFilter.java

@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.http;
+
+import javax.servlet.Filter;
+import javax.servlet.FilterChain;
+import javax.servlet.FilterConfig;
+import javax.servlet.ServletException;
+import javax.servlet.ServletRequest;
+import javax.servlet.ServletResponse;
+import javax.servlet.http.HttpServletResponse;
+import java.io.IOException;
+
+public class NoCacheFilter implements Filter {
+
+  @Override
+  public void init(FilterConfig filterConfig) throws ServletException {
+  }
+
+  @Override
+  public void doFilter(ServletRequest req, ServletResponse res,
+                       FilterChain chain)
+    throws IOException, ServletException {
+    HttpServletResponse httpRes = (HttpServletResponse) res;
+    httpRes.setHeader("Cache-Control", "no-cache");
+    long now = System.currentTimeMillis();
+    httpRes.addDateHeader("Expires", now);
+    httpRes.addDateHeader("Date", now);
+    httpRes.addHeader("Pragma", "no-cache");
+    chain.doFilter(req, res);
+  }
+
+  @Override
+  public void destroy() {
+  }
+
+}

+ 1 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SecureIOUtils.java

@@ -120,7 +120,7 @@ public class SecureIOUtils {
     FileInputStream fis = new FileInputStream(f);
     boolean success = false;
     try {
-      Stat stat = NativeIO.fstat(fis.getFD());
+      Stat stat = NativeIO.getFstat(fis.getFD());
       checkStat(f, stat.getOwner(), stat.getGroup(), expectedOwner,
           expectedGroup);
       success = true;

+ 79 - 4
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java

@@ -19,8 +19,13 @@ package org.apache.hadoop.io.nativeio;
 
 import java.io.FileDescriptor;
 import java.io.IOException;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
 
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.util.NativeCodeLoader;
 
 import org.apache.commons.logging.Log;
@@ -30,6 +35,8 @@ import org.apache.commons.logging.LogFactory;
  * These functions should generally be used alongside a fallback to another
  * more portable mechanism.
  */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
 public class NativeIO {
   // Flags for open() call from bits/fcntl.h
   public static final int O_RDONLY   =    00;
@@ -86,6 +93,8 @@ public class NativeIO {
     "hadoop.workaround.non.threadsafe.getpwuid";
   static final boolean WORKAROUND_NON_THREADSAFE_CALLS_DEFAULT = false;
 
+  private static long cacheTimeout = -1;
+
   static {
     if (NativeCodeLoader.isNativeCodeLoaded()) {
       try {
@@ -96,6 +105,14 @@ public class NativeIO {
 
         initNative();
         nativeLoaded = true;
+
+        cacheTimeout = conf.getLong(
+          CommonConfigurationKeys.HADOOP_SECURITY_UID_NAME_CACHE_TIMEOUT_KEY,
+          CommonConfigurationKeys.HADOOP_SECURITY_UID_NAME_CACHE_TIMEOUT_DEFAULT) *
+          1000;
+        LOG.debug("Initialized cache for IDs to User/Group mapping with a" +
+          " cache timeout of " + cacheTimeout/1000 + " seconds.");
+
       } catch (Throwable t) {
         // This can happen if the user has an older version of libhadoop.so
         // installed - in this case we can continue without native IO
@@ -115,7 +132,7 @@ public class NativeIO {
   /** Wrapper around open(2) */
   public static native FileDescriptor open(String path, int flags, int mode) throws IOException;
   /** Wrapper around fstat(2) */
-  public static native Stat fstat(FileDescriptor fd) throws IOException;
+  private static native Stat fstat(FileDescriptor fd) throws IOException;
   /** Wrapper around chmod(2) */
   public static native void chmod(String path, int mode) throws IOException;
 
@@ -176,6 +193,7 @@ public class NativeIO {
    * Result type of the fstat call
    */
   public static class Stat {
+    private int ownerId, groupId;
     private String owner, group;
     private int mode;
 
@@ -196,9 +214,9 @@ public class NativeIO {
     public static final int S_IWUSR = 0000200;  /* write permission, owner */
     public static final int S_IXUSR = 0000100;  /* execute/search permission, owner */
 
-    Stat(String owner, String group, int mode) {
-      this.owner = owner;
-      this.group = group;
+    Stat(int ownerId, int groupId, int mode) {
+      this.ownerId = ownerId;
+      this.groupId = groupId;
       this.mode = mode;
     }
 
@@ -218,4 +236,61 @@ public class NativeIO {
       return mode;
     }
   }
+
+  static native String getUserName(int uid) throws IOException;
+
+  static native String getGroupName(int uid) throws IOException;
+
+  private static class CachedName {
+    final long timestamp;
+    final String name;
+
+    public CachedName(String name, long timestamp) {
+      this.name = name;
+      this.timestamp = timestamp;
+    }
+  }
+
+  private static final Map<Integer, CachedName> USER_ID_NAME_CACHE =
+    new ConcurrentHashMap<Integer, CachedName>();
+
+  private static final Map<Integer, CachedName> GROUP_ID_NAME_CACHE =
+    new ConcurrentHashMap<Integer, CachedName>();
+
+  private enum IdCache { USER, GROUP }
+
+  private static String getName(IdCache domain, int id) throws IOException {
+    Map<Integer, CachedName> idNameCache = (domain == IdCache.USER)
+      ? USER_ID_NAME_CACHE : GROUP_ID_NAME_CACHE;
+    String name;
+    CachedName cachedName = idNameCache.get(id);
+    long now = System.currentTimeMillis();
+    if (cachedName != null && (cachedName.timestamp + cacheTimeout) > now) {
+      name = cachedName.name;
+    } else {
+      name = (domain == IdCache.USER) ? getUserName(id) : getGroupName(id);
+      if (LOG.isDebugEnabled()) {
+        String type = (domain == IdCache.USER) ? "UserName" : "GroupName";
+        LOG.debug("Got " + type + " " + name + " for ID " + id +
+          " from the native implementation");
+      }
+      cachedName = new CachedName(name, now);
+      idNameCache.put(id, cachedName);
+    }
+    return name;
+  }
+
+  /**
+   * Returns the file stat for a file descriptor.
+   *
+   * @param fd file descriptor.
+   * @return the file descriptor file stat.
+   * @throws IOException thrown if there was an IO error while obtaining the file stat.
+   */
+  public static Stat getFstat(FileDescriptor fd) throws IOException {
+    Stat stat = fstat(fd);
+    stat.owner = getName(IdCache.USER, stat.ownerId);
+    stat.group = getName(IdCache.GROUP, stat.groupId);
+    return stat;
+  }
 }

+ 7 - 6
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java

@@ -294,14 +294,15 @@ public class Client {
         }
       }
       
+      AuthenticationMethod authentication;
       if (token != null) {
-        authMethod = AuthenticationMethod.TOKEN.getAuthMethod();
-      } else if (UserGroupInformation.isSecurityEnabled()) {
-        // eventually just use the ticket's authMethod
-        authMethod = AuthMethod.KERBEROS;
-      } else {
-        authMethod = AuthMethod.SIMPLE;
+        authentication = AuthenticationMethod.TOKEN;
+      } else if (ticket != null) {
+        authentication = ticket.getRealAuthenticationMethod();
+      } else { // this only happens in lazy tests
+        authentication = AuthenticationMethod.SIMPLE;
       }
+      authMethod = authentication.getAuthMethod();
       
       if (LOG.isDebugEnabled())
         LOG.debug("Use " + authMethod + " authentication for protocol "

+ 59 - 53
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java

@@ -45,6 +45,7 @@ import java.security.PrivilegedExceptionAction;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -87,7 +88,9 @@ import org.apache.hadoop.security.SaslRpcServer.AuthMethod;
 import org.apache.hadoop.security.SaslRpcServer.SaslDigestCallbackHandler;
 import org.apache.hadoop.security.SaslRpcServer.SaslGssCallbackHandler;
 import org.apache.hadoop.security.SaslRpcServer.SaslStatus;
+import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
 import org.apache.hadoop.security.authentication.util.KerberosName;
 import org.apache.hadoop.security.authorize.AuthorizationException;
 import org.apache.hadoop.security.authorize.PolicyProvider;
@@ -113,7 +116,7 @@ import com.google.common.annotations.VisibleForTesting;
 @InterfaceStability.Evolving
 public abstract class Server {
   private final boolean authorize;
-  private boolean isSecurityEnabled;
+  private EnumSet<AuthMethod> enabledAuthMethods;
   private ExceptionsHandler exceptionsHandler = new ExceptionsHandler();
   
   public void addTerseExceptions(Class<?>... exceptionClass) {
@@ -1217,6 +1220,10 @@ public abstract class Server {
           AUDITLOG.warn(AUTH_FAILED_FOR + clientIP + ":" + attemptingUser);
           throw e;
         }
+        if (replyToken == null && authMethod == AuthMethod.PLAIN) {
+          // client needs at least response to know if it should use SIMPLE
+          replyToken = new byte[0];
+        }
         if (replyToken != null) {
           if (LOG.isDebugEnabled())
             LOG.debug("Will send token of size " + replyToken.length
@@ -1334,34 +1341,9 @@ public abstract class Server {
           if (authMethod == null) {
             throw new IOException("Unable to read authentication method");
           }
-          boolean useSaslServer = isSecurityEnabled;
-          final boolean clientUsingSasl;
-          switch (authMethod) {
-            case SIMPLE: { // no sasl for simple
-              clientUsingSasl = false;
-              break;
-            }
-            case DIGEST: { // always allow tokens if there's a secret manager
-              useSaslServer |= (secretManager != null);
-              clientUsingSasl = true;
-              break;
-            }
-            default: {
-              clientUsingSasl = true;
-              break;
-            }
-          }
-          if (useSaslServer) {
-            saslServer = createSaslServer(authMethod);
-          } else if (clientUsingSasl) { // security is off
-            doSaslReply(SaslStatus.SUCCESS, new IntWritable(
-                SaslRpcServer.SWITCH_TO_SIMPLE_AUTH), null, null);
-            authMethod = AuthMethod.SIMPLE;
-            // client has already sent the initial Sasl message and we
-            // should ignore it. Both client and server should fall back
-            // to simple auth from now on.
-            skipInitialSaslHandshake = true;
-          }
+  
+          // this may create a SASL server, or switch us into SIMPLE
+          authMethod = initializeAuthContext(authMethod);
           
           connectionHeaderBuf = null;
           connectionHeaderRead = true;
@@ -1409,10 +1391,24 @@ public abstract class Server {
       }
     }
 
-    private SaslServer createSaslServer(AuthMethod authMethod)
+    private AuthMethod initializeAuthContext(AuthMethod authMethod)
         throws IOException {
       try {
-        return createSaslServerInternal(authMethod);
+        if (enabledAuthMethods.contains(authMethod)) {
+          saslServer = createSaslServer(authMethod);
+        } else if (enabledAuthMethods.contains(AuthMethod.SIMPLE)) {
+          doSaslReply(SaslStatus.SUCCESS, new IntWritable(
+              SaslRpcServer.SWITCH_TO_SIMPLE_AUTH), null, null);
+          authMethod = AuthMethod.SIMPLE;
+          // client has already sent the initial Sasl message and we
+          // should ignore it. Both client and server should fall back
+          // to simple auth from now on.
+          skipInitialSaslHandshake = true;
+        } else {
+          throw new AccessControlException(
+              authMethod + " authentication is not enabled."
+                  + "  Available:" + enabledAuthMethods);
+        }
       } catch (IOException ioe) {
         final String ioeClass = ioe.getClass().getName();
         final String ioeMessage  = ioe.getLocalizedMessage();
@@ -1425,9 +1421,10 @@ public abstract class Server {
         }
         throw ioe;
       }
+      return authMethod;
     }
 
-    private SaslServer createSaslServerInternal(AuthMethod authMethod)
+    private SaslServer createSaslServer(AuthMethod authMethod)
         throws IOException {
       SaslServer saslServer = null;
       String hostname = null;
@@ -1436,18 +1433,9 @@ public abstract class Server {
       
       switch (authMethod) {
         case SIMPLE: {
-          throw new AccessControlException("Authorization ("
-              + CommonConfigurationKeys.HADOOP_SECURITY_AUTHORIZATION
-              + ") is enabled but authentication ("
-              + CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION
-              + ") is configured as simple. Please configure another method "
-              + "like kerberos or digest.");
+          return null; // no sasl for simple
         }
         case DIGEST: {
-          if (secretManager == null) {
-            throw new AccessControlException(
-                "Server is not configured to do DIGEST authentication.");
-          }
           secretManager.checkAvailableForRead();
           hostname = SaslRpcServer.SASL_DEFAULT_REALM;
           saslCallback = new SaslDigestCallbackHandler(secretManager, this);
@@ -1469,6 +1457,7 @@ public abstract class Server {
           break;
         }
         default:
+          // we should never be able to get here
           throw new AccessControlException(
               "Server does not support SASL " + authMethod);
       }
@@ -1908,7 +1897,9 @@ public abstract class Server {
     this.authorize = 
       conf.getBoolean(CommonConfigurationKeys.HADOOP_SECURITY_AUTHORIZATION, 
                       false);
-    this.isSecurityEnabled = UserGroupInformation.isSecurityEnabled();
+
+    // configure supported authentications
+    this.enabledAuthMethods = getAuthMethods(secretManager, conf);
     
     // Start the listener here and let it bind to the port
     listener = new Listener();
@@ -1929,6 +1920,31 @@ public abstract class Server {
     this.exceptionsHandler.addTerseExceptions(StandbyException.class);
   }
 
+  // get the security type from the conf. implicitly include token support
+  // if a secret manager is provided, or fail if token is the conf value but
+  // there is no secret manager
+  private EnumSet<AuthMethod> getAuthMethods(SecretManager<?> secretManager,
+                                             Configuration conf) {
+    AuthenticationMethod confAuthenticationMethod =
+        SecurityUtil.getAuthenticationMethod(conf);        
+    EnumSet<AuthMethod> authMethods =
+        EnumSet.of(confAuthenticationMethod.getAuthMethod()); 
+        
+    if (confAuthenticationMethod == AuthenticationMethod.TOKEN) {
+      if (secretManager == null) {
+        throw new IllegalArgumentException(AuthenticationMethod.TOKEN +
+            " authentication requires a secret manager");
+      } 
+    } else if (secretManager != null) {
+      LOG.debug(AuthenticationMethod.TOKEN +
+          " authentication enabled for secret manager");
+      authMethods.add(AuthenticationMethod.TOKEN.getAuthMethod());
+    }
+    
+    LOG.debug("Server accepts auth methods:" + authMethods);
+    return authMethods;
+  }
+  
   private void closeConnection(Connection connection) {
     synchronized (connectionList) {
       if (connectionList.remove(connection))
@@ -2045,16 +2061,6 @@ public abstract class Server {
     return conf;
   }
   
-  /** for unit testing only, should be called before server is started */ 
-  void disableSecurity() {
-    this.isSecurityEnabled = false;
-  }
-  
-  /** for unit testing only, should be called before server is started */ 
-  void enableSecurity() {
-    this.isSecurityEnabled = true;
-  }
-  
   /** Sets the socket buffer size used for responding to RPCs */
   public void setSocketSendBufSize(int size) { this.socketSendBufferSize = size; }
 

+ 16 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java

@@ -28,6 +28,9 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.util.ReflectionUtils;
 
 /** The class represents a cluster of computer with a tree hierarchical
  * network topology.
@@ -52,6 +55,19 @@ public class NetworkTopology {
       super(msg);
     }
   }
+  
+  /**
+   * Get an instance of NetworkTopology based on the value of the configuration
+   * parameter net.topology.impl.
+   * 
+   * @param conf the configuration to be used
+   * @return an instance of NetworkTopology
+   */
+  public static NetworkTopology getInstance(Configuration conf){
+    return ReflectionUtils.newInstance(
+        conf.getClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY,
+        NetworkTopology.class, NetworkTopology.class), conf);
+  }
 
   /** InnerNode represents a switch/router of a data center or rack.
    * Different from a leaf node, it has non-null children.

+ 159 - 0
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslPlainServer.java

@@ -0,0 +1,159 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.security;
+
+import java.security.Provider;
+import java.util.Map;
+
+import javax.security.auth.callback.*;
+import javax.security.sasl.AuthorizeCallback;
+import javax.security.sasl.Sasl;
+import javax.security.sasl.SaslException;
+import javax.security.sasl.SaslServer;
+import javax.security.sasl.SaslServerFactory;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public class SaslPlainServer implements SaslServer {
+  @SuppressWarnings("serial")
+  public static class SecurityProvider extends Provider {
+    public SecurityProvider() {
+      super("SaslPlainServer", 1.0, "SASL PLAIN Authentication Server");
+      put("SaslServerFactory.PLAIN",
+          SaslPlainServerFactory.class.getName());
+    }
+  }
+
+  public static class SaslPlainServerFactory implements SaslServerFactory {
+    @Override
+    public SaslServer createSaslServer(String mechanism, String protocol,
+        String serverName, Map<String,?> props, CallbackHandler cbh)
+            throws SaslException {
+      return "PLAIN".equals(mechanism) ? new SaslPlainServer(cbh) : null; 
+    }
+    @Override
+    public String[] getMechanismNames(Map<String,?> props){
+      return (props == null) || "false".equals(props.get(Sasl.POLICY_NOPLAINTEXT))
+          ? new String[]{"PLAIN"}
+          : new String[0];
+    }
+  }
+  
+  private CallbackHandler cbh;
+  private boolean completed;
+  private String authz;
+  
+  SaslPlainServer(CallbackHandler callback) {
+    this.cbh = callback;
+  }
+
+  @Override
+  public String getMechanismName() {
+    return "PLAIN";
+  }
+  
+  @Override
+  public byte[] evaluateResponse(byte[] response) throws SaslException {
+    if (completed) {
+      throw new IllegalStateException("PLAIN authentication has completed");
+    }
+    if (response == null) {
+      throw new IllegalArgumentException("Received null response");
+    }
+    try {
+      String payload;
+      try {
+        payload = new String(response, "UTF-8");
+      } catch (Exception e) {
+        throw new IllegalArgumentException("Received corrupt response", e);
+      }
+      // [ authz, authn, password ]
+      String[] parts = payload.split("\u0000", 3);
+      if (parts.length != 3) {
+        throw new IllegalArgumentException("Received corrupt response");
+      }
+      if (parts[0].isEmpty()) { // authz = authn
+        parts[0] = parts[1];
+      }
+      
+      NameCallback nc = new NameCallback("SASL PLAIN");
+      nc.setName(parts[1]);
+      PasswordCallback pc = new PasswordCallback("SASL PLAIN", false);
+      pc.setPassword(parts[2].toCharArray());
+      AuthorizeCallback ac = new AuthorizeCallback(parts[1], parts[0]);
+      cbh.handle(new Callback[]{nc, pc, ac});      
+      if (ac.isAuthorized()) {
+        authz = ac.getAuthorizedID();
+      }
+    } catch (Exception e) {
+      throw new SaslException("PLAIN auth failed: " + e.getMessage());
+    } finally {
+      completed = true;
+    }
+    return null;
+  }
+
+  private void throwIfNotComplete() {
+    if (!completed) {
+      throw new IllegalStateException("PLAIN authentication not completed");
+    }
+  }
+  
+  @Override
+  public boolean isComplete() {
+    return completed;
+  }
+
+  @Override
+  public String getAuthorizationID() {
+    throwIfNotComplete();
+    return authz;
+  }
+  
+  @Override
+  public Object getNegotiatedProperty(String propName) {
+    throwIfNotComplete();      
+    return Sasl.QOP.equals(propName) ? "auth" : null;
+  }
+  
+  @Override
+  public byte[] wrap(byte[] outgoing, int offset, int len)
+      throws SaslException {
+    throwIfNotComplete();
+    throw new IllegalStateException(
+        "PLAIN supports neither integrity nor privacy");      
+  }
+  
+  @Override
+  public byte[] unwrap(byte[] incoming, int offset, int len)
+      throws SaslException {
+    throwIfNotComplete();
+    throw new IllegalStateException(
+        "PLAIN supports neither integrity nor privacy");      
+  }
+  
+  @Override
+  public void dispose() throws SaslException {
+    cbh = null;
+    authz = null;
+  }
+}

+ 6 - 22
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcClient.java

@@ -145,15 +145,13 @@ public class SaslRpcClient {
       byte[] saslToken = new byte[0];
       if (saslClient.hasInitialResponse())
         saslToken = saslClient.evaluateChallenge(saslToken);
-      if (saslToken != null) {
+      while (saslToken != null) {
         outStream.writeInt(saslToken.length);
         outStream.write(saslToken, 0, saslToken.length);
         outStream.flush();
         if (LOG.isDebugEnabled())
           LOG.debug("Have sent token of size " + saslToken.length
               + " from initSASLContext.");
-      }
-      if (!saslClient.isComplete()) {
         readStatus(inStream);
         int len = inStream.readInt();
         if (len == SaslRpcServer.SWITCH_TO_SIMPLE_AUTH) {
@@ -161,32 +159,18 @@ public class SaslRpcClient {
             LOG.debug("Server asks us to fall back to simple auth.");
           saslClient.dispose();
           return false;
+        } else if ((len == 0) && saslClient.isComplete()) {
+          break;
         }
         saslToken = new byte[len];
         if (LOG.isDebugEnabled())
           LOG.debug("Will read input token of size " + saslToken.length
               + " for processing by initSASLContext");
         inStream.readFully(saslToken);
-      }
-
-      while (!saslClient.isComplete()) {
         saslToken = saslClient.evaluateChallenge(saslToken);
-        if (saslToken != null) {
-          if (LOG.isDebugEnabled())
-            LOG.debug("Will send token of size " + saslToken.length
-                + " from initSASLContext.");
-          outStream.writeInt(saslToken.length);
-          outStream.write(saslToken, 0, saslToken.length);
-          outStream.flush();
-        }
-        if (!saslClient.isComplete()) {
-          readStatus(inStream);
-          saslToken = new byte[inStream.readInt()];
-          if (LOG.isDebugEnabled())
-            LOG.debug("Will read input token of size " + saslToken.length
-                + " for processing by initSASLContext");
-          inStream.readFully(saslToken);
-        }
+      }
+      if (!saslClient.isComplete()) { // shouldn't happen
+        throw new SaslException("Internal negotiation error");
       }
       if (LOG.isDebugEnabled()) {
         LOG.debug("SASL client context established. Negotiated QoP: "

+ 4 - 1
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcServer.java

@@ -23,6 +23,7 @@ import java.io.DataInput;
 import java.io.DataInputStream;
 import java.io.DataOutput;
 import java.io.IOException;
+import java.security.Security;
 import java.util.Map;
 import java.util.TreeMap;
 
@@ -89,6 +90,7 @@ public class SaslRpcServer {
     
     SASL_PROPS.put(Sasl.QOP, saslQOP.getSaslQop());
     SASL_PROPS.put(Sasl.SERVER_AUTH, "true");
+    Security.addProvider(new SaslPlainServer.SecurityProvider());
   }
   
   static String encodeIdentifier(byte[] identifier) {
@@ -138,7 +140,8 @@ public class SaslRpcServer {
   public static enum AuthMethod {
     SIMPLE((byte) 80, ""),
     KERBEROS((byte) 81, "GSSAPI"),
-    DIGEST((byte) 82, "DIGEST-MD5");
+    DIGEST((byte) 82, "DIGEST-MD5"),
+    PLAIN((byte) 83, "PLAIN");
 
     /** The code for this method. */
     public final byte code;

+ 57 - 33
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java

@@ -17,7 +17,6 @@
  */
 package org.apache.hadoop.security;
 
-import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION;
 import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN;
 import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN_DEFAULT;
 
@@ -133,7 +132,7 @@ public class UserGroupInformation {
       }
       Principal user = null;
       // if we are using kerberos, try it out
-      if (useKerberos) {
+      if (isAuthenticationMethodEnabled(AuthenticationMethod.KERBEROS)) {
         user = getCanonicalUser(KerberosPrincipal.class);
         if (LOG.isDebugEnabled()) {
           LOG.debug("using kerberos user:"+user);
@@ -191,8 +190,8 @@ public class UserGroupInformation {
   static UgiMetrics metrics = UgiMetrics.create();
   /** Are the static variables that depend on configuration initialized? */
   private static boolean isInitialized = false;
-  /** Should we use Kerberos configuration? */
-  private static boolean useKerberos;
+  /** The auth method to use */
+  private static AuthenticationMethod authenticationMethod;
   /** Server-side groups fetching service */
   private static Groups groups;
   /** Min time (in seconds) before relogin for Kerberos */
@@ -237,19 +236,7 @@ public class UserGroupInformation {
    * @param conf the configuration to use
    */
   private static synchronized void initUGI(Configuration conf) {
-    AuthenticationMethod auth = SecurityUtil.getAuthenticationMethod(conf);
-    switch (auth) {
-      case SIMPLE:
-        useKerberos = false;
-        break;
-      case KERBEROS:
-        useKerberos = true;
-        break;
-      default:
-        throw new IllegalArgumentException("Invalid attribute value for " +
-                                           HADOOP_SECURITY_AUTHENTICATION + 
-                                           " of " + auth);
-    }
+    authenticationMethod = SecurityUtil.getAuthenticationMethod(conf);
     try {
         kerberosMinSecondsBeforeRelogin = 1000L * conf.getLong(
                 HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN,
@@ -287,8 +274,14 @@ public class UserGroupInformation {
    * @return true if UGI is working in a secure environment
    */
   public static boolean isSecurityEnabled() {
+    return !isAuthenticationMethodEnabled(AuthenticationMethod.SIMPLE);
+  }
+  
+  @InterfaceAudience.Private
+  @InterfaceStability.Evolving
+  private static boolean isAuthenticationMethodEnabled(AuthenticationMethod method) {
     ensureInitialized();
-    return useKerberos;
+    return (authenticationMethod == method);
   }
   
   /**
@@ -584,7 +577,7 @@ public class UserGroupInformation {
   @InterfaceStability.Evolving
   public static UserGroupInformation getUGIFromTicketCache(
             String ticketCache, String user) throws IOException {
-    if (!isSecurityEnabled()) {
+    if (!isAuthenticationMethodEnabled(AuthenticationMethod.KERBEROS)) {
       return getBestUGI(null, user);
     }
     try {
@@ -637,19 +630,12 @@ public class UserGroupInformation {
   public synchronized 
   static UserGroupInformation getLoginUser() throws IOException {
     if (loginUser == null) {
+      ensureInitialized();
       try {
         Subject subject = new Subject();
-        LoginContext login;
-        AuthenticationMethod authenticationMethod;
-        if (isSecurityEnabled()) {
-          authenticationMethod = AuthenticationMethod.KERBEROS;
-          login = newLoginContext(HadoopConfiguration.USER_KERBEROS_CONFIG_NAME,
-              subject, new HadoopConfiguration());
-        } else {
-          authenticationMethod = AuthenticationMethod.SIMPLE;
-          login = newLoginContext(HadoopConfiguration.SIMPLE_CONFIG_NAME, 
-              subject, new HadoopConfiguration());
-        }
+        LoginContext login =
+            newLoginContext(authenticationMethod.getLoginAppName(), 
+                            subject, new HadoopConfiguration());
         login.login();
         loginUser = new UserGroupInformation(subject);
         loginUser.setLogin(login);
@@ -674,6 +660,14 @@ public class UserGroupInformation {
     return loginUser;
   }
 
+  @InterfaceAudience.Private
+  @InterfaceStability.Unstable
+  synchronized static void setLoginUser(UserGroupInformation ugi) {
+    // if this is to become stable, should probably logout the currently
+    // logged in ugi if it's different
+    loginUser = ugi;
+  }
+  
   /**
    * Is this user logged in from a keytab file?
    * @return true if the credentials are from a keytab file.
@@ -1026,22 +1020,38 @@ public class UserGroupInformation {
   public static enum AuthenticationMethod {
     // currently we support only one auth per method, but eventually a 
     // subtype is needed to differentiate, ex. if digest is token or ldap
-    SIMPLE(AuthMethod.SIMPLE),
-    KERBEROS(AuthMethod.KERBEROS),
+    SIMPLE(AuthMethod.SIMPLE,
+        HadoopConfiguration.SIMPLE_CONFIG_NAME),
+    KERBEROS(AuthMethod.KERBEROS,
+        HadoopConfiguration.USER_KERBEROS_CONFIG_NAME),
     TOKEN(AuthMethod.DIGEST),
     CERTIFICATE(null),
     KERBEROS_SSL(null),
     PROXY(null);
     
     private final AuthMethod authMethod;
+    private final String loginAppName;
+    
     private AuthenticationMethod(AuthMethod authMethod) {
+      this(authMethod, null);
+    }
+    private AuthenticationMethod(AuthMethod authMethod, String loginAppName) {
       this.authMethod = authMethod;
+      this.loginAppName = loginAppName;
     }
     
     public AuthMethod getAuthMethod() {
       return authMethod;
     }
     
+    String getLoginAppName() {
+      if (loginAppName == null) {
+        throw new UnsupportedOperationException(
+            this + " login authentication is not supported");
+      }
+      return loginAppName;
+    }
+    
     public static AuthenticationMethod valueOf(AuthMethod authMethod) {
       for (AuthenticationMethod value : values()) {
         if (value.getAuthMethod() == authMethod) {
@@ -1333,7 +1343,21 @@ public class UserGroupInformation {
   public synchronized AuthenticationMethod getAuthenticationMethod() {
     return user.getAuthenticationMethod();
   }
-  
+
+  /**
+   * Get the authentication method from the real user's subject.  If there
+   * is no real user, return the given user's authentication method.
+   * 
+   * @return AuthenticationMethod in the subject, null if not present.
+   */
+  public synchronized AuthenticationMethod getRealAuthenticationMethod() {
+    UserGroupInformation ugi = getRealUser();
+    if (ugi == null) {
+      ugi = this;
+    }
+    return ugi.getAuthenticationMethod();
+  }
+
   /**
    * Returns the authentication method of a ugi. If the authentication method is
    * PROXY, returns the authentication method of the real user.

+ 139 - 65
hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/nativeio/NativeIO.c

@@ -72,16 +72,27 @@ static int workaround_non_threadsafe_calls(JNIEnv *env, jclass clazz) {
 static void stat_init(JNIEnv *env, jclass nativeio_class) {
   // Init Stat
   jclass clazz = (*env)->FindClass(env, "org/apache/hadoop/io/nativeio/NativeIO$Stat");
-  PASS_EXCEPTIONS(env);
+  if (!clazz) {
+    return; // exception has been raised
+  }
   stat_clazz = (*env)->NewGlobalRef(env, clazz);
+  if (!stat_clazz) {
+    return; // exception has been raised
+  }
   stat_ctor = (*env)->GetMethodID(env, stat_clazz, "<init>",
-    "(Ljava/lang/String;Ljava/lang/String;I)V");
-  
+    "(III)V");
+  if (!stat_ctor) {
+    return; // exception has been raised
+  }
   jclass obj_class = (*env)->FindClass(env, "java/lang/Object");
-  assert(obj_class != NULL);
+  if (!obj_class) {
+    return; // exception has been raised
+  }
   jmethodID  obj_ctor = (*env)->GetMethodID(env, obj_class,
     "<init>", "()V");
-  assert(obj_ctor != NULL);
+  if (!obj_ctor) {
+    return; // exception has been raised
+  }
 
   if (workaround_non_threadsafe_calls(env, nativeio_class)) {
     pw_lock_object = (*env)->NewObject(env, obj_class, obj_ctor);
@@ -158,8 +169,6 @@ Java_org_apache_hadoop_io_nativeio_NativeIO_fstat(
   JNIEnv *env, jclass clazz, jobject fd_object)
 {
   jobject ret = NULL;
-  char *pw_buf = NULL;
-  int pw_lock_locked = 0;
 
   int fd = fd_get(env, fd_object);
   PASS_EXCEPTIONS_GOTO(env, cleanup);
@@ -171,71 +180,14 @@ Java_org_apache_hadoop_io_nativeio_NativeIO_fstat(
     goto cleanup;
   }
 
-  size_t pw_buflen = get_pw_buflen();
-  if ((pw_buf = malloc(pw_buflen)) == NULL) {
-    THROW(env, "java/lang/OutOfMemoryError", "Couldn't allocate memory for pw buffer");
-    goto cleanup;
-  }
-
-  if (pw_lock_object != NULL) {
-    if ((*env)->MonitorEnter(env, pw_lock_object) != JNI_OK) {
-      goto cleanup;
-    }
-    pw_lock_locked = 1;
-  }
-
-  // Grab username
-  struct passwd pwd, *pwdp;
-  while ((rc = getpwuid_r(s.st_uid, &pwd, pw_buf, pw_buflen, &pwdp)) != 0) {
-    if (rc != ERANGE) {
-      throw_ioe(env, rc);
-      goto cleanup;
-    }
-    free(pw_buf);
-    pw_buflen *= 2;
-    if ((pw_buf = malloc(pw_buflen)) == NULL) {
-      THROW(env, "java/lang/OutOfMemoryError", "Couldn't allocate memory for pw buffer");
-      goto cleanup;
-    }
-  }
-  assert(pwdp == &pwd);
-
-  jstring jstr_username = (*env)->NewStringUTF(env, pwd.pw_name);
-  if (jstr_username == NULL) goto cleanup;
-
-  // Grab group
-  struct group grp, *grpp;
-  while ((rc = getgrgid_r(s.st_gid, &grp, pw_buf, pw_buflen, &grpp)) != 0) {
-    if (rc != ERANGE) {
-      throw_ioe(env, rc);
-      goto cleanup;
-    }
-    free(pw_buf);
-    pw_buflen *= 2;
-    if ((pw_buf = malloc(pw_buflen)) == NULL) {
-      THROW(env, "java/lang/OutOfMemoryError", "Couldn't allocate memory for pw buffer");
-      goto cleanup;
-    }
-  }
-  assert(grpp == &grp);
-
-  jstring jstr_groupname = (*env)->NewStringUTF(env, grp.gr_name);
-  PASS_EXCEPTIONS_GOTO(env, cleanup);
-
   // Construct result
   ret = (*env)->NewObject(env, stat_clazz, stat_ctor,
-    jstr_username, jstr_groupname, s.st_mode);
+    (jint)s.st_uid, (jint)s.st_gid, (jint)s.st_mode);
 
 cleanup:
-  if (pw_buf != NULL) free(pw_buf);
-  if (pw_lock_locked) {
-    (*env)->MonitorExit(env, pw_lock_object);
-  }
   return ret;
 }
 
-
-
 /**
  * public static native void posix_fadvise(
  *   FileDescriptor fd, long offset, long len, int flags);
@@ -385,6 +337,128 @@ Java_org_apache_hadoop_io_nativeio_NativeIO_chmod(
   (*env)->ReleaseStringUTFChars(env, j_path, path);
 }
 
+/*
+ * static native String getUserName(int uid);
+ */
+JNIEXPORT jstring JNICALL 
+Java_org_apache_hadoop_io_nativeio_NativeIO_getUserName(JNIEnv *env, 
+jclass clazz, jint uid)
+{
+  int pw_lock_locked = 0;
+  if (pw_lock_object != NULL) {
+    if ((*env)->MonitorEnter(env, pw_lock_object) != JNI_OK) {
+      goto cleanup;
+    }
+    pw_lock_locked = 1;
+  }
+
+  char *pw_buf = NULL;
+  int rc;
+  size_t pw_buflen = get_pw_buflen();
+  if ((pw_buf = malloc(pw_buflen)) == NULL) {
+    THROW(env, "java/lang/OutOfMemoryError", "Couldn't allocate memory for pw buffer");
+    goto cleanup;
+  }
+
+  // Grab username
+  struct passwd pwd, *pwdp;
+  while ((rc = getpwuid_r((uid_t)uid, &pwd, pw_buf, pw_buflen, &pwdp)) != 0) {
+    if (rc != ERANGE) {
+      throw_ioe(env, rc);
+      goto cleanup;
+    }
+    free(pw_buf);
+    pw_buflen *= 2;
+    if ((pw_buf = malloc(pw_buflen)) == NULL) {
+      THROW(env, "java/lang/OutOfMemoryError", "Couldn't allocate memory for pw buffer");
+      goto cleanup;
+    }
+  }
+  if (pwdp == NULL) {
+    char msg[80];
+    snprintf(msg, sizeof(msg), "uid not found: %d", uid);
+    THROW(env, "java/io/IOException", msg);
+    goto cleanup;
+  }
+  if (pwdp != &pwd) {
+    char msg[80];
+    snprintf(msg, sizeof(msg), "pwd pointer inconsistent with reference. uid: %d", uid);
+    THROW(env, "java/lang/IllegalStateException", msg);
+    goto cleanup;
+  }
+
+  jstring jstr_username = (*env)->NewStringUTF(env, pwd.pw_name);
+
+cleanup:
+  if (pw_lock_locked) {
+    (*env)->MonitorExit(env, pw_lock_object);
+  }
+  if (pw_buf != NULL) free(pw_buf);
+  return jstr_username;
+}
+
+/*
+ * static native String getGroupName(int gid);
+ */
+JNIEXPORT jstring JNICALL 
+Java_org_apache_hadoop_io_nativeio_NativeIO_getGroupName(JNIEnv *env, 
+jclass clazz, jint gid)
+{
+  int pw_lock_locked = 0;
+ 
+  if (pw_lock_object != NULL) {
+    if ((*env)->MonitorEnter(env, pw_lock_object) != JNI_OK) {
+      goto cleanup;
+    }
+    pw_lock_locked = 1;
+  }
+  
+  char *pw_buf = NULL;
+  int rc;
+  size_t pw_buflen = get_pw_buflen();
+  if ((pw_buf = malloc(pw_buflen)) == NULL) {
+    THROW(env, "java/lang/OutOfMemoryError", "Couldn't allocate memory for pw buffer");
+    goto cleanup;
+  }
+  
+  // Grab group
+  struct group grp, *grpp;
+  while ((rc = getgrgid_r((uid_t)gid, &grp, pw_buf, pw_buflen, &grpp)) != 0) {
+    if (rc != ERANGE) {
+      throw_ioe(env, rc);
+      goto cleanup;
+    }
+    free(pw_buf);
+    pw_buflen *= 2;
+    if ((pw_buf = malloc(pw_buflen)) == NULL) {
+      THROW(env, "java/lang/OutOfMemoryError", "Couldn't allocate memory for pw buffer");
+      goto cleanup;
+    }
+  }
+  if (grpp == NULL) {
+    char msg[80];
+    snprintf(msg, sizeof(msg), "gid not found: %d", gid);
+    THROW(env, "java/io/IOException", msg);
+    goto cleanup;
+  }
+  if (grpp != &grp) {
+    char msg[80];
+    snprintf(msg, sizeof(msg), "pwd pointer inconsistent with reference. gid: %d", gid);
+    THROW(env, "java/lang/IllegalStateException", msg);
+    goto cleanup;
+  }
+
+  jstring jstr_groupname = (*env)->NewStringUTF(env, grp.gr_name);
+  PASS_EXCEPTIONS_GOTO(env, cleanup);
+  
+cleanup:
+  if (pw_lock_locked) {
+    (*env)->MonitorExit(env, pw_lock_object);
+  }
+  if (pw_buf != NULL) free(pw_buf);
+  return jstr_groupname;
+}
+
 
 /*
  * Throw a java.IO.IOException, generating the message from errno.

+ 11 - 0
hadoop-common-project/hadoop-common/src/main/resources/core-default.xml

@@ -214,6 +214,17 @@
   </description>
 </property>
 
+
+<property>
+    <name>hadoop.security.uid.cache.secs</name>
+    <value>14400</value>
+    <description>
+        This is the config controlling the validity of the entries in the cache
+        containing the userId to userName and groupId to groupName used by
+        NativeIO getFstat().
+    </description>
+</property>
+
 <property>
   <name>hadoop.rpc.protection</name>
   <value>authentication</value>

+ 0 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/CLIMiniCluster.apt.vm → hadoop-common-project/hadoop-common/src/site/apt/CLIMiniCluster.apt.vm


+ 0 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm → hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm


+ 0 - 0
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/SingleCluster.apt.vm → hadoop-common-project/hadoop-common/src/site/apt/SingleCluster.apt.vm


+ 8 - 27
hadoop-mapreduce-project/src/docs/changes/ChangesSimpleStyle.css → hadoop-common-project/hadoop-common/src/site/resources/css/site.css

@@ -14,36 +14,17 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-
-body {
-  font-family: Courier New, monospace;
-  font-size: 10pt;
-}
-
-h1 {
-  font-family: Courier New, monospace;
-  font-size: 10pt;
-}
-
-h2 {
-  font-family: Courier New, monospace;
-  font-size: 10pt; 
-}
-
-h3 {
-  font-family: Courier New, monospace;
-  font-size: 10pt; 
+#banner {
+  height: 93px;
+  background: none;
 }
 
-a:link {
-  color: blue;
+#bannerLeft img {
+  margin-left: 30px;
+  margin-top: 10px;
 }
 
-a:visited {
-  color: purple; 
+#bannerRight img {
+  margin: 17px;
 }
 
-li {
-  margin-top: 1em;
-  margin-bottom: 1em;
-}

+ 28 - 0
hadoop-common-project/hadoop-common/src/site/site.xml

@@ -0,0 +1,28 @@
+<!--
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. See accompanying LICENSE file.
+-->
+<project name="Apache Hadoop ${project.version}">
+
+  <skin>
+    <groupId>org.apache.maven.skins</groupId>
+    <artifactId>maven-stylus-skin</artifactId>
+    <version>1.2</version>
+  </skin>
+
+  <body>
+    <links>
+      <item name="Apache Hadoop" href="http://hadoop.apache.org/"/>
+    </links>
+  </body>
+
+</project>

+ 23 - 2
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemContractBaseTest.java

@@ -23,11 +23,13 @@ import java.io.IOException;
 
 import junit.framework.TestCase;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
 
 /**
  * <p>
@@ -43,7 +45,7 @@ import org.apache.hadoop.fs.Path;
  * </p>
  */
 public abstract class FileSystemContractBaseTest extends TestCase {
-  
+  protected final static String TEST_UMASK = "062";
   protected FileSystem fs;
   protected byte[] data = new byte[getBlockSize() * 2]; // two blocks of data
   {
@@ -151,7 +153,26 @@ public abstract class FileSystemContractBaseTest extends TestCase {
     assertFalse(fs.exists(testDeepSubDir));
     
   }
-  
+
+  public void testMkdirsWithUmask() throws Exception {
+    if (fs.getScheme().equals("s3") || fs.getScheme().equals("s3n")) {
+      // skip permission tests for S3FileSystem until HDFS-1333 is fixed.
+      return;
+    }
+    Configuration conf = fs.getConf();
+    String oldUmask = conf.get(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY);
+    try {
+      conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, TEST_UMASK);
+      final Path dir = new Path("/test/newDir");
+      assertTrue(fs.mkdirs(dir, new FsPermission((short)0777)));
+      FileStatus status = fs.getFileStatus(dir);
+      assertTrue(status.isDirectory());
+      assertEquals((short)0715, status.getPermission().toShort());
+    } finally {
+      conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, oldUmask);
+    }
+  }
+
   public void testGetFileStatusThrowsExceptionForNonExistentFile() 
     throws Exception {
     try {

+ 150 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestDelegationTokenRenewer.java

@@ -0,0 +1,150 @@
+package org.apache.hadoop.fs;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.security.token.TokenIdentifier;
+import org.apache.hadoop.util.Progressable;
+
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestDelegationTokenRenewer {
+  private static final int RENEW_CYCLE = 1000;
+  private static final int MAX_RENEWALS = 100;
+
+  @SuppressWarnings("rawtypes")
+  static class TestToken extends Token {
+    public volatile int renewCount = 0;
+    public volatile boolean cancelled = false;
+
+    @Override
+    public long renew(Configuration conf) {
+      if (renewCount == MAX_RENEWALS) {
+        Thread.currentThread().interrupt();
+      } else {
+        renewCount++;
+      }
+      return renewCount;
+    }
+
+    @Override
+    public void cancel(Configuration conf) {
+      cancelled = true;
+    }
+  }
+  
+  static class TestFileSystem extends FileSystem implements
+      DelegationTokenRenewer.Renewable {
+    private Configuration mockConf = mock(Configuration.class);;
+    private TestToken testToken = new TestToken();
+
+    @Override
+    public Configuration getConf() {
+      return mockConf;
+    }
+
+    @Override
+    public Token<?> getRenewToken() {
+      return testToken;
+    }
+
+    @Override
+    public URI getUri() {
+      return null;
+    }
+
+    @Override
+    public FSDataInputStream open(Path f, int bufferSize) throws IOException {
+      return null;
+    }
+
+    @Override
+    public FSDataOutputStream create(Path f, FsPermission permission,
+        boolean overwrite, int bufferSize, short replication, long blockSize,
+        Progressable progress) throws IOException {
+      return null;
+    }
+
+    @Override
+    public FSDataOutputStream append(Path f, int bufferSize,
+        Progressable progress) throws IOException {
+      return null;
+    }
+
+    @Override
+    public boolean rename(Path src, Path dst) throws IOException {
+      return false;
+    }
+
+    @Override
+    public boolean delete(Path f, boolean recursive) throws IOException {
+      return false;
+    }
+
+    @Override
+    public FileStatus[] listStatus(Path f) throws FileNotFoundException,
+        IOException {
+      return null;
+    }
+
+    @Override
+    public void setWorkingDirectory(Path new_dir) {
+    }
+
+    @Override
+    public Path getWorkingDirectory() {
+      return null;
+    }
+
+    @Override
+    public boolean mkdirs(Path f, FsPermission permission) throws IOException {
+      return false;
+    }
+
+    @Override
+    public FileStatus getFileStatus(Path f) throws IOException {
+      return null;
+    }
+
+    @Override
+    public <T extends TokenIdentifier> void setDelegationToken(Token<T> token) {
+      return;
+    }
+  }
+
+  private DelegationTokenRenewer renewer;
+
+  @Before
+  public void setup() {
+    DelegationTokenRenewer.renewCycle = RENEW_CYCLE;
+    renewer = DelegationTokenRenewer.getInstance();
+  }
+
+  @Test
+  public void testAddRemoveRenewAction() throws IOException,
+      InterruptedException {
+    TestFileSystem tfs = new TestFileSystem();
+    renewer.addRenewAction(tfs);
+
+    for (int i = 0; i < 60; i++) {
+      Thread.sleep(RENEW_CYCLE);
+      if (tfs.testToken.renewCount > 0) {
+        renewer.removeRenewAction(tfs);
+        break;
+      }
+    }
+
+    assertTrue("Token not renewed even after 1 minute",
+        (tfs.testToken.renewCount > 0));
+    assertTrue("Token not removed", (tfs.testToken.renewCount < MAX_RENEWALS));
+    assertTrue("Token not cancelled", tfs.testToken.cancelled);
+  }
+}

+ 297 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystemBasics.java

@@ -0,0 +1,297 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertFalse;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * This test class checks basic operations with {@link HarFileSystem} including
+ * various initialization cases, getters, and modification methods.
+ * 
+ * NB: to run this test from an IDE make sure the folder
+ * "hadoop-common-project/hadoop-common/src/main/resources/" is added as a
+ * source path. This will allow the system to pick up the "core-default.xml" and
+ * "META-INF/services/..." resources from the class-path in the runtime.
+ */
+public class TestHarFileSystemBasics {
+
+  private static final String ROOT_PATH = System.getProperty("test.build.data",
+      "build/test/data");
+  private static final Path rootPath = new Path(
+      new File(ROOT_PATH).getAbsolutePath() + "/localfs");
+  // NB: .har suffix is necessary
+  private static final Path harPath = new Path(rootPath, "path1/path2/my.har");
+
+  private FileSystem localFileSystem;
+  private HarFileSystem harFileSystem;
+  private Configuration conf;
+
+  /*
+   * creates and returns fully initialized HarFileSystem
+   */
+  private HarFileSystem createHarFileSysten(final Configuration conf)
+      throws Exception {
+    localFileSystem = FileSystem.getLocal(conf);
+    localFileSystem.initialize(new URI("file:///"), conf);
+    localFileSystem.mkdirs(rootPath);
+    localFileSystem.mkdirs(harPath);
+    final Path indexPath = new Path(harPath, "_index");
+    final Path masterIndexPath = new Path(harPath, "_masterindex");
+    localFileSystem.createNewFile(indexPath);
+    assertTrue(localFileSystem.exists(indexPath));
+    localFileSystem.createNewFile(masterIndexPath);
+    assertTrue(localFileSystem.exists(masterIndexPath));
+
+    writeVersionToMasterIndexImpl(HarFileSystem.VERSION);
+
+    final HarFileSystem harFileSystem = new HarFileSystem(localFileSystem);
+    final URI uri = new URI("har://" + harPath.toString());
+    harFileSystem.initialize(uri, conf);
+    return harFileSystem;
+  }
+
+  private void writeVersionToMasterIndexImpl(int version) throws IOException {
+    final Path masterIndexPath = new Path(harPath, "_masterindex");
+    // write Har version into the master index:
+    final FSDataOutputStream fsdos = localFileSystem.create(masterIndexPath);
+    try {
+      String versionString = version + "\n";
+      fsdos.write(versionString.getBytes("UTF-8"));
+      fsdos.flush();
+    } finally {
+      fsdos.close();
+    }
+  }
+
+  @Before
+  public void before() throws Exception {
+    final File rootDirIoFile = new File(rootPath.toUri().getPath());
+    rootDirIoFile.mkdirs();
+    if (!rootDirIoFile.exists()) {
+      throw new IOException("Failed to create temp directory ["
+          + rootDirIoFile.getAbsolutePath() + "]");
+    }
+    // create Har to test:
+    conf = new Configuration();
+    harFileSystem = createHarFileSysten(conf);
+  }
+
+  @After
+  public void after() throws Exception {
+    // close Har FS:
+    final FileSystem harFS = harFileSystem;
+    if (harFS != null) {
+      harFS.close();
+      harFileSystem = null;
+    }
+    // cleanup: delete all the temporary files:
+    final File rootDirIoFile = new File(rootPath.toUri().getPath());
+    if (rootDirIoFile.exists()) {
+      FileUtil.fullyDelete(rootDirIoFile);
+    }
+    if (rootDirIoFile.exists()) {
+      throw new IOException("Failed to delete temp directory ["
+          + rootDirIoFile.getAbsolutePath() + "]");
+    }
+  }
+
+  // ======== Positive tests:
+
+  @Test
+  public void testPositiveHarFileSystemBasics() throws Exception {
+    // check Har version:
+    assertEquals(HarFileSystem.VERSION, harFileSystem.getHarVersion());
+
+    // check Har URI:
+    final URI harUri = harFileSystem.getUri();
+    assertEquals(harPath.toUri().getPath(), harUri.getPath());
+    assertEquals("har", harUri.getScheme());
+
+    // check Har home path:
+    final Path homePath = harFileSystem.getHomeDirectory();
+    assertEquals(harPath.toUri().getPath(), homePath.toUri().getPath());
+
+    // check working directory:
+    final Path workDirPath0 = harFileSystem.getWorkingDirectory();
+    assertEquals(homePath, workDirPath0);
+
+    // check that its impossible to reset the working directory
+    // (#setWorkingDirectory should have no effect):
+    harFileSystem.setWorkingDirectory(new Path("/foo/bar"));
+    assertEquals(workDirPath0, harFileSystem.getWorkingDirectory());
+  }
+
+  @Test
+  public void testPositiveNewHarFsOnTheSameUnderlyingFs() throws Exception {
+    // Init 2nd har file system on the same underlying FS, so the
+    // metadata gets reused:
+    final HarFileSystem hfs = new HarFileSystem(localFileSystem);
+    final URI uri = new URI("har://" + harPath.toString());
+    hfs.initialize(uri, new Configuration());
+    // the metadata should be reused from cache:
+    assertTrue(hfs.getMetadata() == harFileSystem.getMetadata());
+  }
+
+  @Test
+  public void testPositiveInitWithoutUnderlyingFS() throws Exception {
+    // Init HarFS with no constructor arg, so that the underlying FS object
+    // is created on demand or got from cache in #initialize() method.
+    final HarFileSystem hfs = new HarFileSystem();
+    final URI uri = new URI("har://" + harPath.toString());
+    hfs.initialize(uri, new Configuration());
+  }
+
+  // ========== Negative:
+
+  @Test
+  public void testNegativeInitWithoutIndex() throws Exception {
+    // delete the index file:
+    final Path indexPath = new Path(harPath, "_index");
+    localFileSystem.delete(indexPath, false);
+    // now init the HarFs:
+    final HarFileSystem hfs = new HarFileSystem(localFileSystem);
+    final URI uri = new URI("har://" + harPath.toString());
+    try {
+      hfs.initialize(uri, new Configuration());
+      Assert.fail("Exception expected.");
+    } catch (IOException ioe) {
+      // ok, expected.
+    }
+  }
+
+  @Test
+  public void testNegativeGetHarVersionOnNotInitializedFS() throws Exception {
+    final HarFileSystem hfs = new HarFileSystem(localFileSystem);
+    try {
+      int version = hfs.getHarVersion();
+      Assert.fail("Exception expected, but got a Har version " + version + ".");
+    } catch (IOException ioe) {
+      // ok, expected.
+    }
+  }
+
+  @Test
+  public void testNegativeInitWithAnUnsupportedVersion() throws Exception {
+    // NB: should wait at least 1 second to ensure the timestamp of the master
+    // index will change upon the writing, because Linux seems to update the
+    // file modification
+    // time with 1 second accuracy:
+    Thread.sleep(1000);
+    // write an unsupported version:
+    writeVersionToMasterIndexImpl(7777);
+    // init the Har:
+    final HarFileSystem hfs = new HarFileSystem(localFileSystem);
+
+    // the metadata should *not* be reused from cache:
+    assertFalse(hfs.getMetadata() == harFileSystem.getMetadata());
+
+    final URI uri = new URI("har://" + harPath.toString());
+    try {
+      hfs.initialize(uri, new Configuration());
+      Assert.fail("IOException expected.");
+    } catch (IOException ioe) {
+      // ok, expected.
+    }
+  }
+
+  @Test
+  public void testNegativeHarFsModifications() throws Exception {
+    // all the modification methods of HarFS must lead to IOE.
+    final Path fooPath = new Path(rootPath, "foo/bar");
+    localFileSystem.createNewFile(fooPath);
+    try {
+      harFileSystem.create(fooPath, new FsPermission("+rwx"), true, 1024,
+          (short) 88, 1024, null);
+      Assert.fail("IOException expected.");
+    } catch (IOException ioe) {
+      // ok, expected.
+    }
+
+    try {
+      harFileSystem.setReplication(fooPath, (short) 55);
+      Assert.fail("IOException expected.");
+    } catch (IOException ioe) {
+      // ok, expected.
+    }
+
+    try {
+      harFileSystem.delete(fooPath, true);
+      Assert.fail("IOException expected.");
+    } catch (IOException ioe) {
+      // ok, expected.
+    }
+
+    try {
+      harFileSystem.mkdirs(fooPath, new FsPermission("+rwx"));
+      Assert.fail("IOException expected.");
+    } catch (IOException ioe) {
+      // ok, expected.
+    }
+
+    final Path indexPath = new Path(harPath, "_index");
+    try {
+      harFileSystem.copyFromLocalFile(false, indexPath, fooPath);
+      Assert.fail("IOException expected.");
+    } catch (IOException ioe) {
+      // ok, expected.
+    }
+
+    try {
+      harFileSystem.startLocalOutput(fooPath, indexPath);
+      Assert.fail("IOException expected.");
+    } catch (IOException ioe) {
+      // ok, expected.
+    }
+
+    try {
+      harFileSystem.completeLocalOutput(fooPath, indexPath);
+      Assert.fail("IOException expected.");
+    } catch (IOException ioe) {
+      // ok, expected.
+    }
+
+    try {
+      harFileSystem.setOwner(fooPath, "user", "group");
+      Assert.fail("IOException expected.");
+    } catch (IOException ioe) {
+      // ok, expected.
+    }
+
+    try {
+      harFileSystem.setPermission(fooPath, new FsPermission("+x"));
+      Assert.fail("IOException expected.");
+    } catch (IOException ioe) {
+      // ok, expected.
+    }
+  }
+
+}

+ 58 - 3
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java

@@ -22,6 +22,8 @@ import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.util.Shell;
@@ -32,6 +34,7 @@ import org.junit.runners.Parameterized.Parameters;
 import org.junit.Test;
 
 import static org.junit.Assert.*;
+import static org.junit.Assume.*;
 
 /** This test LocalDirAllocator works correctly;
  * Every test case uses different buffer dirs to
@@ -321,7 +324,7 @@ public class TestLocalDirAllocator {
    */
   @Test
   public void testNoSideEffects() throws IOException {
-    if (isWindows) return;
+    assumeTrue(!isWindows);
     String dir = buildBufferDir(ROOT, 0);
     try {
       conf.set(CONTEXT, dir);
@@ -343,8 +346,7 @@ public class TestLocalDirAllocator {
    */
   @Test
   public void testGetLocalPathToRead() throws IOException {
-    if (isWindows)
-      return;
+    assumeTrue(!isWindows);
     String dir = buildBufferDir(ROOT, 0);
     try {
       conf.set(CONTEXT, dir);
@@ -359,7 +361,60 @@ public class TestLocalDirAllocator {
                                                       BUFFER_DIR_ROOT));
       rmBufferDirs();
     }
+  }
 
+  /**
+   * Test that {@link LocalDirAllocator#getAllLocalPathsToRead(String, Configuration)} 
+   * returns correct filenames and "file" schema.
+   *
+   * @throws IOException
+   */
+  @Test
+  public void testGetAllLocalPathsToRead() throws IOException {
+    assumeTrue(!isWindows);
+    
+    String dir0 = buildBufferDir(ROOT, 0);
+    String dir1 = buildBufferDir(ROOT, 1);
+    try {
+      conf.set(CONTEXT, dir0 + "," + dir1);
+      assertTrue(localFs.mkdirs(new Path(dir0)));
+      assertTrue(localFs.mkdirs(new Path(dir1)));
+      
+      localFs.create(new Path(dir0 + Path.SEPARATOR + FILENAME));
+      localFs.create(new Path(dir1 + Path.SEPARATOR + FILENAME));
+
+      // check both the paths are returned as paths to read:  
+      final Iterable<Path> pathIterable = dirAllocator.getAllLocalPathsToRead(FILENAME, conf);
+      int count = 0;
+      for (final Path p: pathIterable) {
+        count++;
+        assertEquals(FILENAME, p.getName());
+        assertEquals("file", p.getFileSystem(conf).getUri().getScheme());
+      }
+      assertEquals(2, count);
+
+      // test #next() while no element to iterate any more: 
+      try {
+        Path p = pathIterable.iterator().next();
+        assertFalse("NoSuchElementException must be thrown, but returned ["+p
+            +"] instead.", true); // exception expected
+      } catch (NoSuchElementException nsee) {
+        // okay
+      }
+      
+      // test modification not allowed:
+      final Iterable<Path> pathIterable2 = dirAllocator.getAllLocalPathsToRead(FILENAME, conf);
+      final Iterator<Path> it = pathIterable2.iterator();
+      try {
+        it.remove();
+        assertFalse(true); // exception expected
+      } catch (UnsupportedOperationException uoe) {
+        // okay
+      }
+    } finally {
+      Shell.execCommand(new String[] { "chmod", "u+w", BUFFER_DIR_ROOT });
+      rmBufferDirs();
+    }
   }
   
   @Test

+ 1 - 1
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestPathExceptions.java

@@ -23,7 +23,7 @@ import static org.junit.Assert.assertEquals;
 import java.io.IOException;
 
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.shell.PathExceptions.PathIOException;
+import org.apache.hadoop.fs.PathIOException;
 import org.junit.Test;
 
 public class TestPathExceptions {

+ 6 - 2
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFileSystemTestSetup.java

@@ -26,6 +26,7 @@ import org.apache.hadoop.fs.FileSystemTestHelper;
 import org.apache.hadoop.fs.FsConstants;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.viewfs.ConfigUtil;
+import org.apache.hadoop.util.Shell;
 import org.mortbay.log.Log;
 
 
@@ -123,8 +124,11 @@ public class ViewFileSystemTestSetup {
    * in the target file system.
    */
   static void linkUpFirstComponents(Configuration conf, String path, FileSystem fsTarget, String info) {
-    int indexOf2ndSlash = path.indexOf('/', 1);
-    String firstComponent = path.substring(0, indexOf2ndSlash);
+    int indexOfEnd = path.indexOf('/', 1);
+    if (Shell.WINDOWS) {
+      indexOfEnd = path.indexOf('/', indexOfEnd + 1);
+    }
+    String firstComponent = path.substring(0, indexOfEnd);
     URI linkTarget = fsTarget.makeQualified(new Path(firstComponent)).toUri();
     ConfigUtil.addLink(conf, firstComponent, linkTarget);
     Log.info("Added link for " + info + " " 

+ 6 - 2
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/ViewFsTestSetup.java

@@ -25,6 +25,7 @@ import org.apache.hadoop.fs.FileContextTestHelper;
 import org.apache.hadoop.fs.FsConstants;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.viewfs.ConfigUtil;
+import org.apache.hadoop.util.Shell;
 import org.mortbay.log.Log;
 
 
@@ -120,8 +121,11 @@ public class ViewFsTestSetup {
    */
   static void linkUpFirstComponents(Configuration conf, String path,
       FileContext fsTarget, String info) {
-    int indexOf2ndSlash = path.indexOf('/', 1);
-    String firstComponent = path.substring(0, indexOf2ndSlash);
+    int indexOfEnd = path.indexOf('/', 1);
+    if (Shell.WINDOWS) {
+      indexOfEnd = path.indexOf('/', indexOfEnd + 1);
+    }
+    String firstComponent = path.substring(0, indexOfEnd);
     URI linkTarget = fsTarget.makeQualified(new Path(firstComponent)).toUri();
     ConfigUtil.addLink(conf, firstComponent, linkTarget);
     Log.info("Added link for " + info + " " 

+ 13 - 0
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestHttpServer.java

@@ -539,4 +539,17 @@ public class TestHttpServer extends HttpServerFunctionalTest {
     }
     return server;
   }
+
+  @Test
+  public void testNoCacheHeader() throws Exception {
+    URL url = new URL(baseUrl, "/echo?a=b&c=d");
+    HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+    assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode());
+    assertEquals("no-cache", conn.getHeaderField("Cache-Control"));
+    assertEquals("no-cache", conn.getHeaderField("Pragma"));
+    assertNotNull(conn.getHeaderField("Expires"));
+    assertNotNull(conn.getHeaderField("Date"));
+    assertEquals(conn.getHeaderField("Expires"), conn.getHeaderField("Date"));
+  }
+
 }

+ 13 - 3
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/nativeio/TestNativeIO.java

@@ -61,7 +61,7 @@ public class TestNativeIO {
   public void testFstat() throws Exception {
     FileOutputStream fos = new FileOutputStream(
       new File(TEST_DIR, "testfstat"));
-    NativeIO.Stat stat = NativeIO.fstat(fos.getFD());
+    NativeIO.Stat stat = NativeIO.getFstat(fos.getFD());
     fos.close();
     LOG.info("Stat: " + String.valueOf(stat));
 
@@ -93,7 +93,7 @@ public class TestNativeIO {
           long et = Time.now() + 5000;
           while (Time.now() < et) {
             try {
-              NativeIO.Stat stat = NativeIO.fstat(fos.getFD());
+              NativeIO.Stat stat = NativeIO.getFstat(fos.getFD());
               assertEquals(System.getProperty("user.name"), stat.getOwner());
               assertNotNull(stat.getGroup());
               assertTrue(!stat.getGroup().isEmpty());
@@ -125,7 +125,7 @@ public class TestNativeIO {
       new File(TEST_DIR, "testfstat2"));
     fos.close();
     try {
-      NativeIO.Stat stat = NativeIO.fstat(fos.getFD());
+      NativeIO.Stat stat = NativeIO.getFstat(fos.getFD());
     } catch (NativeIOException nioe) {
       LOG.info("Got expected exception", nioe);
       assertEquals(Errno.EBADF, nioe.getErrno());
@@ -283,4 +283,14 @@ public class TestNativeIO {
     assertEquals(expected, perms.toShort());
   }
 
+  @Test
+  public void testGetUserName() throws IOException {
+    assertFalse(NativeIO.getUserName(0).isEmpty());
+  }
+
+  @Test
+  public void testGetGroupName() throws IOException {
+    assertFalse(NativeIO.getGroupName(0).isEmpty());
+  }
+
 }

+ 160 - 14
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestSaslRPC.java

@@ -27,12 +27,13 @@ import java.io.IOException;
 import java.lang.annotation.Annotation;
 import java.net.InetSocketAddress;
 import java.security.PrivilegedExceptionAction;
+import java.security.Security;
 import java.util.Collection;
 import java.util.Set;
 import java.util.regex.Pattern;
 
-import javax.security.sasl.Sasl;
-
+import javax.security.auth.callback.*;
+import javax.security.sasl.*;
 import junit.framework.Assert;
 
 import org.apache.commons.logging.Log;
@@ -44,6 +45,7 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.ipc.Client.ConnectionId;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.*;
+import org.apache.hadoop.security.SaslRpcServer.AuthMethod;
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
 import org.apache.hadoop.security.token.SecretManager;
 import org.apache.hadoop.security.token.Token;
@@ -53,7 +55,6 @@ import org.apache.hadoop.security.token.TokenSelector;
 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 
 import org.apache.log4j.Level;
-import org.apache.tools.ant.types.Assertions.EnabledAssertion;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -76,7 +77,8 @@ public class TestSaslRPC {
   @BeforeClass
   public static void setupKerb() {
     System.setProperty("java.security.krb5.kdc", "");
-    System.setProperty("java.security.krb5.realm", "NONE"); 
+    System.setProperty("java.security.krb5.realm", "NONE");
+    Security.addProvider(new SaslPlainServer.SecurityProvider());
   }    
 
   @Before
@@ -448,15 +450,132 @@ public class TestSaslRPC {
     System.out.println("Test is successful.");
   }
 
+  @Test
+  public void testSaslPlainServer() throws IOException {
+    runNegotiation(
+        new TestPlainCallbacks.Client("user", "pass"),
+        new TestPlainCallbacks.Server("user", "pass"));
+  }
+
+  @Test
+  public void testSaslPlainServerBadPassword() throws IOException {
+    SaslException e = null;
+    try {
+      runNegotiation(
+          new TestPlainCallbacks.Client("user", "pass1"),
+          new TestPlainCallbacks.Server("user", "pass2"));
+    } catch (SaslException se) {
+      e = se;
+    }
+    assertNotNull(e);
+    assertEquals("PLAIN auth failed: wrong password", e.getMessage());
+  }
+
+
+  private void runNegotiation(CallbackHandler clientCbh,
+                              CallbackHandler serverCbh)
+                                  throws SaslException {
+    String mechanism = AuthMethod.PLAIN.getMechanismName();
+
+    SaslClient saslClient = Sasl.createSaslClient(
+        new String[]{ mechanism }, null, null, null, null, clientCbh);
+    assertNotNull(saslClient);
+
+    SaslServer saslServer = Sasl.createSaslServer(
+        mechanism, null, "localhost", null, serverCbh);
+    assertNotNull("failed to find PLAIN server", saslServer);
+    
+    byte[] response = saslClient.evaluateChallenge(new byte[0]);
+    assertNotNull(response);
+    assertTrue(saslClient.isComplete());
+
+    response = saslServer.evaluateResponse(response);
+    assertNull(response);
+    assertTrue(saslServer.isComplete());
+    assertNotNull(saslServer.getAuthorizationID());
+  }
+  
+  static class TestPlainCallbacks {
+    public static class Client implements CallbackHandler {
+      String user = null;
+      String password = null;
+      
+      Client(String user, String password) {
+        this.user = user;
+        this.password = password;
+      }
+      
+      @Override
+      public void handle(Callback[] callbacks)
+          throws UnsupportedCallbackException {
+        for (Callback callback : callbacks) {
+          if (callback instanceof NameCallback) {
+            ((NameCallback) callback).setName(user);
+          } else if (callback instanceof PasswordCallback) {
+            ((PasswordCallback) callback).setPassword(password.toCharArray());
+          } else {
+            throw new UnsupportedCallbackException(callback,
+                "Unrecognized SASL PLAIN Callback");
+          }
+        }
+      }
+    }
+    
+    public static class Server implements CallbackHandler {
+      String user = null;
+      String password = null;
+      
+      Server(String user, String password) {
+        this.user = user;
+        this.password = password;
+      }
+      
+      @Override
+      public void handle(Callback[] callbacks)
+          throws UnsupportedCallbackException, SaslException {
+        NameCallback nc = null;
+        PasswordCallback pc = null;
+        AuthorizeCallback ac = null;
+        
+        for (Callback callback : callbacks) {
+          if (callback instanceof NameCallback) {
+            nc = (NameCallback)callback;
+            assertEquals(user, nc.getName());
+          } else if (callback instanceof PasswordCallback) {
+            pc = (PasswordCallback)callback;
+            if (!password.equals(new String(pc.getPassword()))) {
+              throw new IllegalArgumentException("wrong password");
+            }
+          } else if (callback instanceof AuthorizeCallback) {
+            ac = (AuthorizeCallback)callback;
+            assertEquals(user, ac.getAuthorizationID());
+            assertEquals(user, ac.getAuthenticationID());
+            ac.setAuthorized(true);
+            ac.setAuthorizedID(ac.getAuthenticationID());
+          } else {
+            throw new UnsupportedCallbackException(callback,
+                "Unsupported SASL PLAIN Callback");
+          }
+        }
+        assertNotNull(nc);
+        assertNotNull(pc);
+        assertNotNull(ac);
+      }
+    }
+  }
+  
   private static Pattern BadToken =
       Pattern.compile(".*DIGEST-MD5: digest response format violation.*");
   private static Pattern KrbFailed =
       Pattern.compile(".*Failed on local exception:.* " +
                       "Failed to specify server's Kerberos principal name.*");
-  private static Pattern Denied = 
-      Pattern.compile(".*Authorization .* is enabled .*");
-  private static Pattern NoDigest =
-      Pattern.compile(".*Server is not configured to do DIGEST auth.*");
+  private static Pattern Denied(AuthenticationMethod method) {
+      return Pattern.compile(".*RemoteException.*AccessControlException.*: "
+          +method.getAuthMethod() + " authentication is not enabled.*");
+  }
+  private static Pattern NoTokenAuth =
+      Pattern.compile(".*IllegalArgumentException: " +
+                      "TOKEN authentication requires a secret manager");
   
   /*
    *  simple server
@@ -488,13 +607,40 @@ public class TestSaslRPC {
     assertAuthEquals(SIMPLE, getAuthMethod(KERBEROS, SIMPLE, false));
   }
   
+  /*
+   *  token server
+   */
+  @Test
+  public void testTokenOnlyServer() throws Exception {
+    assertAuthEquals(Denied(SIMPLE), getAuthMethod(SIMPLE,   TOKEN));
+    assertAuthEquals(KrbFailed,      getAuthMethod(KERBEROS, TOKEN));
+  }
+
+  @Test
+  public void testTokenOnlyServerWithTokens() throws Exception {
+    assertAuthEquals(TOKEN, getAuthMethod(SIMPLE,   TOKEN, true));
+    assertAuthEquals(TOKEN, getAuthMethod(KERBEROS, TOKEN, true));
+    forceSecretManager = false;
+    assertAuthEquals(NoTokenAuth, getAuthMethod(SIMPLE,   TOKEN, true));
+    assertAuthEquals(NoTokenAuth, getAuthMethod(KERBEROS, TOKEN, true));
+  }
+
+  @Test
+  public void testTokenOnlyServerWithInvalidTokens() throws Exception {
+    assertAuthEquals(BadToken, getAuthMethod(SIMPLE,   TOKEN, false));
+    assertAuthEquals(BadToken, getAuthMethod(KERBEROS, TOKEN, false));
+    forceSecretManager = false;
+    assertAuthEquals(NoTokenAuth, getAuthMethod(SIMPLE,   TOKEN, false));
+    assertAuthEquals(NoTokenAuth, getAuthMethod(KERBEROS, TOKEN, false));
+  }
+
   /*
    * kerberos server
    */
   @Test
   public void testKerberosServer() throws Exception {
-    assertAuthEquals(Denied,    getAuthMethod(SIMPLE,   KERBEROS));
-    assertAuthEquals(KrbFailed, getAuthMethod(KERBEROS, KERBEROS));    
+    assertAuthEquals(Denied(SIMPLE), getAuthMethod(SIMPLE,   KERBEROS));
+    assertAuthEquals(KrbFailed,      getAuthMethod(KERBEROS, KERBEROS));    
   }
 
   @Test
@@ -504,8 +650,8 @@ public class TestSaslRPC {
     assertAuthEquals(TOKEN, getAuthMethod(KERBEROS, KERBEROS, true));
     // can't fallback to simple when using kerberos w/o tokens
     forceSecretManager = false;
-    assertAuthEquals(NoDigest, getAuthMethod(SIMPLE,   KERBEROS, true));
-    assertAuthEquals(NoDigest, getAuthMethod(KERBEROS, KERBEROS, true));
+    assertAuthEquals(Denied(TOKEN), getAuthMethod(SIMPLE,   KERBEROS, true));
+    assertAuthEquals(Denied(TOKEN), getAuthMethod(KERBEROS, KERBEROS, true));
   }
 
   @Test
@@ -513,8 +659,8 @@ public class TestSaslRPC {
     assertAuthEquals(BadToken, getAuthMethod(SIMPLE,   KERBEROS, false));
     assertAuthEquals(BadToken, getAuthMethod(KERBEROS, KERBEROS, false));
     forceSecretManager = false;
-    assertAuthEquals(NoDigest, getAuthMethod(SIMPLE,   KERBEROS, true));
-    assertAuthEquals(NoDigest, getAuthMethod(KERBEROS, KERBEROS, true));
+    assertAuthEquals(Denied(TOKEN), getAuthMethod(SIMPLE,   KERBEROS, false));
+    assertAuthEquals(Denied(TOKEN), getAuthMethod(KERBEROS, KERBEROS, false));
   }
 
 

+ 61 - 2
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java

@@ -71,16 +71,75 @@ public class TestUserGroupInformation {
   /** configure ugi */
   @BeforeClass
   public static void setup() {
+    javax.security.auth.login.Configuration.setConfiguration(
+        new DummyLoginConfiguration());
+  }
+  
+  @Before
+  public void setupUgi() {
     conf = new Configuration();
     conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL,
         "RULE:[2:$1@$0](.*@HADOOP.APACHE.ORG)s/@.*//" +
         "RULE:[1:$1@$0](.*@HADOOP.APACHE.ORG)s/@.*//"
         + "DEFAULT");
     UserGroupInformation.setConfiguration(conf);
-    javax.security.auth.login.Configuration.setConfiguration(
-        new DummyLoginConfiguration());
+    UserGroupInformation.setLoginUser(null);
   }
   
+  @After
+  public void resetUgi() {
+    UserGroupInformation.setLoginUser(null);
+  }
+
+  @Test
+  public void testSimpleLogin() throws IOException {
+    tryLoginAuthenticationMethod(AuthenticationMethod.SIMPLE, true);
+  }
+
+  @Test
+  public void testTokenLogin() throws IOException {
+    tryLoginAuthenticationMethod(AuthenticationMethod.TOKEN, false);
+  }
+  
+  @Test
+  public void testProxyLogin() throws IOException {
+    tryLoginAuthenticationMethod(AuthenticationMethod.PROXY, false);
+  }
+  
+  private void tryLoginAuthenticationMethod(AuthenticationMethod method,
+                                            boolean expectSuccess)
+                                                throws IOException {
+    SecurityUtil.setAuthenticationMethod(method, conf);
+    UserGroupInformation.setConfiguration(conf); // pick up changed auth       
+
+    UserGroupInformation ugi = null;
+    Exception ex = null;
+    try {
+      ugi = UserGroupInformation.getLoginUser();
+    } catch (Exception e) {
+      ex = e;
+    }
+    if (expectSuccess) {
+      assertNotNull(ugi);
+      assertEquals(method, ugi.getAuthenticationMethod());
+    } else {
+      assertNotNull(ex);
+      assertEquals(UnsupportedOperationException.class, ex.getClass());
+      assertEquals(method + " login authentication is not supported",
+                   ex.getMessage());
+    }
+  }
+  
+  @Test
+  public void testGetRealAuthenticationMethod() {
+    UserGroupInformation ugi = UserGroupInformation.createRemoteUser("user1");
+    ugi.setAuthenticationMethod(AuthenticationMethod.SIMPLE);
+    assertEquals(AuthenticationMethod.SIMPLE, ugi.getAuthenticationMethod());
+    assertEquals(AuthenticationMethod.SIMPLE, ugi.getRealAuthenticationMethod());
+    ugi = UserGroupInformation.createProxyUser("user2", ugi);
+    assertEquals(AuthenticationMethod.PROXY, ugi.getAuthenticationMethod());
+    assertEquals(AuthenticationMethod.SIMPLE, ugi.getRealAuthenticationMethod());
+  }
   /** Test login method */
   @Test
   public void testLogin() throws Exception {

+ 16 - 2
hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/wsrs/UserProvider.java

@@ -31,6 +31,7 @@ import javax.ws.rs.core.Context;
 import javax.ws.rs.ext.Provider;
 import java.lang.reflect.Type;
 import java.security.Principal;
+import java.text.MessageFormat;
 import java.util.regex.Pattern;
 
 @Provider
@@ -40,13 +41,26 @@ public class UserProvider extends AbstractHttpContextInjectable<Principal> imple
 
   public static final String USER_NAME_PARAM = "user.name";
 
-  public static final Pattern USER_PATTERN = Pattern.compile("[_a-zA-Z0-9]+");
+  public static final Pattern USER_PATTERN = Pattern.compile("^[A-Za-z_][A-Za-z0-9._-]*[$]?$");
 
-  private static class UserParam extends StringParam {
+  static class UserParam extends StringParam {
 
     public UserParam(String user) {
       super(USER_NAME_PARAM, user, USER_PATTERN);
     }
+
+    @Override
+    public String parseParam(String str) {
+      if (str != null) {
+        int len = str.length();
+        if (len < 1 || len > 31) {
+          throw new IllegalArgumentException(MessageFormat.format(
+            "Parameter [{0}], invalid value [{1}], it's length must be between 1 and 31",
+            getName(), str));
+        }
+      }
+      return super.parseParam(str);
+    }
   }
 
   @Override

+ 30 - 0
hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/resources/css/site.css

@@ -0,0 +1,30 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+#banner {
+  height: 93px;
+  background: none;
+}
+
+#bannerLeft img {
+  margin-left: 30px;
+  margin-top: 10px;
+}
+
+#bannerRight img {
+  margin: 17px;
+}
+

+ 55 - 0
hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/lib/wsrs/TestUserProvider.java

@@ -19,13 +19,18 @@
 package org.apache.hadoop.lib.wsrs;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 
 import java.security.Principal;
 
 import javax.ws.rs.core.MultivaluedMap;
 
+import org.apache.hadoop.test.TestException;
+import org.apache.hadoop.test.TestExceptionHelper;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.MethodRule;
 import org.mockito.Mockito;
 import org.slf4j.MDC;
 
@@ -35,6 +40,9 @@ import com.sun.jersey.core.spi.component.ComponentScope;
 
 public class TestUserProvider {
 
+  @Rule
+  public MethodRule exceptionHelper = new TestExceptionHelper();
+
   @Test
   @SuppressWarnings("unchecked")
   public void noUser() {
@@ -92,4 +100,51 @@ public class TestUserProvider {
     assertEquals(up.getInjectable(null, null, Principal.class), up);
     assertNull(up.getInjectable(null, null, String.class));
   }
+
+  @Test
+  @TestException(exception = IllegalArgumentException.class)
+  public void userNameEmpty() {
+    UserProvider.UserParam userParam = new UserProvider.UserParam("username");
+    userParam.parseParam("");
+  }
+
+  @Test
+  @TestException(exception = IllegalArgumentException.class)
+  public void userNameTooLong() {
+    UserProvider.UserParam userParam = new UserProvider.UserParam("username");
+    userParam.parseParam("a123456789012345678901234567890x");
+  }
+
+  @Test
+  @TestException(exception = IllegalArgumentException.class)
+  public void userNameInvalidStart() {
+    UserProvider.UserParam userParam = new UserProvider.UserParam("username");
+    userParam.parseParam("1x");
+  }
+
+  @Test
+  @TestException(exception = IllegalArgumentException.class)
+  public void userNameInvalidDollarSign() {
+    UserProvider.UserParam userParam = new UserProvider.UserParam("username");
+    userParam.parseParam("1$x");
+  }
+
+  @Test
+  public void userNameMinLength() {
+    UserProvider.UserParam userParam = new UserProvider.UserParam("username");
+    assertNotNull(userParam.parseParam("a"));
+  }
+
+  @Test
+  public void userNameMaxLength() {
+    UserProvider.UserParam userParam = new UserProvider.UserParam("username");
+    assertNotNull(userParam.parseParam("a123456789012345678901234567890"));
+  }
+
+  @Test
+  public void userNameValidDollarSign() {
+    UserProvider.UserParam userParam = new UserProvider.UserParam("username");
+    assertNotNull(userParam.parseParam("a$"));
+  }
+
 }

+ 106 - 3
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt

@@ -17,6 +17,9 @@ Trunk (Unreleased)
     reliably storing HDFS edit logs. See dedicated section below for breakdown
     of subtasks.
 
+    HDFS-3495. Update Balancer to support new NetworkTopology with NodeGroup.
+    (Junping Du via szetszwo)
+
   IMPROVEMENTS
 
     HDFS-1620. Rename HdfsConstants -> HdfsServerConstants, FSConstants ->
@@ -159,6 +162,20 @@ Trunk (Unreleased)
 
     HDFS-4153. Add START_MSG/SHUTDOWN_MSG for JournalNode. (liang xie via atm)
 
+    HDFS-3935. Add JournalNode to the start/stop scripts (Andy Isaacson via todd)
+
+    HDFS-4206. Change the fields in INode and its subclasses to private.
+    (szetszwo)
+
+    HDFS-4215. Remove locking from addToParent(..) since it is used in image
+    loading, and add INode.isFile().  (szetszwo)
+
+    HDFS-4200. Reduce the size of synchronized sections in PacketResponder.
+    (suresh)
+
+    HDFS-4209. Clean up the addNode/addChild/addChildNoQuotaCheck methods in
+    FSDirectory and INodeDirectory. (szetszwo)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -245,8 +262,11 @@ Trunk (Unreleased)
     HDFS-4115. TestHDFSCLI.testAll fails one test due to number format.
     (Trevor Robinson via suresh)
 
-    HDFS-4106. BPServiceActor#lastHeartbeat, lastBlockReport and
-    lastDeletedReport should be volatile. (Jing Zhao via suresh)
+    HDFS-4165. Faulty sanity check in FsDirectory.unprotectedSetQuota.
+    (Binglin Chang via suresh)
+
+    HDFS-4105. The SPNEGO user for secondary namenode should use the web 
+    keytab. (Arpit Gupta via jitendra)
 
   BREAKDOWN OF HDFS-3077 SUBTASKS
 
@@ -458,6 +478,18 @@ Release 2.0.3-alpha - Unreleased
     HDFS-4046. Rename ChecksumTypeProto enum NULL since it is illegal in
     C/C++. (Binglin Chang via suresh)
 
+    HDFS-4048. Use ERROR instead of INFO for volume failure logs.
+    (Stephen Chu via eli)
+
+    HDFS-1322. Document umask in DistributedFileSystem#mkdirs javadocs.
+    (Colin Patrick McCabe via eli)
+
+    HDFS-4038. Override toString() for BookKeeperEditLogInputStream.
+    (Vinay via umamahesh)
+
+    HDFS-4214. OfflineEditsViewer should print out the offset at which it
+    encountered an error. (Colin Patrick McCabe via atm)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -568,6 +600,49 @@ Release 2.0.3-alpha - Unreleased
 
     HDFS-3810. Implement format() for BKJM (Ivan Kelly via umamahesh)
 
+    HDFS-4162. Some malformed and unquoted HTML strings are returned from 
+    datanode web ui. (Darek Dagit via suresh)
+
+    HDFS-4164. fuse_dfs: add -lrt to the compiler command line on Linux.
+    (Colin Patrick McCabe via eli)
+
+    HDFS-3921. NN will prematurely consider blocks missing when entering active
+    state while still in safe mode. (atm)
+
+    HDFS-4106. BPServiceActor#lastHeartbeat, lastBlockReport and
+    lastDeletedReport should be volatile. (Jing Zhao via suresh)
+
+    HDFS-4139. fuse-dfs RO mode still allows file truncation.
+    (Colin Patrick McCabe via eli)    
+
+    HDFS-4104. dfs -test -d prints inappropriate error on nonexistent directory
+    (Andy Isaacson via daryn)
+
+    HDFS-3623. BKJM: zkLatchWaitTimeout hard coded to 6000. Make use of ZKSessionTimeout instead.
+    (umamahesh)
+
+    HDFS-4100. Fix all findbug security warings. (Liang Xie via eli)
+
+    HDFS-3507. DFS#isInSafeMode needs to execute only on Active NameNode.
+    (Vinay via atm)
+
+    HDFS-4156. Seeking to a negative position should throw an IOE.
+    (Eli Reisman via eli)
+
+    HDFS-4171. WebHDFS and HttpFs should accept only valid Unix user 
+    names. (tucu)
+
+    HDFS-4178. Shell scripts should not close stderr (Andy Isaacson via daryn)
+
+    HDFS-4179. BackupNode: allow reads, fix checkpointing, safeMode. (shv)
+
+    HDFS-4216. Do not ignore QuotaExceededException when adding symlinks.
+    (szetszwo)
+
+    HDFS-4242. Map.Entry is incorrectly used in LeaseManager since the behavior
+    of it is undefined after the iteration or modifications of the map.
+    (szetszwo)
+
 Release 2.0.2-alpha - 2012-09-07 
 
   INCOMPATIBLE CHANGES
@@ -1945,10 +2020,25 @@ Release 2.0.0-alpha - 05-23-2012
     
     HDFS-3039. Address findbugs and javadoc warnings on branch. (todd via atm)
 
+Release 0.23.6 - UNRELEASED
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
 Release 0.23.5 - UNRELEASED
 
   INCOMPATIBLE CHANGES
 
+    HDFS-4080. Add a separate logger for block state change logs to enable turning
+    off those logs. (Kihwal Lee via suresh)
+
   NEW FEATURES
 
   IMPROVEMENTS
@@ -1957,6 +2047,11 @@ Release 0.23.5 - UNRELEASED
 
     HDFS-4075. Reduce recommissioning overhead (Kihwal Lee via daryn)
 
+    HDFS-3990.  NN's health report has severe performance problems (daryn)
+
+    HDFS-4181.  LeaseManager tries to double remove and prints extra messages
+    (Kihwal Lee via daryn)
+
   BUG FIXES
 
     HDFS-3829. TestHftpURLTimeouts fails intermittently with JDK7  (Trevor
@@ -1971,7 +2066,15 @@ Release 0.23.5 - UNRELEASED
     HDFS-4090. getFileChecksum() result incompatible when called against
     zero-byte files. (Kihwal Lee via daryn)
 
-Release 0.23.4 - UNRELEASED
+    HDFS-4172. namenode does not URI-encode parameters when building URI for
+    datanode request (Derek Dagit via bobby)
+
+    HDFS-4182. SecondaryNameNode leaks NameCache entries (bobby)
+
+    HDFS-4186. logSync() is called with the write lock held while releasing
+    lease (Kihwal Lee via daryn)
+
+Release 0.23.4
 
   INCOMPATIBLE CHANGES
 

+ 8 - 2
hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogInputStream.java

@@ -129,8 +129,9 @@ class BookKeeperEditLogInputStream extends EditLogInputStream {
   
   @Override
   public String getName() {
-    return String.format("BookKeeper[%s,first=%d,last=%d]", 
-        lh.toString(), firstTxId, lastTxId);
+    return String.format(
+        "BookKeeperLedger[ledgerId=%d,firstTxId=%d,lastTxId=%d]", lh.getId(),
+        firstTxId, lastTxId);
   }
 
   @Override
@@ -157,6 +158,11 @@ class BookKeeperEditLogInputStream extends EditLogInputStream {
     }
   }
 
+  @Override
+  public String toString() {
+    return ("BookKeeperEditLogInputStream {" + this.getName() + "}");
+  }
+
   /**
    * Input stream implementation which can be used by 
    * FSEditLogOp.Reader

+ 10 - 3
hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java

@@ -180,9 +180,16 @@ public class BookKeeperJournalManager implements JournalManager {
 
     try {
       zkConnectLatch = new CountDownLatch(1);
-      zkc = new ZooKeeper(zkConnect, conf.getInt(BKJM_ZK_SESSION_TIMEOUT,
-          BKJM_ZK_SESSION_TIMEOUT_DEFAULT), new ZkConnectionWatcher());
-      if (!zkConnectLatch.await(6000, TimeUnit.MILLISECONDS)) {
+      int bkjmZKSessionTimeout = conf.getInt(BKJM_ZK_SESSION_TIMEOUT,
+          BKJM_ZK_SESSION_TIMEOUT_DEFAULT);
+      zkc = new ZooKeeper(zkConnect, bkjmZKSessionTimeout,
+          new ZkConnectionWatcher());
+      // Configured zk session timeout + some extra grace period (here
+      // BKJM_ZK_SESSION_TIMEOUT_DEFAULT used as grace period)
+      int zkConnectionLatchTimeout = bkjmZKSessionTimeout
+          + BKJM_ZK_SESSION_TIMEOUT_DEFAULT;
+      if (!zkConnectLatch
+          .await(zkConnectionLatchTimeout, TimeUnit.MILLISECONDS)) {
         throw new IOException("Error connecting to zookeeper");
       }
 

+ 16 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.sh

@@ -74,7 +74,7 @@ fi
 #---------------------------------------------------------
 # secondary namenodes (if any)
 
-SECONDARY_NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -secondarynamenodes 2>&-)
+SECONDARY_NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -secondarynamenodes 2>/dev/null)
 
 if [ -n "$SECONDARY_NAMENODES" ]; then
   echo "Starting secondary namenodes [$SECONDARY_NAMENODES]"
@@ -85,6 +85,21 @@ if [ -n "$SECONDARY_NAMENODES" ]; then
       --script "$bin/hdfs" start secondarynamenode
 fi
 
+#---------------------------------------------------------
+# quorumjournal nodes (if any)
+
+SHARED_EDITS_DIR=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.namenode.shared.edits.dir 2>&-)
+
+case "$SHARED_EDITS_DIR" in
+qjournal://*)
+  JOURNAL_NODES=$(echo "$SHARED_EDITS_DIR" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g')
+  echo "Starting journal nodes [$JOURNAL_NODES]"
+  "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
+      --config "$HADOOP_CONF_DIR" \
+      --hostnames "$JOURNAL_NODES" \
+      --script "$bin/hdfs" start journalnode ;;
+esac
+
 #---------------------------------------------------------
 # ZK Failover controllers, if auto-HA is enabled
 AUTOHA_ENABLED=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.ha.automatic-failover.enabled)

+ 16 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.sh

@@ -50,7 +50,7 @@ fi
 #---------------------------------------------------------
 # secondary namenodes (if any)
 
-SECONDARY_NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -secondarynamenodes 2>&-)
+SECONDARY_NAMENODES=$($HADOOP_PREFIX/bin/hdfs getconf -secondarynamenodes 2>/dev/null)
 
 if [ -n "$SECONDARY_NAMENODES" ]; then
   echo "Stopping secondary namenodes [$SECONDARY_NAMENODES]"
@@ -61,6 +61,21 @@ if [ -n "$SECONDARY_NAMENODES" ]; then
       --script "$bin/hdfs" stop secondarynamenode
 fi
 
+#---------------------------------------------------------
+# quorumjournal nodes (if any)
+
+SHARED_EDITS_DIR=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.namenode.shared.edits.dir 2>&-)
+
+case "$SHARED_EDITS_DIR" in
+qjournal://*)
+  JOURNAL_NODES=$(echo "$SHARED_EDITS_DIR" | sed 's,qjournal://\([^/]*\)/.*,\1,g; s/;/ /g; s/:[0-9]*//g')
+  echo "Stopping journal nodes [$JOURNAL_NODES]"
+  "$HADOOP_PREFIX/sbin/hadoop-daemons.sh" \
+      --config "$HADOOP_CONF_DIR" \
+      --hostnames "$JOURNAL_NODES" \
+      --script "$bin/hdfs" stop journalnode ;;
+esac
+
 #---------------------------------------------------------
 # ZK Failover controllers, if auto-HA is enabled
 AUTOHA_ENABLED=$($HADOOP_PREFIX/bin/hdfs getconf -confKey dfs.ha.automatic-failover.enabled)

+ 17 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java

@@ -1874,10 +1874,25 @@ public class DFSClient implements java.io.Closeable {
   /**
    * Enter, leave or get safe mode.
    * 
-   * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction)
+   * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeAction,boolean)
    */
   public boolean setSafeMode(SafeModeAction action) throws IOException {
-    return namenode.setSafeMode(action);
+    return setSafeMode(action, false);
+  }
+  
+  /**
+   * Enter, leave or get safe mode.
+   * 
+   * @param action
+   *          One of SafeModeAction.GET, SafeModeAction.ENTER and
+   *          SafeModeActiob.LEAVE
+   * @param isChecked
+   *          If true, then check only active namenode's safemode status, else
+   *          check first namenode's status.
+   * @see ClientProtocol#setSafeMode(HdfsConstants.SafeModeActio,boolean)
+   */
+  public boolean setSafeMode(SafeModeAction action, boolean isChecked) throws IOException{
+    return namenode.setSafeMode(action, isChecked);    
   }
 
   /**

+ 3 - 0
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java

@@ -1076,6 +1076,9 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
     if (targetPos > getFileLength()) {
       throw new IOException("Cannot seek after EOF");
     }
+    if (targetPos < 0) {
+      throw new IOException("Cannot seek to negative offset");
+    }
     if (closed) {
       throw new IOException("Stream is closed!");
     }

+ 9 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java

@@ -233,7 +233,7 @@ public class DFSUtil {
   /**
    * Given a list of path components returns a path as a UTF8 String
    */
-  public static String byteArray2String(byte[][] pathComponents) {
+  public static String byteArray2PathString(byte[][] pathComponents) {
     if (pathComponents.length == 0)
       return "";
     if (pathComponents.length == 1 && pathComponents[0].length == 0) {
@@ -254,6 +254,14 @@ public class DFSUtil {
     return null;
   }
 
+  /** Convert an object representing a path to a string. */
+  public static String path2String(final Object path) {
+    return path == null? null
+        : path instanceof String? (String)path
+        : path instanceof byte[][]? byteArray2PathString((byte[][])path)
+        : path.toString();
+  }
+
   /**
    * Splits the array of bytes into array of arrays of bytes
    * on byte separator

+ 44 - 8
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java

@@ -509,14 +509,32 @@ public class DistributedFileSystem extends FileSystem {
   }
   
   /**
-   * Create a directory with given name and permission, only when
-   * parent directory exists.
+   * Create a directory, only when the parent directories exist.
+   *
+   * See {@link FsPermission#applyUMask(FsPermission)} for details of how
+   * the permission is applied.
+   *
+   * @param f           The path to create
+   * @param permission  The permission.  See FsPermission#applyUMask for 
+   *                    details about how this is used to calculate the
+   *                    effective permission.
    */
   public boolean mkdir(Path f, FsPermission permission) throws IOException {
     statistics.incrementWriteOps(1);
     return dfs.mkdirs(getPathName(f), permission, false);
   }
 
+  /**
+   * Create a directory and its parent directories.
+   *
+   * See {@link FsPermission#applyUMask(FsPermission)} for details of how
+   * the permission is applied.
+   *
+   * @param f           The path to create
+   * @param permission  The permission.  See FsPermission#applyUMask for 
+   *                    details about how this is used to calculate the
+   *                    effective permission.
+   */
   @Override
   public boolean mkdirs(Path f, FsPermission permission) throws IOException {
     statistics.incrementWriteOps(1);
@@ -609,11 +627,27 @@ public class DistributedFileSystem extends FileSystem {
    * Enter, leave or get safe mode.
    *  
    * @see org.apache.hadoop.hdfs.protocol.ClientProtocol#setSafeMode(
-   *    HdfsConstants.SafeModeAction)
+   *    HdfsConstants.SafeModeAction,boolean)
    */
   public boolean setSafeMode(HdfsConstants.SafeModeAction action) 
   throws IOException {
-    return dfs.setSafeMode(action);
+    return setSafeMode(action, false);
+  }
+
+  /**
+   * Enter, leave or get safe mode.
+   * 
+   * @param action
+   *          One of SafeModeAction.ENTER, SafeModeAction.LEAVE and
+   *          SafeModeAction.GET
+   * @param isChecked
+   *          If true check only for Active NNs status, else check first NN's
+   *          status
+   * @see org.apache.hadoop.hdfs.protocol.ClientProtocol#setSafeMode(SafeModeAction, boolean)
+   */
+  public boolean setSafeMode(HdfsConstants.SafeModeAction action,
+      boolean isChecked) throws IOException {
+    return dfs.setSafeMode(action, isChecked);
   }
 
   /**
@@ -860,12 +894,14 @@ public class DistributedFileSystem extends FileSystem {
   }
 
   /**
-   * Utility function that returns if the NameNode is in safemode or not.
-   *
+   * Utility function that returns if the NameNode is in safemode or not. In HA
+   * mode, this API will return only ActiveNN's safemode status.
+   * 
    * @return true if NameNode is in safemode, false otherwise.
-   * @throws IOException when there is an issue communicating with the NameNode
+   * @throws IOException
+   *           when there is an issue communicating with the NameNode
    */
   public boolean isInSafeMode() throws IOException {
-    return setSafeMode(SafeModeAction.SAFEMODE_GET);
+    return setSafeMode(SafeModeAction.SAFEMODE_GET, true);
   }
 }

+ 19 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HftpFileSystem.java

@@ -82,12 +82,8 @@ import org.xml.sax.helpers.XMLReaderFactory;
 @InterfaceStability.Evolving
 public class HftpFileSystem extends FileSystem
     implements DelegationTokenRenewer.Renewable {
-  private static final DelegationTokenRenewer<HftpFileSystem> dtRenewer
-      = new DelegationTokenRenewer<HftpFileSystem>(HftpFileSystem.class);
-  
   static {
     HttpURLConnection.setFollowRedirects(true);
-    dtRenewer.start();
   }
 
   public static final Text TOKEN_KIND = new Text("HFTP delegation");
@@ -106,6 +102,16 @@ public class HftpFileSystem extends FileSystem
   private static final HftpDelegationTokenSelector hftpTokenSelector =
       new HftpDelegationTokenSelector();
 
+  private DelegationTokenRenewer dtRenewer = null;
+
+  private synchronized void addRenewAction(final HftpFileSystem hftpFs) {
+    if (dtRenewer == null) {
+      dtRenewer = DelegationTokenRenewer.getInstance();
+    }
+
+    dtRenewer.addRenewAction(hftpFs);
+  }
+
   public static final SimpleDateFormat getDateFormat() {
     final SimpleDateFormat df = new SimpleDateFormat(HFTP_DATE_FORMAT);
     df.setTimeZone(TimeZone.getTimeZone(HFTP_TIMEZONE));
@@ -202,7 +208,7 @@ public class HftpFileSystem extends FileSystem
     if (token != null) {
       setDelegationToken(token);
       if (createdToken) {
-        dtRenewer.addRenewAction(this);
+        addRenewAction(this);
         LOG.debug("Created new DT for " + token.getService());
       } else {
         LOG.debug("Found existing DT for " + token.getService());
@@ -395,6 +401,14 @@ public class HftpFileSystem extends FileSystem
     return new FSDataInputStream(new RangeHeaderInputStream(u));
   }
 
+  @Override
+  public void close() throws IOException {
+    super.close();
+    if (dtRenewer != null) {
+      dtRenewer.removeRenewAction(this); // blocks
+    }
+  }
+
   /** Class to parse and store a listing reply from the server. */
   class LsParser extends DefaultHandler {
 

+ 8 - 5
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java

@@ -610,7 +610,7 @@ public interface ClientProtocol {
    * <p>
    * Safe mode is entered automatically at name node startup.
    * Safe mode can also be entered manually using
-   * {@link #setSafeMode(HdfsConstants.SafeModeAction) setSafeMode(SafeModeAction.SAFEMODE_GET)}.
+   * {@link #setSafeMode(HdfsConstants.SafeModeAction,boolean) setSafeMode(SafeModeAction.SAFEMODE_ENTER,false)}.
    * <p>
    * At startup the name node accepts data node reports collecting
    * information about block locations.
@@ -626,11 +626,11 @@ public interface ClientProtocol {
    * Then the name node leaves safe mode.
    * <p>
    * If safe mode is turned on manually using
-   * {@link #setSafeMode(HdfsConstants.SafeModeAction) setSafeMode(SafeModeAction.SAFEMODE_ENTER)}
+   * {@link #setSafeMode(HdfsConstants.SafeModeAction,boolean) setSafeMode(SafeModeAction.SAFEMODE_ENTER,false)}
    * then the name node stays in safe mode until it is manually turned off
-   * using {@link #setSafeMode(HdfsConstants.SafeModeAction) setSafeMode(SafeModeAction.SAFEMODE_LEAVE)}.
+   * using {@link #setSafeMode(HdfsConstants.SafeModeAction,boolean) setSafeMode(SafeModeAction.SAFEMODE_LEAVE,false)}.
    * Current state of the name node can be verified using
-   * {@link #setSafeMode(HdfsConstants.SafeModeAction) setSafeMode(SafeModeAction.SAFEMODE_GET)}
+   * {@link #setSafeMode(HdfsConstants.SafeModeAction,boolean) setSafeMode(SafeModeAction.SAFEMODE_GET,false)}
    * <h4>Configuration parameters:</h4>
    * <tt>dfs.safemode.threshold.pct</tt> is the threshold parameter.<br>
    * <tt>dfs.safemode.extension</tt> is the safe mode extension parameter.<br>
@@ -648,12 +648,15 @@ public interface ClientProtocol {
    * @param action  <ul> <li>0 leave safe mode;</li>
    *                <li>1 enter safe mode;</li>
    *                <li>2 get safe mode state.</li></ul>
+   * @param isChecked If true then action will be done only in ActiveNN.
+   * 
    * @return <ul><li>0 if the safe mode is OFF or</li> 
    *         <li>1 if the safe mode is ON.</li></ul>
    *                   
    * @throws IOException
    */
-  public boolean setSafeMode(HdfsConstants.SafeModeAction action) 
+  @Idempotent
+  public boolean setSafeMode(HdfsConstants.SafeModeAction action, boolean isChecked) 
       throws IOException;
 
   /**

+ 15 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeID.java

@@ -38,7 +38,8 @@ public class DatanodeID implements Comparable<DatanodeID> {
   public static final DatanodeID[] EMPTY_ARRAY = {};
 
   private String ipAddr;     // IP address
-  private String hostName;   // hostname
+  private String hostName;   // hostname claimed by datanode
+  private String peerHostName; // hostname from the actual connection
   private String storageID;  // unique per cluster storageID
   private int xferPort;      // data streaming port
   private int infoPort;      // info server port
@@ -51,6 +52,7 @@ public class DatanodeID implements Comparable<DatanodeID> {
         from.getXferPort(),
         from.getInfoPort(),
         from.getIpcPort());
+    this.peerHostName = from.getPeerHostName();
   }
   
   /**
@@ -76,6 +78,10 @@ public class DatanodeID implements Comparable<DatanodeID> {
     this.ipAddr = ipAddr;
   }
 
+  public void setPeerHostName(String peerHostName) {
+    this.peerHostName = peerHostName;
+  }
+  
   public void setStorageID(String storageID) {
     this.storageID = storageID;
   }
@@ -94,6 +100,13 @@ public class DatanodeID implements Comparable<DatanodeID> {
     return hostName;
   }
 
+  /**
+   * @return hostname from the actual connection 
+   */
+  public String getPeerHostName() {
+    return peerHostName;
+  }
+  
   /**
    * @return IP:xferPort string
    */
@@ -202,6 +215,7 @@ public class DatanodeID implements Comparable<DatanodeID> {
   public void updateRegInfo(DatanodeID nodeReg) {
     ipAddr = nodeReg.getIpAddr();
     hostName = nodeReg.getHostName();
+    peerHostName = nodeReg.getPeerHostName();
     xferPort = nodeReg.getXferPort();
     infoPort = nodeReg.getInfoPort();
     ipcPort = nodeReg.getIpcPort();

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java

@@ -518,7 +518,8 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
   public SetSafeModeResponseProto setSafeMode(RpcController controller,
       SetSafeModeRequestProto req) throws ServiceException {
     try {
-      boolean result = server.setSafeMode(PBHelper.convert(req.getAction()));
+      boolean result = server.setSafeMode(PBHelper.convert(req.getAction()),
+          req.getChecked());
       return SetSafeModeResponseProto.newBuilder().setResult(result).build();
     } catch (IOException e) {
       throw new ServiceException(e);

+ 3 - 3
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java

@@ -507,9 +507,9 @@ public class ClientNamenodeProtocolTranslatorPB implements
   }
 
   @Override
-  public boolean setSafeMode(SafeModeAction action) throws IOException {
-    SetSafeModeRequestProto req = SetSafeModeRequestProto.newBuilder().
-        setAction(PBHelper.convert(action)).build();
+  public boolean setSafeMode(SafeModeAction action, boolean isChecked) throws IOException {
+    SetSafeModeRequestProto req = SetSafeModeRequestProto.newBuilder()
+        .setAction(PBHelper.convert(action)).setChecked(isChecked).build();
     try {
       return rpcProxy.setSafeMode(null, req).getResult();
     } catch (ServiceException e) {

+ 4 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/GetJournalEditServlet.java

@@ -31,6 +31,7 @@ import javax.servlet.http.HttpServlet;
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;
 
+import org.apache.commons.lang.StringEscapeUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -139,8 +140,9 @@ public class GetJournalEditServlet extends HttpServlet {
       HttpServletRequest request, HttpServletResponse response)
       throws IOException {
     String myStorageInfoString = storage.toColonSeparatedString();
-    String theirStorageInfoString = request.getParameter(STORAGEINFO_PARAM);
-    
+    String theirStorageInfoString = StringEscapeUtils.escapeHtml(
+        request.getParameter(STORAGEINFO_PARAM));
+
     if (theirStorageInfoString != null
         && !myStorageInfoString.equals(theirStorageInfoString)) {
       String msg = "This node has storage info '" + myStorageInfoString

+ 185 - 57
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java

@@ -168,7 +168,7 @@ import org.apache.hadoop.util.ToolRunner;
  * <ol>
  * <li>The cluster is balanced. Exiting
  * <li>No block can be moved. Exiting...
- * <li>No block has been moved for 3 iterations. Exiting...
+ * <li>No block has been moved for 5 iterations. Exiting...
  * <li>Received an IO exception: failure reason. Exiting...
  * <li>Another balancer is running. Exiting...
  * </ol>
@@ -222,7 +222,7 @@ public class Balancer {
   private Map<String, BalancerDatanode> datanodes
                  = new HashMap<String, BalancerDatanode>();
   
-  private NetworkTopology cluster = new NetworkTopology();
+  private NetworkTopology cluster;
   
   final static private int MOVER_THREAD_POOL_SIZE = 1000;
   final private ExecutorService moverExecutor = 
@@ -249,7 +249,7 @@ public class Balancer {
      * Return true if a block and its proxy are chosen; false otherwise
      */
     private boolean chooseBlockAndProxy() {
-      // iterate all source's blocks until find a good one    
+      // iterate all source's blocks until find a good one
       for (Iterator<BalancerBlock> blocks=
         source.getBlockIterator(); blocks.hasNext();) {
         if (markMovedIfGoodBlock(blocks.next())) {
@@ -293,22 +293,35 @@ public class Balancer {
      * @return true if a proxy is found; otherwise false
      */
     private boolean chooseProxySource() {
-      // check if there is replica which is on the same rack with the target
+      final DatanodeInfo targetDN = target.getDatanode();
+      boolean find = false;
       for (BalancerDatanode loc : block.getLocations()) {
-        if (cluster.isOnSameRack(loc.getDatanode(), target.getDatanode())) {
-          if (loc.addPendingBlock(this)) {
-            proxySource = loc;
+        // check if there is replica which is on the same rack with the target
+        if (cluster.isOnSameRack(loc.getDatanode(), targetDN) && addTo(loc)) {
+          find = true;
+          // if cluster is not nodegroup aware or the proxy is on the same 
+          // nodegroup with target, then we already find the nearest proxy
+          if (!cluster.isNodeGroupAware() 
+              || cluster.isOnSameNodeGroup(loc.getDatanode(), targetDN)) {
             return true;
           }
         }
-      }
-      // find out a non-busy replica
-      for (BalancerDatanode loc : block.getLocations()) {
-        if (loc.addPendingBlock(this)) {
-          proxySource = loc;
-          return true;
+        
+        if (!find) {
+          // find out a non-busy replica out of rack of target
+          find = addTo(loc);
         }
       }
+      
+      return find;
+    }
+    
+    // add a BalancerDatanode as proxy source for specific block movement
+    private boolean addTo(BalancerDatanode bdn) {
+      if (bdn.addPendingBlock(this)) {
+        proxySource = bdn;
+        return true;
+      }
       return false;
     }
     
@@ -686,7 +699,7 @@ public class Balancer {
         NodeTask task = tasks.next();
         BalancerDatanode target = task.getDatanode();
         PendingBlockMove pendingBlock = new PendingBlockMove();
-        if ( target.addPendingBlock(pendingBlock) ) { 
+        if (target.addPendingBlock(pendingBlock)) { 
           // target is not busy, so do a tentative block allocation
           pendingBlock.source = this;
           pendingBlock.target = target;
@@ -787,9 +800,10 @@ public class Balancer {
    */
   private static void checkReplicationPolicyCompatibility(Configuration conf
       ) throws UnsupportedActionException {
-    if (BlockPlacementPolicy.getInstance(conf, null, null).getClass() != 
-        BlockPlacementPolicyDefault.class) {
-      throw new UnsupportedActionException("Balancer without BlockPlacementPolicyDefault");
+    if (BlockPlacementPolicy.getInstance(conf, null, null) instanceof 
+        BlockPlacementPolicyDefault) {
+      throw new UnsupportedActionException(
+          "Balancer without BlockPlacementPolicyDefault");
     }
   }
 
@@ -804,6 +818,7 @@ public class Balancer {
     this.threshold = p.threshold;
     this.policy = p.policy;
     this.nnc = theblockpool;
+    cluster = NetworkTopology.getInstance(conf);
   }
   
   /* Shuffle datanode array */
@@ -914,9 +929,15 @@ public class Balancer {
    * Return total number of bytes to move in this iteration
    */
   private long chooseNodes() {
-    // Match nodes on the same rack first
+    // First, match nodes on the same node group if cluster has nodegroup
+    // awareness
+    if (cluster.isNodeGroupAware()) {
+      chooseNodesOnSameNodeGroup();
+    }
+    
+    // Then, match nodes on the same rack
     chooseNodes(true);
-    // Then match nodes on different racks
+    // At last, match nodes on different racks
     chooseNodes(false);
     
     assert (datanodes.size() >= sources.size()+targets.size())
@@ -931,6 +952,102 @@ public class Balancer {
     }
     return bytesToMove;
   }
+  
+  /**
+   * Decide all <source, target> pairs where source and target are 
+   * on the same NodeGroup
+   */
+  private void chooseNodesOnSameNodeGroup() {
+
+    /* first step: match each overUtilized datanode (source) to
+     * one or more underUtilized datanodes within same NodeGroup(targets).
+     */
+    chooseOnSameNodeGroup(overUtilizedDatanodes, underUtilizedDatanodes);
+
+    /* match each remaining overutilized datanode (source) to below average 
+     * utilized datanodes within the same NodeGroup(targets).
+     * Note only overutilized datanodes that haven't had that max bytes to move
+     * satisfied in step 1 are selected
+     */
+    chooseOnSameNodeGroup(overUtilizedDatanodes, belowAvgUtilizedDatanodes);
+
+    /* match each remaining underutilized datanode to above average utilized 
+     * datanodes within the same NodeGroup.
+     * Note only underutilized datanodes that have not had that max bytes to
+     * move satisfied in step 1 are selected.
+     */
+    chooseOnSameNodeGroup(underUtilizedDatanodes, aboveAvgUtilizedDatanodes);
+  }
+  
+  /**
+   * Match two sets of nodes within the same NodeGroup, one should be source
+   * nodes (utilization > Avg), and the other should be destination nodes 
+   * (utilization < Avg).
+   * @param datanodes
+   * @param candidates
+   */
+  private <D extends BalancerDatanode, C extends BalancerDatanode> void 
+      chooseOnSameNodeGroup(Collection<D> datanodes, Collection<C> candidates) {
+    for (Iterator<D> i = datanodes.iterator(); i.hasNext();) {
+      final D datanode = i.next();
+      for(; chooseOnSameNodeGroup(datanode, candidates.iterator()); );
+      if (!datanode.isMoveQuotaFull()) {
+        i.remove();
+      }
+    }
+  }
+  
+  /**
+   * Match one datanode with a set of candidates nodes within the same NodeGroup.
+   */
+  private <T extends BalancerDatanode> boolean chooseOnSameNodeGroup(
+      BalancerDatanode dn, Iterator<T> candidates) {
+    final T chosen = chooseCandidateOnSameNodeGroup(dn, candidates);
+    if (chosen == null) {
+      return false;
+    }
+    if (dn instanceof Source) {
+      matchSourceWithTargetToMove((Source)dn, chosen);
+    } else {
+      matchSourceWithTargetToMove((Source)chosen, dn);
+    }
+    if (!chosen.isMoveQuotaFull()) {
+      candidates.remove();
+    }
+    return true;
+  }
+  
+  private void matchSourceWithTargetToMove(
+      Source source, BalancerDatanode target) {
+    long size = Math.min(source.availableSizeToMove(), target.availableSizeToMove());
+    NodeTask nodeTask = new NodeTask(target, size);
+    source.addNodeTask(nodeTask);
+    target.incScheduledSize(nodeTask.getSize());
+    sources.add(source);
+    targets.add(target);
+    LOG.info("Decided to move "+StringUtils.byteDesc(size)+" bytes from "
+        +source.datanode.getName() + " to " + target.datanode.getName());
+  }
+  
+  /** choose a datanode from <code>candidates</code> within the same NodeGroup 
+   * of <code>dn</code>.
+   */
+  private <T extends BalancerDatanode> T chooseCandidateOnSameNodeGroup(
+      BalancerDatanode dn, Iterator<T> candidates) {
+    if (dn.isMoveQuotaFull()) {
+      for(; candidates.hasNext(); ) {
+        final T c = candidates.next();
+        if (!c.isMoveQuotaFull()) {
+          candidates.remove();
+          continue;
+        }
+        if (cluster.isOnSameNodeGroup(dn.getDatanode(), c.getDatanode())) {
+          return c;
+        }
+      }
+    }
+    return null;
+  }
 
   /* if onRack is true, decide all <source, target> pairs
    * where source and target are on the same rack; Otherwise
@@ -941,33 +1058,33 @@ public class Balancer {
     /* first step: match each overUtilized datanode (source) to
      * one or more underUtilized datanodes (targets).
      */
-    chooseTargets(underUtilizedDatanodes.iterator(), onRack);
+    chooseTargets(underUtilizedDatanodes, onRack);
     
     /* match each remaining overutilized datanode (source) to 
      * below average utilized datanodes (targets).
      * Note only overutilized datanodes that haven't had that max bytes to move
      * satisfied in step 1 are selected
      */
-    chooseTargets(belowAvgUtilizedDatanodes.iterator(), onRack);
+    chooseTargets(belowAvgUtilizedDatanodes, onRack);
 
-    /* match each remaining underutilized datanode to 
-     * above average utilized datanodes.
+    /* match each remaining underutilized datanode (target) to 
+     * above average utilized datanodes (source).
      * Note only underutilized datanodes that have not had that max bytes to
      * move satisfied in step 1 are selected.
      */
-    chooseSources(aboveAvgUtilizedDatanodes.iterator(), onRack);
+    chooseSources(aboveAvgUtilizedDatanodes, onRack);
   }
    
   /* choose targets from the target candidate list for each over utilized
    * source datanode. OnRackTarget determines if the chosen target 
    * should be on the same rack as the source
    */
-  private void chooseTargets(  
-      Iterator<BalancerDatanode> targetCandidates, boolean onRackTarget ) {
+  private void chooseTargets(
+      Collection<BalancerDatanode> targetCandidates, boolean onRackTarget ) {
     for (Iterator<Source> srcIterator = overUtilizedDatanodes.iterator();
         srcIterator.hasNext();) {
       Source source = srcIterator.next();
-      while (chooseTarget(source, targetCandidates, onRackTarget)) {
+      while (chooseTarget(source, targetCandidates.iterator(), onRackTarget)) {
       }
       if (!source.isMoveQuotaFull()) {
         srcIterator.remove();
@@ -981,11 +1098,11 @@ public class Balancer {
    * should be on the same rack as the target
    */
   private void chooseSources(
-      Iterator<Source> sourceCandidates, boolean onRackSource) {
+      Collection<Source> sourceCandidates, boolean onRackSource) {
     for (Iterator<BalancerDatanode> targetIterator = 
       underUtilizedDatanodes.iterator(); targetIterator.hasNext();) {
       BalancerDatanode target = targetIterator.next();
-      while (chooseSource(target, sourceCandidates, onRackSource)) {
+      while (chooseSource(target, sourceCandidates.iterator(), onRackSource)) {
       }
       if (!target.isMoveQuotaFull()) {
         targetIterator.remove();
@@ -1025,23 +1142,15 @@ public class Balancer {
     }
     if (foundTarget) {
       assert(target != null):"Choose a null target";
-      long size = Math.min(source.availableSizeToMove(),
-          target.availableSizeToMove());
-      NodeTask nodeTask = new NodeTask(target, size);
-      source.addNodeTask(nodeTask);
-      target.incScheduledSize(nodeTask.getSize());
-      sources.add(source);
-      targets.add(target);
+      matchSourceWithTargetToMove(source, target);
       if (!target.isMoveQuotaFull()) {
         targetCandidates.remove();
       }
-      LOG.info("Decided to move "+StringUtils.byteDesc(size)+" bytes from "
-          +source.datanode + " to " + target.datanode);
       return true;
     }
     return false;
   }
-  
+
   /* For the given target, choose sources from the source candidate list.
    * OnRackSource determines if the chosen source 
    * should be on the same rack as the target
@@ -1073,18 +1182,10 @@ public class Balancer {
     }
     if (foundSource) {
       assert(source != null):"Choose a null source";
-      long size = Math.min(source.availableSizeToMove(),
-          target.availableSizeToMove());
-      NodeTask nodeTask = new NodeTask(target, size);
-      source.addNodeTask(nodeTask);
-      target.incScheduledSize(nodeTask.getSize());
-      sources.add(source);
-      targets.add(target);
+      matchSourceWithTargetToMove(source, target);
       if ( !source.isMoveQuotaFull()) {
-        sourceCandidates.remove();
-      }
-      LOG.info("Decided to move "+StringUtils.byteDesc(size)+" bytes from "
-          +source.datanode + " to " + target.datanode);
+          sourceCandidates.remove();
+        }
       return true;
     }
     return false;
@@ -1226,6 +1327,10 @@ public class Balancer {
     if (block.isLocatedOnDatanode(target)) {
       return false;
     }
+    if (cluster.isNodeGroupAware() && 
+        isOnSameNodeGroupWithReplicas(target, block, source)) {
+      return false;
+    }
 
     boolean goodBlock = false;
     if (cluster.isOnSameRack(source.getDatanode(), target.getDatanode())) {
@@ -1257,10 +1362,32 @@ public class Balancer {
     }
     return goodBlock;
   }
-  
+
+  /**
+   * Check if there are any replica (other than source) on the same node group
+   * with target. If true, then target is not a good candidate for placing 
+   * specific block replica as we don't want 2 replicas under the same nodegroup 
+   * after balance.
+   * @param target targetDataNode
+   * @param block dataBlock
+   * @param source sourceDataNode
+   * @return true if there are any replica (other than source) on the same node
+   * group with target
+   */
+  private boolean isOnSameNodeGroupWithReplicas(BalancerDatanode target,
+      BalancerBlock block, Source source) {
+    for (BalancerDatanode loc : block.locations) {
+      if (loc != source && 
+        cluster.isOnSameNodeGroup(loc.getDatanode(), target.getDatanode())) {
+          return true;
+        }
+      }
+    return false;
+  }
+
   /* reset all fields in a balancer preparing for the next iteration */
-  private void resetData() {
-    this.cluster = new NetworkTopology();
+  private void resetData(Configuration conf) {
+    this.cluster = NetworkTopology.getInstance(conf);
     this.overUtilizedDatanodes.clear();
     this.aboveAvgUtilizedDatanodes.clear();
     this.belowAvgUtilizedDatanodes.clear();
@@ -1331,7 +1458,8 @@ public class Balancer {
   }
 
   /** Run an iteration for all datanodes. */
-  private ReturnStatus run(int iteration, Formatter formatter) {
+  private ReturnStatus run(int iteration, Formatter formatter,
+      Configuration conf) {
     try {
       /* get all live datanodes of a cluster and their disk usage
        * decide the number of bytes need to be moved
@@ -1385,7 +1513,7 @@ public class Balancer {
       }
 
       // clean all lists
-      resetData();
+      resetData(conf);
       return ReturnStatus.IN_PROGRESS;
     } catch (IllegalArgumentException e) {
       System.out.println(e + ".  Exiting ...");
@@ -1433,7 +1561,7 @@ public class Balancer {
         Collections.shuffle(connectors);
         for(NameNodeConnector nnc : connectors) {
           final Balancer b = new Balancer(nnc, p, conf);
-          final ReturnStatus r = b.run(iteration, formatter);
+          final ReturnStatus r = b.run(iteration, formatter, conf);
           if (r == ReturnStatus.IN_PROGRESS) {
             done = false;
           } else if (r != ReturnStatus.SUCCESS) {
@@ -1527,7 +1655,7 @@ public class Balancer {
       if (args != null) {
         try {
           for(int i = 0; i < args.length; i++) {
-            checkArgument(args.length >= 2, "args = " + Arrays.toString(args));           
+            checkArgument(args.length >= 2, "args = " + Arrays.toString(args));
             if ("-threshold".equalsIgnoreCase(args[i])) {
               i++;
               try {

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoUnderConstruction.java

@@ -233,7 +233,7 @@ public class BlockInfoUnderConstruction extends BlockInfo {
     setBlockUCState(BlockUCState.UNDER_RECOVERY);
     blockRecoveryId = recoveryId;
     if (replicas.size() == 0) {
-      NameNode.stateChangeLog.warn("BLOCK*"
+      NameNode.blockStateChangeLog.warn("BLOCK*"
         + " BlockInfoUnderConstruction.initLeaseRecovery:"
         + " No blocks found, lease removed.");
     }
@@ -245,7 +245,7 @@ public class BlockInfoUnderConstruction extends BlockInfo {
         primaryNodeIndex = j;
         DatanodeDescriptor primary = replicas.get(j).getExpectedLocation(); 
         primary.addBlockToBeRecovered(this);
-        NameNode.stateChangeLog.info("BLOCK* " + this
+        NameNode.blockStateChangeLog.info("BLOCK* " + this
           + " recovery started, primary=" + primary);
         return;
       }

+ 44 - 43
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

@@ -83,6 +83,7 @@ import com.google.common.collect.Sets;
 public class BlockManager {
 
   static final Log LOG = LogFactory.getLog(BlockManager.class);
+  static final Log blockLog = NameNode.blockStateChangeLog;
 
   /** Default load factor of map */
   public static final float DEFAULT_MAP_LOAD_FACTOR = 0.75f;
@@ -872,7 +873,7 @@ public class BlockManager {
       final long size) throws UnregisteredNodeException {
     final DatanodeDescriptor node = getDatanodeManager().getDatanode(datanode);
     if (node == null) {
-      NameNode.stateChangeLog.warn("BLOCK* getBlocks: "
+      blockLog.warn("BLOCK* getBlocks: "
           + "Asking for blocks from an unrecorded node " + datanode);
       throw new HadoopIllegalArgumentException(
           "Datanode " + datanode + " not found.");
@@ -950,7 +951,7 @@ public class BlockManager {
       datanodes.append(node).append(" ");
     }
     if (datanodes.length() != 0) {
-      NameNode.stateChangeLog.info("BLOCK* addToInvalidates: " + b + " "
+      blockLog.info("BLOCK* addToInvalidates: " + b + " "
           + datanodes);
     }
   }
@@ -971,7 +972,7 @@ public class BlockManager {
       // ignore the request for now. This could happen when BlockScanner
       // thread of Datanode reports bad block before Block reports are sent
       // by the Datanode on startup
-      NameNode.stateChangeLog.info("BLOCK* findAndMarkBlockAsCorrupt: "
+      blockLog.info("BLOCK* findAndMarkBlockAsCorrupt: "
           + blk + " not found");
       return;
     }
@@ -988,7 +989,7 @@ public class BlockManager {
 
     BlockCollection bc = b.corrupted.getBlockCollection();
     if (bc == null) {
-      NameNode.stateChangeLog.info("BLOCK markBlockAsCorrupt: " + b
+      blockLog.info("BLOCK markBlockAsCorrupt: " + b
           + " cannot be marked as corrupt as it does not belong to any file");
       addToInvalidates(b.corrupted, node);
       return;
@@ -1013,7 +1014,7 @@ public class BlockManager {
    */
   private void invalidateBlock(BlockToMarkCorrupt b, DatanodeInfo dn
       ) throws IOException {
-    NameNode.stateChangeLog.info("BLOCK* invalidateBlock: " + b + " on " + dn);
+    blockLog.info("BLOCK* invalidateBlock: " + b + " on " + dn);
     DatanodeDescriptor node = getDatanodeManager().getDatanode(dn);
     if (node == null) {
       throw new IOException("Cannot invalidate " + b
@@ -1023,7 +1024,7 @@ public class BlockManager {
     // Check how many copies we have of the block
     NumberReplicas nr = countNodes(b.stored);
     if (nr.replicasOnStaleNodes() > 0) {
-      NameNode.stateChangeLog.info("BLOCK* invalidateBlocks: postponing " +
+      blockLog.info("BLOCK* invalidateBlocks: postponing " +
           "invalidation of " + b + " on " + dn + " because " +
           nr.replicasOnStaleNodes() + " replica(s) are located on nodes " +
           "with potentially out-of-date block reports");
@@ -1033,12 +1034,12 @@ public class BlockManager {
       // If we have at least one copy on a live node, then we can delete it.
       addToInvalidates(b.corrupted, dn);
       removeStoredBlock(b.stored, node);
-      if(NameNode.stateChangeLog.isDebugEnabled()) {
-        NameNode.stateChangeLog.debug("BLOCK* invalidateBlocks: "
+      if(blockLog.isDebugEnabled()) {
+        blockLog.debug("BLOCK* invalidateBlocks: "
             + b + " on " + dn + " listed for deletion.");
       }
     } else {
-      NameNode.stateChangeLog.info("BLOCK* invalidateBlocks: " + b
+      blockLog.info("BLOCK* invalidateBlocks: " + b
           + " on " + dn + " is the only copy and was not deleted");
     }
   }
@@ -1160,7 +1161,7 @@ public class BlockManager {
                    (blockHasEnoughRacks(block)) ) {
                 neededReplications.remove(block, priority); // remove from neededReplications
                 neededReplications.decrementReplicationIndex(priority);
-                NameNode.stateChangeLog.info("BLOCK* Removing " + block
+                blockLog.info("BLOCK* Removing " + block
                     + " from neededReplications as it has enough replicas");
                 continue;
               }
@@ -1235,7 +1236,7 @@ public class BlockManager {
               neededReplications.remove(block, priority); // remove from neededReplications
               neededReplications.decrementReplicationIndex(priority);
               rw.targets = null;
-              NameNode.stateChangeLog.info("BLOCK* Removing " + block
+              blockLog.info("BLOCK* Removing " + block
                   + " from neededReplications as it has enough replicas");
               continue;
             }
@@ -1261,8 +1262,8 @@ public class BlockManager {
           // The reason we use 'pending' is so we can retry
           // replications that fail after an appropriate amount of time.
           pendingReplications.increment(block, targets.length);
-          if(NameNode.stateChangeLog.isDebugEnabled()) {
-            NameNode.stateChangeLog.debug(
+          if(blockLog.isDebugEnabled()) {
+            blockLog.debug(
                 "BLOCK* block " + block
                 + " is moved from neededReplications to pendingReplications");
           }
@@ -1278,7 +1279,7 @@ public class BlockManager {
       namesystem.writeUnlock();
     }
 
-    if (NameNode.stateChangeLog.isInfoEnabled()) {
+    if (blockLog.isInfoEnabled()) {
       // log which blocks have been scheduled for replication
       for(ReplicationWork rw : work){
         DatanodeDescriptor[] targets = rw.targets;
@@ -1288,13 +1289,13 @@ public class BlockManager {
             targetList.append(' ');
             targetList.append(targets[k]);
           }
-          NameNode.stateChangeLog.info("BLOCK* ask " + rw.srcNode
+          blockLog.info("BLOCK* ask " + rw.srcNode
               + " to replicate " + rw.block + " to " + targetList);
         }
       }
     }
-    if(NameNode.stateChangeLog.isDebugEnabled()) {
-        NameNode.stateChangeLog.debug(
+    if(blockLog.isDebugEnabled()) {
+        blockLog.debug(
           "BLOCK* neededReplications = " + neededReplications.size()
           + " pendingReplications = " + pendingReplications.size());
     }
@@ -1504,7 +1505,7 @@ public class BlockManager {
       // To minimize startup time, we discard any second (or later) block reports
       // that we receive while still in startup phase.
       if (namesystem.isInStartupSafeMode() && !node.isFirstBlockReport()) {
-        NameNode.stateChangeLog.info("BLOCK* processReport: "
+        blockLog.info("BLOCK* processReport: "
             + "discarded non-initial block report from " + nodeID
             + " because namenode still in startup phase");
         return;
@@ -1536,7 +1537,7 @@ public class BlockManager {
 
     // Log the block report processing stats from Namenode perspective
     NameNode.getNameNodeMetrics().addBlockReport((int) (endTime - startTime));
-    NameNode.stateChangeLog.info("BLOCK* processReport: from "
+    blockLog.info("BLOCK* processReport: from "
         + nodeID + ", blocks: " + newReport.getNumberOfBlocks()
         + ", processing time: " + (endTime - startTime) + " msecs");
   }
@@ -1596,7 +1597,7 @@ public class BlockManager {
       addStoredBlock(b, node, null, true);
     }
     for (Block b : toInvalidate) {
-      NameNode.stateChangeLog.info("BLOCK* processReport: "
+      blockLog.info("BLOCK* processReport: "
           + b + " on " + node + " size " + b.getNumBytes()
           + " does not belong to any file");
       addToInvalidates(b, node);
@@ -2034,7 +2035,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
     }
     if (storedBlock == null || storedBlock.getBlockCollection() == null) {
       // If this block does not belong to anyfile, then we are done.
-      NameNode.stateChangeLog.info("BLOCK* addStoredBlock: " + block + " on "
+      blockLog.info("BLOCK* addStoredBlock: " + block + " on "
           + node + " size " + block.getNumBytes()
           + " but it does not belong to any file");
       // we could add this block to invalidate set of this datanode.
@@ -2056,7 +2057,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
       }
     } else {
       curReplicaDelta = 0;
-      NameNode.stateChangeLog.warn("BLOCK* addStoredBlock: "
+      blockLog.warn("BLOCK* addStoredBlock: "
           + "Redundant addStoredBlock request received for " + storedBlock
           + " on " + node + " size " + storedBlock.getNumBytes());
     }
@@ -2115,7 +2116,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
   }
 
   private void logAddStoredBlock(BlockInfo storedBlock, DatanodeDescriptor node) {
-    if (!NameNode.stateChangeLog.isInfoEnabled()) {
+    if (!blockLog.isInfoEnabled()) {
       return;
     }
     
@@ -2126,7 +2127,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
     storedBlock.appendStringTo(sb);
     sb.append(" size " )
       .append(storedBlock.getNumBytes());
-    NameNode.stateChangeLog.info(sb);
+    blockLog.info(sb);
   }
   /**
    * Invalidate corrupt replicas.
@@ -2153,7 +2154,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
       try {
         invalidateBlock(new BlockToMarkCorrupt(blk, null), node);
       } catch (IOException e) {
-        NameNode.stateChangeLog.info("invalidateCorruptReplicas "
+        blockLog.info("invalidateCorruptReplicas "
             + "error in deleting bad block " + blk + " on " + node, e);
         gotException = true;
       }
@@ -2391,7 +2392,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
       // upon giving instructions to the namenode.
       //
       addToInvalidates(b, cur);
-      NameNode.stateChangeLog.info("BLOCK* chooseExcessReplicates: "
+      blockLog.info("BLOCK* chooseExcessReplicates: "
                 +"("+cur+", "+b+") is added to invalidated blocks set");
     }
   }
@@ -2405,8 +2406,8 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
     }
     if (excessBlocks.add(block)) {
       excessBlocksCount++;
-      if(NameNode.stateChangeLog.isDebugEnabled()) {
-        NameNode.stateChangeLog.debug("BLOCK* addToExcessReplicate:"
+      if(blockLog.isDebugEnabled()) {
+        blockLog.debug("BLOCK* addToExcessReplicate:"
             + " (" + dn + ", " + block
             + ") is added to excessReplicateMap");
       }
@@ -2418,15 +2419,15 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
    * removed block is still valid.
    */
   public void removeStoredBlock(Block block, DatanodeDescriptor node) {
-    if(NameNode.stateChangeLog.isDebugEnabled()) {
-      NameNode.stateChangeLog.debug("BLOCK* removeStoredBlock: "
+    if(blockLog.isDebugEnabled()) {
+      blockLog.debug("BLOCK* removeStoredBlock: "
           + block + " from " + node);
     }
     assert (namesystem.hasWriteLock());
     {
       if (!blocksMap.removeNode(block, node)) {
-        if(NameNode.stateChangeLog.isDebugEnabled()) {
-          NameNode.stateChangeLog.debug("BLOCK* removeStoredBlock: "
+        if(blockLog.isDebugEnabled()) {
+          blockLog.debug("BLOCK* removeStoredBlock: "
               + block + " has already been removed from node " + node);
         }
         return;
@@ -2453,8 +2454,8 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
       if (excessBlocks != null) {
         if (excessBlocks.remove(block)) {
           excessBlocksCount--;
-          if(NameNode.stateChangeLog.isDebugEnabled()) {
-            NameNode.stateChangeLog.debug("BLOCK* removeStoredBlock: "
+          if(blockLog.isDebugEnabled()) {
+            blockLog.debug("BLOCK* removeStoredBlock: "
                 + block + " is removed from excessBlocks");
           }
           if (excessBlocks.size() == 0) {
@@ -2497,7 +2498,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
     if (delHint != null && delHint.length() != 0) {
       delHintNode = datanodeManager.getDatanode(delHint);
       if (delHintNode == null) {
-        NameNode.stateChangeLog.warn("BLOCK* blockReceived: " + block
+        blockLog.warn("BLOCK* blockReceived: " + block
             + " is expected to be removed from an unrecorded node " + delHint);
       }
     }
@@ -2532,7 +2533,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
       addStoredBlock(b, node, delHintNode, true);
     }
     for (Block b : toInvalidate) {
-      NameNode.stateChangeLog.info("BLOCK* addBlock: block "
+      blockLog.info("BLOCK* addBlock: block "
           + b + " on " + node + " size " + b.getNumBytes()
           + " does not belong to any file");
       addToInvalidates(b, node);
@@ -2558,7 +2559,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
     try {
       final DatanodeDescriptor node = datanodeManager.getDatanode(nodeID);
       if (node == null || !node.isAlive) {
-        NameNode.stateChangeLog
+        blockLog
             .warn("BLOCK* processIncrementalBlockReport"
                 + " is received from dead or unregistered node "
                 + nodeID);
@@ -2585,19 +2586,19 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
           String msg = 
             "Unknown block status code reported by " + nodeID +
             ": " + rdbi;
-          NameNode.stateChangeLog.warn(msg);
+          blockLog.warn(msg);
           assert false : msg; // if assertions are enabled, throw.
           break;
         }
-        if (NameNode.stateChangeLog.isDebugEnabled()) {
-          NameNode.stateChangeLog.debug("BLOCK* block "
+        if (blockLog.isDebugEnabled()) {
+          blockLog.debug("BLOCK* block "
               + (rdbi.getStatus()) + ": " + rdbi.getBlock()
               + " is received from " + nodeID);
         }
       }
     } finally {
       namesystem.writeUnlock();
-      NameNode.stateChangeLog
+      blockLog
           .debug("*BLOCK* NameNode.processIncrementalBlockReport: " + "from "
               + nodeID
               +  " receiving: " + receiving + ", "
@@ -2890,8 +2891,8 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
     } finally {
       namesystem.writeUnlock();
     }
-    if (NameNode.stateChangeLog.isInfoEnabled()) {
-      NameNode.stateChangeLog.info("BLOCK* " + getClass().getSimpleName()
+    if (blockLog.isInfoEnabled()) {
+      blockLog.info("BLOCK* " + getClass().getSimpleName()
           + ": ask " + dn + " to delete " + toInvalidate);
     }
     return toInvalidate.size();

+ 2 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CorruptReplicasMap.java

@@ -63,13 +63,13 @@ public class CorruptReplicasMap{
     
     if (!nodes.contains(dn)) {
       nodes.add(dn);
-      NameNode.stateChangeLog.info("BLOCK NameSystem.addToCorruptReplicasMap: "+
+      NameNode.blockStateChangeLog.info("BLOCK NameSystem.addToCorruptReplicasMap: "+
                                    blk.getBlockName() +
                                    " added as corrupt on " + dn +
                                    " by " + Server.getRemoteIp() +
                                    reasonText);
     } else {
-      NameNode.stateChangeLog.info("BLOCK NameSystem.addToCorruptReplicasMap: "+
+      NameNode.blockStateChangeLog.info("BLOCK NameSystem.addToCorruptReplicasMap: "+
                                    "duplicate requested for " + 
                                    blk.getBlockName() + " to add as corrupt " +
                                    "on " + dn +

+ 40 - 44
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java

@@ -39,7 +39,6 @@ import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
@@ -151,10 +150,7 @@ public class DatanodeManager {
     this.namesystem = namesystem;
     this.blockManager = blockManager;
     
-    Class<? extends NetworkTopology> networkTopologyClass =
-        conf.getClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY,
-            NetworkTopology.class, NetworkTopology.class);
-    networktopology = ReflectionUtils.newInstance(networkTopologyClass, conf);
+    networktopology = NetworkTopology.getInstance(conf);
 
     this.heartbeatManager = new HeartbeatManager(namesystem, blockManager, conf);
 
@@ -540,28 +536,16 @@ public class DatanodeManager {
   private static boolean checkInList(final DatanodeID node,
       final Set<String> hostsList,
       final boolean isExcludeList) {
-    final InetAddress iaddr;
-
-    try {
-      iaddr = InetAddress.getByName(node.getIpAddr());
-    } catch (UnknownHostException e) {
-      LOG.warn("Unknown IP: " + node.getIpAddr(), e);
-      return isExcludeList;
-    }
-
     // if include list is empty, host is in include list
     if ( (!isExcludeList) && (hostsList.isEmpty()) ){
       return true;
     }
-    return // compare ipaddress(:port)
-    (hostsList.contains(iaddr.getHostAddress().toString()))
-        || (hostsList.contains(iaddr.getHostAddress().toString() + ":"
-            + node.getXferPort()))
-        // compare hostname(:port)
-        || (hostsList.contains(iaddr.getHostName()))
-        || (hostsList.contains(iaddr.getHostName() + ":" + node.getXferPort()))
-        || ((node instanceof DatanodeInfo) && hostsList
-            .contains(((DatanodeInfo) node).getHostName()));
+    for (String name : getNodeNamesForHostFiltering(node)) {
+      if (hostsList.contains(name)) {
+        return true;
+      }
+    }
+    return false;
   }
 
   /**
@@ -644,16 +628,20 @@ public class DatanodeManager {
    */
   public void registerDatanode(DatanodeRegistration nodeReg)
       throws DisallowedDatanodeException {
-    String dnAddress = Server.getRemoteAddress();
-    if (dnAddress == null) {
-      // Mostly called inside an RPC.
-      // But if not, use address passed by the data-node.
-      dnAddress = nodeReg.getIpAddr();
+    InetAddress dnAddress = Server.getRemoteIp();
+    if (dnAddress != null) {
+      // Mostly called inside an RPC, update ip and peer hostname
+      String hostname = dnAddress.getHostName();
+      String ip = dnAddress.getHostAddress();
+      if (hostname.equals(ip)) {
+        LOG.warn("Unresolved datanode registration from " + ip);
+        throw new DisallowedDatanodeException(nodeReg);
+      }
+      // update node registration with the ip and hostname from rpc request
+      nodeReg.setIpAddr(ip);
+      nodeReg.setPeerHostName(hostname);
     }
 
-    // Update the IP to the address of the RPC request that is
-    // registering this datanode.
-    nodeReg.setIpAddr(dnAddress);
     nodeReg.setExportedKeys(blockManager.getBlockKeys());
 
     // Checks if the node is not on the hosts list.  If it is not, then
@@ -1033,19 +1021,8 @@ public class DatanodeManager {
         if ( (isDead && listDeadNodes) || (!isDead && listLiveNodes) ) {
           nodes.add(dn);
         }
-        // Remove any nodes we know about from the map
-        try {
-          InetAddress inet = InetAddress.getByName(dn.getIpAddr());
-          // compare hostname(:port)
-          mustList.remove(inet.getHostName());
-          mustList.remove(inet.getHostName()+":"+dn.getXferPort());
-          // compare ipaddress(:port)
-          mustList.remove(inet.getHostAddress().toString());
-          mustList.remove(inet.getHostAddress().toString()+ ":" +dn.getXferPort());
-        } catch (UnknownHostException e) {
-          mustList.remove(dn.getName());
-          mustList.remove(dn.getIpAddr());
-          LOG.warn(e);
+        for (String name : getNodeNamesForHostFiltering(dn)) {
+          mustList.remove(name);
         }
       }
     }
@@ -1066,6 +1043,25 @@ public class DatanodeManager {
     return nodes;
   }
   
+  private static List<String> getNodeNamesForHostFiltering(DatanodeID node) {
+    String ip = node.getIpAddr();
+    String regHostName = node.getHostName();
+    int xferPort = node.getXferPort();
+    
+    List<String> names = new ArrayList<String>(); 
+    names.add(ip);
+    names.add(ip + ":" + xferPort);
+    names.add(regHostName);
+    names.add(regHostName + ":" + xferPort);
+
+    String peerHostName = node.getPeerHostName();
+    if (peerHostName != null) {
+      names.add(peerHostName);
+      names.add(peerHostName + ":" + xferPort);
+    }
+    return names;
+  }
+  
   private void setDatanodeDead(DatanodeDescriptor node) {
     node.setLastUpdate(0);
   }

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/InvalidateBlocks.java

@@ -86,7 +86,7 @@ class InvalidateBlocks {
     if (set.add(block)) {
       numBlocks++;
       if (log) {
-        NameNode.stateChangeLog.info("BLOCK* " + getClass().getSimpleName()
+        NameNode.blockStateChangeLog.info("BLOCK* " + getClass().getSimpleName()
             + ": add " + block + " to " + datanode);
       }
     }

+ 8 - 8
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/UnderReplicatedBlocks.java

@@ -190,8 +190,8 @@ class UnderReplicatedBlocks implements Iterable<Block> {
     int priLevel = getPriority(block, curReplicas, decomissionedReplicas,
                                expectedReplicas);
     if(priorityQueues.get(priLevel).add(block)) {
-      if(NameNode.stateChangeLog.isDebugEnabled()) {
-        NameNode.stateChangeLog.debug(
+      if(NameNode.blockStateChangeLog.isDebugEnabled()) {
+        NameNode.blockStateChangeLog.debug(
           "BLOCK* NameSystem.UnderReplicationBlock.add:"
           + block
           + " has only " + curReplicas
@@ -233,8 +233,8 @@ class UnderReplicatedBlocks implements Iterable<Block> {
   boolean remove(Block block, int priLevel) {
     if(priLevel >= 0 && priLevel < LEVEL 
         && priorityQueues.get(priLevel).remove(block)) {
-      if(NameNode.stateChangeLog.isDebugEnabled()) {
-        NameNode.stateChangeLog.debug(
+      if(NameNode.blockStateChangeLog.isDebugEnabled()) {
+        NameNode.blockStateChangeLog.debug(
           "BLOCK* NameSystem.UnderReplicationBlock.remove: "
           + "Removing block " + block
           + " from priority queue "+ priLevel);
@@ -245,8 +245,8 @@ class UnderReplicatedBlocks implements Iterable<Block> {
       // not found in the queue for the given priority level.
       for (int i = 0; i < LEVEL; i++) {
         if (priorityQueues.get(i).remove(block)) {
-          if(NameNode.stateChangeLog.isDebugEnabled()) {
-            NameNode.stateChangeLog.debug(
+          if(NameNode.blockStateChangeLog.isDebugEnabled()) {
+            NameNode.blockStateChangeLog.debug(
               "BLOCK* NameSystem.UnderReplicationBlock.remove: "
               + "Removing block " + block
               + " from priority queue "+ i);
@@ -296,8 +296,8 @@ class UnderReplicatedBlocks implements Iterable<Block> {
       remove(block, oldPri);
     }
     if(priorityQueues.get(curPri).add(block)) {
-      if(NameNode.stateChangeLog.isDebugEnabled()) {
-        NameNode.stateChangeLog.debug(
+      if(NameNode.blockStateChangeLog.isDebugEnabled()) {
+        NameNode.blockStateChangeLog.debug(
           "BLOCK* NameSystem.UnderReplicationBlock.update:"
           + block
           + " has only "+ curReplicas

+ 8 - 8
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java

@@ -408,15 +408,15 @@ public class JspHelper {
         if (!parts[i].equals("")) {
           tempPath.append(parts[i]);
           out.print("<a href=\"browseDirectory.jsp" + "?dir="
-              + tempPath.toString() + "&namenodeInfoPort=" + namenodeInfoPort
+              + HtmlQuoting.quoteHtmlChars(tempPath.toString()) + "&namenodeInfoPort=" + namenodeInfoPort
               + getDelegationTokenUrlParam(tokenString)
               + getUrlParam(NAMENODE_ADDRESS, nnAddress));
-          out.print("\">" + parts[i] + "</a>" + Path.SEPARATOR);
+          out.print("\">" + HtmlQuoting.quoteHtmlChars(parts[i]) + "</a>" + Path.SEPARATOR);
           tempPath.append(Path.SEPARATOR);
         }
       }
       if(parts.length > 0) {
-        out.print(parts[parts.length-1]);
+        out.print(HtmlQuoting.quoteHtmlChars(parts[parts.length-1]));
       }
     }
     catch (UnsupportedEncodingException ex) {
@@ -431,16 +431,16 @@ public class JspHelper {
                                    String nnAddress) throws IOException {
     out.print("<form action=\"browseDirectory.jsp\" method=\"get\" name=\"goto\">");
     out.print("Goto : ");
-    out.print("<input name=\"dir\" type=\"text\" width=\"50\" id\"dir\" value=\""+ file+"\">");
-    out.print("<input name=\"go\" type=\"submit\" value=\"go\">");
+    out.print("<input name=\"dir\" type=\"text\" width=\"50\" id=\"dir\" value=\""+ HtmlQuoting.quoteHtmlChars(file)+"\"/>");
+    out.print("<input name=\"go\" type=\"submit\" value=\"go\"/>");
     out.print("<input name=\"namenodeInfoPort\" type=\"hidden\" "
-        + "value=\"" + namenodeInfoPort  + "\">");
+        + "value=\"" + namenodeInfoPort  + "\"/>");
     if (UserGroupInformation.isSecurityEnabled()) {
       out.print("<input name=\"" + DELEGATION_PARAMETER_NAME
-          + "\" type=\"hidden\" value=\"" + tokenString + "\">");
+          + "\" type=\"hidden\" value=\"" + tokenString + "\"/>");
     }
     out.print("<input name=\""+ NAMENODE_ADDRESS +"\" type=\"hidden\" "
-        + "value=\"" + nnAddress  + "\">");
+        + "value=\"" + nnAddress  + "\"/>");
     out.print("</form>");
   }
   

+ 4 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java

@@ -433,7 +433,7 @@ public abstract class Storage extends StorageInfo {
         if (!root.exists()) {
           // storage directory does not exist
           if (startOpt != StartupOption.FORMAT) {
-            LOG.info("Storage directory " + rootPath + " does not exist");
+            LOG.warn("Storage directory " + rootPath + " does not exist");
             return StorageState.NON_EXISTENT;
           }
           LOG.info(rootPath + " does not exist. Creating ...");
@@ -442,15 +442,15 @@ public abstract class Storage extends StorageInfo {
         }
         // or is inaccessible
         if (!root.isDirectory()) {
-          LOG.info(rootPath + "is not a directory");
+          LOG.warn(rootPath + "is not a directory");
           return StorageState.NON_EXISTENT;
         }
         if (!root.canWrite()) {
-          LOG.info("Cannot access storage directory " + rootPath);
+          LOG.warn("Cannot access storage directory " + rootPath);
           return StorageState.NON_EXISTENT;
         }
       } catch(SecurityException ex) {
-        LOG.info("Cannot access storage directory " + rootPath, ex);
+        LOG.warn("Cannot access storage directory " + rootPath, ex);
         return StorageState.NON_EXISTENT;
       }
 

+ 175 - 152
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java

@@ -638,10 +638,7 @@ class BlockReceiver implements Closeable {
         responder.start(); // start thread to processes responses
       }
 
-      /* 
-       * Receive until the last packet.
-       */
-      while (receivePacket() >= 0) {}
+      while (receivePacket() >= 0) { /* Receive until the last packet */ }
 
       // wait for all outstanding packet responses. And then
       // indicate responder to gracefully shutdown.
@@ -724,7 +721,7 @@ class BlockReceiver implements Closeable {
   static private long checksum2long(byte[] checksum) {
     long crc = 0L;
     for(int i=0; i<checksum.length; i++) {
-      crc |= (0xffL&(long)checksum[i])<<((checksum.length-i-1)*8);
+      crc |= (0xffL&checksum[i])<<((checksum.length-i-1)*8);
     }
     return crc;
   }
@@ -783,24 +780,23 @@ class BlockReceiver implements Closeable {
     NON_PIPELINE, LAST_IN_PIPELINE, HAS_DOWNSTREAM_IN_PIPELINE
   }
   
+  private static Status[] MIRROR_ERROR_STATUS = {Status.SUCCESS, Status.ERROR};
+  
   /**
    * Processed responses from downstream datanodes in the pipeline
    * and sends back replies to the originator.
    */
   class PacketResponder implements Runnable, Closeable {   
-
-    /** queue for packets waiting for ack */
+    /** queue for packets waiting for ack - synchronization using monitor lock */
     private final LinkedList<Packet> ackQueue = new LinkedList<Packet>(); 
     /** the thread that spawns this responder */
     private final Thread receiverThread = Thread.currentThread();
-    /** is this responder running? */
+    /** is this responder running? - synchronization using monitor lock */
     private volatile boolean running = true;
-
     /** input from the next downstream datanode */
     private final DataInputStream downstreamIn;
     /** output to upstream datanode/client */
     private final DataOutputStream upstreamOut;
-
     /** The type of this responder */
     private final PacketResponderType type;
     /** for log and error messages */
@@ -812,8 +808,7 @@ class BlockReceiver implements Closeable {
     }
 
     PacketResponder(final DataOutputStream upstreamOut,
-        final DataInputStream downstreamIn,
-        final DatanodeInfo[] downstreams) {
+        final DataInputStream downstreamIn, final DatanodeInfo[] downstreams) {
       this.downstreamIn = downstreamIn;
       this.upstreamOut = upstreamOut;
 
@@ -830,31 +825,49 @@ class BlockReceiver implements Closeable {
       this.myString = b.toString();
     }
 
+    private boolean isRunning() {
+      return running && datanode.shouldRun;
+    }
+    
     /**
      * enqueue the seqno that is still be to acked by the downstream datanode.
      * @param seqno
      * @param lastPacketInBlock
      * @param offsetInBlock
      */
-    synchronized void enqueue(final long seqno,
-        final boolean lastPacketInBlock, final long offsetInBlock) {
-      if (running) {
-        final Packet p = new Packet(seqno, lastPacketInBlock, offsetInBlock,
-            System.nanoTime());
-        if(LOG.isDebugEnabled()) {
-          LOG.debug(myString + ": enqueue " + p);
+    void enqueue(final long seqno, final boolean lastPacketInBlock,
+        final long offsetInBlock) {
+      final Packet p = new Packet(seqno, lastPacketInBlock, offsetInBlock,
+          System.nanoTime());
+      if(LOG.isDebugEnabled()) {
+        LOG.debug(myString + ": enqueue " + p);
+      }
+      synchronized(this) {
+        if (running) {
+          ackQueue.addLast(p);
+          notifyAll();
         }
-        ackQueue.addLast(p);
-        notifyAll();
       }
     }
+    
+    /** Wait for a packet with given {@code seqno} to be enqueued to ackQueue */
+    synchronized Packet waitForAckHead(long seqno) throws InterruptedException {
+      while (isRunning() && ackQueue.size() == 0) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug(myString + ": seqno=" + seqno +
+                    " waiting for local datanode to finish write.");
+        }
+        wait();
+      }
+      return isRunning() ? ackQueue.getFirst() : null;
+    }
 
     /**
      * wait for all pending packets to be acked. Then shutdown thread.
      */
     @Override
     public synchronized void close() {
-      while (running && ackQueue.size() != 0 && datanode.shouldRun) {
+      while (isRunning() && ackQueue.size() != 0) {
         try {
           wait();
         } catch (InterruptedException e) {
@@ -877,147 +890,97 @@ class BlockReceiver implements Closeable {
     public void run() {
       boolean lastPacketInBlock = false;
       final long startTime = ClientTraceLog.isInfoEnabled() ? System.nanoTime() : 0;
-      while (running && datanode.shouldRun && !lastPacketInBlock) {
-
+      while (isRunning() && !lastPacketInBlock) {
         long totalAckTimeNanos = 0;
         boolean isInterrupted = false;
         try {
-            Packet pkt = null;
-            long expected = -2;
-            PipelineAck ack = new PipelineAck();
-            long seqno = PipelineAck.UNKOWN_SEQNO;
-            long ackRecvNanoTime = 0;
-            try {
-              if (type != PacketResponderType.LAST_IN_PIPELINE
-                  && !mirrorError) {
-                // read an ack from downstream datanode
-                ack.readFields(downstreamIn);
-                ackRecvNanoTime = System.nanoTime();
-                if (LOG.isDebugEnabled()) {
-                  LOG.debug(myString + " got " + ack);
-                }
-                seqno = ack.getSeqno();
+          Packet pkt = null;
+          long expected = -2;
+          PipelineAck ack = new PipelineAck();
+          long seqno = PipelineAck.UNKOWN_SEQNO;
+          long ackRecvNanoTime = 0;
+          try {
+            if (type != PacketResponderType.LAST_IN_PIPELINE && !mirrorError) {
+              // read an ack from downstream datanode
+              ack.readFields(downstreamIn);
+              ackRecvNanoTime = System.nanoTime();
+              if (LOG.isDebugEnabled()) {
+                LOG.debug(myString + " got " + ack);
               }
-              if (seqno != PipelineAck.UNKOWN_SEQNO
-                  || type == PacketResponderType.LAST_IN_PIPELINE) {
-                synchronized (this) {
-                  while (running && datanode.shouldRun && ackQueue.size() == 0) {
-                    if (LOG.isDebugEnabled()) {
-                      LOG.debug(myString + ": seqno=" + seqno +
-                                " waiting for local datanode to finish write.");
-                    }
-                    wait();
-                  }
-                  if (!running || !datanode.shouldRun) {
-                    break;
-                  }
-                  pkt = ackQueue.getFirst();
-                  expected = pkt.seqno;
-                  if (type == PacketResponderType.HAS_DOWNSTREAM_IN_PIPELINE
-                      && seqno != expected) {
-                    throw new IOException(myString + "seqno: expected="
-                        + expected + ", received=" + seqno);
-                  }
-                  if (type == PacketResponderType.HAS_DOWNSTREAM_IN_PIPELINE) {
-                    // The total ack time includes the ack times of downstream nodes.
-                    // The value is 0 if this responder doesn't have a downstream
-                    // DN in the pipeline.
-                    totalAckTimeNanos = ackRecvNanoTime - pkt.ackEnqueueNanoTime;
-                    // Report the elapsed time from ack send to ack receive minus
-                    // the downstream ack time.
-                    long ackTimeNanos = totalAckTimeNanos - ack.getDownstreamAckTimeNanos();
-                    if (ackTimeNanos < 0) {
-                      if (LOG.isDebugEnabled()) {
-                        LOG.debug("Calculated invalid ack time: " + ackTimeNanos + "ns.");
-                      }
-                    } else {
-                      datanode.metrics.addPacketAckRoundTripTimeNanos(ackTimeNanos);
-                    }
+              seqno = ack.getSeqno();
+            }
+            if (seqno != PipelineAck.UNKOWN_SEQNO
+                || type == PacketResponderType.LAST_IN_PIPELINE) {
+              pkt = waitForAckHead(seqno);
+              if (!isRunning()) {
+                break;
+              }
+              expected = pkt.seqno;
+              if (type == PacketResponderType.HAS_DOWNSTREAM_IN_PIPELINE
+                  && seqno != expected) {
+                throw new IOException(myString + "seqno: expected=" + expected
+                    + ", received=" + seqno);
+              }
+              if (type == PacketResponderType.HAS_DOWNSTREAM_IN_PIPELINE) {
+                // The total ack time includes the ack times of downstream
+                // nodes.
+                // The value is 0 if this responder doesn't have a downstream
+                // DN in the pipeline.
+                totalAckTimeNanos = ackRecvNanoTime - pkt.ackEnqueueNanoTime;
+                // Report the elapsed time from ack send to ack receive minus
+                // the downstream ack time.
+                long ackTimeNanos = totalAckTimeNanos
+                    - ack.getDownstreamAckTimeNanos();
+                if (ackTimeNanos < 0) {
+                  if (LOG.isDebugEnabled()) {
+                    LOG.debug("Calculated invalid ack time: " + ackTimeNanos
+                        + "ns.");
                   }
-                  lastPacketInBlock = pkt.lastPacketInBlock;
+                } else {
+                  datanode.metrics.addPacketAckRoundTripTimeNanos(ackTimeNanos);
                 }
               }
-            } catch (InterruptedException ine) {
+              lastPacketInBlock = pkt.lastPacketInBlock;
+            }
+          } catch (InterruptedException ine) {
+            isInterrupted = true;
+          } catch (IOException ioe) {
+            if (Thread.interrupted()) {
               isInterrupted = true;
-            } catch (IOException ioe) {
-              if (Thread.interrupted()) {
-                isInterrupted = true;
-              } else {
-                // continue to run even if can not read from mirror
-                // notify client of the error
-                // and wait for the client to shut down the pipeline
-                mirrorError = true;
-                LOG.info(myString, ioe);
-              }
+            } else {
+              // continue to run even if can not read from mirror
+              // notify client of the error
+              // and wait for the client to shut down the pipeline
+              mirrorError = true;
+              LOG.info(myString, ioe);
             }
+          }
 
-            if (Thread.interrupted() || isInterrupted) {
-              /* The receiver thread cancelled this thread. 
-               * We could also check any other status updates from the 
-               * receiver thread (e.g. if it is ok to write to replyOut). 
-               * It is prudent to not send any more status back to the client
-               * because this datanode has a problem. The upstream datanode
-               * will detect that this datanode is bad, and rightly so.
-               */
-              LOG.info(myString + ": Thread is interrupted.");
-              running = false;
-              continue;
-            }
-            
-            // If this is the last packet in block, then close block
-            // file and finalize the block before responding success
-            if (lastPacketInBlock) {
-              BlockReceiver.this.close();
-              final long endTime = ClientTraceLog.isInfoEnabled() ? System.nanoTime() : 0;
-              block.setNumBytes(replicaInfo.getNumBytes());
-              datanode.data.finalizeBlock(block);
-              datanode.closeBlock(block, DataNode.EMPTY_DEL_HINT);
-              if (ClientTraceLog.isInfoEnabled() && isClient) {
-                long offset = 0;
-                DatanodeRegistration dnR = 
-                  datanode.getDNRegistrationForBP(block.getBlockPoolId());
-                ClientTraceLog.info(String.format(DN_CLIENTTRACE_FORMAT,
-                      inAddr, myAddr, block.getNumBytes(),
-                      "HDFS_WRITE", clientname, offset,
-                      dnR.getStorageID(), block, endTime-startTime));
-              } else {
-                LOG.info("Received " + block + " size "
-                    + block.getNumBytes() + " from " + inAddr);
-              }
-            }
+          if (Thread.interrupted() || isInterrupted) {
+            /*
+             * The receiver thread cancelled this thread. We could also check
+             * any other status updates from the receiver thread (e.g. if it is
+             * ok to write to replyOut). It is prudent to not send any more
+             * status back to the client because this datanode has a problem.
+             * The upstream datanode will detect that this datanode is bad, and
+             * rightly so.
+             */
+            LOG.info(myString + ": Thread is interrupted.");
+            running = false;
+            continue;
+          }
 
-            // construct my ack message
-            Status[] replies = null;
-            if (mirrorError) { // ack read error
-              replies = new Status[2];
-              replies[0] = Status.SUCCESS;
-              replies[1] = Status.ERROR;
-            } else {
-              short ackLen = type == PacketResponderType.LAST_IN_PIPELINE? 0
-                  : ack.getNumOfReplies();
-              replies = new Status[1+ackLen];
-              replies[0] = Status.SUCCESS;
-              for (int i=0; i<ackLen; i++) {
-                replies[i+1] = ack.getReply(i);
-              }
-            }
-            PipelineAck replyAck = new PipelineAck(expected, replies, totalAckTimeNanos);
-            
-            if (replyAck.isSuccess() && 
-                 pkt.offsetInBlock > replicaInfo.getBytesAcked())
-                replicaInfo.setBytesAcked(pkt.offsetInBlock);
-
-            // send my ack back to upstream datanode
-            replyAck.write(upstreamOut);
-            upstreamOut.flush();
-            if (LOG.isDebugEnabled()) {
-              LOG.debug(myString + ", replyAck=" + replyAck);
-            }
-            if (pkt != null) {
-              // remove the packet from the ack queue
-              removeAckHead();
-              // update bytes acked
-            }
+          if (lastPacketInBlock) {
+            // Finalize the block and close the block file
+            finalizeBlock(startTime);
+          }
+
+          sendAckUpstream(ack, expected, totalAckTimeNanos,
+              (pkt != null ? pkt.offsetInBlock : 0));
+          if (pkt != null) {
+            // remove the packet from the ack queue
+            removeAckHead();
+          }
         } catch (IOException e) {
           LOG.warn("IOException in BlockReceiver.run(): ", e);
           if (running) {
@@ -1043,6 +1006,66 @@ class BlockReceiver implements Closeable {
       LOG.info(myString + " terminating");
     }
     
+    /**
+     * Finalize the block and close the block file
+     * @param startTime time when BlockReceiver started receiving the block
+     */
+    private void finalizeBlock(long startTime) throws IOException {
+      BlockReceiver.this.close();
+      final long endTime = ClientTraceLog.isInfoEnabled() ? System.nanoTime()
+          : 0;
+      block.setNumBytes(replicaInfo.getNumBytes());
+      datanode.data.finalizeBlock(block);
+      datanode.closeBlock(block, DataNode.EMPTY_DEL_HINT);
+      if (ClientTraceLog.isInfoEnabled() && isClient) {
+        long offset = 0;
+        DatanodeRegistration dnR = datanode.getDNRegistrationForBP(block
+            .getBlockPoolId());
+        ClientTraceLog.info(String.format(DN_CLIENTTRACE_FORMAT, inAddr,
+            myAddr, block.getNumBytes(), "HDFS_WRITE", clientname, offset,
+            dnR.getStorageID(), block, endTime - startTime));
+      } else {
+        LOG.info("Received " + block + " size " + block.getNumBytes()
+            + " from " + inAddr);
+      }
+    }
+    
+    /**
+     * @param ack Ack received from downstream
+     * @param seqno sequence number of ack to be sent upstream
+     * @param totalAckTimeNanos total ack time including all the downstream
+     *          nodes
+     * @param offsetInBlock offset in block for the data in packet
+     */
+    private void sendAckUpstream(PipelineAck ack, long seqno,
+        long totalAckTimeNanos, long offsetInBlock) throws IOException {
+      Status[] replies = null;
+      if (mirrorError) { // ack read error
+        replies = MIRROR_ERROR_STATUS;
+      } else {
+        short ackLen = type == PacketResponderType.LAST_IN_PIPELINE ? 0 : ack
+            .getNumOfReplies();
+        replies = new Status[1 + ackLen];
+        replies[0] = Status.SUCCESS;
+        for (int i = 0; i < ackLen; i++) {
+          replies[i + 1] = ack.getReply(i);
+        }
+      }
+      PipelineAck replyAck = new PipelineAck(seqno, replies,
+          totalAckTimeNanos);
+      if (replyAck.isSuccess()
+          && offsetInBlock > replicaInfo.getBytesAcked()) {
+        replicaInfo.setBytesAcked(offsetInBlock);
+      }
+
+      // send my ack back to upstream datanode
+      replyAck.write(upstreamOut);
+      upstreamOut.flush();
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(myString + ", replyAck=" + replyAck);
+      }
+    }
+    
     /**
      * Remove a packet from the head of the ack queue
      * 

+ 6 - 4
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DatanodeJspHelper.java

@@ -43,6 +43,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
 import org.apache.hadoop.hdfs.server.common.JspHelper;
+import org.apache.hadoop.http.HtmlQuoting;
 import org.apache.hadoop.http.HttpConfig;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -119,7 +120,7 @@ public class DatanodeJspHelper {
     String target = dir;
     final HdfsFileStatus targetStatus = dfs.getFileInfo(target);
     if (targetStatus == null) { // not exists
-      out.print("<h3>File or directory : " + target + " does not exist</h3>");
+      out.print("<h3>File or directory : " + StringEscapeUtils.escapeHtml(target) + " does not exist</h3>");
       JspHelper.printGotoForm(out, namenodeInfoPort, tokenString, target,
           nnAddr);
     } else {
@@ -203,7 +204,7 @@ public class DatanodeJspHelper {
               + JspHelper.getDelegationTokenUrlParam(tokenString)
               + JspHelper.getUrlParam(JspHelper.NAMENODE_ADDRESS, nnAddr);
             cols[0] = "<a href=\"" + datanodeUrl + "\">"
-              + localFileName + "</a>";
+              + HtmlQuoting.quoteHtmlChars(localFileName) + "</a>";
             cols[5] = lsDateFormat.format(new Date((files[i]
               .getModificationTime())));
             cols[6] = files[i].getPermission().toString();
@@ -258,7 +259,8 @@ public class DatanodeJspHelper {
     int namenodeInfoPort = -1;
     if (namenodeInfoPortStr != null)
       namenodeInfoPort = Integer.parseInt(namenodeInfoPortStr);
-    final String nnAddr = req.getParameter(JspHelper.NAMENODE_ADDRESS);
+    final String nnAddr = StringEscapeUtils.escapeHtml(
+        req.getParameter(JspHelper.NAMENODE_ADDRESS));
     if (nnAddr == null){
       out.print(JspHelper.NAMENODE_ADDRESS + " url param is null");
       return;
@@ -636,7 +638,7 @@ public class DatanodeJspHelper {
     UserGroupInformation ugi = JspHelper.getUGI(req, conf);
 
     String namenodeInfoPortStr = req.getParameter("namenodeInfoPort");
-    String nnAddr = req.getParameter(JspHelper.NAMENODE_ADDRESS);
+    String nnAddr = StringEscapeUtils.escapeHtml(req.getParameter(JspHelper.NAMENODE_ADDRESS));
     int namenodeInfoPort = -1;
     if (namenodeInfoPortStr != null)
       namenodeInfoPort = Integer.parseInt(namenodeInfoPortStr);

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java

@@ -137,7 +137,7 @@ class FsVolumeList {
     if (removedVols != null && removedVols.size() > 0) {
       // Replace volume list
       volumes = Collections.unmodifiableList(volumeList);
-      FsDatasetImpl.LOG.info("Completed checkDirs. Removed " + removedVols.size()
+      FsDatasetImpl.LOG.warn("Completed checkDirs. Removed " + removedVols.size()
           + " volumes. Current volumes: " + this);
     }
 

+ 8 - 2
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java

@@ -69,6 +69,8 @@ public class BackupNode extends NameNode {
   private static final String BN_HTTP_ADDRESS_NAME_KEY = DFSConfigKeys.DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY;
   private static final String BN_HTTP_ADDRESS_DEFAULT = DFSConfigKeys.DFS_NAMENODE_BACKUP_HTTP_ADDRESS_DEFAULT;
   private static final String BN_SERVICE_RPC_ADDRESS_KEY = DFSConfigKeys.DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY;
+  private static final float  BN_SAFEMODE_THRESHOLD_PCT_DEFAULT = 1.5f;
+  private static final int    BN_SAFEMODE_EXTENSION_DEFAULT = Integer.MAX_VALUE;
 
   /** Name-node proxy */
   NamenodeProtocol namenode;
@@ -127,6 +129,10 @@ public class BackupNode extends NameNode {
 
   @Override // NameNode
   protected void loadNamesystem(Configuration conf) throws IOException {
+    conf.setFloat(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY,
+                                BN_SAFEMODE_THRESHOLD_PCT_DEFAULT);
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY,
+                                BN_SAFEMODE_EXTENSION_DEFAULT);
     BackupImage bnImage = new BackupImage(conf);
     this.namesystem = new FSNamesystem(conf, bnImage);
     bnImage.setNamesystem(namesystem);
@@ -423,9 +429,9 @@ public class BackupNode extends NameNode {
         return;
       }
       if (OperationCategory.JOURNAL != op &&
-          !(OperationCategory.READ == op && allowStaleStandbyReads)) {
+          !(OperationCategory.READ == op && !isRole(NamenodeRole.CHECKPOINT))) {
         String msg = "Operation category " + op
-            + " is not supported at the BackupNode";
+            + " is not supported at " + getRole();
         throw new StandbyException(msg);
       }
     }

+ 17 - 13
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java

@@ -206,6 +206,7 @@ class Checkpointer extends Daemon {
     RemoteEditLogManifest manifest =
       getRemoteNamenodeProxy().getEditLogManifest(bnImage.getLastAppliedTxId() + 1);
 
+    boolean needReloadImage = false;
     if (!manifest.getLogs().isEmpty()) {
       RemoteEditLog firstRemoteLog = manifest.getLogs().get(0);
       // we don't have enough logs to roll forward using only logs. Need
@@ -218,13 +219,10 @@ class Checkpointer extends Daemon {
             bnStorage, true);
         bnImage.saveDigestAndRenameCheckpointImage(
             sig.mostRecentCheckpointTxId, downloadedHash);
-        
-        LOG.info("Loading image with txid " + sig.mostRecentCheckpointTxId);
-        File file = bnStorage.findImageFile(sig.mostRecentCheckpointTxId);
-        bnImage.reloadFromImageFile(file, backupNode.getNamesystem());
+        lastApplied = sig.mostRecentCheckpointTxId;
+        needReloadImage = true;
       }
-      
-      lastApplied = bnImage.getLastAppliedTxId();
+
       if (firstRemoteLog.getStartTxId() > lastApplied + 1) {
         throw new IOException("No logs to roll forward from " + lastApplied);
       }
@@ -234,7 +232,12 @@ class Checkpointer extends Daemon {
         TransferFsImage.downloadEditsToStorage(
             backupNode.nnHttpAddress, log, bnStorage);
       }
-  
+
+      if(needReloadImage) {
+        LOG.info("Loading image with txid " + sig.mostRecentCheckpointTxId);
+        File file = bnStorage.findImageFile(sig.mostRecentCheckpointTxId);
+        bnImage.reloadFromImageFile(file, backupNode.getNamesystem());
+      }
       rollForwardByApplyingLogs(manifest, bnImage, backupNode.getNamesystem());
     }
     
@@ -243,8 +246,9 @@ class Checkpointer extends Daemon {
     backupNode.namesystem.writeLock();
     try {
       backupNode.namesystem.dir.setReady();
-      backupNode.namesystem.setBlockTotal();
-      
+      if(backupNode.namesystem.getBlocksTotal() > 0) {
+        backupNode.namesystem.setBlockTotal();
+      }
       bnImage.saveFSImageInAllDirs(backupNode.getNamesystem(), txid);
       bnStorage.writeAll();
     } finally {
@@ -284,12 +288,12 @@ class Checkpointer extends Daemon {
   
     List<EditLogInputStream> editsStreams = Lists.newArrayList();    
     for (RemoteEditLog log : manifest.getLogs()) {
-      File f = dstStorage.findFinalizedEditsFile(
-          log.getStartTxId(), log.getEndTxId());
-      if (log.getStartTxId() > dstImage.getLastAppliedTxId()) {
+      if (log.getEndTxId() > dstImage.getLastAppliedTxId()) {
+        File f = dstStorage.findFinalizedEditsFile(
+            log.getStartTxId(), log.getEndTxId());
         editsStreams.add(new EditLogFileInputStream(f, log.getStartTxId(), 
                                                     log.getEndTxId(), true));
-       }
+      }
     }
     LOG.info("Checkpointer about to load edits from " +
         editsStreams.size() + " stream(s).");

+ 122 - 189
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java

@@ -35,6 +35,7 @@ import org.apache.hadoop.fs.Options;
 import org.apache.hadoop.fs.Options.Rename;
 import org.apache.hadoop.fs.ParentNotDirectoryException;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathIsNotDirectoryException;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsPermission;
@@ -61,6 +62,7 @@ import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectory.INodesInPath;
 import org.apache.hadoop.hdfs.util.ByteArray;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 
 /*************************************************
@@ -73,12 +75,15 @@ import com.google.common.base.Preconditions;
  * 
  *************************************************/
 public class FSDirectory implements Closeable {
+  private static INodeDirectoryWithQuota createRoot(FSNamesystem namesystem) {
+    return new INodeDirectoryWithQuota(INodeDirectory.ROOT_NAME,
+        namesystem.createFsOwnerPermissions(new FsPermission((short)0755)));
+  }
 
   INodeDirectoryWithQuota rootDir;
   FSImage fsImage;  
   private final FSNamesystem namesystem;
   private volatile boolean ready = false;
-  private static final long UNKNOWN_DISK_SPACE = -1;
   private final int maxComponentLength;
   private final int maxDirItems;
   private final int lsLimit;  // max list limit
@@ -121,9 +126,7 @@ public class FSDirectory implements Closeable {
   FSDirectory(FSImage fsImage, FSNamesystem ns, Configuration conf) {
     this.dirLock = new ReentrantReadWriteLock(true); // fair
     this.cond = dirLock.writeLock().newCondition();
-    rootDir = new INodeDirectoryWithQuota(INodeDirectory.ROOT_NAME,
-        ns.createFsOwnerPermissions(new FsPermission((short)0755)),
-        Long.MAX_VALUE, UNKNOWN_DISK_SPACE);
+    rootDir = createRoot(ns);
     this.fsImage = fsImage;
     int configuredLimit = conf.getInt(
         DFSConfigKeys.DFS_LIST_LIMIT, DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT);
@@ -175,6 +178,12 @@ public class FSDirectory implements Closeable {
       writeUnlock();
     }
   }
+  
+  //This is for testing purposes only
+  @VisibleForTesting
+  boolean isReady() {
+    return ready;
+  }
 
   // exposed for unit tests
   protected void setReady(boolean flag) {
@@ -248,13 +257,14 @@ public class FSDirectory implements Closeable {
                                  permissions,replication,
                                  preferredBlockSize, modTime, clientName, 
                                  clientMachine, clientNode);
+    boolean added = false;
     writeLock();
     try {
-      newNode = addNode(path, newNode, UNKNOWN_DISK_SPACE);
+      added = addINode(path, newNode);
     } finally {
       writeUnlock();
     }
-    if (newNode == null) {
+    if (!added) {
       NameNode.stateChangeLog.info("DIR* addFile: failed to add " + path);
       return null;
     }
@@ -274,7 +284,7 @@ public class FSDirectory implements Closeable {
                             boolean underConstruction,
                             String clientName,
                             String clientMachine) {
-    INode newNode;
+    final INode newNode;
     assert hasWriteLock();
     if (underConstruction) {
       newNode = new INodeFileUnderConstruction(
@@ -287,45 +297,17 @@ public class FSDirectory implements Closeable {
     }
 
     try {
-      newNode = addNode(path, newNode, UNKNOWN_DISK_SPACE);
+      if (addINode(path, newNode)) {
+        return newNode;
+      }
     } catch (IOException e) {
       if(NameNode.stateChangeLog.isDebugEnabled()) {
         NameNode.stateChangeLog.debug(
             "DIR* FSDirectory.unprotectedAddFile: exception when add " + path
                 + " to the file system", e);
       }
-      return null;
     }
-    return newNode;
-  }
-
-  INodeDirectory addToParent(byte[] src, INodeDirectory parentINode,
-      INode newNode, boolean propagateModTime) {
-    // NOTE: This does not update space counts for parents
-    INodeDirectory newParent = null;
-    writeLock();
-    try {
-      try {
-        newParent = rootDir.addToParent(src, newNode, parentINode,
-                                        propagateModTime);
-        cacheName(newNode);
-      } catch (FileNotFoundException e) {
-        return null;
-      }
-      if(newParent == null)
-        return null;
-      if(!newNode.isDirectory() && !newNode.isSymlink()) {
-        // Add file->block mapping
-        INodeFile newF = (INodeFile)newNode;
-        BlockInfo[] blocks = newF.getBlocks();
-        for (int i = 0; i < blocks.length; i++) {
-          newF.setBlock(i, getBlockManager().addBlockCollection(blocks[i], newF));
-        }
-      }
-    } finally {
-      writeUnlock();
-    }
-    return newParent;
+    return null;
   }
 
   /**
@@ -533,7 +515,7 @@ public class FSDirectory implements Closeable {
       return true;
     }
     if (srcInode.isSymlink() && 
-        dst.equals(((INodeSymlink)srcInode).getLinkValue())) {
+        dst.equals(((INodeSymlink)srcInode).getSymlinkString())) {
       throw new FileAlreadyExistsException(
           "Cannot rename symlink "+src+" to its target "+dst);
     }
@@ -567,12 +549,12 @@ public class FSDirectory implements Closeable {
     // Ensure dst has quota to accommodate rename
     verifyQuotaForRename(srcInodes, dstInodes);
     
-    INode dstChild = null;
+    boolean added = false;
     INode srcChild = null;
     String srcChildName = null;
     try {
       // remove src
-      srcChild = removeChild(srcInodesInPath, srcInodes.length-1);
+      srcChild = removeLastINode(srcInodesInPath);
       if (srcChild == null) {
         NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
             + "failed to rename " + src + " to " + dst
@@ -583,9 +565,8 @@ public class FSDirectory implements Closeable {
       srcChild.setLocalName(dstComponents[dstInodes.length-1]);
       
       // add src to the destination
-      dstChild = addChildNoQuotaCheck(dstInodesInPath, dstInodes.length-1,
-          srcChild, UNKNOWN_DISK_SPACE);
-      if (dstChild != null) {
+      added = addLastINodeNoQuotaCheck(dstInodesInPath, srcChild);
+      if (added) {
         srcChild = null;
         if (NameNode.stateChangeLog.isDebugEnabled()) {
           NameNode.stateChangeLog.debug("DIR* FSDirectory.unprotectedRenameTo: " 
@@ -597,11 +578,10 @@ public class FSDirectory implements Closeable {
         return true;
       }
     } finally {
-      if (dstChild == null && srcChild != null) {
+      if (!added && srcChild != null) {
         // put it back
         srcChild.setLocalName(srcChildName);
-        addChildNoQuotaCheck(srcInodesInPath, srcInodes.length - 1, srcChild, 
-            UNKNOWN_DISK_SPACE);
+        addLastINodeNoQuotaCheck(srcInodesInPath, srcChild);
       }
     }
     NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
@@ -656,7 +636,7 @@ public class FSDirectory implements Closeable {
           "The source "+src+" and destination "+dst+" are the same");
     }
     if (srcInode.isSymlink() && 
-        dst.equals(((INodeSymlink)srcInode).getLinkValue())) {
+        dst.equals(((INodeSymlink)srcInode).getSymlinkString())) {
       throw new FileAlreadyExistsException(
           "Cannot rename symlink "+src+" to its target "+dst);
     }
@@ -695,14 +675,15 @@ public class FSDirectory implements Closeable {
             + error);
         throw new FileAlreadyExistsException(error);
       }
-      List<INode> children = dstInode.isDirectory() ? 
-          ((INodeDirectory) dstInode).getChildren() : null;
-      if (children != null && children.size() != 0) {
-        error = "rename cannot overwrite non empty destination directory "
-            + dst;
-        NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
-            + error);
-        throw new IOException(error);
+      if (dstInode.isDirectory()) {
+        final List<INode> children = ((INodeDirectory) dstInode
+            ).getChildrenList();
+        if (!children.isEmpty()) {
+          error = "rename destination directory is not empty: " + dst;
+          NameNode.stateChangeLog.warn(
+              "DIR* FSDirectory.unprotectedRenameTo: " + error);
+          throw new IOException(error);
+        }
       }
     }
     if (dstInodes[dstInodes.length - 2] == null) {
@@ -720,7 +701,7 @@ public class FSDirectory implements Closeable {
 
     // Ensure dst has quota to accommodate rename
     verifyQuotaForRename(srcInodes, dstInodes);
-    INode removedSrc = removeChild(srcInodesInPath, srcInodes.length - 1);
+    INode removedSrc = removeLastINode(srcInodesInPath);
     if (removedSrc == null) {
       error = "Failed to rename " + src + " to " + dst
           + " because the source can not be removed";
@@ -733,18 +714,13 @@ public class FSDirectory implements Closeable {
     INode removedDst = null;
     try {
       if (dstInode != null) { // dst exists remove it
-        removedDst = removeChild(dstInodesInPath, dstInodes.length - 1);
+        removedDst = removeLastINode(dstInodesInPath);
         dstChildName = removedDst.getLocalName();
       }
 
-      INode dstChild = null;
       removedSrc.setLocalName(dstComponents[dstInodes.length - 1]);
       // add src as dst to complete rename
-      dstChild = addChildNoQuotaCheck(dstInodesInPath, dstInodes.length - 1,
-          removedSrc, UNKNOWN_DISK_SPACE);
-
-      int filesDeleted = 0;
-      if (dstChild != null) {
+      if (addLastINodeNoQuotaCheck(dstInodesInPath, removedSrc)) {
         removedSrc = null;
         if (NameNode.stateChangeLog.isDebugEnabled()) {
           NameNode.stateChangeLog.debug(
@@ -755,6 +731,7 @@ public class FSDirectory implements Closeable {
         dstInodes[dstInodes.length - 2].setModificationTime(timestamp);
 
         // Collect the blocks and remove the lease for previous dst
+        int filesDeleted = 0;
         if (removedDst != null) {
           INode rmdst = removedDst;
           removedDst = null;
@@ -768,14 +745,12 @@ public class FSDirectory implements Closeable {
       if (removedSrc != null) {
         // Rename failed - restore src
         removedSrc.setLocalName(srcChildName);
-        addChildNoQuotaCheck(srcInodesInPath, srcInodes.length - 1, removedSrc, 
-            UNKNOWN_DISK_SPACE);
+        addLastINodeNoQuotaCheck(srcInodesInPath, removedSrc);
       }
       if (removedDst != null) {
         // Rename failed - restore dst
         removedDst.setLocalName(dstChildName);
-        addChildNoQuotaCheck(dstInodesInPath, dstInodes.length - 1, removedDst, 
-            UNKNOWN_DISK_SPACE);
+        addLastINodeNoQuotaCheck(dstInodesInPath, removedDst);
       }
     }
     NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedRenameTo: "
@@ -817,11 +792,7 @@ public class FSDirectory implements Closeable {
     final INodesInPath inodesInPath = rootDir.getExistingPathINodes(src, true);
     final INode[] inodes = inodesInPath.getINodes();
     INode inode = inodes[inodes.length - 1];
-    if (inode == null) {
-      return null;
-    }
-    assert !inode.isSymlink();
-    if (inode.isDirectory()) {
+    if (inode == null || !inode.isFile()) {
       return null;
     }
     INodeFile fileNode = (INodeFile)inode;
@@ -840,22 +811,15 @@ public class FSDirectory implements Closeable {
   }
 
   /**
-   * Get the blocksize of a file
-   * @param filename the filename
-   * @return the number of bytes 
+   * @param path the file path
+   * @return the block size of the file. 
    */
-  long getPreferredBlockSize(String filename) throws UnresolvedLinkException,
+  long getPreferredBlockSize(String path) throws UnresolvedLinkException,
       FileNotFoundException, IOException {
     readLock();
     try {
-      INode inode = rootDir.getNode(filename, false);
-      if (inode == null) {
-        throw new FileNotFoundException("File does not exist: " + filename);
-      }
-      if (inode.isDirectory() || inode.isSymlink()) {
-        throw new IOException("Getting block size of non-file: "+ filename); 
-      }
-      return ((INodeFile)inode).getPreferredBlockSize();
+      return INodeFile.valueOf(rootDir.getNode(path, false), path
+          ).getPreferredBlockSize();
     } finally {
       readUnlock();
     }
@@ -869,9 +833,7 @@ public class FSDirectory implements Closeable {
       if (inode == null) {
          return false;
       }
-      return inode.isDirectory() || inode.isSymlink() 
-        ? true 
-        : ((INodeFile)inode).getBlocks() != null;
+      return !inode.isFile() || ((INodeFile)inode).getBlocks() != null;
     } finally {
       readUnlock();
     }
@@ -1087,14 +1049,13 @@ public class FSDirectory implements Closeable {
           " because the root is not allowed to be deleted");
       return 0;
     }
-    int pos = inodes.length - 1;
     // Remove the node from the namespace
-    targetNode = removeChild(inodesInPath, pos);
+    targetNode = removeLastINode(inodesInPath);
     if (targetNode == null) {
       return 0;
     }
     // set the parent's modification time
-    inodes[pos-1].setModificationTime(mtime);
+    inodes[inodes.length - 2].setModificationTime(mtime);
     int filesRemoved = targetNode.collectSubtreeBlocksAndClear(collectedBlocks);
     if (NameNode.stateChangeLog.isDebugEnabled()) {
       NameNode.stateChangeLog.debug("DIR* FSDirectory.unprotectedDelete: "
@@ -1123,7 +1084,7 @@ public class FSDirectory implements Closeable {
       /* Currently oldnode and newnode are assumed to contain the same
        * blocks. Otherwise, blocks need to be removed from the blocksMap.
        */
-      rootDir.addNode(path, newnode); 
+      rootDir.addINode(path, newnode); 
 
       int index = 0;
       for (BlockInfo b : newnode.getBlocks()) {
@@ -1167,7 +1128,7 @@ public class FSDirectory implements Closeable {
       HdfsFileStatus listing[] = new HdfsFileStatus[numOfListing];
       for (int i=0; i<numOfListing; i++) {
         INode cur = contents.get(startChild+i);
-        listing[i] = createFileStatus(cur.name, cur, needLocation);
+        listing[i] = createFileStatus(cur.getLocalNameBytes(), cur, needLocation);
       }
       return new DirectoryListing(
           listing, totalNumChildren-startChild-numOfListing);
@@ -1206,14 +1167,8 @@ public class FSDirectory implements Closeable {
     waitForReady();
     readLock();
     try {
-      INode targetNode = rootDir.getNode(src, false);
-      if (targetNode == null)
-        return null;
-      if (targetNode.isDirectory())
-        return null;
-      if (targetNode.isSymlink()) 
-        return null;
-      return ((INodeFile)targetNode).getBlocks();
+      final INode i = rootDir.getNode(src, false);
+      return i != null && i.isFile()? ((INodeFile)i).getBlocks(): null;
     } finally {
       readUnlock();
     }
@@ -1231,21 +1186,6 @@ public class FSDirectory implements Closeable {
     }
   }
   
-  /**
-   * Get the parent node of path.
-   * 
-   * @param path the path to explore
-   * @return its parent node
-   */
-  INodeDirectory getParent(byte[][] path) 
-    throws FileNotFoundException, UnresolvedLinkException {
-    readLock();
-    try {
-      return rootDir.getParent(path);
-    } finally {
-      readUnlock();
-    }
-  }
   
   /** 
    * Check whether the filepath could be created
@@ -1287,20 +1227,17 @@ public class FSDirectory implements Closeable {
    * @param nsDelta the delta change of namespace
    * @param dsDelta the delta change of diskspace
    * @throws QuotaExceededException if the new count violates any quota limit
-   * @throws FileNotFound if path does not exist.
+   * @throws FileNotFoundException if path does not exist.
    */
   void updateSpaceConsumed(String path, long nsDelta, long dsDelta)
-                                         throws QuotaExceededException,
-                                                FileNotFoundException,
-                                                UnresolvedLinkException {
+      throws QuotaExceededException, FileNotFoundException, UnresolvedLinkException {
     writeLock();
     try {
       final INodesInPath inodesInPath = rootDir.getExistingPathINodes(path, false);
       final INode[] inodes = inodesInPath.getINodes();
       int len = inodes.length;
       if (inodes[len - 1] == null) {
-        throw new FileNotFoundException(path + 
-                                        " does not exist under rootDir.");
+        throw new FileNotFoundException("Path not found: " + path);
       }
       updateCount(inodesInPath, len-1, nsDelta, dsDelta, true);
     } finally {
@@ -1364,7 +1301,7 @@ public class FSDirectory implements Closeable {
     for(int i=0; i < numOfINodes; i++) {
       if (inodes[i].isQuotaSet()) { // a directory with quota
         INodeDirectoryWithQuota node =(INodeDirectoryWithQuota)inodes[i]; 
-        node.unprotectedUpdateNumItemsInTree(nsDelta, dsDelta);
+        node.addSpaceConsumed(nsDelta, dsDelta);
       }
     }
   }
@@ -1524,15 +1461,17 @@ public class FSDirectory implements Closeable {
       long timestamp) throws QuotaExceededException {
     assert hasWriteLock();
     final INodeDirectory dir = new INodeDirectory(name, permission, timestamp);
-    final INode inode = addChild(inodesInPath, pos, dir, -1, true);
-    inodesInPath.setINode(pos, inode);
+    if (addChild(inodesInPath, pos, dir, true)) {
+      inodesInPath.setINode(pos, dir);
+    }
   }
   
-  /** Add a node child to the namespace. The full path name of the node is src.
-   * childDiskspace should be -1, if unknown. 
+  /**
+   * Add the given child to the namespace.
+   * @param src The full path name of the child node.
    * @throw QuotaExceededException is thrown if it violates quota limit
    */
-  private <T extends INode> T addNode(String src, T child, long childDiskspace
+  private boolean addINode(String src, INode child
       ) throws QuotaExceededException, UnresolvedLinkException {
     byte[][] components = INode.getPathComponents(src);
     byte[] path = components[components.length-1];
@@ -1542,8 +1481,7 @@ public class FSDirectory implements Closeable {
     try {
       INodesInPath inodesInPath = rootDir.getExistingPathINodes(components,
           components.length, false);
-      return addChild(inodesInPath, inodesInPath.getINodes().length-1, child,
-          childDiskspace, true);
+      return addLastINode(inodesInPath, child, true);
     } finally {
       writeUnlock();
     }
@@ -1667,14 +1605,24 @@ public class FSDirectory implements Closeable {
     }
   }
   
+  /**
+   * The same as {@link #addChild(INodesInPath, int, INode, boolean)}
+   * with pos = length - 1.
+   */
+  private boolean addLastINode(INodesInPath inodesInPath,
+      INode inode, boolean checkQuota) throws QuotaExceededException {
+    final int pos = inodesInPath.getINodes().length - 1;
+    return addChild(inodesInPath, pos, inode, checkQuota);
+  }
+
   /** Add a node child to the inodes at index pos. 
    * Its ancestors are stored at [0, pos-1].
-   * @return the added node. 
+   * @return false if the child with this name already exists; 
+   *         otherwise return true;
    * @throw QuotaExceededException is thrown if it violates quota limit
    */
-  private <T extends INode> T addChild(INodesInPath inodesInPath, int pos,
-      T child, long childDiskspace,
-      boolean checkQuota) throws QuotaExceededException {
+  private boolean addChild(INodesInPath inodesInPath, int pos,
+      INode child, boolean checkQuota) throws QuotaExceededException {
     final INode[] inodes = inodesInPath.getINodes();
     // The filesystem limits are not really quotas, so this check may appear
     // odd. It's because a rename operation deletes the src, tries to add
@@ -1688,38 +1636,34 @@ public class FSDirectory implements Closeable {
     
     INode.DirCounts counts = new INode.DirCounts();
     child.spaceConsumedInTree(counts);
-    if (childDiskspace < 0) {
-      childDiskspace = counts.getDsCount();
-    }
-    updateCount(inodesInPath, pos, counts.getNsCount(), childDiskspace, checkQuota);
+    updateCount(inodesInPath, pos, counts.getNsCount(), counts.getDsCount(), checkQuota);
     if (inodes[pos-1] == null) {
       throw new NullPointerException("Panic: parent does not exist");
     }
-    final T addedNode = ((INodeDirectory)inodes[pos-1]).addChild(child, true);
-    if (addedNode == null) {
-      updateCount(inodesInPath, pos, -counts.getNsCount(), -childDiskspace, true);
+    final boolean added = ((INodeDirectory)inodes[pos-1]).addChild(child, true);
+    if (!added) {
+      updateCount(inodesInPath, pos, -counts.getNsCount(), -counts.getDsCount(), true);
     }
-    return addedNode;
+    return added;
   }
   
-  private <T extends INode> T addChildNoQuotaCheck(INodesInPath inodesInPath,
-      int pos, T child, long childDiskspace) {
-    T inode = null;
+  private boolean addLastINodeNoQuotaCheck(INodesInPath inodesInPath, INode i) {
     try {
-      inode = addChild(inodesInPath, pos, child, childDiskspace, false);
+      return addLastINode(inodesInPath, i, false);
     } catch (QuotaExceededException e) {
       NameNode.LOG.warn("FSDirectory.addChildNoQuotaCheck - unexpected", e); 
     }
-    return inode;
+    return false;
   }
   
-  /** Remove an inode at index pos from the namespace.
-   * Its ancestors are stored at [0, pos-1].
+  /**
+   * Remove the last inode in the path from the namespace.
    * Count of each ancestor with quota is also updated.
-   * Return the removed node; null if the removal fails.
+   * @return the removed node; null if the removal fails.
    */
-  private INode removeChild(final INodesInPath inodesInPath, int pos) {
+  private INode removeLastINode(final INodesInPath inodesInPath) {
     final INode[] inodes = inodesInPath.getINodes();
+    final int pos = inodes.length - 1;
     INode removedNode = ((INodeDirectory)inodes[pos-1]).removeChild(inodes[pos]);
     if (removedNode != null) {
       INode.DirCounts counts = new INode.DirCounts();
@@ -1838,20 +1782,21 @@ public class FSDirectory implements Closeable {
    * See {@link ClientProtocol#setQuota(String, long, long)} for the contract.
    * Sets quota for for a directory.
    * @returns INodeDirectory if any of the quotas have changed. null other wise.
-   * @throws FileNotFoundException if the path does not exist or is a file
+   * @throws FileNotFoundException if the path does not exist.
+   * @throws PathIsNotDirectoryException if the path is not a directory.
    * @throws QuotaExceededException if the directory tree size is 
    *                                greater than the given quota
    * @throws UnresolvedLinkException if a symlink is encountered in src.
    */
   INodeDirectory unprotectedSetQuota(String src, long nsQuota, long dsQuota)
-    throws FileNotFoundException, QuotaExceededException, 
-      UnresolvedLinkException {
+      throws FileNotFoundException, PathIsNotDirectoryException,
+      QuotaExceededException, UnresolvedLinkException {
     assert hasWriteLock();
     // sanity check
     if ((nsQuota < 0 && nsQuota != HdfsConstants.QUOTA_DONT_SET && 
-         nsQuota < HdfsConstants.QUOTA_RESET) || 
+         nsQuota != HdfsConstants.QUOTA_RESET) || 
         (dsQuota < 0 && dsQuota != HdfsConstants.QUOTA_DONT_SET && 
-          dsQuota < HdfsConstants.QUOTA_RESET)) {
+          dsQuota != HdfsConstants.QUOTA_RESET)) {
       throw new IllegalArgumentException("Illegal value for nsQuota or " +
                                          "dsQuota : " + nsQuota + " and " +
                                          dsQuota);
@@ -1861,15 +1806,10 @@ public class FSDirectory implements Closeable {
 
     final INodesInPath inodesInPath = rootDir.getExistingPathINodes(src, true);
     final INode[] inodes = inodesInPath.getINodes();
-    INode targetNode = inodes[inodes.length-1];
-    if (targetNode == null) {
-      throw new FileNotFoundException("Directory does not exist: " + srcs);
-    } else if (!targetNode.isDirectory()) {
-      throw new FileNotFoundException("Cannot set quota on a file: " + srcs);  
-    } else if (targetNode.isRoot() && nsQuota == HdfsConstants.QUOTA_RESET) {
+    INodeDirectory dirNode = INodeDirectory.valueOf(inodes[inodes.length-1], srcs);
+    if (dirNode.isRoot() && nsQuota == HdfsConstants.QUOTA_RESET) {
       throw new IllegalArgumentException("Cannot clear namespace quota on root.");
     } else { // a directory inode
-      INodeDirectory dirNode = (INodeDirectory)targetNode;
       long oldNsQuota = dirNode.getNsQuota();
       long oldDsQuota = dirNode.getDsQuota();
       if (nsQuota == HdfsConstants.QUOTA_DONT_SET) {
@@ -1903,13 +1843,12 @@ public class FSDirectory implements Closeable {
   }
   
   /**
-   * See {@link ClientProtocol#setQuota(String, long, long)} for the 
-   * contract.
+   * See {@link ClientProtocol#setQuota(String, long, long)} for the contract.
    * @see #unprotectedSetQuota(String, long, long)
    */
   void setQuota(String src, long nsQuota, long dsQuota) 
-    throws FileNotFoundException, QuotaExceededException,
-    UnresolvedLinkException { 
+      throws FileNotFoundException, PathIsNotDirectoryException,
+      QuotaExceededException, UnresolvedLinkException {
     writeLock();
     try {
       INodeDirectory dir = unprotectedSetQuota(src, nsQuota, dsQuota);
@@ -1981,9 +1920,14 @@ public class FSDirectory implements Closeable {
    * Reset the entire namespace tree.
    */
   void reset() {
-    rootDir = new INodeDirectoryWithQuota(INodeDirectory.ROOT_NAME,
-        getFSNamesystem().createFsOwnerPermissions(new FsPermission((short)0755)),
-        Integer.MAX_VALUE, -1);
+    writeLock();
+    try {
+      setReady(false);
+      rootDir = createRoot(getFSNamesystem());
+      nameCache.reset();
+    } finally {
+      writeUnlock();
+    }
   }
 
   /**
@@ -2074,7 +2018,7 @@ public class FSDirectory implements Closeable {
   INodeSymlink addSymlink(String path, String target,
       PermissionStatus dirPerms, boolean createParent)
       throws UnresolvedLinkException, FileAlreadyExistsException,
-      QuotaExceededException, IOException {
+      QuotaExceededException {
     waitForReady();
 
     final long modTime = now();
@@ -2088,7 +2032,7 @@ public class FSDirectory implements Closeable {
     INodeSymlink newNode  = null;
     writeLock();
     try {
-      newNode = unprotectedSymlink(path, target, modTime, modTime,
+      newNode = unprotectedAddSymlink(path, target, modTime, modTime,
           new PermissionStatus(userName, null, FsPermission.getDefault()));
     } finally {
       writeUnlock();
@@ -2108,23 +2052,12 @@ public class FSDirectory implements Closeable {
   /**
    * Add the specified path into the namespace. Invoked from edit log processing.
    */
-  INodeSymlink unprotectedSymlink(String path, String target, long modTime, 
+  INodeSymlink unprotectedAddSymlink(String path, String target, long mtime, 
                                   long atime, PermissionStatus perm) 
-      throws UnresolvedLinkException {
+      throws UnresolvedLinkException, QuotaExceededException {
     assert hasWriteLock();
-    INodeSymlink newNode = new INodeSymlink(target, modTime, atime, perm);
-    try {
-      newNode = addNode(path, newNode, UNKNOWN_DISK_SPACE);
-    } catch (UnresolvedLinkException e) {
-      /* All UnresolvedLinkExceptions should have been resolved by now, but we
-       * should re-throw them in case that changes so they are not swallowed 
-       * by catching IOException below.
-       */
-      throw e;
-    } catch (IOException e) {
-      return null;
-    }
-    return newNode;
+    final INodeSymlink symlink = new INodeSymlink(target, mtime, atime, perm);
+    return addINode(path, symlink)? symlink: null;
   }
   
   /**
@@ -2133,7 +2066,7 @@ public class FSDirectory implements Closeable {
    */
   void cacheName(INode inode) {
     // Name is cached only for files
-    if (inode.isDirectory() || inode.isSymlink()) {
+    if (!inode.isFile()) {
       return;
     }
     ByteArray name = new ByteArray(inode.getLocalNameBytes());

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java

@@ -426,7 +426,7 @@ public class FSEditLogLoader {
     }
     case OP_SYMLINK: {
       SymlinkOp symlinkOp = (SymlinkOp)op;
-      fsDir.unprotectedSymlink(symlinkOp.path, symlinkOp.value,
+      fsDir.unprotectedAddSymlink(symlinkOp.path, symlinkOp.value,
                                symlinkOp.mtime, symlinkOp.atime,
                                symlinkOp.permissionStatus);
       break;

+ 2 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java

@@ -38,7 +38,6 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.LayoutVersion;
 import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
-import org.apache.hadoop.hdfs.server.common.GenerationStamp;
 import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
 import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.common.Storage.FormatConfirmable;
@@ -1020,6 +1019,7 @@ public class FSImage implements Closeable {
   NamenodeCommand startCheckpoint(NamenodeRegistration bnReg, // backup node
                                   NamenodeRegistration nnReg) // active name-node
   throws IOException {
+    LOG.info("Start checkpoint at txid " + getEditLog().getLastWrittenTxId());
     String msg = null;
     // Verify that checkpoint is allowed
     if(bnReg.getNamespaceID() != storage.getNamespaceID())
@@ -1059,6 +1059,7 @@ public class FSImage implements Closeable {
    * @throws IOException if the checkpoint fields are inconsistent
    */
   void endCheckpoint(CheckpointSignature sig) throws IOException {
+    LOG.info("End checkpoint at txid " + getEditLog().getLastWrittenTxId());
     sig.validateStorageInfo(this);
   }
 

+ 32 - 7
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java

@@ -43,6 +43,7 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.LayoutVersion;
 import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
 import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.io.Text;
@@ -202,7 +203,7 @@ class FSImageFormat {
       fsDir.rootDir.setQuota(nsQuota, dsQuota);
     }
     fsDir.rootDir.setModificationTime(root.getModificationTime());
-    fsDir.rootDir.setPermissionStatus(root.getPermissionStatus());    
+    fsDir.rootDir.clonePermissionStatus(root);    
   }
 
   /** 
@@ -257,7 +258,8 @@ class FSImageFormat {
        INode newNode = loadINode(in); // read rest of inode
 
        // add to parent
-       namesystem.dir.addToParent(localName, parent, newNode, false);
+       newNode.setLocalName(localName);
+       addToParent(parent, newNode);
      }
      return numChildren;
    }
@@ -286,13 +288,36 @@ class FSImageFormat {
       }
       // check if the new inode belongs to the same parent
       if(!isParent(pathComponents, parentPath)) {
-        parentINode = fsDir.getParent(pathComponents);
+        parentINode = fsDir.rootDir.getParent(pathComponents);
         parentPath = getParent(pathComponents);
       }
 
       // add new inode
-      parentINode = fsDir.addToParent(pathComponents[pathComponents.length-1], 
-          parentINode, newNode, false);
+      newNode.setLocalName(pathComponents[pathComponents.length-1]);
+      addToParent(parentINode, newNode);
+    }
+  }
+
+  /**
+   * Add the child node to parent and, if child is a file, update block map.
+   * This method is only used for image loading so that synchronization,
+   * modification time update and space count update are not needed.
+   */
+  void addToParent(INodeDirectory parent, INode child) {
+    // NOTE: This does not update space counts for parents
+    if (!parent.addChild(child, false)) {
+      return;
+    }
+    namesystem.dir.cacheName(child);
+
+    if (child.isFile()) {
+      // Add file->block mapping
+      final INodeFile file = (INodeFile)child;
+      final BlockInfo[] blocks = file.getBlocks();
+      final BlockManager bm = namesystem.getBlockManager();
+      for (int i = 0; i < blocks.length; i++) {
+        file.setBlock(i, bm.addBlockCollection(blocks[i], file));
+      }
     }
   }
 
@@ -530,8 +555,8 @@ class FSImageFormat {
     private void saveImage(ByteBuffer currentDirName,
                                   INodeDirectory current,
                                   DataOutputStream out) throws IOException {
-      List<INode> children = current.getChildren();
-      if (children == null || children.isEmpty())
+      final List<INode> children = current.getChildrenList();
+      if (children.isEmpty())
         return;
       // print prefix (parent directory name)
       int prefixLen = currentDirName.position();

+ 1 - 1
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java

@@ -168,7 +168,7 @@ public class FSImageSerialization {
       out.writeLong(0);   // access time
       out.writeLong(0);   // preferred block size
       out.writeInt(-2);   // # of blocks
-      Text.writeString(out, ((INodeSymlink)node).getLinkValue());
+      Text.writeString(out, ((INodeSymlink)node).getSymlinkString());
       filePerm.fromShort(node.getFsPermissionShort());
       PermissionStatus.write(out, node.getUserName(),
                              node.getGroupName(),

+ 43 - 15
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

@@ -660,13 +660,17 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         LOG.info("Catching up to latest edits from old active before " +
             "taking over writer role in edits logs");
         editLogTailer.catchupDuringFailover();
-        blockManager.setPostponeBlocksFromFuture(false);
         
-        LOG.info("Reprocessing replication and invalidation queues");
+        blockManager.setPostponeBlocksFromFuture(false);
         blockManager.getDatanodeManager().markAllDatanodesStale();
         blockManager.clearQueues();
         blockManager.processAllPendingDNMessages();
-        blockManager.processMisReplicatedBlocks();
+        
+        if (!isInSafeMode() ||
+            (isInSafeMode() && safeMode.isPopulatingReplQueues())) {
+          LOG.info("Reprocessing replication and invalidation queues");
+          blockManager.processMisReplicatedBlocks();
+        }
         
         if (LOG.isDebugEnabled()) {
           LOG.debug("NameNode metadata after re-processing " +
@@ -1716,16 +1720,25 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       short replication, long blockSize) throws AccessControlException,
       SafeModeException, FileAlreadyExistsException, UnresolvedLinkException,
       FileNotFoundException, ParentNotDirectoryException, IOException {
+    boolean skipSync = false;
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
 
       startFileInternal(src, permissions, holder, clientMachine, flag,
           createParent, replication, blockSize);
+    } catch (StandbyException se) {
+      skipSync = true;
+      throw se;
     } finally {
       writeUnlock();
-    }
-    getEditLog().logSync();
+      // There might be transactions logged while trying to recover the lease.
+      // They need to be sync'ed even when an exception was thrown.
+      if (!skipSync) {
+        getEditLog().logSync();
+      }
+    } 
+
     if (auditLog.isInfoEnabled() && isExternalInvocation()) {
       final HdfsFileStatus stat = dir.getFileInfo(src, false);
       logAuditEvent(UserGroupInformation.getCurrentUser(),
@@ -1906,6 +1919,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    */
   boolean recoverLease(String src, String holder, String clientMachine)
       throws IOException {
+    boolean skipSync = false;
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
@@ -1927,8 +1941,16 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       }
   
       recoverLeaseInternal(inode, src, holder, clientMachine, true);
+    } catch (StandbyException se) {
+      skipSync = true;
+      throw se;
     } finally {
       writeUnlock();
+      // There might be transactions logged while trying to recover the lease.
+      // They need to be sync'ed even when an exception was thrown.
+      if (!skipSync) {
+        getEditLog().logSync();
+      }
     }
     return false;
   }
@@ -2031,6 +2053,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       throws AccessControlException, SafeModeException,
       FileAlreadyExistsException, FileNotFoundException,
       ParentNotDirectoryException, IOException {
+    boolean skipSync = false;
     if (!supportAppends) {
       throw new UnsupportedOperationException(
           "Append is not enabled on this NameNode. Use the " +
@@ -2044,10 +2067,17 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       lb = startFileInternal(src, null, holder, clientMachine, 
                         EnumSet.of(CreateFlag.APPEND), 
                         false, blockManager.maxReplication, 0);
+    } catch (StandbyException se) {
+      skipSync = true;
+      throw se;
     } finally {
       writeUnlock();
+      // There might be transactions logged while trying to recover the lease.
+      // They need to be sync'ed even when an exception was thrown.
+      if (!skipSync) {
+        getEditLog().logSync();
+      }
     }
-    getEditLog().logSync();
     if (lb != null) {
       if (NameNode.stateChangeLog.isDebugEnabled()) {
         NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: file "
@@ -2979,7 +3009,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    *         RecoveryInProgressException if lease recovery is in progress.<br>
    *         IOException in case of an error.
    * @return true  if file has been successfully finalized and closed or 
-   *         false if block recovery has been initiated
+   *         false if block recovery has been initiated. Since the lease owner
+   *         has been changed and logged, caller should call logSync().
    */
   boolean internalReleaseLease(Lease lease, String src, 
       String recoveryLeaseHolder) throws AlreadyBeingCreatedException, 
@@ -3100,6 +3131,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     assert hasWriteLock();
     if(newHolder == null)
       return lease;
+    // The following transaction is not synced. Make sure it's sync'ed later.
     logReassignLease(lease.getHolder(), src, newHolder);
     return reassignLeaseInternal(lease, src, newHolder, pendingFile);
   }
@@ -3962,7 +3994,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         // of the number of total blocks in the system.
         this.shouldIncrementallyTrackBlocks = true;
       }
-      
+      if(blockSafe < 0)
+        this.blockSafe = 0;
       checkMode();
     }
       
@@ -5199,13 +5232,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   
   private void logReassignLease(String leaseHolder, String src,
       String newHolder) {
-    writeLock();
-    try {
-      getEditLog().logReassignLease(leaseHolder, src, newHolder);
-    } finally {
-      writeUnlock();
-    }
-    getEditLog().logSync();
+    assert hasWriteLock();
+    getEditLog().logReassignLease(leaseHolder, src, newHolder);
   }
   
   /**

Niektóre pliki nie zostały wyświetlone z powodu dużej ilości zmienionych plików