Просмотр исходного кода

HADOOP-7106. Reorganize SVN layout to combine HDFS, Common, and MR in a single tree (project unsplit)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1073@1134994 13f79535-47bb-0310-9956-ffa450edef68
Todd Lipcon 14 лет назад
Сommit
3c03962930
100 измененных файлов с 25758 добавлено и 0 удалено
  1. 48 0
      common/.gitignore
  2. 10969 0
      common/CHANGES.txt
  3. 244 0
      common/LICENSE.txt
  4. 2 0
      common/NOTICE.txt
  5. 31 0
      common/README.txt
  6. 132 0
      common/bin/hadoop
  7. 354 0
      common/bin/hadoop-config.sh
  8. 172 0
      common/bin/hadoop-daemon.sh
  9. 38 0
      common/bin/hadoop-daemons.sh
  10. 98 0
      common/bin/rcc
  11. 69 0
      common/bin/slaves.sh
  12. 40 0
      common/bin/start-all.sh
  13. 41 0
      common/bin/stop-all.sh
  14. 1898 0
      common/build.xml
  15. 24 0
      common/conf/configuration.xsl
  16. 8 0
      common/conf/core-site.xml.template
  17. 66 0
      common/conf/hadoop-env.sh.template
  18. 72 0
      common/conf/hadoop-metrics.properties
  19. 27 0
      common/conf/hadoop-metrics2.properties
  20. 106 0
      common/conf/hadoop-policy.xml.template
  21. 151 0
      common/conf/log4j.properties
  22. 1 0
      common/conf/masters.template
  23. 2 0
      common/conf/slaves.template
  24. 57 0
      common/conf/ssl-client.xml.example
  25. 55 0
      common/conf/ssl-server.xml.example
  26. 331 0
      common/ivy.xml
  27. 139 0
      common/ivy/hadoop-common-instrumented-template.xml
  28. 159 0
      common/ivy/hadoop-common-template.xml
  29. 58 0
      common/ivy/hadoop-common-test-template.xml
  30. 50 0
      common/ivy/ivysettings.xml
  31. 90 0
      common/ivy/libraries.properties
  32. 11 0
      common/lib/jdiff/hadoop-core_0.20.0.xml
  33. 11 0
      common/lib/jdiff/hadoop-core_0.21.0.xml
  34. 11 0
      common/lib/jdiff/hadoop_0.17.0.xml
  35. 11 0
      common/lib/jdiff/hadoop_0.18.1.xml
  36. 11 0
      common/lib/jdiff/hadoop_0.18.2.xml
  37. 11 0
      common/lib/jdiff/hadoop_0.18.3.xml
  38. 11 0
      common/lib/jdiff/hadoop_0.19.0.xml
  39. 11 0
      common/lib/jdiff/hadoop_0.19.1.xml
  40. 11 0
      common/lib/jdiff/hadoop_0.19.2.xml
  41. 11 0
      common/lib/jdiff/hadoop_0.20.0.xml
  42. 11 0
      common/lib/jdiff/hadoop_0.20.1.xml
  43. 11 0
      common/lib/jdiff/hadoop_0.20.2.xml
  44. 11 0
      common/src/contrib/bash-tab-completion/README
  45. 121 0
      common/src/contrib/bash-tab-completion/hadoop.sh
  46. 170 0
      common/src/docs/changes/ChangesFancyStyle.css
  47. 49 0
      common/src/docs/changes/ChangesSimpleStyle.css
  48. 282 0
      common/src/docs/changes/changes2html.pl
  49. 170 0
      common/src/docs/cn/changes/ChangesFancyStyle.css
  50. 49 0
      common/src/docs/cn/changes/ChangesSimpleStyle.css
  51. 282 0
      common/src/docs/cn/changes/changes2html.pl
  52. 112 0
      common/src/docs/cn/forrest.properties
  53. 7 0
      common/src/docs/cn/src/documentation/README.txt
  54. 40 0
      common/src/docs/cn/src/documentation/classes/CatalogManager.properties
  55. 327 0
      common/src/docs/cn/src/documentation/conf/cli.xconf
  56. 386 0
      common/src/docs/cn/src/documentation/content/xdocs/cluster_setup.xml
  57. 596 0
      common/src/docs/cn/src/documentation/content/xdocs/commands_manual.xml
  58. 294 0
      common/src/docs/cn/src/documentation/content/xdocs/distcp.xml
  59. 69 0
      common/src/docs/cn/src/documentation/content/xdocs/hadoop_archives.xml
  60. 376 0
      common/src/docs/cn/src/documentation/content/xdocs/hdfs_design.xml
  61. 193 0
      common/src/docs/cn/src/documentation/content/xdocs/hdfs_permissions_guide.xml
  62. 72 0
      common/src/docs/cn/src/documentation/content/xdocs/hdfs_quota_admin_guide.xml
  63. 477 0
      common/src/docs/cn/src/documentation/content/xdocs/hdfs_shell.xml
  64. 513 0
      common/src/docs/cn/src/documentation/content/xdocs/hdfs_user_guide.xml
  65. 46 0
      common/src/docs/cn/src/documentation/content/xdocs/hod.xml
  66. 237 0
      common/src/docs/cn/src/documentation/content/xdocs/hod_admin_guide.xml
  67. 158 0
      common/src/docs/cn/src/documentation/content/xdocs/hod_config_guide.xml
  68. 559 0
      common/src/docs/cn/src/documentation/content/xdocs/hod_user_guide.xml
  69. 46 0
      common/src/docs/cn/src/documentation/content/xdocs/index.xml
  70. 2466 0
      common/src/docs/cn/src/documentation/content/xdocs/mapred_tutorial.xml
  71. 230 0
      common/src/docs/cn/src/documentation/content/xdocs/native_libraries.xml
  72. 252 0
      common/src/docs/cn/src/documentation/content/xdocs/quickstart.xml
  73. 249 0
      common/src/docs/cn/src/documentation/content/xdocs/site.xml
  74. 618 0
      common/src/docs/cn/src/documentation/content/xdocs/streaming.xml
  75. 37 0
      common/src/docs/cn/src/documentation/content/xdocs/tabs.xml
  76. BIN
      common/src/docs/cn/src/documentation/resources/images/architecture.gif
  77. BIN
      common/src/docs/cn/src/documentation/resources/images/core-logo.gif
  78. BIN
      common/src/docs/cn/src/documentation/resources/images/favicon.ico
  79. BIN
      common/src/docs/cn/src/documentation/resources/images/hadoop-logo-big.jpg
  80. BIN
      common/src/docs/cn/src/documentation/resources/images/hadoop-logo.jpg
  81. BIN
      common/src/docs/cn/src/documentation/resources/images/hdfsarchitecture.gif
  82. BIN
      common/src/docs/cn/src/documentation/resources/images/hdfsarchitecture.odg
  83. BIN
      common/src/docs/cn/src/documentation/resources/images/hdfsarchitecture.png
  84. BIN
      common/src/docs/cn/src/documentation/resources/images/hdfsdatanodes.gif
  85. BIN
      common/src/docs/cn/src/documentation/resources/images/hdfsdatanodes.odg
  86. BIN
      common/src/docs/cn/src/documentation/resources/images/hdfsdatanodes.png
  87. 346 0
      common/src/docs/cn/src/documentation/skinconf.xml
  88. 78 0
      common/src/docs/cn/src/documentation/skins/common/css/forrest.css.xslt
  89. 1 0
      common/src/docs/cn/src/documentation/skins/common/images/README.txt
  90. BIN
      common/src/docs/cn/src/documentation/skins/common/images/add.jpg
  91. BIN
      common/src/docs/cn/src/documentation/skins/common/images/built-with-forrest-button.png
  92. 92 0
      common/src/docs/cn/src/documentation/skins/common/images/corner-imports.svg.xslt
  93. 28 0
      common/src/docs/cn/src/documentation/skins/common/images/dc.svg.xslt
  94. BIN
      common/src/docs/cn/src/documentation/skins/common/images/external-link.gif
  95. BIN
      common/src/docs/cn/src/documentation/skins/common/images/fix.jpg
  96. BIN
      common/src/docs/cn/src/documentation/skins/common/images/forrest-credit-logo.png
  97. BIN
      common/src/docs/cn/src/documentation/skins/common/images/hack.jpg
  98. BIN
      common/src/docs/cn/src/documentation/skins/common/images/pdfdoc.gif
  99. BIN
      common/src/docs/cn/src/documentation/skins/common/images/poddoc.png
  100. 55 0
      common/src/docs/cn/src/documentation/skins/common/images/poddoc.svg.xslt

+ 48 - 0
common/.gitignore

@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+*~
+.classpath
+.project
+.settings
+*.iml
+*.ipr
+*.iws
+.idea
+.svn
+build/
+build-fi/
+build.properties
+conf/masters
+conf/slaves
+conf/hadoop-env.sh
+conf/hadoop-site.xml
+conf/core-site.xml
+conf/mapred-site.xml
+conf/hdfs-site.xml
+conf/hadoop-policy.xml
+conf/capacity-scheduler.xml
+conf/mapred-queue-acls.xml
+docs/api/
+ivy/hadoop-core.xml
+ivy/hadoop-core-test.xml
+ivy/ivy-*.jar
+ivy/maven-ant-tasks-*.jar
+logs/
+src/contrib/ec2/bin/hadoop-ec2-env.sh
+src/docs/build
+src/docs/cn/build
+src/docs/cn/src/documentation/sitemap.xmap
+src/docs/cn/uming.conf

+ 10969 - 0
common/CHANGES.txt

@@ -0,0 +1,10969 @@
+Hadoop Change Log
+
+Trunk (unreleased changes)
+
+  INCOMPATIBLE CHANGES
+
+   HADOOP-6904. Support method based RPC compatiblity. (hairong)
+
+   HADOOP-6432. Add Statistics support in FileContext. (jitendra)
+
+   HADOOP-7136. Remove failmon contrib component. (nigel)
+
+  NEW FEATURES
+
+    HADOOP-7342. Add an utility API in FileUtil for JDK File.list
+    avoid NPEs on File.list() (Bharath Mundlapudi via mattf)
+
+    HADOOP-7322. Adding a util method in FileUtil for directory listing,
+    avoid NPEs on File.listFiles() (Bharath Mundlapudi via mattf)
+
+    HADOOP-7023. Add listCorruptFileBlocks to Filesysem. (Patrick Kling
+    via hairong)
+
+    HADOOP-7096. Allow setting of end-of-record delimiter for TextInputFormat
+    (Ahmed Radwan via todd)
+
+    HADOOP-6994. Api to get delegation token in AbstractFileSystem. (jitendra)
+
+    HADOOP-7171. Support UGI in FileContext API. (jitendra)
+
+    HADOOP-7257 Client side mount tables (sanjay)
+
+    HADOOP-6919. New metrics2 framework. (Luke Lu via acmurthy) 
+
+    HADOOP-6920. Metrics instrumentation to move new metrics2 framework.
+    (Luke Lu via suresh)
+
+    HADOOP-7214. Add Common functionality necessary to provide an equivalent
+    of /usr/bin/groups for Hadoop. (Aaron T. Myers via todd)
+
+    HADOOP-6832. Add an authentication plugin using a configurable static user
+    for the web UI. (Owen O'Malley and Todd Lipcon via cdouglas)
+
+    HADOOP-7144. Expose JMX metrics via JSON servlet. (Robert Joseph Evans via
+    cdouglas)
+
+  IMPROVEMENTS
+
+    HADOOP-7042. Updates to test-patch.sh to include failed test names and
+    improve other messaging. (nigel)
+
+    HADOOP-7001.  Configuration changes can occur via the Reconfigurable
+    interface. (Patrick Kling via dhruba)
+
+    HADOOP-6764. Add number of reader threads and queue length as
+    configuration parameters in RPC.getServer. (Dmytro Molkov via hairong)
+
+    HADOOP-7049. TestReconfiguration should be junit v4.
+    (Patrick Kling via eli)
+
+    HADOOP-7054 Change NN LoadGenerator to use FileContext APIs
+	  (Sanjay Radia)
+
+    HADOOP-7060. A more elegant FileSystem#listCorruptFileBlocks API.
+    (Patrick Kling via hairong)
+
+    HADOOP-7058. Expose number of bytes in FSOutputSummer buffer to
+    implementatins. (Todd Lipcon via hairong)
+
+    HADOOP-7061. unprecise javadoc for CompressionCodec. (Jingguo Yao via eli)
+
+    HADOOP-7059. Remove "unused" warning in native code.  (Noah Watkins via eli)
+
+    HADOOP-6864. Provide a JNI-based implementation of
+     ShellBasedUnixGroupsNetgroupMapping
+    (implementation of GroupMappingServiceProvider) (Erik Seffl via boryas)
+
+    HADOOP-7078. Improve javadocs for RawComparator interface.
+    (Harsh J Chouraria via todd)
+
+    HADOOP-6995. Allow wildcards to be used in ProxyUsers configurations.
+    (todd)
+
+    HADOOP-6376. Add a comment header to conf/slaves that specifies the file
+    format. (Kay Kay via todd)
+
+    HADOOP-7151. Document need for stable hashCode() in WritableComparable.
+    (Dmitriy V. Ryaboy via todd)
+
+    HADOOP-7112. Issue a warning when GenericOptionsParser libjars are not on
+    local filesystem. (tomwhite)
+
+    HADOOP-7114. FsShell should dump all exceptions at DEBUG level.
+    (todd via tomwhite)
+
+    HADOOP-7159. RPC server should log the client hostname when read exception
+    happened. (Scott Chen via todd)
+
+    HADOOP-7167. Allow using a file to exclude certain tests from build. (todd)
+
+    HADOOP-7133. Batch the calls in DataStorage to FileUtil.createHardLink().
+    (Matt Foley via jghoman)
+
+    HADOOP-7166. Add DaemonFactory to common. (Erik Steffl & jitendra)
+
+    HADOOP-7175. Add isEnabled() to Trash.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7180. Better support on CommandFormat on the API and exceptions.
+    (Daryn Sharp via szetszwo)
+
+    HADOOP-7202. Improve shell Command base class.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7224. Add CommandFactory to shell.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7014. Generalize CLITest structure and interfaces to facilitate
+    upstream adoption (e.g. for web testing). (cos)
+
+    HADOOP-7230. Move "fs -help" shell command tests from HDFS to COMMOM; see
+    also HDFS-1844.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7233. Refactor ls to conform to new FsCommand class.  (Daryn Sharp
+    via szetszwo)
+
+    HADOOP-7235. Refactor the tail command to conform to new FsCommand class.
+    (Daryn Sharp via szetszwo)
+
+    HADOOP-7179. Federation: Improve HDFS startup scripts. (Erik Steffl
+    and Tanping Wang via suresh)
+
+    HADOOP-7227. Remove protocol version check at proxy creation in Hadoop
+    RPC. (jitendra)
+
+    HADOOP-7236. Refactor the mkdir command to conform to new FsCommand class.
+    (Daryn Sharp via szetszwo)
+
+    HADOOP-7250. Refactor the setrep command to conform to new FsCommand class.
+    (Daryn Sharp via szetszwo)
+
+    HADOOP-7249. Refactor the chmod/chown/chgrp command to conform to new
+    FsCommand class.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7251. Refactor the getmerge command to conform to new FsCommand
+    class.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7265. Keep track of relative paths in PathData.  (Daryn Sharp
+    via szetszwo)
+
+    HADOOP-7238. Refactor the cat and text commands to conform to new FsCommand
+    class.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7271. Standardize shell command error messages.  (Daryn Sharp
+    via szetszwo)
+
+    HADOOP-7272. Remove unnecessary security related info logs. (suresh)
+
+    HADOOP-7275. Refactor the stat command to conform to new FsCommand
+    class.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7237. Refactor the touchz command to conform to new FsCommand
+    class.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7267. Refactor the rm/rmr/expunge commands to conform to new
+    FsCommand class.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7285. Refactor the test command to conform to new FsCommand
+    class. (Daryn Sharp via todd)
+
+    HADOOP-7289. In ivy.xml, test conf should not extend common conf.
+    (Eric Yang via szetszwo)
+
+    HADOOP-7291. Update Hudson job not to run test-contrib. (Nigel Daley via eli)
+
+    HADOOP-7286. Refactor the du/dus/df commands to conform to new FsCommand
+    class. (Daryn Sharp via todd)
+
+    HADOOP-7301. FSDataInputStream should expose a getWrappedStream method.
+    (Jonathan Hsieh via eli)
+
+    HADOOP-7306. Start metrics system even if config files are missing
+    (Luke Lu via todd)
+
+    HADOOP-7302. webinterface.private.actions should be renamed and moved to
+    the MapReduce project. (Ari Rabkin via todd)
+
+    HADOOP-7329. Improve help message for "df" to include "-h" flag.
+    (Xie Xianshan via todd)
+
+    HADOOP-7320. Refactor the copy and move commands to conform to new
+    FsCommand class. (Daryn Sharp via todd)
+
+    HADOOP-7312. Update value of hadoop.common.configuration.version.
+    (Harsh J Chouraria via todd)
+
+    HADOOP-7337. Change PureJavaCrc32 annotations to public stable.  (szetszwo)
+
+    HADOOP-7331. Make hadoop-daemon.sh return exit code 1 if daemon processes
+    did not get started. (Tanping Wang via todd)
+
+    HADOOP-7316. Add public javadocs to FSDataInputStream and
+    FSDataOutputStream. (eli)
+
+    HADOOP-7323. Add capability to resolve compression codec based on codec
+    name. (Alejandro Abdelnur via tomwhite)
+
+    HADOOP-1886. Undocumented parameters in FilesSystem. (Frank Conrad via eli)
+
+    HADOOP-7375. Add resolvePath method to FileContext. (Sanjay Radia via eli)
+
+    HADOOP-7383. HDFS needs to export protobuf library dependency in pom.
+    (todd via eli)
+
+    HADOOP-7374. Don't add tools.jar to the classpath when running Hadoop.
+    (eli)
+
+  OPTIMIZATIONS
+  
+    HADOOP-7333. Performance improvement in PureJavaCrc32. (Eric Caspole
+    via todd)
+
+  BUG FIXES
+
+    HADOOP-7015. RawLocalFileSystem#listStatus does not deal with a directory
+    whose entries are changing (e.g. in a multi-thread or multi-process
+    environment). (Sanjay Radia via eli)
+
+    HADOOP-7045. TestDU fails on systems with local file systems with 
+    extended attributes. (eli)
+
+    HADOOP-6939. Inconsistent lock ordering in
+    AbstractDelegationTokenSecretManager. (Todd Lipcon via tomwhite)
+
+    HADOOP-7129. Fix typo in method name getProtocolSigature (todd)
+
+    HADOOP-7048.  Wrong description of Block-Compressed SequenceFile Format in
+    SequenceFile's javadoc.  (Jingguo Yao via tomwhite)
+
+    HADOOP-7153. MapWritable violates contract of Map interface for equals()
+    and hashCode(). (Nicholas Telford via todd)
+
+    HADOOP-6754. DefaultCodec.createOutputStream() leaks memory.
+    (Aaron Kimball via tomwhite)
+
+    HADOOP-7098. Tasktracker property not set in conf/hadoop-env.sh.
+    (Bernd Fondermann via tomwhite)
+
+    HADOOP-7131. Exceptions thrown by Text methods should include the causing
+    exception. (Uma Maheswara Rao G via todd)
+
+    HADOOP-6912. Guard against NPE when calling UGI.isLoginKeytabBased().
+    (Kan Zhang via jitendra)
+
+    HADOOP-7204. remove local unused fs variable from CmdHandler 
+    and FsShellPermissions.changePermissions (boryas)
+
+    HADOOP-7210. Chown command is not working from FSShell
+    (Uma Maheswara Rao G via todd)
+
+    HADOOP-7215. RPC clients must use network interface corresponding to 
+    the host in the client's kerberos principal key. (suresh)
+
+    HADOOP-7019. Refactor build targets to enable faster cross project dev
+    cycles. (Luke Lu via cos)
+
+    HADOOP-7216. Add FsCommand.runAll() with deprecated annotation for the
+    transition of Command base class improvement.  (Daryn Sharp via szetszwo)
+
+    HADOOP-7207. fs member of FSShell is not really needed (boryas)
+
+    HADOOP-7223. FileContext createFlag combinations are not clearly defined.
+    (suresh)
+
+    HADOOP-7231. Fix synopsis for -count. (Daryn Sharp via eli).
+
+    HADOOP-7261. Disable IPV6 for junit tests. (suresh)
+
+    HADOOP-7268. FileContext.getLocalFSFileContext() behavior needs to be fixed
+    w.r.t tokens. (jitendra)
+
+    HADOOP-7290. Unit test failure in TestUserGroupInformation.testGetServerSideGroups. (Trevor Robison via eli)
+
+    HADOOP-7292. Fix racy test case TestSinkQueue. (Luke Lu via todd)
+
+    HADOOP-7282. ipc.Server.getRemoteIp() may return null.  (John George
+    via szetszwo)
+
+    HADOOP-7208. Fix implementation of equals() and hashCode() in
+    StandardSocketFactory. (Uma Maheswara Rao G via todd)
+
+    HADOOP-7336. TestFileContextResolveAfs will fail with default 
+    test.build.data property. (jitendra)
+
+    HADOOP-7284 Trash and shell's rm does not work for viewfs (Sanjay Radia)
+
+    HADOOP-7341. Fix options parsing in CommandFormat (Daryn Sharp via todd)
+
+    HADOOP-7353. Cleanup FsShell and prevent masking of RTE stack traces.
+    (Daryn Sharp via todd)
+
+    HADOOP-7356. RPM packages broke bin/hadoop script in developer environment.
+    (Eric Yang via todd)
+
+Release 0.22.0 - Unreleased
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-7137. Remove hod contrib. (nigel via eli)
+
+  NEW FEATURES
+
+    HADOOP-6791.  Refresh for proxy superuser config
+    (common part for HDFS-1096) (boryas)
+
+    HADOOP-6581. Add authenticated TokenIdentifiers to UGI so that 
+    they can be used for authorization (Kan Zhang and Jitendra Pandey 
+    via jghoman)
+
+    HADOOP-6584. Provide Kerberized SSL encryption for webservices.
+    (jghoman and Kan Zhang via jghoman)
+
+    HADOOP-6853. Common component of HDFS-1045. (jghoman)
+
+    HADOOP-6859 - Introduce additional statistics to FileSystem to track 
+    file system operations (suresh)
+
+    HADOOP-6870. Add a new API getFiles to FileSystem and FileContext that
+    lists all files under the input path or the subtree rooted at the
+    input path if recursive is true. Block locations are returned together
+    with each file's status. (hairong)
+
+    HADOOP-6888. Add a new FileSystem API closeAllForUGI(..) for closing all
+    file systems associated with a particular UGI.  (Devaraj Das and Kan Zhang
+    via szetszwo)
+
+    HADOOP-6892. Common component of HDFS-1150 (Verify datanodes' identities 
+    to clients in secure clusters) (jghoman)
+
+    HADOOP-6889. Make RPC to have an option to timeout. (hairong)
+
+    HADOOP-6996. Allow CodecFactory to return a codec object given a codec'
+    class name. (hairong)
+
+    HADOOP-7013. Add boolean field isCorrupt to BlockLocation. 
+    (Patrick Kling via hairong)
+
+    HADOOP-6978. Adds support for NativeIO using JNI. 
+    (Todd Lipcon, Devaraj Das & Owen O'Malley via ddas)
+
+    HADOOP-7134. configure files that are generated as part of the released
+    tarball need to have executable bit set. (Roman Shaposhnik via cos)
+
+  IMPROVEMENTS
+
+    HADOOP-6644. util.Shell getGROUPS_FOR_USER_COMMAND method name 
+    - should use common naming convention (boryas)
+
+    HADOOP-6778. add isRunning() method to 
+    AbstractDelegationTokenSecretManager (for HDFS-1044) (boryas)
+
+    HADOOP-6633. normalize property names for JT/NN kerberos principal 
+    names in configuration (boryas)
+
+    HADOOP-6627. "Bad Connection to FS" message in FSShell should print 
+    message from the exception (boryas)
+
+    HADOOP-6600. mechanism for authorization check for inter-server 
+    protocols. (boryas)
+
+    HADOOP-6623. Add StringUtils.split for non-escaped single-character
+    separator. (Todd Lipcon via tomwhite)
+
+    HADOOP-6761. The Trash Emptier has the ability to run more frequently.
+    (Dmytro Molkov via dhruba)
+
+    HADOOP-6714. Resolve compressed files using CodecFactory in FsShell::text.
+    (Patrick Angeles via cdouglas)
+
+    HADOOP-6661. User document for UserGroupInformation.doAs. 
+    (Jitendra Pandey via jghoman)
+
+    HADOOP-6674. Makes use of the SASL authentication options in the
+    SASL RPC. (Jitendra Pandey via ddas)
+
+    HADOOP-6526. Need mapping from long principal names to local OS 
+    user names. (boryas)
+
+    HADOOP-6814. Adds an API in UserGroupInformation to get the real
+    authentication method of a passed UGI. (Jitendra Pandey via ddas)
+
+    HADOOP-6756. Documentation for common configuration keys.
+    (Erik Steffl via shv)
+
+    HADOOP-6835. Add support for concatenated gzip input. (Greg Roelofs via
+    cdouglas)
+
+    HADOOP-6845. Renames the TokenStorage class to Credentials. 
+    (Jitendra Pandey via ddas)
+
+    HADOOP-6826. FileStatus needs unit tests. (Rodrigo Schmidt via Eli
+    Collins)
+
+    HADOOP-6905. add buildDTServiceName method to SecurityUtil 
+    (as part of MAPREDUCE-1718)  (boryas)
+
+    HADOOP-6632. Adds support for using different keytabs for different
+    servers in a Hadoop cluster. In the earier implementation, all servers 
+    of a certain type (like TaskTracker), would have the same keytab and the
+    same principal. Now the principal name is a pattern that has _HOST in it.
+    (Kan Zhang & Jitendra Pandey via ddas)
+
+    HADOOP-6861. Adds new non-static methods in Credentials to read and 
+    write token storage file. (Jitendra Pandey & Owen O'Malley via ddas)
+
+    HADOOP-6877. Common part of HDFS-1178 (NameNode servlets should communicate
+    with NameNode directrly). (Kan Zhang via jghoman)
+    
+    HADOOP-6475. Adding some javadoc to Server.RpcMetrics, UGI. 
+    (Jitendra Pandey and borya via jghoman)
+
+    HADOOP-6656. Adds a thread in the UserGroupInformation to renew TGTs 
+    periodically. (Owen O'Malley and ddas via ddas)
+
+    HADOOP-6890. Improve listFiles API introduced by HADOOP-6870. (hairong)
+
+    HADOOP-6862. Adds api to add/remove user and group to AccessControlList
+    (amareshwari)
+
+    HADOOP-6911. doc update for DelegationTokenFetcher (boryas)
+
+    HADOOP-6900. Make the iterator returned by FileSystem#listLocatedStatus to 
+    throw IOException rather than RuntimeException when there is an IO error
+    fetching the next file. (hairong)
+
+    HADOOP-6905. Better logging messages when a delegation token is invalid.
+    (Kan Zhang via jghoman)
+
+    HADOOP-6693. Add metrics to track kerberol login activity. (suresh)
+
+    HADOOP-6803. Add native gzip read/write coverage to TestCodec.
+    (Eli Collins via tomwhite)
+
+    HADOOP-6950. Suggest that HADOOP_CLASSPATH should be preserved in 
+    hadoop-env.sh.template. (Philip Zeyliger via Eli Collins)
+
+    HADOOP-6922. Make AccessControlList a writable and update documentation
+    for Job ACLs.  (Ravi Gummadi via vinodkv)
+
+    HADOOP-6965. Introduces checks for whether the original tgt is valid 
+    in the reloginFromKeytab method.
+
+    HADOOP-6856. Simplify constructors for SequenceFile, and MapFile. (omalley)
+
+    HADOOP-6987. Use JUnit Rule to optionally fail test cases that run more
+    than 10 seconds (jghoman)
+
+    HADOOP-7005. Update test-patch.sh to remove callback to Hudson. (nigel)
+
+    HADOOP-6985. Suggest that HADOOP_OPTS be preserved in
+    hadoop-env.sh.template. (Ramkumar Vadali via cutting)
+
+    HADOOP-7007. Update the hudson-test-patch ant target to work with the
+    latest test-patch.sh script (gkesavan)
+
+    HADOOP-7010. Typo in FileSystem.java. (Jingguo Yao via eli)
+
+    HADOOP-7009. MD5Hash provides a public factory method that creates an
+    instance of thread local MessageDigest. (hairong)
+
+    HADOOP-7008. Enable test-patch.sh to have a configured number of 
+    acceptable findbugs and javadoc warnings. (nigel and gkesavan)
+
+    HADOOP-6818. Provides a JNI implementation of group resolution. (ddas)
+
+    HADOOP-6943. The GroupMappingServiceProvider interface should be public.
+    (Aaron T. Myers via tomwhite)
+
+    HADOOP-4675. Current Ganglia metrics implementation is incompatible with
+    Ganglia 3.1. (Brian Bockelman via tomwhite)
+
+    HADOOP-6977. Herriot daemon clients should vend statistics (cos)
+
+    HADOOP-7024. Create a test method for adding file systems during tests.
+    (Kan Zhang via jghoman)
+
+    HADOOP-6903. Make AbstractFSileSystem methods and some FileContext methods
+    to be public. (Sanjay Radia)
+
+    HADOOP-7034. Add TestPath tests to cover dot, dot dot, and slash 
+    normalization. (eli)
+
+    HADOOP-7032. Assert type constraints in the FileStatus constructor. (eli)
+
+    HADOOP-6562. FileContextSymlinkBaseTest should use FileContextTestHelper. 
+    (eli)
+
+    HADOOP-7028. ant eclipse does not include requisite ant.jar in the 
+    classpath. (Patrick Angeles via eli)
+
+    HADOOP-6298. Add copyBytes to Text and BytesWritable. (omalley)
+  
+    HADOOP-6578. Configuration should trim whitespace around a lot of value
+    types. (Michele Catasta via eli)
+
+    HADOOP-6811. Remove EC2 bash scripts. They are replaced by Apache Whirr
+    (incubating, http://incubator.apache.org/whirr). (tomwhite)
+
+    HADOOP-7102. Remove "fs.ramfs.impl" field from core-deafult.xml (shv)
+
+    HADOOP-7104. Remove unnecessary DNS reverse lookups from RPC layer
+    (Kan Zhang via todd)
+
+    HADOOP-6056. Use java.net.preferIPv4Stack to force IPv4.
+    (Michele Catasta via shv)
+
+    HADOOP-7110. Implement chmod with JNI. (todd)
+
+    HADOOP-6812. Change documentation for correct placement of configuration
+    variables: mapreduce.reduce.input.buffer.percent, 
+    mapreduce.task.io.sort.factor, mapreduce.task.io.sort.mb
+    (Chris Douglas via shv)
+
+    HADOOP-6436. Remove auto-generated native build files. (rvs via eli)
+
+    HADOOP-6970. SecurityAuth.audit should be generated under /build. (boryas)
+
+    HADOOP-7154. Should set MALLOC_ARENA_MAX in hadoop-env.sh (todd)
+
+    HADOOP-7187. Fix socket leak in GangliaContext.  (Uma Maheswara Rao G
+    via szetszwo)
+
+    HADOOP-7241. fix typo of command 'hadoop fs -help tail'. 
+    (Wei Yongjun via eli)
+
+    HADOOP-7244. Documentation change for updated configuration keys.
+    (tomwhite via eli)
+
+    HADOOP-7189. Add ability to enable 'debug' property in JAAS configuration.
+    (Ted Yu via todd)
+
+    HADOOP-7192. Update fs -stat docs to reflect the format features. (Harsh
+    J Chouraria via todd)
+
+    HADOOP-7355  Add audience and stability annotations to HttpServer class
+                 (stack)
+
+    HADOOP-7346. Send back nicer error message to clients using outdated IPC
+    version. (todd)
+
+    HADOOP-7335. Force entropy to come from non-true random for tests.
+    (todd via eli)
+
+    HADOOP-7325. The hadoop command should not accept class names starting with
+    a hyphen. (Brock Noland via todd)
+
+  OPTIMIZATIONS
+
+    HADOOP-6884. Add LOG.isDebugEnabled() guard for each LOG.debug(..).
+    (Erik Steffl via szetszwo)
+
+    HADOOP-6683. ZlibCompressor does not fully utilize the buffer.
+    (Kang Xiao via eli)
+
+    HADOOP-6949. Reduce RPC packet size of primitive arrays using
+    ArrayPrimitiveWritable instead of ObjectWritable. (Matt Foley via suresh)
+
+  BUG FIXES
+
+    HADOOP-6638. try to relogin in a case of failed RPC connection (expired 
+    tgt) only in case the subject is loginUser or proxyUgi.realUser. (boryas)
+
+    HADOOP-6781. security audit log shouldn't have exception in it. (boryas)
+
+    HADOOP-6612.  Protocols RefreshUserToGroupMappingsProtocol and 
+    RefreshAuthorizationPolicyProtocol will fail with security enabled (boryas)
+
+    HADOOP-6764. Remove verbose logging from the Groups class. (Boris Shkolnik)
+
+    HADOOP-6730. Bug in FileContext#copy and provide base class for 
+    FileContext tests. (Ravi Phulari via jghoman)
+
+    HADOOP-6669. Respect compression configuration when creating DefaultCodec
+    instances. (Koji Noguchi via cdouglas)
+
+    HADOOP-6747. TestNetUtils fails on Mac OS X. (Todd Lipcon via jghoman)
+
+    HADOOP-6787. Factor out glob pattern code from FileContext and
+    Filesystem. Also fix bugs identified in HADOOP-6618 and make the
+    glob pattern code less restrictive and more POSIX standard
+    compliant. (Luke Lu via eli)
+
+    HADOOP-6649.  login object in UGI should be inside the subject (jnp via 
+    boryas)
+
+    HADOOP-6687.   user object in the subject in UGI should be reused in case 
+    of a relogin. (jnp via boryas)
+
+    HADOOP-6603. Provide workaround for issue with Kerberos not resolving 
+    cross-realm principal (Kan Zhang and Jitendra Pandey via jghoman)
+
+    HADOOP-6620. NPE if renewer is passed as null in getDelegationToken.
+    (Jitendra Pandey via jghoman)
+
+    HADOOP-6613. Moves the RPC version check ahead of the AuthMethod check.
+    (Kan Zhang via ddas)
+
+    HADOOP-6682. NetUtils:normalizeHostName does not process hostnames starting
+    with [a-f] correctly. (jghoman)
+
+    HADOOP-6652. Removes the unnecessary cache from 
+    ShellBasedUnixGroupsMapping. (ddas)
+
+    HADOOP-6815. refreshSuperUserGroupsConfiguration should use server side 
+    configuration for the refresh (boryas)
+
+    HADOOP-6648. Adds a check for null tokens in Credentials.addToken api.
+    (ddas)
+ 
+    HADOOP-6647. balancer fails with "is not authorized for protocol 
+    interface NamenodeProtocol" in secure environment (boryas)
+
+    HADOOP-6834. TFile.append compares initial key against null lastKey
+    (hong tang via mahadev)
+
+    HADOOP-6670. Use the UserGroupInformation's Subject as the criteria for
+    equals and hashCode. (Owen O'Malley and Kan Zhang via ddas)
+
+    HADOOP-6536. Fixes FileUtil.fullyDelete() not to delete the contents of
+    the sym-linked directory. (Ravi Gummadi via amareshwari)
+
+    HADOOP-6873. using delegation token over hftp for long 
+    running clients (boryas)
+
+    HADOOP-6706. Improves the sasl failure handling due to expired tickets,
+    and other server detected failures. (Jitendra Pandey and ddas via ddas)
+
+    HADOOP-6715. Fixes AccessControlList.toString() to return a descriptive
+    String representation of the ACL. (Ravi Gummadi via amareshwari)
+
+    HADOOP-6885. Fix java doc warnings in Groups and 
+    RefreshUserMappingsProtocol. (Eli Collins via jghoman) 
+
+    HADOOP-6482. GenericOptionsParser constructor that takes Options and 
+    String[] ignores options. (Eli Collins via jghoman)
+
+    HADOOP-6906.  FileContext copy() utility doesn't work with recursive
+    copying of directories. (vinod k v via mahadev)
+
+    HADOOP-6453. Hadoop wrapper script shouldn't ignore an existing 
+    JAVA_LIBRARY_PATH. (Chad Metcalf via jghoman)
+
+    HADOOP-6932.  Namenode start (init) fails because of invalid kerberos 
+    key, even when security set to "simple" (boryas)
+
+    HADOOP-6913. Circular initialization between UserGroupInformation and 
+    KerberosName (Kan Zhang via boryas)
+
+    HADOOP-6907. Rpc client doesn't use the per-connection conf to figure
+    out server's Kerberos principal (Kan Zhang via hairong)
+
+    HADOOP-6938. ConnectionId.getRemotePrincipal() should check if security
+    is enabled. (Kan Zhang via hairong)
+
+    HADOOP-6930. AvroRpcEngine doesn't work with generated Avro code. 
+    (sharad)
+
+    HADOOP-6940. RawLocalFileSystem's markSupported method misnamed 
+    markSupport. (Tom White via eli).
+
+    HADOOP-6951.  Distinct minicluster services (e.g. NN and JT) overwrite each
+    other's service policies.  (Aaron T. Myers via tomwhite)
+
+    HADOOP-6879. Provide SSH based (Jsch) remote execution API for system
+    tests (cos)
+
+    HADOOP-6989. Correct the parameter for SetFile to set the value type
+    for SetFile to be NullWritable instead of the key. (cdouglas via omalley)
+
+    HADOOP-6984. Combine the compress kind and the codec in the same option
+    for SequenceFiles. (cdouglas via omalley)
+
+    HADOOP-6933. TestListFiles is flaky. (Todd Lipcon via tomwhite)
+
+    HADOOP-6947.  Kerberos relogin should set refreshKrb5Config to true.
+    (Todd Lipcon via tomwhite)
+
+    HADOOP-7006. Fix 'fs -getmerge' command to not be a no-op.
+    (Chris Nauroth via cutting)
+
+    HADOOP-6663.  BlockDecompressorStream get EOF exception when decompressing
+    the file compressed from empty file.  (Kang Xiao via tomwhite)
+
+    HADOOP-6991.  Fix SequenceFile::Reader to honor file lengths and call
+    openFile (cdouglas via omalley)
+
+    HADOOP-7011.  Fix KerberosName.main() to not throw an NPE.
+    (Aaron T. Myers via tomwhite)
+
+    HADOOP-6975.  Integer overflow in S3InputStream for blocks > 2GB.
+    (Patrick Kling via tomwhite)
+
+    HADOOP-6758. MapFile.fix does not allow index interval definition.
+    (Gianmarco De Francisci Morales via tomwhite)
+
+    HADOOP-6926. SocketInputStream incorrectly implements read().
+    (Todd Lipcon via tomwhite)
+
+    HADOOP-6899 RawLocalFileSystem#setWorkingDir() does not work for relative names
+     (Sanjay Radia)
+
+    HADOOP-6496. HttpServer sends wrong content-type for CSS files
+    (and others). (Todd Lipcon via tomwhite)
+
+    HADOOP-7057. IOUtils.readFully and IOUtils.skipFully have typo in
+    exception creation's message. (cos)
+
+    HADOOP-7038. saveVersion script includes an additional \r while running
+    whoami under windows. (Wang Xu via cos)
+
+    HADOOP-7082. Configuration.writeXML should not hold lock while outputting
+    (todd)
+
+    HADOOP-7070. JAAS configuration should delegate unknown application names
+    to pre-existing configuration. (todd)
+
+    HADOOP-7087. SequenceFile.createWriter ignores FileSystem parameter (todd)
+
+    HADOOP-7091. reloginFromKeytab() should happen even if TGT can't be found.
+    (Kan Zhang via jghoman)
+
+    HADOOP-7100. Fix build to not refer to contrib/ec2 removed by HADOOP-6811
+    (todd)
+
+    HADOOP-7097. JAVA_LIBRARY_PATH missing base directory. (Noah Watkins via
+    todd)
+
+    HADOOP-7093. Servlets should default to text/plain (todd)
+
+    HADOOP-7101. UserGroupInformation.getCurrentUser() fails when called from
+    non-Hadoop JAAS context. (todd)
+
+    HADOOP-7089. Fix link resolution logic in hadoop-config.sh. (eli)
+
+    HADOOP-7046. Fix Findbugs warning in Configuration. (Po Cheung via shv)
+
+    HADOOP-7118. Fix NPE in Configuration.writeXml (todd)
+
+    HADOOP-7122. Fix thread leak when shell commands time out. (todd)
+
+    HADOOP-7126. Fix file permission setting for RawLocalFileSystem on Windows.
+    (Po Cheung via shv)
+
+    HADOOP-6642. Fix javac, javadoc, findbugs warnings related to security work. 
+    (Chris Douglas, Po Cheung via shv)
+
+    HADOOP-7140. IPC Reader threads do not stop when server stops (todd)
+
+    HADOOP-7094. hadoop.css got lost during project split (cos)
+
+    HADOOP-7145. Configuration.getLocalPath should trim whitespace from
+    the provided directories. (todd)
+
+    HADOOP-7156. Workaround for unsafe implementations of getpwuid_r (todd)
+
+    HADOOP-6898. FileSystem.copyToLocal creates files with 777 permissions.
+    (Aaron T. Myers via tomwhite)
+
+    HADOOP-7229. Do not default to an absolute path for kinit in Kerberos
+    auto-renewal thread. (Aaron T. Myers via todd)
+
+    HADOOP-7172. SecureIO should not check owner on non-secure
+    clusters that have no native support. (todd via eli)
+
+    HADOOP-7184. Remove deprecated config local.cache.size from
+    core-default.xml (todd)
+
+    HADOOP-7245. FsConfig should use constants in CommonConfigurationKeys.
+    (tomwhite via eli)
+
+    HADOOP-7068. Ivy resolve force mode should be turned off by default.
+    (Luke Lu via tomwhite)
+
+    HADOOP-7296. The FsPermission(FsPermission) constructor does not use the
+    sticky bit. (Siddharth Seth via tomwhite)
+
+    HADOOP-7300. Configuration methods that return collections are inconsistent
+    about mutability. (todd)
+
+    HADOOP-7305. Eclipse project classpath should include tools.jar from JDK.
+    (Niels Basjes via todd)
+
+    HADOOP-7318. MD5Hash factory should reset the digester it returns.
+    (todd via eli)
+
+    HADOOP-7287. Configuration deprecation mechanism doesn't work properly for
+    GenericOptionsParser and Tools. (Aaron T. Myers via todd)
+
+    HADOOP-7146. RPC server leaks file descriptors (todd)
+
+    HADOOP-7276. Hadoop native builds fail on ARM due to -m32 (Trevor Robinson
+    via eli)
+
+    HADOOP-7121. Exceptions while serializing IPC call responses are not
+    handled well. (todd)
+
+    HADOOP-7351  Regression: HttpServer#getWebAppsPath used to be protected
+    so subclasses could supply alternate webapps path but it was made private
+    by HADOOP-6461 (Stack)
+
+    HADOOP-7349. HADOOP-7121 accidentally disabled some tests in TestIPC.
+    (todd)
+
+Release 0.21.1 - Unreleased
+
+  IMPROVEMENTS
+
+    HADOOP-6934. Test for ByteWritable comparator.
+    (Johannes Zillmann via Eli Collins)
+
+    HADOOP-6786. test-patch needs to verify Herriot integrity (cos)
+
+    HADOOP-7177. CodecPool should report which compressor it is using.
+    (Allen Wittenauer via eli)
+
+  BUG FIXES
+
+    HADOOP-6925. BZip2Codec incorrectly implements read(). 
+    (Todd Lipcon via Eli Collins)
+
+    HADOOP-6833. IPC leaks call parameters when exceptions thrown.
+    (Todd Lipcon via Eli Collins)
+
+    HADOOP-6971. Clover build doesn't generate per-test coverage (cos)
+
+    HADOOP-6993. Broken link on cluster setup page of docs. (eli)
+
+    HADOOP-6944. [Herriot] Implement a functionality for getting proxy users
+    definitions like groups and hosts. (Vinay Thota via cos)
+
+    HADOOP-6954.  Sources JARs are not correctly published to the Maven
+    repository. (tomwhite)
+
+    HADOOP-7052. misspelling of threshold in conf/log4j.properties.
+    (Jingguo Yao via eli)
+
+    HADOOP-7053. wrong FSNamesystem Audit logging setting in 
+    conf/log4j.properties. (Jingguo Yao via eli)
+
+    HADOOP-7120. Fix a syntax error in test-patch.sh.  (szetszwo)
+
+    HADOOP-7162. Rmove a duplicated call FileSystem.listStatus(..) in FsShell.
+    (Alexey Diomin via szetszwo)
+
+    HADOOP-7117. Remove fs.checkpoint.* from core-default.xml and replace
+    fs.checkpoint.* with dfs.namenode.checkpoint.* in documentations.
+    (Harsh J Chouraria via szetszwo)
+
+    HADOOP-7193. Correct the "fs -touchz" command help message.
+    (Uma Maheswara Rao G via szetszwo)
+
+    HADOOP-7174. Null is displayed in the "fs -copyToLocal" command.
+    (Uma Maheswara Rao G via szetszwo)
+
+    HADOOP-7194. Fix resource leak in IOUtils.copyBytes(..).
+    (Devaraj K via szetszwo)
+
+    HADOOP-7183. WritableComparator.get should not cache comparator objects.
+    (tomwhite via eli)
+
+Release 0.21.0 - 2010-08-13
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-4895. Remove deprecated methods DFSClient.getHints(..) and
+    DFSClient.isDirectory(..).  (szetszwo)
+
+    HADOOP-4941. Remove deprecated FileSystem methods: getBlockSize(Path f),
+    getLength(Path f) and getReplication(Path src).  (szetszwo)
+
+    HADOOP-4648. Remove obsolete, deprecated InMemoryFileSystem and
+    ChecksumDistributedFileSystem.  (cdouglas via szetszwo)
+
+    HADOOP-4940. Remove a deprecated method FileSystem.delete(Path f).  (Enis
+    Soztutar via szetszwo)
+
+    HADOOP-4010. Change semantics for LineRecordReader to read an additional
+    line per split- rather than moving back one character in the stream- to
+    work with splittable compression codecs. (Abdul Qadeer via cdouglas)
+
+    HADOOP-5094. Show hostname and separate live/dead datanodes in DFSAdmin
+    report.  (Jakob Homan via szetszwo)
+
+    HADOOP-4942. Remove deprecated FileSystem methods getName() and
+    getNamed(String name, Configuration conf).  (Jakob Homan via szetszwo)
+
+    HADOOP-5486. Removes the CLASSPATH string from the command line and instead
+    exports it in the environment. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-2827. Remove deprecated NetUtils::getServerAddress. (cdouglas)
+
+    HADOOP-5681. Change examples RandomWriter and RandomTextWriter to 
+    use new mapreduce API. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5680. Change org.apache.hadoop.examples.SleepJob to use new 
+    mapreduce api. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5699. Change org.apache.hadoop.examples.PiEstimator to use 
+    new mapreduce api. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5720. Introduces new task types - JOB_SETUP, JOB_CLEANUP
+    and TASK_CLEANUP. Removes the isMap methods from TaskID/TaskAttemptID
+    classes. (ddas)
+
+    HADOOP-5668. Change TotalOrderPartitioner to use new API. (Amareshwari
+    Sriramadasu via cdouglas)
+
+    HADOOP-5738. Split "waiting_tasks" JobTracker metric into waiting maps and
+    waiting reduces. (Sreekanth Ramakrishnan via cdouglas)
+
+    HADOOP-5679. Resolve findbugs warnings in core/streaming/pipes/examples. 
+    (Jothi Padmanabhan via sharad)
+
+    HADOOP-4359. Support for data access authorization checking on Datanodes.
+    (Kan Zhang via rangadi)
+
+    HADOOP-5690. Change org.apache.hadoop.examples.DBCountPageView to use 
+    new mapreduce api. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5694. Change org.apache.hadoop.examples.dancing to use new 
+    mapreduce api. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5696. Change org.apache.hadoop.examples.Sort to use new 
+    mapreduce api. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5698. Change org.apache.hadoop.examples.MultiFileWordCount to 
+    use new mapreduce api. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5913. Provide ability to an administrator to stop and start
+    job queues. (Rahul Kumar Singh and Hemanth Yamijala via yhemanth)
+
+    MAPREDUCE-711. Removed Distributed Cache from Common, to move it
+    under Map/Reduce. (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-6201. Change FileSystem::listStatus contract to throw
+    FileNotFoundException if the directory does not exist, rather than letting
+    this be implementation-specific. (Jakob Homan via cdouglas)
+
+    HADOOP-6230. Moved process tree and memory calculator related classes
+    from Common to Map/Reduce. (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-6203. FsShell rm/rmr error message indicates exceeding Trash quota
+    and suggests using -skpTrash, when moving to trash fails.
+    (Boris Shkolnik via suresh)
+
+    HADOOP-6303. Eclipse .classpath template has outdated jar files and is
+    missing some new ones.  (cos)
+
+    HADOOP-6396. Fix uninformative exception message when unable to parse
+    umask. (jghoman)
+
+    HADOOP-6299. Reimplement the UserGroupInformation to use the OS
+    specific and Kerberos JAAS login. (omalley)
+
+    HADOOP-6686. Remove redundant exception class name from the exception
+    message for the exceptions thrown at RPC client. (suresh)
+
+    HADOOP-6701. Fix incorrect exit codes returned from chmod, chown and chgrp
+    commands from FsShell. (Ravi Phulari via suresh)
+
+  NEW FEATURES
+
+    HADOOP-6332. Large-scale Automated Test Framework. (sharad, Sreekanth
+    Ramakrishnan, at all via cos)
+
+    HADOOP-4268. Change fsck to use ClientProtocol methods so that the
+    corresponding permission requirement for running the ClientProtocol
+    methods will be enforced.  (szetszwo)
+
+    HADOOP-3953. Implement sticky bit for directories in HDFS. (Jakob Homan
+    via szetszwo)
+
+    HADOOP-4368. Implement df in FsShell to show the status of a FileSystem.
+    (Craig Macdonald via szetszwo)
+
+    HADOOP-3741. Add a web ui to the SecondaryNameNode for showing its status.
+    (szetszwo)
+
+    HADOOP-5018. Add pipelined writers to Chukwa. (Ari Rabkin via cdouglas)
+
+    HADOOP-5052. Add an example computing exact digits of pi using the
+    Bailey-Borwein-Plouffe algorithm. (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-4927. Adds a generic wrapper around outputformat to allow creation of
+    output on demand (Jothi Padmanabhan via ddas)
+
+    HADOOP-5144. Add a new DFSAdmin command for changing the setting of restore
+    failed storage replicas in namenode. (Boris Shkolnik via szetszwo)
+
+    HADOOP-5258. Add a new DFSAdmin command to print a tree of the rack and
+    datanode topology as seen by the namenode.  (Jakob Homan via szetszwo)
+    
+    HADOOP-4756. A command line tool to access JMX properties on NameNode
+    and DataNode. (Boris Shkolnik via rangadi)
+
+    HADOOP-4539. Introduce backup node and checkpoint node. (shv)
+
+    HADOOP-5363. Add support for proxying connections to multiple clusters with
+    different versions to hdfsproxy. (Zhiyong Zhang via cdouglas)
+
+    HADOOP-5528. Add a configurable hash partitioner operating on ranges of
+    BinaryComparable keys. (Klaas Bosteels via shv)
+
+    HADOOP-5257. HDFS servers may start and stop external components through
+    a plugin interface. (Carlos Valiente via dhruba)
+
+    HADOOP-5450. Add application-specific data types to streaming's typed bytes
+    interface. (Klaas Bosteels via omalley)
+
+    HADOOP-5518. Add contrib/mrunit, a MapReduce unit test framework.
+    (Aaron Kimball via cutting)
+
+    HADOOP-5469.  Add /metrics servlet to daemons, providing metrics
+    over HTTP as either text or JSON.  (Philip Zeyliger via cutting)
+
+    HADOOP-5467. Introduce offline fsimage image viewer. (Jakob Homan via shv)
+
+    HADOOP-5752. Add a new hdfs image processor, Delimited, to oiv. (Jakob
+    Homan via szetszwo)
+
+    HADOOP-5266. Adds the capability to do mark/reset of the reduce values 
+    iterator in the Context object API. (Jothi Padmanabhan via ddas)
+
+    HADOOP-5745. Allow setting the default value of maxRunningJobs for all
+    pools. (dhruba via matei)
+
+    HADOOP-5643. Adds a way to decommission TaskTrackers while the JobTracker
+    is running. (Amar Kamat via ddas)
+
+    HADOOP-4829. Allow FileSystem shutdown hook to be disabled.
+    (Todd Lipcon via tomwhite)
+
+    HADOOP-5815. Sqoop: A database import tool for Hadoop.
+    (Aaron Kimball via tomwhite)
+
+    HADOOP-4861. Add disk usage with human-readable size (-duh).
+    (Todd Lipcon via tomwhite)
+
+    HADOOP-5844. Use mysqldump when connecting to local mysql instance in Sqoop.
+    (Aaron Kimball via tomwhite)
+
+    HADOOP-5976. Add a new command, classpath, to the hadoop script.  (Owen
+    O'Malley and Gary Murry via szetszwo)
+
+    HADOOP-6120. Add support for Avro specific and reflect data.
+    (sharad via cutting)
+
+    HADOOP-6226. Moves BoundedByteArrayOutputStream from the tfile package to
+    the io package and makes it available to other users (MAPREDUCE-318). 
+    (Jothi Padmanabhan via ddas)
+
+    HADOOP-6105. Adds support for automatically handling deprecation of
+    configuration keys. (V.V.Chaitanya Krishna via yhemanth)
+    
+    HADOOP-6235. Adds new method to FileSystem for clients to get server
+    defaults. (Kan Zhang via suresh)
+
+    HADOOP-6234. Add new option dfs.umaskmode to set umask in configuration
+    to use octal or symbolic instead of decimal. (Jakob Homan via suresh)
+
+    HADOOP-5073. Add annotation mechanism for interface classification.
+    (Jakob Homan via suresh)
+
+    HADOOP-4012. Provide splitting support for bzip2 compressed files. (Abdul
+    Qadeer via cdouglas)
+
+    HADOOP-6246. Add backward compatibility support to use deprecated decimal 
+    umask from old configuration. (Jakob Homan via suresh)
+
+    HADOOP-4952. Add new improved file system interface FileContext for the
+    application writer (Sanjay Radia via suresh)
+
+    HADOOP-6170. Add facility to tunnel Avro RPCs through Hadoop RPCs.
+    This permits one to take advantage of both Avro's RPC versioning
+    features and Hadoop's proven RPC scalability.  (cutting)
+
+    HADOOP-6267. Permit building contrib modules located in external
+    source trees.  (Todd Lipcon via cutting)
+
+    HADOOP-6240. Add new FileContext rename operation that posix compliant
+    that allows overwriting existing destination. (suresh)
+
+    HADOOP-6204. Implementing aspects development and fault injeciton
+    framework for Hadoop (cos)
+
+    HADOOP-6313. Implement Syncable interface in FSDataOutputStream to expose
+    flush APIs to application users. (Hairong Kuang via suresh)
+
+    HADOOP-6284. Add a new parameter, HADOOP_JAVA_PLATFORM_OPTS, to
+    hadoop-config.sh so that it allows setting java command options for
+    JAVA_PLATFORM.  (Koji Noguchi via szetszwo)
+
+    HADOOP-6337. Updates FilterInitializer class to be more visible,
+    and the init of the class is made to take a Configuration argument.
+    (Jakob Homan via ddas)
+
+    Hadoop-6223. Add new file system interface AbstractFileSystem with
+    implementation of some file systems that delegate to old FileSystem.
+    (Sanjay Radia via suresh)
+
+    HADOOP-6433. Introduce asychronous deletion of files via a pool of
+    threads. This can be used to delete files in the Distributed
+    Cache. (Zheng Shao via dhruba)
+
+    HADOOP-6415. Adds a common token interface for both job token and 
+    delegation token. (Kan Zhang via ddas)
+
+    HADOOP-6408. Add a /conf servlet to dump running configuration.
+    (Todd Lipcon via tomwhite)
+
+    HADOOP-6520. Adds APIs to read/write Token and secret keys. Also
+    adds the automatic loading of tokens into UserGroupInformation
+    upon login. The tokens are read from a file specified in the
+    environment variable. (ddas)
+
+    HADOOP-6419. Adds SASL based authentication to RPC.
+    (Kan Zhang via ddas)
+
+    HADOOP-6510. Adds a way for superusers to impersonate other users
+    in a secure environment. (Jitendra Nath Pandey via ddas)
+
+    HADOOP-6421. Adds Symbolic links to FileContext, AbstractFileSystem.
+    It also adds a limited implementation for the local file system
+     (RawLocalFs) that allows local symlinks. (Eli Collins via Sanjay Radia)
+
+    HADOOP-6577. Add hidden configuration option "ipc.server.max.response.size"
+    to change the default 1 MB, the maximum size when large IPC handler 
+    response buffer is reset. (suresh)
+
+    HADOOP-6568. Adds authorization for the default servlets. 
+    (Vinod Kumar Vavilapalli via ddas)
+
+    HADOOP-6586. Log authentication and authorization failures and successes
+    for RPC (boryas)
+
+    HADOOP-6580. UGI should contain authentication method. (jnp via boryas)
+    
+    HADOOP-6657. Add a capitalization method to StringUtils for MAPREDUCE-1545.
+    (Luke Lu via Steve Loughran)
+
+    HADOOP-6692. Add FileContext#listStatus that returns an iterator.
+    (hairong)
+
+    HADOOP-6869. Functionality to create file or folder on a remote daemon
+    side (Vinay Thota via cos)
+
+  IMPROVEMENTS
+
+    HADOOP-6798. Align Ivy version for all Hadoop subprojects. (cos)
+
+    HADOOP-6777. Implement a functionality for suspend and resume a process.
+    (Vinay Thota via cos)
+
+    HADOOP-6772. Utilities for system tests specific. (Vinay Thota via cos)
+
+    HADOOP-6771. Herriot's artifact id for Maven deployment should be set to
+    hadoop-core-instrumented (cos)
+
+    HADOOP-6752. Remote cluster control functionality needs JavaDocs
+    improvement (Balaji Rajagopalan via cos).
+
+    HADOOP-4565. Added CombineFileInputFormat to use data locality information
+    to create splits. (dhruba via zshao)
+
+    HADOOP-4936. Improvements to TestSafeMode. (shv)
+
+    HADOOP-4985. Remove unnecessary "throw IOException" declarations in
+    FSDirectory related methods.  (szetszwo)
+
+    HADOOP-5017. Change NameNode.namesystem declaration to private.  (szetszwo)
+
+    HADOOP-4794. Add branch information from the source version control into
+    the version information that is compiled into Hadoop. (cdouglas via 
+    omalley)
+
+    HADOOP-5070. Increment copyright year to 2009, remove assertions of ASF
+    copyright to licensed files. (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-5037. Deprecate static FSNamesystem.getFSNamesystem().  (szetszwo)
+
+    HADOOP-5088. Include releaseaudit target as part of developer test-patch
+    target.  (Giridharan Kesavan via nigel)
+
+    HADOOP-2721. Uses setsid when creating new tasks so that subprocesses of 
+    this process will be within this new session (and this process will be 
+    the process leader for all the subprocesses). Killing the process leader,
+    or the main Java task in Hadoop's case, kills the entire subtree of
+    processes. (Ravi Gummadi via ddas)
+
+    HADOOP-5097. Remove static variable JspHelper.fsn, a static reference to
+    a non-singleton FSNamesystem object.  (szetszwo)
+
+    HADOOP-3327. Improves handling of READ_TIMEOUT during map output copying.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-5124. Choose datanodes randomly instead of starting from the first
+    datanode for providing fairness.  (hairong via szetszwo)
+
+    HADOOP-4930. Implement a Linux native executable that can be used to 
+    launch tasks as users. (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-5122. Fix format of fs.default.name value in libhdfs test conf.
+    (Craig Macdonald via tomwhite)
+
+    HADOOP-5038. Direct daemon trace to debug log instead of stdout. (Jerome
+    Boulon via cdouglas)
+
+    HADOOP-5101. Improve packaging by adding 'all-jars' target building core,
+    tools, and example jars. Let findbugs depend on this rather than the 'tar'
+    target. (Giridharan Kesavan via cdouglas)
+
+    HADOOP-4868. Splits the hadoop script into three parts - bin/hadoop, 
+    bin/mapred and bin/hdfs. (Sharad Agarwal via ddas)
+
+    HADOOP-1722. Adds support for TypedBytes and RawBytes in Streaming.
+    (Klaas Bosteels via ddas)
+
+    HADOOP-4220. Changes the JobTracker restart tests so that they take much
+    less time. (Amar Kamat via ddas)
+
+    HADOOP-4885. Try to restore failed name-node storage directories at 
+    checkpoint time. (Boris Shkolnik via shv)
+
+    HADOOP-5209. Update year to 2009 for javadoc.  (szetszwo)
+
+    HADOOP-5279. Remove unnecessary targets from test-patch.sh.
+    (Giridharan Kesavan via nigel)
+
+    HADOOP-5120. Remove the use of FSNamesystem.getFSNamesystem() from 
+    UpgradeManagerNamenode and UpgradeObjectNamenode.  (szetszwo)
+
+    HADOOP-5222. Add offset to datanode clienttrace. (Lei Xu via cdouglas)
+
+    HADOOP-5240. Skip re-building javadoc when it is already
+    up-to-date. (Aaron Kimball via cutting)
+
+    HADOOP-5042. Add a cleanup stage to log rollover in Chukwa appender.
+    (Jerome Boulon via cdouglas)
+
+    HADOOP-5264. Removes redundant configuration object from the TaskTracker.
+    (Sharad Agarwal via ddas)
+
+    HADOOP-5232. Enable patch testing to occur on more than one host.
+    (Giri Kesavan via nigel)
+
+    HADOOP-4546. Fix DF reporting for AIX. (Bill Habermaas via cdouglas)
+
+    HADOOP-5023. Add Tomcat support to HdfsProxy. (Zhiyong Zhang via cdouglas)
+    
+    HADOOP-5317. Provide documentation for LazyOutput Feature. 
+    (Jothi Padmanabhan via johan)
+
+    HADOOP-5455. Document rpc metrics context to the extent dfs, mapred, and
+    jvm contexts are documented. (Philip Zeyliger via cdouglas)
+
+    HADOOP-5358. Provide scripting functionality to the synthetic load
+    generator. (Jakob Homan via hairong)
+
+    HADOOP-5442. Paginate jobhistory display and added some search
+    capabilities. (Amar Kamat via acmurthy) 
+
+    HADOOP-4842. Streaming now allows specifiying a command for the combiner.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-5196. avoiding unnecessary byte[] allocation in 
+    SequenceFile.CompressedBytes and SequenceFile.UncompressedBytes.
+    (hong tang via mahadev)
+
+    HADOOP-4655. New method FileSystem.newInstance() that always returns
+    a newly allocated FileSystem object. (dhruba)
+
+    HADOOP-4788. Set Fair scheduler to assign both a map and a reduce on each
+    heartbeat by default. (matei)
+
+    HADOOP-5491.  In contrib/index, better control memory usage.
+    (Ning Li via cutting)
+
+    HADOOP-5423. Include option of preserving file metadata in
+    SequenceFile::sort. (Michael Tamm via cdouglas)
+
+    HADOOP-5331. Add support for KFS appends. (Sriram Rao via cdouglas)
+
+    HADOOP-4365. Make Configuration::getProps protected in support of
+    meaningful subclassing. (Steve Loughran via cdouglas)
+
+    HADOOP-2413. Remove the static variable FSNamesystem.fsNamesystemObject.
+    (Konstantin Shvachko via szetszwo)
+
+    HADOOP-4584. Improve datanode block reports and associated file system
+    scan to avoid interefering with normal datanode operations.
+    (Suresh Srinivas via rangadi)
+
+    HADOOP-5502. Documentation for backup and checkpoint nodes.
+    (Jakob Homan via shv)
+
+    HADOOP-5485. Mask actions in the fair scheduler's servlet UI based on
+    value of webinterface.private.actions. 
+    (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-5581. HDFS should throw FileNotFoundException when while opening
+    a file that does not exist. (Brian Bockelman via rangadi)
+
+    HADOOP-5509. PendingReplicationBlocks does not start monitor in the
+    constructor. (shv)
+
+    HADOOP-5494. Modify sorted map output merger to lazily read values,
+    rather than buffering at least one record for each segment. (Devaraj Das
+    via cdouglas)
+
+    HADOOP-5396. Provide ability to refresh queue ACLs in the JobTracker
+    without having to restart the daemon.
+    (Sreekanth Ramakrishnan and Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-4490. Provide ability to run tasks as job owners.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-5697. Change org.apache.hadoop.examples.Grep to use new 
+    mapreduce api. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5625. Add operation duration to clienttrace. (Lei Xu via cdouglas)
+
+    HADOOP-5705. Improve TotalOrderPartitioner efficiency by updating the trie
+    construction. (Dick King via cdouglas)
+
+    HADOOP-5589. Eliminate source limit of 64 for map-side joins imposed by
+    TupleWritable encoding. (Jingkei Ly via cdouglas)
+
+    HADOOP-5734. Correct block placement policy description in HDFS
+    Design document. (Konstantin Boudnik via shv)
+
+    HADOOP-5657. Validate data in TestReduceFetch to improve merge test
+    coverage. (cdouglas)
+
+    HADOOP-5613. Change S3Exception to checked exception.
+    (Andrew Hitchcock via tomwhite)
+
+    HADOOP-5717. Create public enum class for the Framework counters in 
+    org.apache.hadoop.mapreduce. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5217. Split AllTestDriver for core, hdfs and mapred. (sharad)
+
+    HADOOP-5364. Add certificate expiration warning to HsftpFileSystem and HDFS
+    proxy. (Zhiyong Zhang via cdouglas)
+
+    HADOOP-5733. Add map/reduce slot capacity and blacklisted capacity to
+    JobTracker metrics. (Sreekanth Ramakrishnan via cdouglas)
+
+    HADOOP-5596. Add EnumSetWritable. (He Yongqiang via szetszwo)
+
+    HADOOP-5727. Simplify hashcode for ID types. (Shevek via cdouglas)
+
+    HADOOP-5500. In DBOutputFormat, where field names are absent permit the
+    number of fields to be sufficient to construct the select query. (Enis
+    Soztutar via cdouglas)
+
+    HADOOP-5081. Split TestCLI into HDFS, Mapred and Core tests. (sharad)
+
+    HADOOP-5015. Separate block management code from FSNamesystem.  (Suresh
+    Srinivas via szetszwo)
+
+    HADOOP-5080. Add new test cases to TestMRCLI and TestHDFSCLI
+    (V.Karthikeyan via nigel)
+
+    HADOOP-5135. Splits the tests into different directories based on the 
+    package. Four new test targets have been defined - run-test-core, 
+    run-test-mapred, run-test-hdfs and run-test-hdfs-with-mr.
+    (Sharad Agarwal via ddas)
+
+    HADOOP-5771. Implements unit tests for LinuxTaskController.
+    (Sreekanth Ramakrishnan and Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-5419. Provide a facility to query the Queue ACLs for the
+    current user.
+    (Rahul Kumar Singh via yhemanth)
+
+    HADOOP-5780. Improve per block message prited by "-metaSave" in HDFS.
+    (Raghu Angadi)
+
+    HADOOP-5823. Added a new class DeprecatedUTF8 to help with removing
+    UTF8 related javac warnings. These warnings are removed in 
+    FSEditLog.java as a use case. (Raghu Angadi)
+
+    HADOOP-5824. Deprecate DataTransferProtocol.OP_READ_METADATA and remove
+    the corresponding unused codes.  (Kan Zhang via szetszwo)
+
+    HADOOP-5721. Factor out EditLogFileInputStream and EditLogFileOutputStream
+    into independent classes. (Luca Telloli & Flavio Junqueira via shv)
+
+    HADOOP-5838. Fix a few javac warnings in HDFS. (Raghu Angadi)
+
+    HADOOP-5854. Fix a few "Inconsistent Synchronization" warnings in HDFS.
+    (Raghu Angadi)
+
+    HADOOP-5369. Small tweaks to reduce MapFile index size. (Ben Maurer 
+    via sharad)
+
+    HADOOP-5858. Eliminate UTF8 and fix warnings in test/hdfs-with-mr package.
+    (shv)
+
+    HADOOP-5866. Move DeprecatedUTF8 from o.a.h.io to o.a.h.hdfs since it may
+    not be used outside hdfs. (Raghu Angadi)
+
+    HADOOP-5857. Move normal java methods from hdfs .jsp files to .java files.
+    (szetszwo)
+
+    HADOOP-5873. Remove deprecated methods randomDataNode() and
+    getDatanodeByIndex(..) in FSNamesystem.  (szetszwo)
+
+    HADOOP-5572. Improves the progress reporting for the sort phase for both
+    maps and reduces. (Ravi Gummadi via ddas)
+
+    HADOOP-5839. Fix EC2 scripts to allow remote job submission.
+    (Joydeep Sen Sarma via tomwhite)
+
+    HADOOP-5877. Fix javac warnings in TestHDFSServerPorts, TestCheckpoint, 
+    TestNameEditsConfig, TestStartup and TestStorageRestore.
+    (Jakob Homan via shv)
+
+    HADOOP-5438. Provide a single FileSystem method to create or 
+    open-for-append to a file.  (He Yongqiang via dhruba)
+
+    HADOOP-5472. Change DistCp to support globbing of input paths.  (Dhruba
+    Borthakur and Rodrigo Schmidt via szetszwo)
+
+    HADOOP-5175. Don't unpack libjars on classpath. (Todd Lipcon via tomwhite)
+
+    HADOOP-5620. Add an option to DistCp for preserving modification and access
+    times.  (Rodrigo Schmidt via szetszwo)
+
+    HADOOP-5664. Change map serialization so a lock is obtained only where
+    contention is possible, rather than for each write. (cdouglas)
+
+    HADOOP-5896. Remove the dependency of GenericOptionsParser on 
+    Option.withArgPattern. (Giridharan Kesavan and Sharad Agarwal via 
+    sharad)
+
+    HADOOP-5784. Makes the number of heartbeats that should arrive a second
+    at the JobTracker configurable. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-5955. Changes TestFileOuputFormat so that is uses LOCAL_MR
+    instead of CLUSTER_MR. (Jothi Padmanabhan via das)
+
+    HADOOP-5948. Changes TestJavaSerialization to use LocalJobRunner 
+    instead of MiniMR/DFS cluster. (Jothi Padmanabhan via das)
+
+    HADOOP-2838. Add mapred.child.env to pass environment variables to 
+    tasktracker's child processes. (Amar Kamat via sharad)
+
+    HADOOP-5961. DataNode process understand generic hadoop command line
+    options (like -Ddfs.property=value). (Raghu Angadi)
+
+    HADOOP-5938. Change org.apache.hadoop.mapred.jobcontrol to use new
+    api. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-2141. Improves the speculative execution heuristic. The heuristic
+    is currently based on the progress-rates of tasks and the expected time
+    to complete. Also, statistics about trackers are collected, and speculative
+    tasks are not given to the ones deduced to be slow. 
+    (Andy Konwinski and ddas)
+
+    HADOOP-5952. Change "-1 tests included" wording in test-patch.sh.
+    (Gary Murry via szetszwo)
+
+    HADOOP-6106. Provides an option in ShellCommandExecutor to timeout 
+    commands that do not complete within a certain amount of time.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-5925. EC2 scripts should exit on error. (tomwhite)
+
+    HADOOP-6109. Change Text to grow its internal buffer exponentially, rather
+    than the max of the current length and the proposed length to improve
+    performance reading large values. (thushara wijeratna via cdouglas)
+
+    HADOOP-2366. Support trimmed strings in Configuration.  (Michele Catasta
+    via szetszwo)
+
+    HADOOP-6099. The RPC module can be configured to not send period pings.
+    The default behaviour of sending periodic pings remain unchanged. (dhruba)
+
+    HADOOP-6142. Update documentation and use of harchives for relative paths
+    added in MAPREDUCE-739. (Mahadev Konar via cdouglas)
+
+    HADOOP-6148. Implement a fast, pure Java CRC32 calculator which outperforms
+    java.util.zip.CRC32.  (Todd Lipcon and Scott Carey via szetszwo)
+
+    HADOOP-6146. Upgrade to JetS3t version 0.7.1. (tomwhite)
+
+    HADOOP-6161. Add get/setEnum methods to Configuration. (cdouglas)
+
+    HADOOP-6160. Fix releaseaudit target to run on specific directories.
+    (gkesavan)
+    
+    HADOOP-6169. Removing deprecated method calls in TFile. (hong tang via 
+    mahadev)
+
+    HADOOP-6176. Add a couple package private methods to AccessTokenHandler
+    for testing.  (Kan Zhang via szetszwo)
+
+    HADOOP-6182. Fix ReleaseAudit warnings (Giridharan Kesavan and Lee Tucker
+    via gkesavan)
+
+    HADOOP-6173. Change src/native/packageNativeHadoop.sh to package all
+    native library files.  (Hong Tang via szetszwo)
+
+    HADOOP-6184. Provide an API to dump Configuration in a JSON format.
+    (V.V.Chaitanya Krishna via yhemanth)
+
+    HADOOP-6224. Add a method to WritableUtils performing a bounded read of an
+    encoded String. (Jothi Padmanabhan via cdouglas)
+
+    HADOOP-6133. Add a caching layer to Configuration::getClassByName to
+    alleviate a performance regression introduced in a compatibility layer.
+    (Todd Lipcon via cdouglas)
+
+    HADOOP-6252. Provide a method to determine if a deprecated key is set in
+    config file. (Jakob Homan via suresh)
+
+    HADOOP-5879. Read compression level and strategy from Configuration for
+    gzip compression. (He Yongqiang via cdouglas)
+
+    HADOOP-6216. Support comments in host files.  (Ravi Phulari and Dmytro
+    Molkov via szetszwo)
+
+    HADOOP-6217. Update documentation for project split. (Corinne Chandel via 
+    omalley)
+
+    HADOOP-6268. Add ivy jar to .gitignore. (Todd Lipcon via cdouglas)
+
+    HADOOP-6270. Support deleteOnExit in FileContext.  (Suresh Srinivas via
+    szetszwo)
+
+    HADOOP-6233. Rename configuration keys towards API standardization and
+    backward compatibility. (Jithendra Pandey via suresh)
+
+    HADOOP-6260. Add additional unit tests for FileContext util methods.
+    (Gary Murry via suresh).
+
+    HADOOP-6309. Change build.xml to run tests with java asserts.  (Eli
+    Collins via szetszwo)
+
+    HADOOP-6326. Hundson runs should check for AspectJ warnings and report
+    failure if any is present (cos)
+
+    HADOOP-6329. Add build-fi directory to the ignore lists.  (szetszwo)
+
+    HADOOP-5107. Use Maven ant tasks to publish the subproject jars.
+    (Giridharan Kesavan via omalley)
+
+    HADOOP-6343. Log unexpected throwable object caught in RPC.  (Jitendra Nath
+    Pandey via szetszwo)
+
+    HADOOP-6367. Removes Access Token implementation from common.
+    (Kan Zhang via ddas)
+
+    HADOOP-6395. Upgrade some libraries to be consistent across common, hdfs,
+    and mapreduce. (omalley)
+
+    HADOOP-6398. Build is broken after HADOOP-6395 patch has been applied (cos)
+
+    HADOOP-6413. Move TestReflectionUtils to Common. (Todd Lipcon via tomwhite)
+
+    HADOOP-6283. Improve the exception messages thrown by
+    FileUtil$HardLink.getLinkCount(..).  (szetszwo)
+
+    HADOOP-6279. Add Runtime::maxMemory to JVM metrics. (Todd Lipcon via
+    cdouglas)
+
+    HADOOP-6305. Unify build property names to facilitate cross-projects
+    modifications (cos)
+
+    HADOOP-6312. Remove unnecessary debug logging in Configuration constructor.
+    (Aaron Kimball via cdouglas)
+
+    HADOOP-6366. Reduce ivy console output to ovservable level (cos)
+
+    HADOOP-6400. Log errors getting Unix UGI. (Todd Lipcon via tomwhite)
+
+    HADOOP-6346. Add support for specifying unpack pattern regex to
+    RunJar.unJar. (Todd Lipcon via tomwhite)
+
+    HADOOP-6422. Make RPC backend plugable, protocol-by-protocol, to
+    ease evolution towards Avro.  (cutting)
+
+    HADOOP-5958. Use JDK 1.6 File APIs in DF.java wherever possible.
+    (Aaron Kimball via tomwhite)
+
+    HADOOP-6222. Core doesn't have TestCommonCLI facility. (cos)
+
+    HADOOP-6394. Add a helper class to simplify FileContext related tests and
+    improve code reusability. (Jitendra Nath Pandey via suresh)
+
+    HADOOP-4656. Add a user to groups mapping service. (boryas, acmurthy)
+
+    HADOOP-6435. Make RPC.waitForProxy with timeout public. (Steve Loughran
+    via tomwhite)
+  
+    HADOOP-6472. add tokenCache option to GenericOptionsParser for passing
+     file with secret keys to a map reduce job. (boryas)
+
+    HADOOP-3205. Read multiple chunks directly from FSInputChecker subclass
+    into user buffers. (Todd Lipcon via tomwhite)
+
+    HADOOP-6479. TestUTF8 assertions could fail with better text.
+    (Steve Loughran via tomwhite)
+
+    HADOOP-6155. Deprecate RecordIO anticipating Avro. (Tom White via cdouglas)
+
+    HADOOP-6492. Make some Avro serialization APIs public.
+    (Aaron Kimball via cutting)
+
+    HADOOP-6497. Add an adapter for Avro's SeekableInput interface, so
+    that Avro can read FileSystem data.
+    (Aaron Kimball via cutting)
+
+    HADOOP-6495.  Identifier should be serialized after the password is
+     created In Token constructor (jnp via boryas)
+
+    HADOOP-6518. Makes the UGI honor the env var KRB5CCNAME. 
+    (Owen O'Malley via ddas)
+
+    HADOOP-6531. Enhance FileUtil with an API to delete all contents of a
+    directory. (Amareshwari Sriramadasu via yhemanth)
+
+    HADOOP-6547. Move DelegationToken into Common, so that it can be used by
+    MapReduce also. (devaraj via omalley)
+
+    HADOOP-6552. Puts renewTGT=true and useTicketCache=true for the keytab
+    kerberos options. (ddas)
+
+    HADOOP-6534. Trim whitespace from directory lists initializing
+    LocalDirAllocator. (Todd Lipcon via cdouglas)
+
+    HADOOP-6559. Makes the RPC client automatically re-login when the SASL 
+    connection setup fails. This is applicable only to keytab based logins.
+    (Devaraj Das)
+
+    HADOOP-6551. Delegation token renewing and cancelling should provide
+    meaningful exceptions when there are failures instead of returning 
+    false. (omalley)
+
+    HADOOP-6583. Captures authentication and authorization metrics. (ddas)
+
+    HADOOP-6543. Allows secure clients to talk to unsecure clusters. 
+    (Kan Zhang via ddas)
+
+    HADOOP-6579. Provide a mechanism for encoding/decoding Tokens from
+    a url-safe string and change the commons-code library to 1.4. (omalley)
+
+    HADOOP-6596. Add a version field to the AbstractDelegationTokenIdentifier's
+    serialized value. (omalley)
+
+    HADOOP-6573. Support for persistent delegation tokens.
+    (Jitendra Pandey via shv)
+
+    HADOOP-6594. Provide a fetchdt tool via bin/hdfs. (jhoman via acmurthy) 
+
+    HADOOP-6589. Provide better error messages when RPC authentication fails.
+    (Kan Zhang via omalley)
+
+    HADOOP-6599  Split existing RpcMetrics into RpcMetrics & RpcDetailedMetrics.
+    (Suresh Srinivas via Sanjay Radia)
+
+    HADOOP-6537 Declare more detailed exceptions in FileContext and 
+    AbstractFileSystem (Suresh Srinivas via Sanjay Radia)
+
+    HADOOP-6486. fix common classes to work with Avro 1.3 reflection.
+    (cutting via tomwhite)
+
+    HADOOP-6591. HarFileSystem can handle paths with the whitespace characters.
+    (Rodrigo Schmidt via dhruba)
+
+    HADOOP-6407. Have a way to automatically update Eclipse .classpath file
+    when new libs are added to the classpath through Ivy. (tomwhite)
+
+    HADOOP-3659. Patch to allow hadoop native to compile on Mac OS X.
+    (Colin Evans and Allen Wittenauer via tomwhite)
+
+    HADOOP-6471. StringBuffer -> StringBuilder - conversion of references
+    as necessary. (Kay Kay via tomwhite)
+
+    HADOOP-6646. Move HarfileSystem out of Hadoop Common. (mahadev)
+
+    HADOOP-6566. Add methods supporting, enforcing narrower permissions on
+    local daemon directories. (Arun Murthy and Luke Lu via cdouglas)
+
+    HADOOP-6705. Fix to work with 1.5 version of jiracli
+    (Giridharan Kesavan)
+
+    HADOOP-6658. Exclude Private elements from generated Javadoc. (tomwhite)
+
+    HADOOP-6635. Install/deploy source jars to Maven repo. 
+    (Patrick Angeles via jghoman)
+
+    HADOOP-6717. Log levels in o.a.h.security.Groups too high 
+    (Todd Lipcon via jghoman)
+
+    HADOOP-6667. RPC.waitForProxy should retry through NoRouteToHostException.
+    (Todd Lipcon via tomwhite)
+
+    HADOOP-6677. InterfaceAudience.LimitedPrivate should take a string not an
+    enum. (tomwhite)
+
+    HADOOP-678. Remove FileContext#isFile, isDirectory, and exists.
+    (Eli Collins via hairong)
+
+    HADOOP-6515. Make maximum number of http threads configurable.
+    (Scott Chen via zshao)
+
+    HADOOP-6563. Add more symlink tests to cover intermediate symlinks
+    in paths. (Eli Collins via suresh)
+
+    HADOOP-6585.  Add FileStatus#isDirectory and isFile.  (Eli Collins via
+    tomwhite)
+
+    HADOOP-6738.  Move cluster_setup.xml from MapReduce to Common.
+    (Tom White via tomwhite)
+
+    HADOOP-6794. Move configuration and script files post split. (tomwhite)
+
+    HADOOP-6403.  Deprecate EC2 bash scripts.  (tomwhite)
+
+    HADOOP-6769. Add an API in FileSystem to get FileSystem instances based 
+    on users(ddas via boryas)
+
+    HADOOP-6813. Add a new newInstance method in FileSystem that takes 
+    a "user" as argument (ddas via boryas)
+
+    HADOOP-6668.  Apply audience and stability annotations to classes in
+    common.  (tomwhite)
+
+    HADOOP-6821.  Document changes to memory monitoring.  (Hemanth Yamijala
+    via tomwhite)
+
+  OPTIMIZATIONS
+
+    HADOOP-5595. NameNode does not need to run a replicator to choose a
+    random DataNode. (hairong)
+
+    HADOOP-5603. Improve NameNode's block placement performance. (hairong)
+
+    HADOOP-5638. More improvement on block placement performance. (hairong)
+
+    HADOOP-6180. NameNode slowed down when many files with same filename
+    were moved to Trash. (Boris Shkolnik via hairong)
+
+    HADOOP-6166. Further improve the performance of the pure-Java CRC32
+    implementation. (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-6271. Add recursive and non recursive create and mkdir to 
+    FileContext. (Sanjay Radia via suresh)
+
+    HADOOP-6261. Add URI based tests for FileContext. 
+    (Ravi Pulari via suresh).
+
+    HADOOP-6307. Add a new SequenceFile.Reader constructor in order to support
+    reading on un-closed file.  (szetszwo)
+
+    HADOOP-6467. Improve the performance on HarFileSystem.listStatus(..).
+    (mahadev via szetszwo)
+
+    HADOOP-6569. FsShell#cat should avoid calling unecessary getFileStatus
+    before opening a file to read. (hairong)
+
+    HADOOP-6689. Add directory renaming test to existing FileContext tests.
+    (Eli Collins via suresh)
+
+    HADOOP-6713. The RPC server Listener thread is a scalability bottleneck.
+    (Dmytro Molkov via hairong)
+
+  BUG FIXES
+
+    HADOOP-6748. Removes hadoop.cluster.administrators, cluster administrators
+    acl is passed as parameter in constructor. (amareshwari) 
+
+    HADOOP-6828. Herrior uses old way of accessing logs directories (Sreekanth
+    Ramakrishnan via cos)
+
+    HADOOP-6788. [Herriot] Exception exclusion functionality is not working
+    correctly. (Vinay Thota via cos)
+
+    HADOOP-6773. Ivy folder contains redundant files (cos)
+
+    HADOOP-5379. CBZip2InputStream to throw IOException on data crc error.
+    (Rodrigo Schmidt via zshao)
+
+    HADOOP-5326. Fixes CBZip2OutputStream data corruption problem.
+    (Rodrigo Schmidt via zshao)
+
+    HADOOP-4963. Fixes a logging to do with getting the location of
+    map output file. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-2337. Trash should close FileSystem on exit and should not start 
+    emtying thread if disabled. (shv)
+
+    HADOOP-5072. Fix failure in TestCodec because testSequenceFileGzipCodec 
+    won't pass without native gzip codec. (Zheng Shao via dhruba)
+
+    HADOOP-5050. TestDFSShell.testFilePermissions should not assume umask
+    setting.  (Jakob Homan via szetszwo)
+
+    HADOOP-4975. Set classloader for nested mapred.join configs. (Jingkei Ly
+    via cdouglas)
+
+    HADOOP-5078. Remove invalid AMI kernel in EC2 scripts. (tomwhite)
+
+    HADOOP-5045. FileSystem.isDirectory() should not be deprecated.  (Suresh
+    Srinivas via szetszwo)
+
+    HADOOP-4960. Use datasource time, rather than system time, during metrics
+    demux. (Eric Yang via cdouglas)
+
+    HADOOP-5032. Export conf dir set in config script. (Eric Yang via cdouglas)
+
+    HADOOP-5176. Fix a typo in TestDFSIO.  (Ravi Phulari via szetszwo)
+
+    HADOOP-4859. Distinguish daily rolling output dir by adding a timestamp.
+    (Jerome Boulon via cdouglas)
+
+    HADOOP-4959. Correct system metric collection from top on Redhat 5.1. (Eric
+    Yang via cdouglas)
+
+    HADOOP-5039. Fix log rolling regex to process only the relevant
+    subdirectories. (Jerome Boulon via cdouglas)
+
+    HADOOP-5095. Update Chukwa watchdog to accept config parameter. (Jerome
+    Boulon via cdouglas)
+
+    HADOOP-5147. Correct reference to agent list in Chukwa bin scripts. (Ari
+    Rabkin via cdouglas)
+
+    HADOOP-5148. Fix logic disabling watchdog timer in Chukwa daemon scripts.
+    (Ari Rabkin via cdouglas)
+
+    HADOOP-5100. Append, rather than truncate, when creating log4j metrics in
+    Chukwa. (Jerome Boulon via cdouglas)
+
+    HADOOP-5204. Fix broken trunk compilation on Hudson by letting 
+    task-controller be an independent target in build.xml.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-5212. Fix the path translation problem introduced by HADOOP-4868 
+    running on cygwin. (Sharad Agarwal via omalley)
+
+    HADOOP-5226. Add license headers to html and jsp files.  (szetszwo)
+
+    HADOOP-5172. Disable misbehaving Chukwa unit test until it can be fixed.
+    (Jerome Boulon via nigel)
+
+    HADOOP-4933. Fixes a ConcurrentModificationException problem that shows up
+    when the history viewer is accessed concurrently. 
+    (Amar Kamat via ddas)
+
+    HADOOP-5253. Remove duplicate call to cn-docs target. 
+    (Giri Kesavan via nigel)
+
+    HADOOP-5251. Fix classpath for contrib unit tests to include clover jar.
+    (nigel)
+
+    HADOOP-5206. Synchronize "unprotected*" methods of FSDirectory on the root.
+    (Jakob Homan via shv)
+
+    HADOOP-5292. Fix NPE in KFS::getBlockLocations. (Sriram Rao via lohit)
+
+    HADOOP-5219. Adds a new property io.seqfile.local.dir for use by
+    SequenceFile, which earlier used mapred.local.dir. (Sharad Agarwal
+    via ddas)
+
+    HADOOP-5300. Fix ant javadoc-dev target and the typo in the class name
+    NameNodeActivtyMBean.  (szetszwo)
+
+    HADOOP-5218.  libhdfs unit test failed because it was unable to 
+    start namenode/datanode. Fixed. (dhruba)
+
+    HADOOP-5273. Add license header to TestJobInProgress.java.  (Jakob Homan
+    via szetszwo)
+    
+    HADOOP-5229. Remove duplicate version variables in build files
+    (Stefan Groschupf via johan)
+
+    HADOOP-5383. Avoid building an unused string in NameNode's 
+    verifyReplication(). (Raghu Angadi)
+
+    HADOOP-5347. Create a job output directory for the bbp examples. (szetszwo)
+
+    HADOOP-5341. Make hadoop-daemon scripts backwards compatible with the
+    changes in HADOOP-4868. (Sharad Agarwal via yhemanth)
+
+    HADOOP-5456. Fix javadoc links to ClientProtocol#restoreFailedStorage(..).
+    (Boris Shkolnik via szetszwo)
+
+    HADOOP-5458. Remove leftover Chukwa entries from build, etc. (cdouglas)
+
+    HADOOP-5386. Modify hdfsproxy unit test to start on a random port,
+    implement clover instrumentation. (Zhiyong Zhang via cdouglas)
+
+    HADOOP-5511. Add Apache License to EditLogBackupOutputStream. (shv)
+
+    HADOOP-5507. Fix JMXGet javadoc warnings.  (Boris Shkolnik via szetszwo)
+
+    HADOOP-5191. Accessing HDFS with any ip or hostname should work as long 
+    as it points to the interface NameNode is listening on. (Raghu Angadi)
+
+    HADOOP-5561. Add javadoc.maxmemory parameter to build, preventing OOM
+    exceptions from javadoc-dev. (Jakob Homan via cdouglas)
+
+    HADOOP-5149. Modify HistoryViewer to ignore unfamiliar files in the log
+    directory. (Hong Tang via cdouglas)
+
+    HADOOP-5477. Fix rare failure in TestCLI for hosts returning variations of
+    'localhost'. (Jakob Homan via cdouglas)
+
+    HADOOP-5194. Disables setsid for tasks run on cygwin. 
+    (Ravi Gummadi via ddas)
+
+    HADOOP-5322. Fix misleading/outdated comments in JobInProgress.
+    (Amareshwari Sriramadasu via cdouglas)
+
+    HADOOP-5198. Fixes a problem to do with the task PID file being absent and 
+    the JvmManager trying to look for it. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-5464. DFSClient did not treat write timeout of 0 properly.
+    (Raghu Angadi)
+
+    HADOOP-4045. Fix processing of IO errors in EditsLog.
+    (Boris Shkolnik via shv)
+
+    HADOOP-5462. Fixed a double free bug in the task-controller
+    executable. (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-5652. Fix a bug where in-memory segments are incorrectly retained in
+    memory. (cdouglas)
+
+    HADOOP-5533. Recovery duration shown on the jobtracker webpage is 
+    inaccurate. (Amar Kamat via sharad)
+
+    HADOOP-5647. Fix TestJobHistory to not depend on /tmp. (Ravi Gummadi 
+    via sharad)
+
+    HADOOP-5661. Fixes some findbugs warnings in o.a.h.mapred* packages and
+    supresses a bunch of them. (Jothi Padmanabhan via ddas)
+
+    HADOOP-5704. Fix compilation problems in TestFairScheduler and
+    TestCapacityScheduler.  (Chris Douglas via szetszwo)
+
+    HADOOP-5650. Fix safemode messages in the Namenode log.  (Suresh Srinivas
+    via szetszwo)
+
+    HADOOP-5488. Removes the pidfile management for the Task JVM from the
+    framework and instead passes the PID back and forth between the
+    TaskTracker and the Task processes. (Ravi Gummadi via ddas)
+
+    HADOOP-5658. Fix Eclipse templates. (Philip Zeyliger via shv)
+
+    HADOOP-5709. Remove redundant synchronization added in HADOOP-5661. (Jothi
+    Padmanabhan via cdouglas)
+
+    HADOOP-5715. Add conf/mapred-queue-acls.xml to the ignore lists.
+    (szetszwo)
+
+    HADOOP-5592. Fix typo in Streaming doc in reference to GzipCodec.
+    (Corinne Chandel via tomwhite)
+
+    HADOOP-5656. Counter for S3N Read Bytes does not work. (Ian Nowland
+    via tomwhite)
+
+    HADOOP-5406. Fix JNI binding for ZlibCompressor::setDictionary. (Lars
+    Francke via cdouglas)
+
+    HADOOP-3426. Fix/provide handling when DNS lookup fails on the loopback
+    address. Also cache the result of the lookup. (Steve Loughran via cdouglas)
+
+    HADOOP-5476. Close the underlying InputStream in SequenceFile::Reader when
+    the constructor throws an exception. (Michael Tamm via cdouglas)
+
+    HADOOP-5675. Do not launch a job if DistCp has no work to do. (Tsz Wo
+    (Nicholas), SZE via cdouglas)
+
+    HADOOP-5737. Fixes a problem in the way the JobTracker used to talk to
+    other daemons like the NameNode to get the job's files. Also adds APIs
+    in the JobTracker to get the FileSystem objects as per the JobTracker's
+    configuration. (Amar Kamat via ddas) 
+
+    HADOOP-5648. Not able to generate gridmix.jar on the already compiled 
+    version of hadoop. (gkesavan)	
+
+    HADOOP-5808. Fix import never used javac warnings in hdfs. (szetszwo)
+
+    HADOOP-5203. TT's version build is too restrictive. (Rick Cox via sharad)
+
+    HADOOP-5818. Revert the renaming from FSNamesystem.checkSuperuserPrivilege
+    to checkAccess by HADOOP-5643.  (Amar Kamat via szetszwo)
+
+    HADOOP-5820. Fix findbugs warnings for http related codes in hdfs.
+    (szetszwo)
+
+    HADOOP-5822. Fix javac warnings in several dfs tests related to unncessary
+    casts.  (Jakob Homan via szetszwo)
+
+    HADOOP-5842. Fix a few javac warnings under packages fs and util.
+    (Hairong Kuang via szetszwo)
+
+    HADOOP-5845. Build successful despite test failure on test-core target.
+    (sharad)
+
+    HADOOP-5314. Prevent unnecessary saving of the file system image during 
+    name-node startup. (Jakob Homan via shv)
+
+    HADOOP-5855. Fix javac warnings for DisallowedDatanodeException and
+    UnsupportedActionException.  (szetszwo)
+
+    HADOOP-5582. Fixes a problem in Hadoop Vaidya to do with reading
+    counters from job history files. (Suhas Gogate via ddas)
+
+    HADOOP-5829. Fix javac warnings found in ReplicationTargetChooser,
+    FSImage, Checkpointer, SecondaryNameNode and a few other hdfs classes.
+    (Suresh Srinivas via szetszwo)
+
+    HADOOP-5835. Fix findbugs warnings found in Block, DataNode, NameNode and
+    a few other hdfs classes.  (Suresh Srinivas via szetszwo)
+
+    HADOOP-5853. Undeprecate HttpServer.addInternalServlet method.  (Suresh
+    Srinivas via szetszwo)
+
+    HADOOP-5801. Fixes the problem: If the hosts file is changed across restart
+    then it should be refreshed upon recovery so that the excluded hosts are 
+    lost and the maps are re-executed. (Amar Kamat via ddas)
+
+    HADOOP-5841. Resolve findbugs warnings in DistributedFileSystem,
+    DatanodeInfo, BlocksMap, DataNodeDescriptor.  (Jakob Homan via szetszwo)
+
+    HADOOP-5878. Fix import and Serializable javac warnings found in hdfs jsp.
+    (szetszwo)
+
+    HADOOP-5782. Revert a few formatting changes introduced in HADOOP-5015.
+    (Suresh Srinivas via rangadi)
+
+    HADOOP-5687. NameNode throws NPE if fs.default.name is the default value.
+    (Philip Zeyliger via shv)
+
+    HADOOP-5867. Fix javac warnings found in NNBench and NNBenchWithoutMR.
+    (Konstantin Boudnik via szetszwo)
+    
+    HADOOP-5728. Fixed FSEditLog.printStatistics IndexOutOfBoundsException.
+    (Wang Xu via johan)
+
+    HADOOP-5847. Fixed failing Streaming unit tests (gkesavan) 
+
+    HADOOP-5252. Streaming overrides -inputformat option (Klaas Bosteels 
+    via sharad)
+
+    HADOOP-5710. Counter MAP_INPUT_BYTES missing from new mapreduce api. 
+    (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5809. Fix job submission, broken by errant directory creation.
+    (Sreekanth Ramakrishnan and Jothi Padmanabhan via cdouglas)
+
+    HADOOP-5635. Change distributed cache to work with other distributed file
+    systems. (Andrew Hitchcock via tomwhite)
+
+    HADOOP-5856. Fix "unsafe multithreaded use of DateFormat" findbugs warning
+    in DataBlockScanner.  (Kan Zhang via szetszwo)
+
+    HADOOP-4864. Fixes a problem to do with -libjars with multiple jars when
+    client and cluster reside on different OSs. (Amareshwari Sriramadasu via 
+    ddas)
+
+    HADOOP-5623. Fixes a problem to do with status messages getting overwritten
+    in streaming jobs. (Rick Cox and Jothi Padmanabhan via ddas)
+
+    HADOOP-5895. Fixes computation of count of merged bytes for logging.
+    (Ravi Gummadi via ddas)
+
+    HADOOP-5805. problem using top level s3 buckets as input/output 
+    directories. (Ian Nowland via tomwhite)
+   
+    HADOOP-5940. trunk eclipse-plugin build fails while trying to copy 
+    commons-cli jar from the lib dir (Giridharan Kesavan via gkesavan)
+
+    HADOOP-5864. Fix DMI and OBL findbugs in packages hdfs and metrics.
+    (hairong)
+
+    HADOOP-5935. Fix Hudson's release audit warnings link is broken. 
+    (Giridharan Kesavan via gkesavan)
+
+    HADOOP-5947. Delete empty TestCombineFileInputFormat.java
+
+    HADOOP-5899. Move a log message in FSEditLog to the right place for
+    avoiding unnecessary log.  (Suresh Srinivas via szetszwo)
+
+    HADOOP-5944. Add Apache license header to BlockManager.java.  (Suresh
+    Srinivas via szetszwo)
+
+    HADOOP-5891. SecondaryNamenode is able to converse with the NameNode 
+    even when the default value of dfs.http.address is not overridden.
+    (Todd Lipcon via dhruba)
+
+    HADOOP-5953. The isDirectory(..) and isFile(..) methods in KosmosFileSystem
+    should not be deprecated.  (szetszwo)
+
+    HADOOP-5954. Fix javac warnings in TestFileCreation, TestSmallBlock,
+    TestFileStatus, TestDFSShellGenericOptions, TestSeekBug and
+    TestDFSStartupVersions.  (szetszwo)
+
+    HADOOP-5956. Fix ivy dependency in hdfsproxy and capacity-scheduler.
+    (Giridharan Kesavan via szetszwo)
+
+    HADOOP-5836. Bug in S3N handling of directory markers using an object with
+    a trailing "/" causes jobs to fail. (Ian Nowland via tomwhite)
+
+    HADOOP-5861. s3n files are not getting split by default. (tomwhite)
+
+    HADOOP-5762. Fix a problem that DistCp does not copy empty directory.
+    (Rodrigo Schmidt via szetszwo)
+
+    HADOOP-5859. Fix "wait() or sleep() with locks held" findbugs warnings in
+    DFSClient.  (Kan Zhang via szetszwo)
+   
+    HADOOP-5457. Fix to continue to run builds even if contrib test fails
+    (Giridharan Kesavan via gkesavan)
+
+    HADOOP-5963. Remove an unnecessary exception catch in NNBench.  (Boris
+    Shkolnik via szetszwo)
+
+    HADOOP-5989. Fix streaming test failure.  (gkesavan)
+
+    HADOOP-5981. Fix a bug in HADOOP-2838 in parsing mapred.child.env.
+    (Amar Kamat via sharad)
+
+    HADOOP-5420. Fix LinuxTaskController to kill tasks using the process
+    groups they are launched with.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-6031. Remove @author tags from Java source files.  (Ravi Phulari
+    via szetszwo)
+
+    HADOOP-5980. Fix LinuxTaskController so tasks get passed 
+    LD_LIBRARY_PATH and other environment variables.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-4041. IsolationRunner does not work as documented.
+    (Philip Zeyliger via tomwhite)
+
+    HADOOP-6004. Fixes BlockLocation deserialization.  (Jakob Homan via
+    szetszwo)
+
+    HADOOP-6079. Serialize proxySource as DatanodeInfo in DataTransferProtocol.
+    (szetszwo)
+
+    HADOOP-6096. Fix Eclipse project and classpath files following project
+    split. (tomwhite)
+
+    HADOOP-6122. The great than operator in test-patch.sh should be "-gt" but
+    not ">".  (szetszwo)
+
+    HADOOP-6114. Fix javadoc documentation for FileStatus.getLen.
+    (Dmitry Rzhevskiy via dhruba)
+
+    HADOOP-6131. A sysproperty should not be set unless the property 
+    is set on the ant command line in build.xml (hong tang via mahadev)
+
+    HADOOP-6137. Fix project specific test-patch requirements
+    (Giridharan Kesavan)
+
+    HADOOP-6138. Eliminate the deprecated warnings introduced by H-5438.
+    (He Yongqiang via szetszwo)
+
+    HADOOP-6132. RPC client create an extra connection because of incorrect
+    key for connection cache. (Kan Zhang via rangadi)
+
+    HADOOP-6123. Add missing classpaths in hadoop-config.sh.  (Sharad Agarwal
+    via szetszwo)
+
+    HADOOP-6172. Fix jar file names in hadoop-config.sh and include 
+    ${build.src} as a part of the source list in build.xml.  (Hong Tang via 
+    szetszwo)
+
+    HADOOP-6124. Fix javac warning detection in test-patch.sh.  (Giridharan
+    Kesavan via szetszwo)
+
+    HADOOP-6177. FSInputChecker.getPos() would return position greater 
+    than the file size. (Hong Tang via hairong)
+
+    HADOOP-6188. TestTrash uses java.io.File api but not hadoop FileSystem api.
+    (Boris Shkolnik via szetszwo)
+
+    HADOOP-6192. Fix Shell.getUlimitMemoryCommand to not rely on Map-Reduce
+    specific configs. (acmurthy) 
+
+    HADOOP-6103. Clones the classloader as part of Configuration clone.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-6152. Fix classpath variables in bin/hadoop-config.sh and some
+    other scripts.  (Aaron Kimball via szetszwo)
+
+    HADOOP-6215. fix GenericOptionParser to deal with -D with '=' in the 
+    value. (Amar Kamat via sharad)
+
+    HADOOP-6227. Fix Configuration to allow final parameters to be set to null
+    and prevent them from being overridden.
+    (Amareshwari Sriramadasu via yhemanth)
+
+    HADOOP-6199. Move io.map.skip.index property to core-default from mapred.
+    (Amareshwari Sriramadasu via cdouglas)
+
+    HADOOP-6229. Attempt to make a directory under an existing file on
+    LocalFileSystem should throw an Exception. (Boris Shkolnik via tomwhite)
+
+    HADOOP-6243. Fix a NullPointerException in processing deprecated keys.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-6009. S3N listStatus incorrectly returns null instead of empty
+    array when called on empty root. (Ian Nowland via tomwhite)
+
+    HADOOP-6181.  Fix .eclipse.templates/.classpath for avro and jets3t jar
+    files.  (Carlos Valiente via szetszwo)
+
+    HADOOP-6196. Fix a bug in SequenceFile.Reader where syncing within the
+    header would cause the reader to read the sync marker as a record. (Jay
+    Booth via cdouglas)
+
+    HADOOP-6250. Modify test-patch to delete copied XML files before running
+    patch build. (Rahul Kumar Singh via yhemanth)
+
+    HADOOP-6257. Two TestFileSystem classes are confusing
+    hadoop-hdfs-hdfwithmr. (Philip Zeyliger via tomwhite)
+
+    HADOOP-6151. Added a input filter to all of the http servlets that quotes
+    html characters in the parameters, to prevent cross site scripting 
+    attacks. (omalley)
+
+    HADOOP-6274. Fix TestLocalFSFileContextMainOperations test failure.
+    (Gary Murry via suresh).
+
+    HADOOP-6281. Avoid null pointer exceptions when the jsps don't have 
+    paramaters (omalley)
+
+    HADOOP-6285. Fix the result type of the getParameterMap method in the
+    HttpServer.QuotingInputFilter. (omalley)
+
+    HADOOP-6286. Fix bugs in related to URI handling in glob methods in 
+    FileContext. (Boris Shkolnik via suresh)
+
+    HADOOP-6292. Update native libraries guide. (Corinne Chandel via cdouglas)
+
+    HADOOP-6327. FileContext tests should not use /tmp and should clean up
+    files.  (Sanjay Radia via szetszwo)
+
+    HADOOP-6318. Upgrade to Avro 1.2.0.  (cutting)
+
+    HADOOP-6334.  Fix GenericOptionsParser to understand URI for -files,
+    -libjars and -archives options and fix Path to support URI with fragment.
+    (Amareshwari Sriramadasu via szetszwo)
+
+    HADOOP-6344. Fix rm and rmr immediately delete files rather than sending 
+    to trash, if a user is over-quota. (Jakob Homan via suresh)
+
+    HADOOP-6347. run-test-core-fault-inject runs a test case twice if
+    -Dtestcase is set (cos)
+
+    HADOOP-6375. Sync documentation for FsShell du with its implementation.
+    (Todd Lipcon via cdouglas)
+
+    HADOOP-6441. Protect web ui from cross site scripting attacks (XSS) on
+    the host http header and using encoded utf-7. (omalley)
+
+    HADOOP-6451. Fix build to run contrib unit tests. (Tom White via cdouglas)
+
+    HADOOP-6374. JUnit tests should never depend on anything in conf.
+    (Anatoli Fomenko via cos)
+
+    HADOOP-6290. Prevent duplicate slf4j-simple jar via Avro's classpath.
+    (Owen O'Malley via cdouglas)
+
+    HADOOP-6293. Fix FsShell -text to work on filesystems other than the
+    default. (cdouglas)
+
+    HADOOP-6341. Fix test-patch.sh for checkTests function. (gkesavan)
+
+    HADOOP-6314. Fix "fs -help" for the "-count" commond.  (Ravi Phulari via
+    szetszwo)
+
+    HADOOP-6405. Update Eclipse configuration to match changes to Ivy
+    configuration (Edwin Chan via cos)
+
+    HADOOP-6411. Remove deprecated file src/test/hadoop-site.xml. (cos)
+
+    HADOOP-6386. NameNode's HttpServer can't instantiate InetSocketAddress:
+    IllegalArgumentException is thrown (cos)
+
+    HADOOP-6254. Slow reads cause s3n to fail with SocketTimeoutException.
+    (Andrew Hitchcock via tomwhite)
+
+    HADOOP-6428. HttpServer sleeps with negative values. (cos)
+
+    HADOOP-6414. Add command line help for -expunge command.
+    (Ravi Phulari via tomwhite)
+
+    HADOOP-6391. Classpath should not be part of command line arguments.
+    (Cristian Ivascu via tomwhite)
+
+    HADOOP-6462. Target "compile" does not exist in contrib/cloud. (tomwhite)
+
+    HADOOP-6402. testConf.xsl is not well-formed XML. (Steve Loughran
+    via tomwhite)
+
+    HADOOP-6489. Fix 3 findbugs warnings. (Erik Steffl via suresh)
+
+    HADOOP-6517. Fix UserGroupInformation so that tokens are saved/retrieved
+    to/from the embedded Subject (Owen O'Malley & Kan Zhang via ddas)
+
+    HADOOP-6538. Sets hadoop.security.authentication to simple by default.
+    (ddas)
+
+    HADOOP-6540. Contrib unit tests have invalid XML for core-site, etc.
+    (Aaron Kimball via tomwhite)
+
+    HADOOP-6521. User specified umask using deprecated dfs.umask must override
+    server configured using new dfs.umaskmode for backward compatibility.
+    (suresh)
+    
+    HADOOP-6522. Fix decoding of codepoint zero in UTF8. (cutting)
+
+    HADOOP-6505. Use tr rather than sed to effect literal substitution in the
+    build script. (Allen Wittenauer via cdouglas)
+
+    HADOOP-6548. Replace mortbay imports with commons logging. (cdouglas)
+
+    HADOOP-6560. Handle invalid har:// uri in HarFileSystem.  (szetszwo)
+
+    HADOOP-6549. TestDoAsEffectiveUser should use ip address of the host
+     for superuser ip check(jnp via boryas)
+
+    HADOOP-6570. RPC#stopProxy throws NPE if getProxyEngine(proxy) returns
+    null. (hairong)
+
+    HADOOP-6558. Return null in HarFileSystem.getFileChecksum(..) since no
+    checksum algorithm is implemented.  (szetszwo)
+
+    HADOOP-6572. Makes sure that SASL encryption and push to responder
+    queue for the RPC response happens atomically. (Kan Zhang via ddas)
+
+    HADOOP-6545. Changes the Key for the FileSystem cache to be UGI (ddas)
+
+    HADOOP-6609. Fixed deadlock in RPC by replacing shared static 
+    DataOutputBuffer in the UTF8 class with a thread local variable. (omalley)
+
+    HADOOP-6504. Invalid example in the documentation of
+    org.apache.hadoop.util.Tool. (Benoit Sigoure via tomwhite)
+
+    HADOOP-6546. BloomMapFile can return false negatives. (Clark Jefcoat
+    via tomwhite)
+
+    HADOOP-6593. TextRecordInputStream doesn't close SequenceFile.Reader.
+    (Chase Bradford via tomwhite)
+
+    HADOOP-6175. Incorrect version compilation with es_ES.ISO8859-15 locale
+    on Solaris 10. (Urko Benito via tomwhite)
+
+    HADOOP-6645.  Bugs on listStatus for HarFileSystem (rodrigo via mahadev)
+
+    HADOOP-6645. Re: Bugs on listStatus for HarFileSystem (rodrigo via
+    mahadev)
+
+    HADOOP-6654. Fix code example in WritableComparable javadoc.  (Tom White
+    via szetszwo)
+
+    HADOOP-6640. FileSystem.get() does RPC retries within a static
+    synchronized block. (hairong)
+
+    HADOOP-6691. TestFileSystemCaching sometimes hangs. (hairong)
+
+    HADOOP-6507. Hadoop Common Docs - delete 3 doc files that do not belong
+    under Common. (Corinne Chandel via tomwhite)
+
+    HADOOP-6439. Fixes handling of deprecated keys to follow order in which
+    keys are defined. (V.V.Chaitanya Krishna via yhemanth)
+
+    HADOOP-6690. FilterFileSystem correctly handles setTimes call.
+    (Rodrigo Schmidt via dhruba)
+
+    HADOOP-6703. Prevent renaming a file, directory or symbolic link to
+    itself. (Eli Collins via suresh)
+
+    HADOOP-6710. Symbolic umask for file creation is not conformant with posix.
+    (suresh)
+    
+    HADOOP-6719. Insert all missing methods in FilterFs.
+    (Rodrigo Schmidt via dhruba)
+
+    HADOOP-6724. IPC doesn't properly handle IOEs thrown by socket factory.
+    (Todd Lipcon via tomwhite)
+
+    HADOOP-6722. NetUtils.connect should check that it hasn't connected a socket
+    to itself. (Todd Lipcon via tomwhite)
+
+    HADOOP-6634. Fix AccessControlList to use short names to verify access 
+    control. (Vinod Kumar Vavilapalli via sharad)
+
+    HADOOP-6709. Re-instate deprecated FileSystem methods that were removed
+    after 0.20. (tomwhite)
+ 
+    HADOOP-6630. hadoop-config.sh fails to get executed if hadoop wrapper
+    scripts are in path. (Allen Wittenauer via tomwhite)
+
+    HADOOP-6742. Add methods HADOOP-6709 from to TestFilterFileSystem.
+    (Eli Collins via tomwhite)
+
+    HADOOP-6727. Remove UnresolvedLinkException from public FileContext APIs.
+    (Eli Collins via tomwhite)
+
+    HADOOP-6631. Fix FileUtil.fullyDelete() to continue deleting other files 
+    despite failure at any level. (Contributed by Ravi Gummadi and 
+    Vinod Kumar Vavilapalli)
+
+    HADOOP-6723.  Unchecked exceptions thrown in IPC Connection should not
+    orphan clients.  (Todd Lipcon via tomwhite)
+
+    HADOOP-6404. Rename the generated artifacts to common instead of core.
+    (tomwhite)
+
+    HADOOP-6461.  Webapps aren't located correctly post-split.
+    (Todd Lipcon and Steve Loughran via tomwhite)
+
+    HADOOP-6826.  Revert FileSystem create method that takes CreateFlags.
+    (tomwhite)
+
+    HADOOP-6800.  Harmonize JAR library versions.  (tomwhite)
+
+    HADOOP-6847. Problem staging 0.21.0 artifacts to Apache Nexus Maven
+    Repository (Giridharan Kesavan via cos)
+
+    HADOOP-6819. [Herriot] Shell command for getting the new exceptions in
+    the logs returning exitcode 1 after executing successfully. (Vinay Thota
+    via cos)
+
+    HADOOP-6839. [Herriot] Implement a functionality for getting the user list
+    for creating proxy users. (Vinay Thota via cos)
+
+    HADOOP-6836. [Herriot]: Generic method for adding/modifying the attributes
+    for new configuration. (Vinay Thota via cos)
+
+    HADOOP-6860. 'compile-fault-inject' should never be called directly.
+    (Konstantin Boudnik)
+
+    HADOOP-6790. Instrumented (Herriot) build uses too wide mask to include
+    aspect files. (Konstantin Boudnik)
+
+    HADOOP-6875. [Herriot] Cleanup of temp. configurations is needed upon
+    restart of a cluster (Vinay Thota via cos)
+
+Release 0.20.3 - Unreleased
+
+  NEW FEATURES
+
+    HADOOP-6637. Benchmark for establishing RPC session. (shv)
+
+  BUG FIXES
+
+    HADOOP-6760. WebServer shouldn't increase port number in case of negative
+    port setting caused by Jetty's race (cos)
+    
+    HADOOP-6881. Make WritableComparator intialize classes when
+    looking for their raw comparator, as classes often register raw
+    comparators in initializers, which are no longer automatically run
+    in Java 6 when a class is referenced. (cutting via omalley)
+
+    HADOOP-7072. Remove java5 dependencies from build. (cos)
+
+Release 0.20.204.0 - Unreleased
+
+  NEW FEATURES
+
+    HADOOP-6255. Create RPM and Debian packages for common. Changes deployment
+    layout to be consistent across the binary tgz, rpm, and deb. Adds setup
+    scripts for easy one node cluster configuration and user creation.
+    (Eric Yang via omalley)
+
+Release 0.20.203.0 - 2011-5-11
+
+  BUG FIXES
+
+    HADOOP-7258. The Gzip codec should not return null decompressors. (omalley)
+
+Release 0.20.2 - 2010-2-16
+
+  NEW FEATURES
+
+    HADOOP-6218. Adds a feature where TFile can be split by Record
+    Sequence number. (Hong Tang and Raghu Angadi via ddas)
+
+  BUG FIXES
+
+    HADOOP-6231. Allow caching of filesystem instances to be disabled on a
+    per-instance basis. (tomwhite)
+
+    HADOOP-5759. Fix for IllegalArgumentException when CombineFileInputFormat
+    is used as job InputFormat. (Amareshwari Sriramadasu via dhruba)
+
+    HADOOP-6097. Fix Path conversion in makeQualified and reset LineReader byte
+    count at the start of each block in Hadoop archives. (Ben Slusky, Tom
+    White, and Mahadev Konar via cdouglas)
+
+    HADOOP-6269. Fix threading issue with defaultResource in Configuration.
+    (Sreekanth Ramakrishnan via cdouglas)
+
+    HADOOP-6460. Reinitializes buffers used for serializing responses in ipc
+    server on exceeding maximum response size to free up Java heap. (suresh)
+
+    HADOOP-6315. Avoid incorrect use of BuiltInflater/BuiltInDeflater in
+    GzipCodec. (Aaron Kimball via cdouglas)
+
+    HADOOP-6498. IPC client bug may cause rpc call hang. (Ruyue Ma and
+    hairong via hairong)
+
+  IMPROVEMENTS
+
+    HADOOP-5611. Fix C++ libraries to build on Debian Lenny. (Todd Lipcon
+    via tomwhite)
+
+    HADOOP-5612. Some c++ scripts are not chmodded before ant execution.
+    (Todd Lipcon via tomwhite)
+
+    HADOOP-1849. Add undocumented configuration parameter for per handler 
+    call queue size in IPC Server. (shv)
+
+Release 0.20.1 - 2009-09-01
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-5726. Remove pre-emption from capacity scheduler code base.
+    (Rahul Kumar Singh via yhemanth)
+
+    HADOOP-5881. Simplify memory monitoring and scheduling related
+    configuration. (Vinod Kumar Vavilapalli via yhemanth)
+
+  NEW FEATURES
+
+    HADOOP-6080. Introduce -skipTrash option to rm and rmr.
+    (Jakob Homan via shv)
+
+    HADOOP-3315. Add a new, binary file foramt, TFile. (Hong Tang via cdouglas)
+
+  IMPROVEMENTS
+
+    HADOOP-5711. Change Namenode file close log to info. (szetszwo)
+
+    HADOOP-5736. Update the capacity scheduler documentation for features
+    like memory based scheduling, job initialization and removal of pre-emption.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-5714. Add a metric for NameNode getFileInfo operation. (Jakob Homan
+    via szetszwo)
+
+    HADOOP-4372. Improves the way history filenames are obtained and manipulated.
+    (Amar Kamat via ddas)
+
+    HADOOP-5897. Add name-node metrics to capture java heap usage.
+    (Suresh Srinivas via shv)
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
+    HADOOP-5691. Makes org.apache.hadoop.mapreduce.Reducer concrete class
+    instead of abstract. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5646. Fixes a problem in TestQueueCapacities.
+    (Vinod Kumar Vavilapalli via ddas)
+
+    HADOOP-5655. TestMRServerPorts fails on java.net.BindException. (Devaraj
+    Das via hairong)
+
+    HADOOP-5654. TestReplicationPolicy.<init> fails on java.net.BindException.
+    (hairong)
+
+    HADOOP-5688. Fix HftpFileSystem checksum path construction. (Tsz Wo
+    (Nicholas) Sze via cdouglas)
+
+    HADOOP-4674. Fix fs help messages for -test, -text, -tail, -stat 
+    and -touchz options.  (Ravi Phulari via szetszwo)
+
+    HADOOP-5718. Remove the check for the default queue in capacity scheduler.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-5719. Remove jobs that failed initialization from the waiting queue
+    in the capacity scheduler. (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-4744. Attaching another fix to the jetty port issue. The TaskTracker
+    kills itself if it ever discovers that the port to which jetty is actually
+    bound is invalid (-1). (ddas)
+
+    HADOOP-5349. Fixes a problem in LocalDirAllocator to check for the return
+    path value that is returned for the case where the file we want to write
+    is of an unknown size. (Vinod Kumar Vavilapalli via ddas)
+
+    HADOOP-5636. Prevents a job from going to RUNNING state after it has been
+    KILLED (this used to happen when the SetupTask would come back with a 
+    success after the job has been killed). (Amar Kamat via ddas)
+
+    HADOOP-5641. Fix a NullPointerException in capacity scheduler's memory
+    based scheduling code when jobs get retired. (yhemanth)
+
+    HADOOP-5828. Use absolute path for mapred.local.dir of JobTracker in
+    MiniMRCluster. (yhemanth)
+
+    HADOOP-4981. Fix capacity scheduler to schedule speculative tasks 
+    correctly in the presence of High RAM jobs.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-5210. Solves a problem in the progress report of the reduce task.
+    (Ravi Gummadi via ddas)
+
+    HADOOP-5850. Fixes a problem to do with not being able to jobs with
+    0 maps/reduces. (Vinod K V via ddas)
+
+    HADOOP-4626. Correct the API links in hdfs forrest doc so that they
+    point to the same version of hadoop.  (szetszwo)
+
+    HADOOP-5883. Fixed tasktracker memory monitoring to account for
+    momentary spurts in memory usage due to java's fork() model.
+    (yhemanth)
+
+    HADOOP-5539. Fixes a problem to do with not preserving intermediate
+    output compression for merged data.
+    (Jothi Padmanabhan and Billy Pearson via ddas)
+
+    HADOOP-5932. Fixes a problem in capacity scheduler in computing
+    available memory on a tasktracker.
+    (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-5908. Fixes a problem to do with ArithmeticException in the 
+    JobTracker when there are jobs with 0 maps. (Amar Kamat via ddas)
+
+    HADOOP-5924. Fixes a corner case problem to do with job recovery with
+    empty history files. Also, after a JT restart, sends KillTaskAction to 
+    tasks that report back but the corresponding job hasn't been initialized
+    yet. (Amar Kamat via ddas)
+
+    HADOOP-5882. Fixes a reducer progress update problem for new mapreduce
+    api. (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-5746. Fixes a corner case problem in Streaming, where if an exception
+    happens in MROutputThread after the last call to the map/reduce method, the 
+    exception goes undetected. (Amar Kamat via ddas)
+
+    HADOOP-5884. Fixes accounting in capacity scheduler so that high RAM jobs
+    take more slots. (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-5937. Correct a safemode message in FSNamesystem.  (Ravi Phulari
+    via szetszwo)
+
+    HADOOP-5869. Fix bug in assignment of setup / cleanup task that was
+    causing TestQueueCapacities to fail.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-5921. Fixes a problem in the JobTracker where it sometimes never used
+    to come up due to a system file creation on JobTracker's system-dir failing. 
+    This problem would sometimes show up only when the FS for the system-dir 
+    (usually HDFS) is started at nearly the same time as the JobTracker. 
+    (Amar Kamat via ddas)
+
+    HADOOP-5920. Fixes a testcase failure for TestJobHistory. 
+    (Amar Kamat via ddas)
+
+    HADOOP-6139. Fix the FsShell help messages for rm and rmr.  (Jakob Homan
+    via szetszwo)
+
+    HADOOP-6145. Fix FsShell rm/rmr error messages when there is a FNFE.
+    (Jakob Homan via szetszwo)
+
+    HADOOP-6150. Users should be able to instantiate comparator using TFile
+    API. (Hong Tang via rangadi)
+
+Release 0.20.0 - 2009-04-15
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-4210. Fix findbugs warnings for equals implementations of mapred ID
+    classes. Removed public, static ID::read and ID::forName; made ID an
+    abstract class. (Suresh Srinivas via cdouglas)
+
+    HADOOP-4253. Fix various warnings generated by findbugs. 
+    Following deprecated methods in RawLocalFileSystem are removed:
+  	  public String getName()
+  	  public void lock(Path p, boolean shared)
+  	  public void release(Path p) 
+    (Suresh Srinivas via johan)
+
+    HADOOP-4618. Move http server from FSNamesystem into NameNode.
+    FSNamesystem.getNameNodeInfoPort() is removed.
+    FSNamesystem.getDFSNameNodeMachine() and FSNamesystem.getDFSNameNodePort()
+      replaced by FSNamesystem.getDFSNameNodeAddress().
+    NameNode(bindAddress, conf) is removed.
+    (shv)
+
+    HADOOP-4567. GetFileBlockLocations returns the NetworkTopology
+    information of the machines where the blocks reside. (dhruba)
+
+    HADOOP-4435. The JobTracker WebUI displays the amount of heap memory 
+    in use. (dhruba)
+
+    HADOOP-4628. Move Hive into a standalone subproject. (omalley)
+
+    HADOOP-4188. Removes task's dependency on concrete filesystems.
+    (Sharad Agarwal via ddas)
+
+    HADOOP-1650. Upgrade to Jetty 6. (cdouglas)
+
+    HADOOP-3986. Remove static Configuration from JobClient. (Amareshwari
+    Sriramadasu via cdouglas)
+      JobClient::setCommandLineConfig is removed
+      JobClient::getCommandLineConfig is removed
+      JobShell, TestJobShell classes are removed
+
+    HADOOP-4422. S3 file systems should not create bucket.
+    (David Phillips via tomwhite)
+
+    HADOOP-4035. Support memory based scheduling in capacity scheduler.
+    (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-3497. Fix bug in overly restrictive file globbing with a
+    PathFilter. (tomwhite)
+
+    HADOOP-4445. Replace running task counts with running task
+    percentage in capacity scheduler UI. (Sreekanth Ramakrishnan via
+    yhemanth)
+
+    HADOOP-4631. Splits the configuration into three parts - one for core,
+    one for mapred and the last one for HDFS. (Sharad Agarwal via cdouglas)
+
+    HADOOP-3344. Fix libhdfs build to use autoconf and build the same
+    architecture (32 vs 64 bit) of the JVM running Ant.  The libraries for
+    pipes, utils, and libhdfs are now all in c++/<os_osarch_jvmdatamodel>/lib. 
+    (Giridharan Kesavan via nigel)
+
+    HADOOP-4874. Remove LZO codec because of licensing issues. (omalley)
+
+    HADOOP-4970. The full path name of a file is preserved inside Trash.
+    (Prasad Chakka via dhruba)
+
+    HADOOP-4103. NameNode keeps a count of missing blocks. It warns on 
+    WebUI if there are such blocks. '-report' and '-metaSave' have extra
+    info to track such blocks. (Raghu Angadi)
+
+    HADOOP-4783. Change permissions on history files on the jobtracker
+    to be only group readable instead of world readable.
+    (Amareshwari Sriramadasu via yhemanth)
+
+  NEW FEATURES
+
+    HADOOP-4575. Add a proxy service for relaying HsftpFileSystem requests.
+    Includes client authentication via user certificates and config-based
+    access control. (Kan Zhang via cdouglas)
+
+    HADOOP-4661. Add DistCh, a new tool for distributed ch{mod,own,grp}.
+    (szetszwo)
+
+    HADOOP-4709. Add several new features and bug fixes to Chukwa.
+      Added Hadoop Infrastructure Care Center (UI for visualize data collected
+                                               by Chukwa)
+      Added FileAdaptor for streaming small file in one chunk
+      Added compression to archive and demux output
+      Added unit tests and validation for agent, collector, and demux map 
+        reduce job
+      Added database loader for loading demux output (sequence file) to jdbc 
+        connected database
+      Added algorithm to distribute collector load more evenly
+    (Jerome Boulon, Eric Yang, Andy Konwinski, Ariel Rabkin via cdouglas)
+
+    HADOOP-4179. Add Vaidya tool to analyze map/reduce job logs for performanc
+    problems. (Suhas Gogate via omalley)
+
+    HADOOP-4029. Add NameNode storage information to the dfshealth page and
+    move DataNode information to a separated page. (Boris Shkolnik via
+    szetszwo)
+
+    HADOOP-4348. Add service-level authorization for Hadoop. (acmurthy) 
+
+    HADOOP-4826. Introduce admin command saveNamespace. (shv)
+
+    HADOOP-3063  BloomMapFile - fail-fast version of MapFile for sparsely
+    populated key space (Andrzej Bialecki via stack)
+
+    HADOOP-1230. Add new map/reduce API and deprecate the old one. Generally,
+    the old code should work without problem. The new api is in 
+    org.apache.hadoop.mapreduce and the old classes in org.apache.hadoop.mapred
+    are deprecated. Differences in the new API:
+      1. All of the methods take Context objects that allow us to add new
+         methods without breaking compatability.
+      2. Mapper and Reducer now have a "run" method that is called once and
+         contains the control loop for the task, which lets applications
+         replace it.
+      3. Mapper and Reducer by default are Identity Mapper and Reducer.
+      4. The FileOutputFormats use part-r-00000 for the output of reduce 0 and
+         part-m-00000 for the output of map 0.
+      5. The reduce grouping comparator now uses the raw compare instead of 
+         object compare.
+      6. The number of maps in FileInputFormat is controlled by min and max
+         split size rather than min size and the desired number of maps.
+      (omalley)
+    
+    HADOOP-3305.  Use Ivy to manage dependencies.  (Giridharan Kesavan
+    and Steve Loughran via cutting)
+
+  IMPROVEMENTS
+
+    HADOOP-4749. Added a new counter REDUCE_INPUT_BYTES. (Yongqiang He via 
+    zshao)
+
+    HADOOP-4234. Fix KFS "glue" layer to allow applications to interface
+    with multiple KFS metaservers. (Sriram Rao via lohit)
+
+    HADOOP-4245. Update to latest version of KFS "glue" library jar. 
+    (Sriram Rao via lohit)
+
+    HADOOP-4244. Change test-patch.sh to check Eclipse classpath no matter
+    it is run by Hudson or not. (szetszwo)
+
+    HADOOP-3180. Add name of missing class to WritableName.getClass 
+    IOException. (Pete Wyckoff via omalley)
+
+    HADOOP-4178. Make the capacity scheduler's default values configurable.
+    (Sreekanth Ramakrishnan via omalley)
+
+    HADOOP-4262. Generate better error message when client exception has null
+    message. (stevel via omalley)
+
+    HADOOP-4226. Refactor and document LineReader to make it more readily
+    understandable. (Yuri Pradkin via cdouglas)
+    
+    HADOOP-4238. When listing jobs, if scheduling information isn't available 
+    print NA instead of empty output. (Sreekanth Ramakrishnan via johan)
+
+    HADOOP-4284. Support filters that apply to all requests, or global filters,
+    to HttpServer. (Kan Zhang via cdouglas)
+    
+    HADOOP-4276. Improve the hashing functions and deserialization of the 
+    mapred ID classes. (omalley)
+
+    HADOOP-4485. Add a compile-native ant task, as a shorthand. (enis)
+
+    HADOOP-4454. Allow # comments in slaves file. (Rama Ramasamy via omalley)
+
+    HADOOP-3461. Remove hdfs.StringBytesWritable. (szetszwo)
+
+    HADOOP-4437. Use Halton sequence instead of java.util.Random in 
+    PiEstimator. (szetszwo)
+
+    HADOOP-4572. Change INode and its sub-classes to package private. 
+    (szetszwo)
+
+    HADOOP-4187. Does a runtime lookup for JobConf/JobConfigurable, and if 
+    found, invokes the appropriate configure method. (Sharad Agarwal via ddas)
+
+    HADOOP-4453. Improve ssl configuration and handling in HsftpFileSystem,
+    particularly when used with DistCp. (Kan Zhang via cdouglas)
+
+    HADOOP-4583. Several code optimizations in HDFS.  (Suresh Srinivas via
+    szetszwo)
+
+    HADOOP-3923. Remove org.apache.hadoop.mapred.StatusHttpServer.  (szetszwo)
+    
+    HADOOP-4622. Explicitly specify interpretor for non-native
+    pipes binaries. (Fredrik Hedberg via johan)
+    
+    HADOOP-4505. Add a unit test to test faulty setup task and cleanup
+    task killing the job. (Amareshwari Sriramadasu via johan)
+
+    HADOOP-4608. Don't print a stack trace when the example driver gets an
+    unknown program to run. (Edward Yoon via omalley)
+
+    HADOOP-4645. Package HdfsProxy contrib project without the extra level
+    of directories. (Kan Zhang via omalley)
+
+    HADOOP-4126. Allow access to HDFS web UI on EC2 (tomwhite via omalley)
+
+    HADOOP-4612. Removes RunJar's dependency on JobClient.
+    (Sharad Agarwal via ddas)
+
+    HADOOP-4185. Adds setVerifyChecksum() method to FileSystem.
+    (Sharad Agarwal via ddas)
+
+    HADOOP-4523. Prevent too many tasks scheduled on a node from bringing
+    it down by monitoring for cumulative memory usage across tasks.
+    (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-4640. Adds an input format that can split lzo compressed
+    text files. (johan)
+    
+    HADOOP-4666. Launch reduces only after a few maps have run in the 
+    Fair Scheduler. (Matei Zaharia via johan)    
+
+    HADOOP-4339. Remove redundant calls from FileSystem/FsShell when
+    generating/processing ContentSummary. (David Phillips via cdouglas)
+
+    HADOOP-2774. Add counters tracking records spilled to disk in MapTask and
+    ReduceTask. (Ravi Gummadi via cdouglas)
+
+    HADOOP-4513. Initialize jobs asynchronously in the capacity scheduler.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-4649. Improve abstraction for spill indices. (cdouglas)
+
+    HADOOP-3770. Add gridmix2, an iteration on the gridmix benchmark. (Runping
+    Qi via cdouglas)
+
+    HADOOP-4708. Add support for dfsadmin commands in TestCLI. (Boris Shkolnik
+    via cdouglas)
+
+    HADOOP-4758. Add a splitter for metrics contexts to support more than one
+    type of collector. (cdouglas)
+
+    HADOOP-4722. Add tests for dfsadmin quota error messages. (Boris Shkolnik
+    via cdouglas)
+
+    HADOOP-4690.  fuse-dfs - create source file/function + utils + config +
+    main source files. (pete wyckoff via mahadev)
+
+    HADOOP-3750. Fix and enforce module dependencies. (Sharad Agarwal via
+    tomwhite)
+
+    HADOOP-4747. Speed up FsShell::ls by removing redundant calls to the
+    filesystem. (David Phillips via cdouglas)
+
+    HADOOP-4305. Improves the blacklisting strategy, whereby, tasktrackers
+    that are blacklisted are not given tasks to run from other jobs, subject
+    to the following conditions (all must be met):
+    1) The TaskTracker has been blacklisted by at least 4 jobs (configurable)
+    2) The TaskTracker has been blacklisted 50% more number of times than
+       the average (configurable)
+    3) The cluster has less than 50% trackers blacklisted
+    Once in 24 hours, a TaskTracker blacklisted for all jobs is given a chance.
+    Restarting the TaskTracker moves it out of the blacklist.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4688. Modify the MiniMRDFSSort unit test to spill multiple times,
+    exercising the map-side merge code. (cdouglas)
+
+    HADOOP-4737. Adds the KILLED notification when jobs get killed.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4728. Add a test exercising different namenode configurations.
+    (Boris Shkolnik via cdouglas)
+
+    HADOOP-4807. Adds JobClient commands to get the active/blacklisted tracker
+    names. Also adds commands to display running/completed task attempt IDs. 
+    (ddas)
+
+    HADOOP-4699. Remove checksum validation from map output servlet. (cdouglas)
+
+    HADOOP-4838. Added a registry to automate metrics and mbeans management.
+    (Sanjay Radia via acmurthy) 
+
+    HADOOP-3136. Fixed the default scheduler to assign multiple tasks to each 
+    tasktracker per heartbeat, when feasible. To ensure locality isn't hurt 
+    too badly, the scheudler will not assign more than one off-switch task per 
+    heartbeat. The heartbeat interval is also halved since the task-tracker is 
+    fixed to no longer send out heartbeats on each task completion. A 
+    slow-start for scheduling reduces is introduced to ensure that reduces 
+    aren't started till sufficient number of maps are done, else reduces of 
+    jobs whose maps aren't scheduled might swamp the cluster.
+    Configuration changes to mapred-default.xml:
+      add mapred.reduce.slowstart.completed.maps 
+    (acmurthy)
+
+    HADOOP-4545. Add example and test case of secondary sort for the reduce.
+    (omalley)
+
+    HADOOP-4753. Refactor gridmix2 to reduce code duplication. (cdouglas)
+
+    HADOOP-4909. Fix Javadoc and make some of the API more consistent in their
+    use of the JobContext instead of Configuration. (omalley)
+
+    HADOOP-4920.  Stop storing Forrest output in Subversion. (cutting)
+
+    HADOOP-4948. Add parameters java5.home and forrest.home to the ant commands
+    in test-patch.sh.  (Giridharan Kesavan via szetszwo)
+
+    HADOOP-4830. Add end-to-end test cases for testing queue capacities.
+    (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-4980. Improve code layout of capacity scheduler to make it 
+    easier to fix some blocker bugs. (Vivek Ratan via yhemanth)
+
+    HADOOP-4916. Make user/location of Chukwa installation configurable by an
+    external properties file. (Eric Yang via cdouglas)
+
+    HADOOP-4950. Make the CompressorStream, DecompressorStream, 
+    BlockCompressorStream, and BlockDecompressorStream public to facilitate 
+    non-Hadoop codecs. (omalley)
+
+    HADOOP-4843. Collect job history and configuration in Chukwa. (Eric Yang
+    via cdouglas)
+
+    HADOOP-5030. Build Chukwa RPM to install into configured directory. (Eric
+    Yang via cdouglas)
+    
+    HADOOP-4828. Updates documents to do with configuration (HADOOP-4631).
+    (Sharad Agarwal via ddas)
+
+    HADOOP-4939. Adds a test that would inject random failures for tasks in 
+    large jobs and would also inject TaskTracker failures. (ddas)
+
+    HADOOP-4944. A configuration file can include other configuration
+    files. (Rama Ramasamy via dhruba)
+
+    HADOOP-4804. Provide Forrest documentation for the Fair Scheduler.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-5248. A testcase that checks for the existence of job directory
+    after the job completes. Fails if it exists. (ddas)
+
+    HADOOP-4664. Introduces multiple job initialization threads, where the 
+    number of threads are configurable via mapred.jobinit.threads.
+    (Matei Zaharia and Jothi Padmanabhan via ddas)
+
+    HADOOP-4191. Adds a testcase for JobHistory. (Ravi Gummadi via ddas)
+
+    HADOOP-5466. Change documenation CSS style for headers and code. (Corinne
+    Chandel via szetszwo)
+
+    HADOOP-5275. Add ivy directory and files to built tar.
+    (Giridharan Kesavan via nigel)
+
+    HADOOP-5468. Add sub-menus to forrest documentation and make some minor
+    edits.  (Corinne Chandel via szetszwo)
+
+    HADOOP-5437. Fix TestMiniMRDFSSort to properly test jvm-reuse. (omalley)
+
+    HADOOP-5521. Removes dependency of TestJobInProgress on RESTART_COUNT 
+    JobHistory tag. (Ravi Gummadi via ddas)
+
+  OPTIMIZATIONS
+
+    HADOOP-3293. Fixes FileInputFormat to do provide locations for splits
+    based on the rack/host that has the most number of bytes.
+    (Jothi Padmanabhan via ddas)
+
+    HADOOP-4683. Fixes Reduce shuffle scheduler to invoke
+    getMapCompletionEvents in a separate thread. (Jothi Padmanabhan
+    via ddas)
+
+  BUG FIXES
+
+    HADOOP-4204. Fix findbugs warnings related to unused variables, naive
+    Number subclass instantiation, Map iteration, and badly scoped inner
+    classes. (Suresh Srinivas via cdouglas)
+
+    HADOOP-4207. Update derby jar file to release 10.4.2 release.
+    (Prasad Chakka via dhruba)
+
+    HADOOP-4325. SocketInputStream.read() should return -1 in case EOF.
+    (Raghu Angadi)
+
+    HADOOP-4408. FsAction functions need not create new objects. (cdouglas)
+
+    HADOOP-4440.  TestJobInProgressListener tests for jobs killed in queued 
+    state (Amar Kamat via ddas)
+
+    HADOOP-4346. Implement blocking connect so that Hadoop is not affected
+    by selector problem with JDK default implementation. (Raghu Angadi)
+
+    HADOOP-4388. If there are invalid blocks in the transfer list, Datanode
+    should handle them and keep transferring the remaining blocks.  (Suresh
+    Srinivas via szetszwo)
+
+    HADOOP-4587. Fix a typo in Mapper javadoc.  (Koji Noguchi via szetszwo)
+
+    HADOOP-4530. In fsck, HttpServletResponse sendError fails with
+    IllegalStateException. (hairong)
+
+    HADOOP-4377. Fix a race condition in directory creation in
+    NativeS3FileSystem. (David Phillips via cdouglas)
+
+    HADOOP-4621. Fix javadoc warnings caused by duplicate jars. (Kan Zhang via
+    cdouglas)
+
+    HADOOP-4566. Deploy new hive code to support more types.
+    (Zheng Shao via dhruba)
+
+    HADOOP-4571. Add chukwa conf files to svn:ignore list. (Eric Yang via
+    szetszwo)
+
+    HADOOP-4589. Correct PiEstimator output messages and improve the code
+    readability. (szetszwo)
+
+    HADOOP-4650. Correct a mismatch between the default value of
+    local.cache.size in the config and the source. (Jeff Hammerbacher via
+    cdouglas)
+
+    HADOOP-4606. Fix cygpath error if the log directory does not exist.
+    (szetszwo via omalley)
+
+    HADOOP-4141. Fix bug in ScriptBasedMapping causing potential infinite
+    loop on misconfigured hadoop-site. (Aaron Kimball via tomwhite)
+
+    HADOOP-4691. Correct a link in the javadoc of IndexedSortable. (szetszwo)
+
+    HADOOP-4598. '-setrep' command skips under-replicated blocks. (hairong)
+
+    HADOOP-4429. Set defaults for user, group in UnixUserGroupInformation so
+    login fails more predictably when misconfigured. (Alex Loddengaard via
+    cdouglas)
+
+    HADOOP-4676. Fix broken URL in blacklisted tasktrackers page. (Amareshwari
+    Sriramadasu via cdouglas)
+
+    HADOOP-3422  Ganglia counter metrics are all reported with the metric
+    name "value", so the counter values can not be seen. (Jason Attributor
+    and Brian Bockelman via stack)
+
+    HADOOP-4704. Fix javadoc typos "the the". (szetszwo)
+
+    HADOOP-4677. Fix semantics of FileSystem::getBlockLocations to return
+    meaningful values. (Hong Tang via cdouglas)
+
+    HADOOP-4669. Use correct operator when evaluating whether access time is
+    enabled (Dhruba Borthakur via cdouglas)
+
+    HADOOP-4732. Pass connection and read timeouts in the correct order when
+    setting up fetch in reduce. (Amareshwari Sriramadasu via cdouglas)
+
+    HADOOP-4558. Fix capacity reclamation in capacity scheduler.
+    (Amar Kamat via yhemanth)
+
+    HADOOP-4770. Fix rungridmix_2 script to work with RunJar. (cdouglas)
+
+    HADOOP-4738. When using git, the saveVersion script will use only the
+    commit hash for the version and not the message, which requires escaping.
+    (cdouglas)
+
+    HADOOP-4576. Show pending job count instead of task count in the UI per
+    queue in capacity scheduler. (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-4623. Maintain running tasks even if speculative execution is off.
+    (Amar Kamat via yhemanth)
+
+    HADOOP-4786. Fix broken compilation error in 
+    TestTrackerBlacklistAcrossJobs. (yhemanth)
+
+    HADOOP-4785. Fixes theJobTracker heartbeat to not make two calls to 
+    System.currentTimeMillis(). (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4792. Add generated Chukwa configuration files to version control
+    ignore lists. (cdouglas)
+
+    HADOOP-4796. Fix Chukwa test configuration, remove unused components. (Eric
+    Yang via cdouglas)
+
+    HADOOP-4708. Add binaries missed in the initial checkin for Chukwa. (Eric
+    Yang via cdouglas)
+
+    HADOOP-4805. Remove black list collector from Chukwa Agent HTTP Sender.
+    (Eric Yang via cdouglas)
+
+    HADOOP-4837. Move HADOOP_CONF_DIR configuration to chukwa-env.sh (Jerome
+    Boulon via cdouglas)
+
+    HADOOP-4825. Use ps instead of jps for querying process status in Chukwa.
+    (Eric Yang via cdouglas)
+
+    HADOOP-4844. Fixed javadoc for
+    org.apache.hadoop.fs.permission.AccessControlException to document that
+    it's deprecated in favour of
+    org.apache.hadoop.security.AccessControlException. (acmurthy) 
+
+    HADOOP-4706. Close the underlying output stream in
+    IFileOutputStream::close. (Jothi Padmanabhan via cdouglas)
+
+    HADOOP-4855. Fixed command-specific help messages for refreshServiceAcl in
+    DFSAdmin and MRAdmin. (acmurthy)
+
+    HADOOP-4820. Remove unused method FSNamesystem::deleteInSafeMode. (Suresh
+    Srinivas via cdouglas)
+
+    HADOOP-4698. Lower io.sort.mb to 10 in the tests and raise the junit memory
+    limit to 512m from 256m. (Nigel Daley via cdouglas)
+
+    HADOOP-4860. Split TestFileTailingAdapters into three separate tests to
+    avoid contention. (Eric Yang via cdouglas)
+
+    HADOOP-3921. Fixed clover (code coverage) target to work with JDK 6.
+    (tomwhite via nigel)
+
+    HADOOP-4845. Modify the reduce input byte counter to record only the
+    compressed size and add a human-readable label. (Yongqiang He via cdouglas)
+
+    HADOOP-4458. Add a test creating symlinks in the working directory.
+    (Amareshwari Sriramadasu via cdouglas)
+
+    HADOOP-4879. Fix org.apache.hadoop.mapred.Counters to correctly define
+    Object.equals rather than depend on contentEquals api. (omalley via 
+    acmurthy)
+
+    HADOOP-4791. Fix rpm build process for Chukwa. (Eric Yang via cdouglas)
+
+    HADOOP-4771. Correct initialization of the file count for directories 
+    with quotas. (Ruyue Ma via shv)
+
+    HADOOP-4878. Fix eclipse plugin classpath file to point to ivy's resolved
+    lib directory and added the same to test-patch.sh. (Giridharan Kesavan via
+    acmurthy)
+
+    HADOOP-4774. Fix default values of some capacity scheduler configuration
+    items which would otherwise not work on a fresh checkout.
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-4876. Fix capacity scheduler reclamation by updating count of
+    pending tasks correctly. (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-4849. Documentation for Service Level Authorization implemented in
+    HADOOP-4348. (acmurthy)
+
+    HADOOP-4827. Replace Consolidator with Aggregator macros in Chukwa (Eric
+    Yang via cdouglas)
+
+    HADOOP-4894. Correctly parse ps output in Chukwa jettyCollector.sh. (Ari
+    Rabkin via cdouglas)
+
+    HADOOP-4892. Close fds out of Chukwa ExecPlugin. (Ari Rabkin via cdouglas)
+
+    HADOOP-4889. Fix permissions in RPM packaging. (Eric Yang via cdouglas)
+
+    HADOOP-4869. Fixes the TT-JT heartbeat to have an explicit flag for 
+    restart apart from the initialContact flag that there was earlier.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4716. Fixes ReduceTask.java to clear out the mapping between
+    hosts and MapOutputLocation upon a JT restart (Amar Kamat via ddas)
+
+    HADOOP-4880. Removes an unnecessary testcase from TestJobTrackerRestart.
+    (Amar Kamat via ddas)
+
+    HADOOP-4924. Fixes a race condition in TaskTracker re-init. (ddas)
+
+    HADOOP-4854. Read reclaim capacity interval from capacity scheduler 
+    configuration. (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-4896. HDFS Fsck does not load HDFS configuration. (Raghu Angadi)
+
+    HADOOP-4956. Creates TaskStatus for failed tasks with an empty Counters 
+    object instead of null. (ddas)
+
+    HADOOP-4979. Fix capacity scheduler to block cluster for failed high
+    RAM requirements across task types. (Vivek Ratan via yhemanth)
+
+    HADOOP-4949. Fix native compilation. (Chris Douglas via acmurthy) 
+
+    HADOOP-4787. Fixes the testcase TestTrackerBlacklistAcrossJobs which was
+    earlier failing randomly. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4914. Add description fields to Chukwa init.d scripts (Eric Yang via
+    cdouglas)
+
+    HADOOP-4884. Make tool tip date format match standard HICC format. (Eric
+    Yang via cdouglas)
+
+    HADOOP-4925. Make Chukwa sender properties configurable. (Ari Rabkin via
+    cdouglas)
+
+    HADOOP-4947. Make Chukwa command parsing more forgiving of whitespace. (Ari
+    Rabkin via cdouglas)
+
+    HADOOP-5026. Make chukwa/bin scripts executable in repository. (Andy
+    Konwinski via cdouglas)
+
+    HADOOP-4977. Fix a deadlock between the reclaimCapacity and assignTasks
+    in capacity scheduler. (Vivek Ratan via yhemanth)
+
+    HADOOP-4988. Fix reclaim capacity to work even when there are queues with
+    no capacity. (Vivek Ratan via yhemanth)
+
+    HADOOP-5065. Remove generic parameters from argument to 
+    setIn/OutputFormatClass so that it works with SequenceIn/OutputFormat.
+    (cdouglas via omalley)
+
+    HADOOP-4818. Pass user config to instrumentation API. (Eric Yang via
+    cdouglas)
+
+    HADOOP-4993. Fix Chukwa agent configuration and startup to make it both
+    more modular and testable. (Ari Rabkin via cdouglas)
+
+    HADOOP-5048. Fix capacity scheduler to correctly cleanup jobs that are
+    killed after initialization, but before running. 
+    (Sreekanth Ramakrishnan via yhemanth)
+
+    HADOOP-4671. Mark loop control variables shared between threads as
+    volatile. (cdouglas)
+
+    HADOOP-5079. HashFunction inadvertently destroys some randomness
+    (Jonathan Ellis via stack)
+
+    HADOOP-4999. A failure to write to FsEditsLog results in 
+    IndexOutOfBounds exception. (Boris Shkolnik via rangadi)
+
+    HADOOP-5139. Catch IllegalArgumentException during metrics registration 
+    in RPC.  (Hairong Kuang via szetszwo)
+
+    HADOOP-5085. Copying a file to local with Crc throws an exception.
+    (hairong)
+
+    HADOOP-5211. Fix check for job completion in TestSetupAndCleanupFailure.
+    (enis)
+
+    HADOOP-5254. The Configuration class should be able to work with XML
+    parsers that do not support xmlinclude. (Steve Loughran via dhruba)
+
+    HADOOP-4692. Namenode in infinite loop for replicating/deleting corrupt
+    blocks. (hairong)
+
+    HADOOP-5255. Fix use of Math.abs to avoid overflow. (Jonathan Ellis via
+    cdouglas)
+
+    HADOOP-5269. Fixes a problem to do with tasktracker holding on to 
+    FAILED_UNCLEAN or KILLED_UNCLEAN tasks forever. (Amareshwari Sriramadasu
+    via ddas) 
+
+    HADOOP-5214. Fixes a ConcurrentModificationException while the Fairshare
+    Scheduler accesses the tasktrackers stored by the JobTracker.
+    (Rahul Kumar Singh via yhemanth)
+
+    HADOOP-5233. Addresses the three issues - Race condition in updating
+    status, NPE in TaskTracker task localization when the conf file is missing
+    (HADOOP-5234) and NPE in handling KillTaskAction of a cleanup task 
+    (HADOOP-5235). (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-5247. Introduces a broadcast of KillJobAction to all trackers when
+    a job finishes. This fixes a bunch of problems to do with NPE when a 
+    completed job is not in memory and a tasktracker comes to the jobtracker 
+    with a status report of a task belonging to that job. (Amar Kamat via ddas)
+
+    HADOOP-5282. Fixed job history logs for task attempts that are
+    failed by the JobTracker, say due to lost task trackers. (Amar
+    Kamat via yhemanth)
+
+    HADOOP-5241. Fixes a bug in disk-space resource estimation. Makes
+    the estimation formula linear where blowUp =
+    Total-Output/Total-Input. (Sharad Agarwal via ddas)
+
+    HADOOP-5142. Fix MapWritable#putAll to store key/value classes. 
+    (Do??acan G??ney via enis)
+
+    HADOOP-4744. Workaround for jetty6 returning -1 when getLocalPort
+    is invoked on the connector. The workaround patch retries a few
+    times before failing.  (Jothi Padmanabhan via yhemanth)
+
+    HADOOP-5280. Adds a check to prevent a task state transition from
+    FAILED to any of UNASSIGNED, RUNNING, COMMIT_PENDING or
+    SUCCEEDED. (ddas)
+
+    HADOOP-5272. Fixes a problem to do with detecting whether an
+    attempt is the first attempt of a Task. This affects JobTracker
+    restart. (Amar Kamat via ddas)
+
+    HADOOP-5306. Fixes a problem to do with logging/parsing the http port of a 
+    lost tracker. Affects JobTracker restart. (Amar Kamat via ddas)
+
+    HADOOP-5111. Fix Job::set* methods to work with generics. (cdouglas)
+
+    HADOOP-5274. Fix gridmix2 dependency on wordcount example. (cdouglas)
+
+    HADOOP-5145. Balancer sometimes runs out of memory after running
+    days or weeks.  (hairong)
+
+    HADOOP-5338. Fix jobtracker restart to clear task completion
+    events cached by tasktrackers forcing them to fetch all events
+    afresh, thus avoiding missed task completion events on the
+    tasktrackers. (Amar Kamat via yhemanth)
+
+    HADOOP-4695. Change TestGlobalFilter so that it allows a web page to be
+    filtered more than once for a single access.  (Kan Zhang via szetszwo) 
+
+    HADOOP-5298. Change TestServletFilter so that it allows a web page to be
+    filtered more than once for a single access.  (szetszwo) 
+
+    HADOOP-5432. Disable ssl during unit tests in hdfsproxy, as it is unused
+    and causes failures. (cdouglas)
+
+    HADOOP-5416. Correct the shell command "fs -test" forrest doc description.
+    (Ravi Phulari via szetszwo) 
+
+    HADOOP-5327. Fixed job tracker to remove files from system directory on
+    ACL check failures and also check ACLs on restart.
+    (Amar Kamat via yhemanth)
+
+    HADOOP-5395. Change the exception message when a job is submitted to an
+    invalid queue. (Rahul Kumar Singh via yhemanth)
+
+    HADOOP-5276. Fixes a problem to do with updating the start time of
+    a task when the tracker that ran the task is lost. (Amar Kamat via
+    ddas)
+
+    HADOOP-5278. Fixes a problem to do with logging the finish time of
+    a task during recovery (after a JobTracker restart). (Amar Kamat
+    via ddas)
+
+    HADOOP-5490. Fixes a synchronization problem in the
+    EagerTaskInitializationListener class. (Jothi Padmanabhan via
+    ddas)
+
+    HADOOP-5493. The shuffle copier threads return the codecs back to
+    the pool when the shuffle completes. (Jothi Padmanabhan via ddas)
+
+    HADOOP-5414. Fixes IO exception while executing hadoop fs -touchz
+    fileName by making sure that lease renewal thread exits before dfs
+    client exits.  (hairong)
+
+    HADOOP-5103. FileInputFormat now reuses the clusterMap network
+    topology object and that brings down the log messages in the
+    JobClient to do with NetworkTopology.add significantly. (Jothi
+    Padmanabhan via ddas)
+
+    HADOOP-5483. Fixes a problem in the Directory Cleanup Thread due to which
+    TestMiniMRWithDFS sometimes used to fail. (ddas) 
+
+    HADOOP-5281. Prevent sharing incompatible ZlibCompressor instances between
+    GzipCodec and DefaultCodec. (cdouglas)
+
+    HADOOP-5463. Balancer throws "Not a host:port pair" unless port is
+    specified in fs.default.name. (Stuart White via hairong)
+
+    HADOOP-5514. Fix JobTracker metrics and add metrics for wating, failed
+    tasks. (cdouglas)
+
+    HADOOP-5516. Fix NullPointerException in TaskMemoryManagerThread
+    that comes when monitored processes disappear when the thread is
+    running.  (Vinod Kumar Vavilapalli via yhemanth)
+
+    HADOOP-5382. Support combiners in the new context object API. (omalley)
+
+    HADOOP-5471. Fixes a problem to do with updating the log.index file in the 
+    case where a cleanup task is run. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-5534. Fixed a deadlock in Fair scheduler's servlet.
+    (Rahul Kumar Singh via yhemanth)
+
+    HADOOP-5328. Fixes a problem in the renaming of job history files during 
+    job recovery. (Amar Kamat via ddas)
+
+    HADOOP-5417. Don't ignore InterruptedExceptions that happen when calling 
+    into rpc. (omalley)
+
+    HADOOP-5320. Add a close() in TestMapReduceLocal.  (Jothi Padmanabhan
+    via szetszwo)
+
+    HADOOP-5520. Fix a typo in disk quota help message.  (Ravi Phulari
+    via szetszwo)
+
+    HADOOP-5519. Remove claims from mapred-default.xml that prime numbers
+    of tasks are helpful.  (Owen O'Malley via szetszwo)
+
+    HADOOP-5484. TestRecoveryManager fails wtih FileAlreadyExistsException.
+    (Amar Kamat via hairong)
+
+    HADOOP-5564. Limit the JVM heap size in the java command for initializing
+    JAVA_PLATFORM.  (Suresh Srinivas via szetszwo)
+
+    HADOOP-5565. Add API for failing/finalized jobs to the JT metrics
+    instrumentation. (Jerome Boulon via cdouglas)
+
+    HADOOP-5390. Remove duplicate jars from tarball, src from binary tarball
+    added by hdfsproxy. (Zhiyong Zhang via cdouglas)
+
+    HADOOP-5066. Building binary tarball should not build docs/javadocs, copy
+    src, or run jdiff. (Giridharan Kesavan via cdouglas)
+
+    HADOOP-5459. Fix undetected CRC errors where intermediate output is closed
+    before it has been completely consumed. (cdouglas)
+
+    HADOOP-5571. Remove widening primitive conversion in TupleWritable mask
+    manipulation. (Jingkei Ly via cdouglas)
+
+    HADOOP-5588. Remove an unnecessary call to listStatus(..) in
+    FileSystem.globStatusInternal(..).  (Hairong Kuang via szetszwo)
+
+    HADOOP-5473. Solves a race condition in killing a task - the state is KILLED
+    if there is a user request pending to kill the task and the TT reported
+    the state as SUCCESS. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-5576. Fix LocalRunner to work with the new context object API in
+    mapreduce. (Tom White via omalley)
+
+    HADOOP-4374. Installs a shutdown hook in the Task JVM so that log.index is
+    updated before the JVM exits. Also makes the update to log.index atomic.
+    (Ravi Gummadi via ddas)
+
+    HADOOP-5577. Add a verbose flag to mapreduce.Job.waitForCompletion to get
+    the running job's information printed to the user's stdout as it runs.
+    (omalley)
+
+    HADOOP-5607. Fix NPE in TestCapacityScheduler. (cdouglas)
+
+    HADOOP-5605. All the replicas incorrectly got marked as corrupt. (hairong)
+
+    HADOOP-5337. JobTracker, upon restart, now waits for the TaskTrackers to
+    join back before scheduling new tasks. This fixes race conditions associated
+    with greedy scheduling as was the case earlier. (Amar Kamat via ddas) 
+
+    HADOOP-5227. Fix distcp so -update and -delete can be meaningfully
+    combined. (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-5305. Increase number of files and print debug messages in
+    TestCopyFiles.  (szetszwo)
+
+    HADOOP-5548. Add synchronization for JobTracker methods in RecoveryManager.
+    (Amareshwari Sriramadasu via sharad)
+
+    HADOOP-3810. NameNode seems unstable on a cluster with little space left.
+    (hairong)
+
+    HADOOP-5068. Fix NPE in TestCapacityScheduler.  (Vinod Kumar Vavilapalli
+    via szetszwo)
+
+    HADOOP-5585. Clear FileSystem statistics between tasks when jvm-reuse
+    is enabled. (omalley)
+
+    HADOOP-5394. JobTracker might schedule 2 attempts of the same task 
+    with the same attempt id across restarts. (Amar Kamat via sharad)
+
+    HADOOP-5645. After HADOOP-4920 we need a place to checkin
+    releasenotes.html. (nigel)
+
+Release 0.19.2 - 2009-06-30
+
+  BUG FIXES
+
+    HADOOP-5154. Fixes a deadlock in the fairshare scheduler. 
+    (Matei Zaharia via yhemanth)
+   
+    HADOOP-5146. Fixes a race condition that causes LocalDirAllocator to miss
+    files.  (Devaraj Das via yhemanth)
+
+    HADOOP-4638. Fixes job recovery to not crash the job tracker for problems
+    with a single job file. (Amar Kamat via yhemanth)
+
+    HADOOP-5384. Fix a problem that DataNodeCluster creates blocks with
+    generationStamp == 1.  (szetszwo)
+
+    HADOOP-5376. Fixes the code handling lost tasktrackers to set the task state
+    to KILLED_UNCLEAN only for relevant type of tasks.
+    (Amareshwari Sriramadasu via yhemanth)
+
+    HADOOP-5285. Fixes the issues - (1) obtainTaskCleanupTask checks whether job is
+    inited before trying to lock the JobInProgress (2) Moves the CleanupQueue class
+    outside the TaskTracker and makes it a generic class that is used by the 
+    JobTracker also for deleting the paths on the job's output fs. (3) Moves the
+    references to completedJobStore outside the block where the JobTracker is locked.
+    (ddas)
+
+    HADOOP-5392. Fixes a problem to do with JT crashing during recovery when
+    the job files are garbled. (Amar Kamat via ddas)
+
+    HADOOP-5332. Appending to files is not allowed (by default) unless
+    dfs.support.append is set to true. (dhruba)
+
+    HADOOP-5333. libhdfs supports appending to files. (dhruba)
+
+    HADOOP-3998. Fix dfsclient exception when JVM is shutdown. (dhruba)
+
+    HADOOP-5440. Fixes a problem to do with removing a taskId from the list
+    of taskIds that the TaskTracker's TaskMemoryManager manages.
+    (Amareshwari Sriramadasu via ddas)
+ 
+    HADOOP-5446. Restore TaskTracker metrics. (cdouglas)
+
+    HADOOP-5449. Fixes the history cleaner thread. 
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-5479. NameNode should not send empty block replication request to
+    DataNode. (hairong)
+
+    HADOOP-5259. Job with output hdfs:/user/<username>/outputpath (no 
+    authority) fails with Wrong FS. (Doug Cutting via hairong)
+
+    HADOOP-5522. Documents the setup/cleanup tasks in the mapred tutorial.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-5549. ReplicationMonitor should schedule both replication and
+    deletion work in one iteration. (hairong)
+
+    HADOOP-5554. DataNodeCluster and CreateEditsLog should create blocks with
+    the same generation stamp value. (hairong via szetszwo)
+
+    HADOOP-5231. Clones the TaskStatus before passing it to the JobInProgress.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4719. Fix documentation of 'ls' format for FsShell. (Ravi Phulari
+    via cdouglas)
+
+    HADOOP-5374. Fixes a NPE problem in getTasksToSave method.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4780. Cache the size of directories in DistributedCache, avoiding
+    long delays in recalculating it. (He Yongqiang via cdouglas)
+
+    HADOOP-5551. Prevent directory destruction on file create.
+    (Brian Bockelman via shv)
+
+    HADOOP-5671. Fix FNF exceptions when copying from old versions of
+    HftpFileSystem. (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-5213. Fix Null pointer exception caused when bzip2compression 
+    was used and user closed a output stream without writing any data.
+    (Zheng Shao via dhruba)
+
+    HADOOP-5579. Set errno correctly in libhdfs for permission, quota, and FNF
+    conditions. (Brian Bockelman via cdouglas)
+
+    HADOOP-5816. Fixes a problem in the KeyFieldBasedComparator to do with
+    ArrayIndexOutOfBounds exception. (He Yongqiang via ddas)
+
+    HADOOP-5951. Add Apache license header to StorageInfo.java.  (Suresh
+    Srinivas via szetszwo)
+
+Release 0.19.1 - 2009-02-23 
+
+  IMPROVEMENTS
+
+    HADOOP-4739. Fix spelling and grammar, improve phrasing of some sections in
+    mapred tutorial. (Vivek Ratan via cdouglas)
+
+    HADOOP-3894. DFSClient logging improvements. (Steve Loughran via shv)
+
+    HADOOP-5126. Remove empty file BlocksWithLocations.java (shv)
+
+    HADOOP-5127. Remove public methods in FSDirectory. (Jakob Homan via shv)
+
+  BUG FIXES
+
+    HADOOP-4697. Fix getBlockLocations in KosmosFileSystem to handle multiple
+    blocks correctly. (Sriram Rao via cdouglas)
+
+    HADOOP-4420. Add null checks for job, caused by invalid job IDs.
+    (Aaron Kimball via tomwhite)
+
+    HADOOP-4632. Fix TestJobHistoryVersion to use test.build.dir instead of the
+    current workding directory for scratch space. (Amar Kamat via cdouglas)
+
+    HADOOP-4508. Fix FSDataOutputStream.getPos() for append. (dhruba via
+    szetszwo)
+
+    HADOOP-4727. Fix a group checking bug in fill_stat_structure(...) in
+    fuse-dfs.  (Brian Bockelman via szetszwo)
+
+    HADOOP-4836. Correct typos in mapred related documentation.  (Jord? Polo
+    via szetszwo)
+
+    HADOOP-4821. Usage description in the Quotas guide documentations are
+    incorrect. (Boris Shkolnik via hairong)
+
+    HADOOP-4847. Moves the loading of OutputCommitter to the Task.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4966. Marks completed setup tasks for removal. 
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4982. TestFsck should run in Eclipse. (shv)
+
+    HADOOP-5008. TestReplication#testPendingReplicationRetry leaves an opened
+    fd unclosed. (hairong)
+
+    HADOOP-4906. Fix TaskTracker OOM by keeping a shallow copy of JobConf in
+    TaskTracker.TaskInProgress. (Sharad Agarwal via acmurthy) 
+
+    HADOOP-4918. Fix bzip2 compression to work with Sequence Files.
+    (Zheng Shao via dhruba).
+
+    HADOOP-4965. TestFileAppend3 should close FileSystem. (shv)
+
+    HADOOP-4967. Fixes a race condition in the JvmManager to do with killing
+    tasks. (ddas)
+
+    HADOOP-5009. DataNode#shutdown sometimes leaves data block scanner
+    verification log unclosed. (hairong)
+
+    HADOOP-5086. Use the appropriate FileSystem for trash URIs. (cdouglas)
+    
+    HADOOP-4955. Make DBOutputFormat us column names from setOutput().
+    (Kevin Peterson via enis) 
+
+    HADOOP-4862. Minor : HADOOP-3678 did not remove all the cases of 
+    spurious IOExceptions logged by DataNode. (Raghu Angadi) 
+
+    HADOOP-5034. NameNode should send both replication and deletion requests
+    to DataNode in one reply to a heartbeat. (hairong)
+
+    HADOOP-4759. Removes temporary output directory for failed and killed
+    tasks  by launching special CLEANUP tasks for the same.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-5161. Accepted sockets do not get placed in
+    DataXceiverServer#childSockets. (hairong)
+
+    HADOOP-5193. Correct calculation of edits modification time. (shv)
+
+    HADOOP-4494. Allow libhdfs to append to files.
+    (Pete Wyckoff via dhruba)
+
+    HADOOP-5166. Fix JobTracker restart to work when ACLs are configured
+    for the JobTracker. (Amar Kamat via yhemanth).
+
+    HADOOP-5067. Fixes TaskInProgress.java to keep track of count of failed and
+    killed tasks correctly. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4760. HDFS streams should not throw exceptions when closed twice. 
+    (enis)
+
+Release 0.19.0 - 2008-11-18
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-3595. Remove deprecated methods for mapred.combine.once 
+    functionality, which was necessary to providing backwards
+    compatible combiner semantics for 0.18. (cdouglas via omalley)
+
+    HADOOP-3667. Remove the following deprecated methods from JobConf:
+      addInputPath(Path)
+      getInputPaths()
+      getMapOutputCompressionType()
+      getOutputPath()
+      getSystemDir()
+      setInputPath(Path)
+      setMapOutputCompressionType(CompressionType style)
+      setOutputPath(Path)
+    (Amareshwari Sriramadasu via omalley)
+
+    HADOOP-3652. Remove deprecated class OutputFormatBase.
+    (Amareshwari Sriramadasu via cdouglas)
+
+    HADOOP-2885. Break the hadoop.dfs package into separate packages under
+    hadoop.hdfs that reflect whether they are client, server, protocol, 
+    etc. DistributedFileSystem and DFSClient have moved and are now 
+    considered package private. (Sanjay Radia via omalley)
+
+    HADOOP-2325.  Require Java 6. (cutting)
+
+    HADOOP-372.  Add support for multiple input paths with a different
+    InputFormat and Mapper for each path.  (Chris Smith via tomwhite)
+
+    HADOOP-1700.  Support appending to file in HDFS. (dhruba)
+
+    HADOOP-3792. Make FsShell -test consistent with unix semantics, returning
+    zero for true and non-zero for false. (Ben Slusky via cdouglas)
+
+    HADOOP-3664. Remove the deprecated method InputFormat.validateInput,
+    which is no longer needed. (tomwhite via omalley)
+
+    HADOOP-3549. Give more meaningful errno's in libhdfs. In particular, 
+    EACCES is returned for permission problems. (Ben Slusky via omalley)
+
+    HADOOP-4036. ResourceStatus was added to TaskTrackerStatus by HADOOP-3759,
+    so increment the InterTrackerProtocol version. (Hemanth Yamijala via 
+    omalley)
+
+    HADOOP-3150. Moves task promotion to tasks. Defines a new interface for
+    committing output files. Moves job setup to jobclient, and moves jobcleanup
+    to a separate task. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3446. Keep map outputs in memory during the reduce. Remove
+    fs.inmemory.size.mb and replace with properties defining in memory map
+    output retention during the shuffle and reduce relative to maximum heap
+    usage. (cdouglas)
+
+    HADOOP-3245. Adds the feature for supporting JobTracker restart. Running
+    jobs can be recovered from the history file. The history file format has
+    been modified to support recovery. The task attempt ID now has the 
+    JobTracker start time to disinguish attempts of the same TIP across 
+    restarts. (Amar Ramesh Kamat via ddas)
+
+    HADOOP-4007. REMOVE DFSFileInfo - FileStatus is sufficient. 
+    (Sanjay Radia via hairong)
+
+    HADOOP-3722. Fixed Hadoop Streaming and Hadoop Pipes to use the Tool
+    interface and GenericOptionsParser. (Enis Soztutar via acmurthy) 
+
+    HADOOP-2816. Cluster summary at name node web reports the space
+    utilization as:
+    Configured Capacity: capacity of all the data directories - Reserved space
+    Present Capacity: Space available for dfs,i.e. remaining+used space
+    DFS Used%: DFS used space/Present Capacity
+    (Suresh Srinivas via hairong)
+
+    HADOOP-3938. Disk space quotas for HDFS. This is similar to namespace
+    quotas in 0.18. (rangadi)
+
+    HADOOP-4293. Make Configuration Writable and remove unreleased 
+    WritableJobConf. Configuration.write is renamed to writeXml. (omalley)
+
+    HADOOP-4281. Change dfsadmin to report available disk space in a format
+    consistent with the web interface as defined in HADOOP-2816. (Suresh
+    Srinivas via cdouglas)
+
+    HADOOP-4430. Further change the cluster summary at name node web that was
+    changed in HADOOP-2816:
+      Non DFS Used - This indicates the disk space taken by non DFS file from
+                     the Configured capacity
+      DFS Used % - DFS Used % of Configured Capacity 
+      DFS Remaining % - Remaing % Configured Capacity available for DFS use
+    DFS command line report reflects the same change. Config parameter 
+    dfs.datanode.du.pct is no longer used and is removed from the 
+    hadoop-default.xml. (Suresh Srinivas via hairong)
+
+    HADOOP-4116. Balancer should provide better resource management. (hairong)
+
+    HADOOP-4599. BlocksMap and BlockInfo made package private. (shv)
+
+  NEW FEATURES
+
+    HADOOP-3341. Allow streaming jobs to specify the field separator for map
+    and reduce input and output. The new configuration values are:
+      stream.map.input.field.separator
+      stream.map.output.field.separator
+      stream.reduce.input.field.separator
+      stream.reduce.output.field.separator
+    All of them default to "\t". (Zheng Shao via omalley)
+
+    HADOOP-3479. Defines the configuration file for the resource manager in 
+    Hadoop. You can configure various parameters related to scheduling, such 
+    as queues and queue properties here. The properties for a queue follow a
+    naming convention,such as, hadoop.rm.queue.queue-name.property-name.
+    (Hemanth Yamijala via ddas)
+
+    HADOOP-3149. Adds a way in which map/reducetasks can create multiple 
+    outputs. (Alejandro Abdelnur via ddas)
+
+    HADOOP-3714.  Add a new contrib, bash-tab-completion, which enables 
+    bash tab completion for the bin/hadoop script. See the README file
+    in the contrib directory for the installation. (Chris Smith via enis)
+
+    HADOOP-3730. Adds a new JobConf constructor that disables loading
+    default configurations. (Alejandro Abdelnur via ddas)
+
+    HADOOP-3772. Add a new Hadoop Instrumentation api for the JobTracker and
+    the TaskTracker, refactor Hadoop Metrics as an implementation of the api.
+    (Ari Rabkin via acmurthy) 
+
+    HADOOP-2302. Provides a comparator for numerical sorting of key fields.
+    (ddas)
+
+    HADOOP-153. Provides a way to skip bad records. (Sharad Agarwal via ddas)
+
+    HADOOP-657. Free disk space should be modelled and used by the scheduler
+    to make scheduling decisions. (Ari Rabkin via omalley)
+
+    HADOOP-3719. Initial checkin of Chukwa, which is a data collection and 
+    analysis framework. (Jerome Boulon, Andy Konwinski, Ari Rabkin, 
+    and Eric Yang)
+
+    HADOOP-3873. Add -filelimit and -sizelimit options to distcp to cap the
+    number of files/bytes copied in a particular run to support incremental
+    updates and mirroring. (TszWo (Nicholas), SZE via cdouglas)
+
+    HADOOP-3585. FailMon package for hardware failure monitoring and 
+    analysis of anomalies. (Ioannis Koltsidas via dhruba)
+
+    HADOOP-1480. Add counters to the C++ Pipes API. (acmurthy via omalley)
+
+    HADOOP-3854. Add support for pluggable servlet filters in the HttpServers.
+    (Tsz Wo (Nicholas) Sze via omalley)
+
+    HADOOP-3759. Provides ability to run memory intensive jobs without 
+    affecting other running tasks on the nodes. (Hemanth Yamijala via ddas)
+
+    HADOOP-3746. Add a fair share scheduler. (Matei Zaharia via omalley)
+
+    HADOOP-3754. Add a thrift interface to access HDFS. (dhruba via omalley)
+
+    HADOOP-3828. Provides a way to write skipped records to DFS.
+    (Sharad Agarwal via ddas)
+
+    HADOOP-3948. Separate name-node edits and fsimage directories.
+    (Lohit Vijayarenu via shv)
+
+    HADOOP-3939. Add an option to DistCp to delete files at the destination
+    not present at the source. (Tsz Wo (Nicholas) Sze via cdouglas)
+
+    HADOOP-3601. Add a new contrib module for Hive, which is a sql-like
+    query processing tool that uses map/reduce. (Ashish Thusoo via omalley)
+
+    HADOOP-3866. Added sort and multi-job updates in the JobTracker web ui.
+    (Craig Weisenfluh via omalley)
+
+    HADOOP-3698. Add access control to control who is allowed to submit or 
+    modify jobs in the JobTracker. (Hemanth Yamijala via omalley)
+
+    HADOOP-1869. Support access times for HDFS files. (dhruba)
+
+    HADOOP-3941. Extend FileSystem API to return file-checksums.
+    (szetszwo)
+
+    HADOOP-3581. Prevents memory intensive user tasks from taking down 
+    nodes. (Vinod K V via ddas)
+
+    HADOOP-3970. Provides a way to recover counters written to JobHistory.
+    (Amar Kamat via ddas)
+
+    HADOOP-3702. Adds ChainMapper and ChainReducer classes allow composing
+    chains of Maps and Reduces in a single Map/Reduce job, something like 
+    MAP+ / REDUCE MAP*. (Alejandro Abdelnur via ddas)
+
+    HADOOP-3445. Add capacity scheduler that provides guaranteed capacities to 
+    queues as a percentage of the cluster. (Vivek Ratan via omalley)
+
+    HADOOP-3992. Add a synthetic load generation facility to the test
+    directory. (hairong via szetszwo)
+
+    HADOOP-3981. Implement a distributed file checksum algorithm in HDFS
+    and change DistCp to use file checksum for comparing src and dst files
+    (szetszwo)
+
+    HADOOP-3829. Narrown down skipped records based on user acceptable value.
+    (Sharad Agarwal via ddas)
+
+    HADOOP-3930. Add common interfaces for the pluggable schedulers and the
+    cli & gui clients. (Sreekanth Ramakrishnan via omalley)
+
+    HADOOP-4176. Implement getFileChecksum(Path) in HftpFileSystem. (szetszwo)
+
+    HADOOP-249. Reuse JVMs across Map-Reduce Tasks. 
+    Configuration changes to hadoop-default.xml:
+      add mapred.job.reuse.jvm.num.tasks
+    (Devaraj Das via acmurthy) 
+
+    HADOOP-4070. Provide a mechanism in Hive for registering UDFs from the
+    query language. (tomwhite)
+
+    HADOOP-2536. Implement a JDBC based database input and output formats to
+    allow Map-Reduce applications to work with databases. (Fredrik Hedberg and
+    Enis Soztutar via acmurthy)
+
+    HADOOP-3019. A new library to support total order partitions.
+    (cdouglas via omalley)
+
+    HADOOP-3924. Added a 'KILLED' job status. (Subramaniam Krishnan via
+    acmurthy) 
+
+  IMPROVEMENTS
+
+    HADOOP-4205. hive: metastore and ql to use the refactored SerDe library.
+    (zshao)
+
+    HADOOP-4106. libhdfs: add time, permission and user attribute support 
+    (part 2). (Pete Wyckoff through zshao)
+
+    HADOOP-4104. libhdfs: add time, permission and user attribute support.
+    (Pete Wyckoff through zshao)
+
+    HADOOP-3908. libhdfs: better error message if llibhdfs.so doesn't exist.
+    (Pete Wyckoff through zshao)
+
+    HADOOP-3732. Delay intialization of datanode block verification till
+    the verification thread is started. (rangadi)
+
+    HADOOP-1627. Various small improvements to 'dfsadmin -report' output.
+    (rangadi)
+
+    HADOOP-3577. Tools to inject blocks into name node and simulated
+    data nodes for testing. (Sanjay Radia via hairong)
+
+    HADOOP-2664. Add a lzop compatible codec, so that files compressed by lzop
+    may be processed by map/reduce. (cdouglas via omalley)
+
+    HADOOP-3655. Add additional ant properties to control junit. (Steve 
+    Loughran via omalley)
+
+    HADOOP-3543. Update the copyright year to 2008. (cdouglas via omalley)
+
+    HADOOP-3587. Add a unit test for the contrib/data_join framework.
+    (cdouglas)
+
+    HADOOP-3402. Add terasort example program (omalley)
+
+    HADOOP-3660. Add replication factor for injecting blocks in simulated
+    datanodes. (Sanjay Radia via cdouglas)
+
+    HADOOP-3684. Add a cloning function to the contrib/data_join framework
+    permitting users to define a more efficient method for cloning values from
+    the reduce than serialization/deserialization. (Runping Qi via cdouglas)
+
+    HADOOP-3478. Improves the handling of map output fetching. Now the
+    randomization is by the hosts (and not the map outputs themselves). 
+    (Jothi Padmanabhan via ddas)
+
+    HADOOP-3617. Removed redundant checks of accounting space in MapTask and
+    makes the spill thread persistent so as to avoid creating a new one for
+    each spill. (Chris Douglas via acmurthy)  
+
+    HADOOP-3412. Factor the scheduler out of the JobTracker and make
+    it pluggable. (Tom White and Brice Arnould via omalley)
+
+    HADOOP-3756. Minor. Remove unused dfs.client.buffer.dir from 
+    hadoop-default.xml. (rangadi)
+
+    HADOOP-3747. Adds counter suport for MultipleOutputs. 
+    (Alejandro Abdelnur via ddas)
+
+    HADOOP-3169. LeaseChecker daemon should not be started in DFSClient
+    constructor. (TszWo (Nicholas), SZE via hairong)
+
+    HADOOP-3824. Move base functionality of StatusHttpServer to a core
+    package. (TszWo (Nicholas), SZE via cdouglas)
+
+    HADOOP-3646. Add a bzip2 compatible codec, so bzip compressed data
+    may be processed by map/reduce. (Abdul Qadeer via cdouglas)
+
+    HADOOP-3861. MapFile.Reader and Writer should implement Closeable.
+    (tomwhite via omalley)
+
+    HADOOP-3791. Introduce generics into ReflectionUtils. (Chris Smith via
+    cdouglas)
+
+    HADOOP-3694. Improve unit test performance by changing
+    MiniDFSCluster to listen only on 127.0.0.1.  (cutting)
+
+    HADOOP-3620. Namenode should synchronously resolve a datanode's network
+    location when the datanode registers. (hairong)
+
+    HADOOP-3860. NNThroughputBenchmark is extended with rename and delete 
+    benchmarks. (shv)
+    
+    HADOOP-3892. Include unix group name in JobConf. (Matei Zaharia via johan)
+
+    HADOOP-3875. Change the time period between heartbeats to be relative to
+    the end of the heartbeat rpc, rather than the start. This causes better
+    behavior if the JobTracker is overloaded. (acmurthy via omalley)
+
+    HADOOP-3853. Move multiple input format (HADOOP-372) extension to 
+    library package. (tomwhite via johan)
+
+    HADOOP-9. Use roulette scheduling for temporary space when the size
+    is not known. (Ari Rabkin via omalley)
+
+    HADOOP-3202. Use recursive delete rather than FileUtil.fullyDelete.
+    (Amareshwari Sriramadasu via omalley)
+
+    HADOOP-3368. Remove common-logging.properties from conf. (Steve Loughran 
+    via omalley)
+
+    HADOOP-3851. Fix spelling mistake in FSNamesystemMetrics. (Steve Loughran 
+    via omalley)
+
+    HADOOP-3780. Remove asynchronous resolution of network topology in the 
+    JobTracker (Amar Kamat via omalley)
+
+    HADOOP-3852. Add ShellCommandExecutor.toString method to make nicer
+    error messages. (Steve Loughran via omalley)
+
+    HADOOP-3844. Include message of local exception in RPC client failures.
+    (Steve Loughran via omalley)
+
+    HADOOP-3935. Split out inner classes from DataNode.java. (johan)
+
+    HADOOP-3905. Create generic interfaces for edit log streams. (shv)
+
+    HADOOP-3062. Add metrics to DataNode and TaskTracker to record network
+    traffic for HDFS reads/writes and MR shuffling. (cdouglas)
+
+    HADOOP-3742. Remove HDFS from public java doc and add javadoc-dev for
+    generative javadoc for developers. (Sanjay Radia via omalley)
+
+    HADOOP-3944. Improve documentation for public TupleWritable class in 
+    join package. (Chris Douglas via enis)
+
+    HADOOP-2330. Preallocate HDFS transaction log to improve performance.
+    (dhruba and hairong)
+
+    HADOOP-3965. Convert DataBlockScanner into a package private class. (shv)
+
+    HADOOP-3488. Prevent hadoop-daemon from rsync'ing log files (Stefan 
+    Groshupf and Craig Macdonald via omalley)
+
+    HADOOP-3342. Change the kill task actions to require http post instead of 
+    get to prevent accidental crawls from triggering it. (enis via omalley)
+
+    HADOOP-3937. Limit the job name in the job history filename to 50 
+    characters. (Matei Zaharia via omalley)
+
+    HADOOP-3943. Remove unnecessary synchronization in 
+    NetworkTopology.pseudoSortByDistance. (hairong via omalley)
+
+    HADOOP-3498. File globbing alternation should be able to span path
+    components. (tomwhite)
+
+    HADOOP-3361. Implement renames for NativeS3FileSystem.
+    (Albert Chern via tomwhite)
+
+    HADOOP-3605. Make EC2 scripts show an error message if AWS_ACCOUNT_ID is
+    unset. (Al Hoang via tomwhite)
+
+    HADOOP-4147. Remove unused class JobWithTaskContext from class
+    JobInProgress. (Amareshwari Sriramadasu via johan)
+
+    HADOOP-4151. Add a byte-comparable interface that both Text and 
+    BytesWritable implement. (cdouglas via omalley)
+
+    HADOOP-4174. Move fs image/edit log methods from ClientProtocol to
+    NamenodeProtocol. (shv via szetszwo)
+
+    HADOOP-4181. Include a .gitignore and saveVersion.sh change to support
+    developing under git. (omalley)
+
+    HADOOP-4186. Factor LineReader out of LineRecordReader. (tomwhite via
+    omalley)
+
+    HADOOP-4184. Break the module dependencies between core, hdfs, and 
+    mapred. (tomwhite via omalley)
+
+    HADOOP-4075. test-patch.sh now spits out ant commands that it runs.
+    (Ramya R via nigel)
+
+    HADOOP-4117. Improve configurability of Hadoop EC2 instances.
+    (tomwhite)
+
+    HADOOP-2411. Add support for larger CPU EC2 instance types.
+    (Chris K Wensel via tomwhite)
+
+    HADOOP-4083. Changed the configuration attribute queue.name to
+    mapred.job.queue.name. (Hemanth Yamijala via acmurthy) 
+
+    HADOOP-4194. Added the JobConf and JobID to job-related methods in
+    JobTrackerInstrumentation for better metrics. (Mac Yang via acmurthy) 
+
+    HADOOP-3975. Change test-patch script to report working the dir
+    modifications preventing the suite from being run. (Ramya R via cdouglas)
+
+    HADOOP-4124. Added a command-line switch to allow users to set job
+    priorities, also allow it to be manipulated via the web-ui. (Hemanth
+    Yamijala via acmurthy) 
+
+    HADOOP-2165. Augmented JobHistory to include the URIs to the tasks'
+    userlogs. (Vinod Kumar Vavilapalli via acmurthy) 
+
+    HADOOP-4062. Remove the synchronization on the output stream when a
+    connection is closed and also remove an undesirable exception when
+    a client is stoped while there is no pending RPC request. (hairong)
+
+    HADOOP-4227. Remove the deprecated class org.apache.hadoop.fs.ShellCommand.
+    (szetszwo)
+
+    HADOOP-4006. Clean up FSConstants and move some of the constants to
+    better places. (Sanjay Radia via rangadi)
+
+    HADOOP-4279. Trace the seeds of random sequences in append unit tests to
+    make itermitant failures reproducible. (szetszwo via cdouglas)
+
+    HADOOP-4209. Remove the change to the format of task attempt id by 
+    incrementing the task attempt numbers by 1000 when the job restarts.
+    (Amar Kamat via omalley)
+
+    HADOOP-4301. Adds forrest doc for the skip bad records feature.
+    (Sharad Agarwal via ddas)
+
+    HADOOP-4354. Separate TestDatanodeDeath.testDatanodeDeath() into 4 tests.
+    (szetszwo)
+
+    HADOOP-3790. Add more unit tests for testing HDFS file append.  (szetszwo)
+
+    HADOOP-4321. Include documentation for the capacity scheduler. (Hemanth 
+    Yamijala via omalley)
+
+    HADOOP-4424. Change menu layout for Hadoop documentation (Boris Shkolnik
+    via cdouglas).
+
+    HADOOP-4438. Update forrest documentation to include missing FsShell
+    commands. (Suresh Srinivas via cdouglas)
+
+    HADOOP-4105.  Add forrest documentation for libhdfs.
+    (Pete Wyckoff via cutting)
+
+    HADOOP-4510. Make getTaskOutputPath public. (Chris Wensel via omalley)
+
+  OPTIMIZATIONS
+
+    HADOOP-3556. Removed lock contention in MD5Hash by changing the 
+    singleton MessageDigester by an instance per Thread using 
+    ThreadLocal. (Iv?n de Prado via omalley)
+
+    HADOOP-3328. When client is writing data to DFS, only the last 
+    datanode in the pipeline needs to verify the checksum. Saves around
+    30% CPU on intermediate datanodes. (rangadi)
+
+    HADOOP-3863. Use a thread-local string encoder rather than a static one
+    that is protected by a lock. (acmurthy via omalley)
+
+    HADOOP-3864. Prevent the JobTracker from locking up when a job is being
+    initialized. (acmurthy via omalley)
+
+    HADOOP-3816. Faster directory listing in KFS. (Sriram Rao via omalley)
+
+    HADOOP-2130. Pipes submit job should have both blocking and non-blocking
+    versions. (acmurthy via omalley)
+
+    HADOOP-3769. Make the SampleMapper and SampleReducer from
+    GenericMRLoadGenerator public, so they can be used in other contexts. 
+    (Lingyun Yang via omalley)
+
+    HADOOP-3514. Inline the CRCs in intermediate files as opposed to reading
+    it from a different .crc file. (Jothi Padmanabhan via ddas)
+
+    HADOOP-3638. Caches the iFile index files in memory to reduce seeks
+    (Jothi Padmanabhan via ddas)
+
+    HADOOP-4225. FSEditLog.logOpenFile() should persist accessTime 
+    rather than modificationTime. (shv)
+
+    HADOOP-4380. Made several new classes (Child, JVMId, 
+    JobTrackerInstrumentation, QueueManager, ResourceEstimator, 
+    TaskTrackerInstrumentation, and TaskTrackerMetricsInst) in 
+    org.apache.hadoop.mapred  package private instead of public. (omalley)
+
+  BUG FIXES
+
+    HADOOP-3563.  Refactor the distributed upgrade code so that it is 
+    easier to identify datanode and namenode related code. (dhruba)
+
+    HADOOP-3640. Fix the read method in the NativeS3InputStream. (tomwhite via
+    omalley)
+
+    HADOOP-3711. Fixes the Streaming input parsing to properly find the 
+    separator. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3725. Prevent TestMiniMRMapDebugScript from swallowing exceptions.
+    (Steve Loughran via cdouglas)
+
+    HADOOP-3726. Throw exceptions from TestCLI setup and teardown instead of
+    swallowing them. (Steve Loughran via cdouglas)
+
+    HADOOP-3721. Refactor CompositeRecordReader and related mapred.join classes
+    to make them clearer. (cdouglas)
+
+    HADOOP-3720. Re-read the config file when dfsadmin -refreshNodes is invoked
+    so dfs.hosts and dfs.hosts.exclude are observed. (lohit vijayarenu via
+    cdouglas)
+
+    HADOOP-3485. Allow writing to files over fuse.
+    (Pete Wyckoff via dhruba)
+
+    HADOOP-3723. The flags to the libhdfs.create call can be treated as
+    a bitmask. (Pete Wyckoff via dhruba)
+
+    HADOOP-3643. Filter out completed tasks when asking for running tasks in
+    the JobTracker web/ui. (Amar Kamat via omalley)
+
+    HADOOP-3777. Ensure that Lzo compressors/decompressors correctly handle the
+    case where native libraries aren't available. (Chris Douglas via acmurthy) 
+
+    HADOOP-3728. Fix SleepJob so that it doesn't depend on temporary files,
+    this ensures we can now run more than one instance of SleepJob
+    simultaneously. (Chris Douglas via acmurthy) 
+
+    HADOOP-3795. Fix saving image files on Namenode with different checkpoint
+    stamps. (Lohit Vijayarenu via mahadev)
+   
+    HADOOP-3624. Improving createeditslog to create tree directory structure.
+    (Lohit Vijayarenu via mahadev)
+
+    HADOOP-3778. DFSInputStream.seek() did not retry in case of some errors.
+    (Luo Ning via rangadi)
+
+    HADOOP-3661. The handling of moving files deleted through fuse-dfs to
+    Trash made similar to the behaviour from dfs shell.
+    (Pete Wyckoff via dhruba)
+
+    HADOOP-3819. Unset LANG and LC_CTYPE in saveVersion.sh to make it
+    compatible with non-English locales. (Rong-En Fan via cdouglas)
+
+    HADOOP-3848. Cache calls to getSystemDir in the TaskTracker instead of
+    calling it for each task start. (acmurthy via omalley)
+
+    HADOOP-3131. Fix reduce progress reporting for compressed intermediate
+    data. (Matei Zaharia via acmurthy) 
+
+    HADOOP-3796. fuse-dfs configuration is implemented as file system
+    mount options. (Pete Wyckoff via dhruba)
+
+    HADOOP-3836. Fix TestMultipleOutputs to correctly clean up. (Alejandro 
+    Abdelnur via acmurthy)
+
+    HADOOP-3805. Improve fuse-dfs write performance.
+    (Pete Wyckoff via zshao)
+
+    HADOOP-3846. Fix unit test CreateEditsLog to generate paths correctly. 
+    (Lohit Vjayarenu via cdouglas)
+    
+    HADOOP-3904. Fix unit tests using the old dfs package name.
+    (TszWo (Nicholas), SZE via johan)
+
+    HADOOP-3319. Fix some HOD error messages to go stderr instead of
+    stdout. (Vinod Kumar Vavilapalli via omalley)
+
+    HADOOP-3907. Move INodeDirectoryWithQuota to its own .java file.
+    (Tsz Wo (Nicholas), SZE via hairong)
+
+    HADOOP-3919. Fix attribute name in hadoop-default for 
+    mapred.jobtracker.instrumentation. (Ari Rabkin via omalley)
+
+    HADOOP-3903. Change the package name for the servlets to be hdfs instead of
+    dfs. (Tsz Wo (Nicholas) Sze via omalley)
+
+    HADOOP-3773. Change Pipes to set the default map output key and value 
+    types correctly. (Koji Noguchi via omalley)
+
+    HADOOP-3952. Fix compilation error in TestDataJoin referencing dfs package.
+    (omalley)
+
+    HADOOP-3951. Fix package name for FSNamesystem logs and modify other
+    hard-coded Logs to use the class name. (cdouglas)
+
+    HADOOP-3889. Improve error reporting from HftpFileSystem, handling in
+    DistCp. (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-3946. Fix TestMapRed after hadoop-3664. (tomwhite via omalley)
+
+    HADOOP-3949. Remove duplicate jars from Chukwa. (Jerome Boulon via omalley)
+
+    HADOOP-3933. DataNode sometimes sends up to io.byte.per.checksum bytes 
+    more than required to client. (Ning Li via rangadi)
+
+    HADOOP-3962. Shell command "fs -count" should support paths with different
+    file systems. (Tsz Wo (Nicholas), SZE via mahadev)
+
+    HADOOP-3957. Fix javac warnings in DistCp and TestCopyFiles. (Tsz Wo
+    (Nicholas), SZE via cdouglas)
+
+    HADOOP-3958. Fix TestMapRed to check the success of test-job. (omalley via
+    acmurthy)
+
+    HADOOP-3985. Fix TestHDFSServerPorts to use random ports.  (Hairong Kuang 
+    via omalley)
+
+    HADOOP-3964. Fix javadoc warnings introduced by FailMon. (dhruba)
+
+    HADOOP-3785. Fix FileSystem cache to be case-insensitive for scheme and
+    authority. (Bill de hOra via cdouglas)
+
+    HADOOP-3506. Fix a rare NPE caused by error handling in S3. (Tom White via
+    cdouglas)
+
+    HADOOP-3705. Fix mapred.join parser to accept InputFormats named with
+    underscore and static, inner classes. (cdouglas)
+
+    HADOOP-4023. Fix javadoc warnings introduced when the HDFS javadoc was 
+    made private. (omalley)
+
+    HADOOP-4030. Remove lzop from the default list of codecs. (Arun Murthy via
+    cdouglas)
+
+    HADOOP-3961. Fix task disk space requirement estimates for virtual
+    input jobs. Delays limiting task placement until after 10% of the maps
+    have finished. (Ari Rabkin via omalley)
+
+    HADOOP-2168. Fix problem with C++ record reader's progress not being
+    reported to framework. (acmurthy via omalley)
+
+    HADOOP-3966. Copy findbugs generated output files to PATCH_DIR while 
+    running test-patch. (Ramya R via lohit)
+
+    HADOOP-4037. Fix the eclipse plugin for versions of kfs and log4j. (nigel
+    via omalley)
+
+    HADOOP-3950. Cause the Mini MR cluster to wait for task trackers to 
+    register before continuing. (enis via omalley)
+
+    HADOOP-3910. Remove unused ClusterTestDFSNamespaceLogging and
+    ClusterTestDFS. (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-3954. Disable record skipping by default. (Sharad Agarwal via
+    cdouglas)
+
+    HADOOP-4050. Fix TestFairScheduler to use absolute paths for the work
+    directory. (Matei Zaharia via omalley)
+
+    HADOOP-4069. Keep temporary test files from TestKosmosFileSystem under
+    test.build.data instead of /tmp. (lohit via omalley)
+ 
+    HADOOP-4078. Create test files for TestKosmosFileSystem in separate
+    directory under test.build.data. (lohit)
+
+    HADOOP-3968. Fix getFileBlockLocations calls to use FileStatus instead
+    of Path reflecting the new API. (Pete Wyckoff via lohit)
+
+    HADOOP-3963. libhdfs does not exit on its own, instead it returns error 
+    to the caller and behaves as a true library. (Pete Wyckoff via dhruba)
+
+    HADOOP-4100. Removes the cleanupTask scheduling from the Scheduler 
+    implementations and moves it to the JobTracker. 
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4097. Make hive work well with speculative execution turned on.
+    (Joydeep Sen Sarma via dhruba)
+
+    HADOOP-4113. Changes to libhdfs to not exit on its own, rather return
+    an error code to the caller. (Pete Wyckoff via dhruba)
+
+    HADOOP-4054. Remove duplicate lease removal during edit log loading.
+    (hairong)
+
+    HADOOP-4071. FSNameSystem.isReplicationInProgress should add an
+    underReplicated block to the neededReplication queue using method 
+    "add" not "update". (hairong)
+
+    HADOOP-4154. Fix type warnings in WritableUtils. (szetszwo via omalley)
+
+    HADOOP-4133. Log files generated by Hive should reside in the 
+    build directory. (Prasad Chakka via dhruba)
+
+    HADOOP-4094. Hive now has hive-default.xml and hive-site.xml similar
+    to core hadoop. (Prasad Chakka via dhruba)
+
+    HADOOP-4112. Handles cleanupTask in JobHistory 
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3831. Very slow reading clients sometimes failed while reading.
+    (rangadi)
+
+    HADOOP-4155. Use JobTracker's start time while initializing JobHistory's
+    JobTracker Unique String. (lohit) 
+
+    HADOOP-4099. Fix null pointer when using HFTP from an 0.18 server.
+    (dhruba via omalley)
+
+    HADOOP-3570. Includes user specified libjar files in the client side 
+    classpath path. (Sharad Agarwal via ddas)
+
+    HADOOP-4129. Changed memory limits of TaskTracker and Tasks to be in
+    KiloBytes rather than bytes. (Vinod Kumar Vavilapalli via acmurthy)
+
+    HADOOP-4139. Optimize Hive multi group-by.
+    (Namin Jain via dhruba)
+
+    HADOOP-3911. Add a check to fsck options to make sure -files is not 
+    the first option to resolve conflicts with GenericOptionsParser
+    (lohit)
+
+    HADOOP-3623. Refactor LeaseManager. (szetszwo)
+
+    HADOOP-4125. Handles Reduce cleanup tip on the web ui.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4087. Hive Metastore API for php and python clients.
+    (Prasad Chakka via dhruba)
+
+    HADOOP-4197. Update DATA_TRANSFER_VERSION for HADOOP-3981. (szetszwo)
+
+    HADOOP-4138. Refactor the Hive SerDe library to better structure
+    the interfaces to the serializer and de-serializer.
+    (Zheng Shao via dhruba)
+
+    HADOOP-4195. Close compressor before returning to codec pool.
+    (acmurthy via omalley)
+
+    HADOOP-2403. Escapes some special characters before logging to 
+    history files. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4200. Fix a bug in the test-patch.sh script.
+    (Ramya R via nigel)
+
+    HADOOP-4084. Add explain plan capabilities to Hive Query Language.
+    (Ashish Thusoo via dhruba)
+
+    HADOOP-4121. Preserve cause for exception if the initialization of
+    HistoryViewer for JobHistory fails. (Amareshwari Sri Ramadasu via
+    acmurthy) 
+
+    HADOOP-4213. Fixes NPE in TestLimitTasksPerJobTaskScheduler.
+    (Sreekanth Ramakrishnan via ddas)
+
+    HADOOP-4077. Setting access and modification time for a file
+    requires write permissions on the file. (dhruba)
+
+    HADOOP-3592. Fix a couple of possible file leaks in FileUtil
+    (Bill de hOra via rangadi)
+
+    HADOOP-4120. Hive interactive shell records the time taken by a 
+    query.  (Raghotham Murthy via dhruba)
+
+    HADOOP-4090. The hive scripts pick up hadoop from HADOOP_HOME
+    and then the path. (Raghotham Murthy via dhruba)
+
+    HADOOP-4242. Remove extra ";" in FSDirectory that blocks compilation
+    in some IDE's. (szetszwo via omalley)
+
+    HADOOP-4249. Fix eclipse path to include the hsqldb.jar. (szetszwo via
+    omalley)
+
+    HADOOP-4247. Move InputSampler into org.apache.hadoop.mapred.lib, so that
+    examples.jar doesn't depend on tools.jar. (omalley)
+
+    HADOOP-4269. Fix the deprecation of LineReader by extending the new class
+    into the old name and deprecating it. Also update the tests to test the 
+    new class. (cdouglas via omalley)
+
+    HADOOP-4280. Fix conversions between seconds in C and milliseconds in 
+    Java for access times for files. (Pete Wyckoff via rangadi)
+
+    HADOOP-4254. -setSpaceQuota command does not convert "TB" extenstion to
+    terabytes properly. Implementation now uses StringUtils for parsing this.
+    (Raghu Angadi)
+
+    HADOOP-4259. Findbugs should run over tools.jar also. (cdouglas via 
+    omalley)
+
+    HADOOP-4275. Move public method isJobValidName from JobID to a private
+    method in JobTracker. (omalley)
+
+    HADOOP-4173. fix failures in TestProcfsBasedProcessTree and
+    TestTaskTrackerMemoryManager tests. ProcfsBasedProcessTree and
+    memory management in TaskTracker are disabled on Windows.
+    (Vinod K V via rangadi)
+
+    HADOOP-4189. Fixes the history blocksize & intertracker protocol version
+    issues introduced as part of HADOOP-3245. (Amar Kamat via ddas)
+
+    HADOOP-4190. Fixes the backward compatibility issue with Job History.
+    introduced by HADOOP-3245 and HADOOP-2403. (Amar Kamat via ddas)
+
+    HADOOP-4237. Fixes the TestStreamingBadRecords.testNarrowDown testcase.
+    (Sharad Agarwal via ddas)
+
+    HADOOP-4274. Capacity scheduler accidently modifies the underlying 
+    data structures when browing the job lists. (Hemanth Yamijala via omalley)
+
+    HADOOP-4309. Fix eclipse-plugin compilation. (cdouglas)
+
+    HADOOP-4232. Fix race condition in JVM reuse when multiple slots become
+    free. (ddas via acmurthy) 
+
+    HADOOP-4302. Fix a race condition in TestReduceFetch that can yield false
+    negatvies. (cdouglas)
+
+    HADOOP-3942. Update distcp documentation to include features introduced in
+    HADOOP-3873, HADOOP-3939. (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-4319. fuse-dfs dfs_read function returns as many bytes as it is
+    told to read unlesss end-of-file is reached.  (Pete Wyckoff via dhruba)
+
+    HADOOP-4246. Ensure we have the correct lower bound on the number of
+    retries for fetching map-outputs; also fixed the case where the reducer
+    automatically kills on too many unique map-outputs could not be fetched
+    for small jobs. (Amareshwari Sri Ramadasu via acmurthy)  
+
+    HADOOP-4163. Report FSErrors from map output fetch threads instead of
+    merely logging them. (Sharad Agarwal via cdouglas)
+
+    HADOOP-4261. Adds a setup task for jobs. This is required so that we 
+    don't setup jobs that haven't been inited yet (since init could lead
+    to job failure). Only after the init has successfully happened do we 
+    launch the setupJob task. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4256. Removes Completed and Failed Job tables from 
+    jobqueue_details.jsp. (Sreekanth Ramakrishnan via ddas)
+
+    HADOOP-4267. Occasional exceptions during shutting down HSQLDB is logged 
+    but not rethrown. (enis) 
+
+    HADOOP-4018. The number of tasks for a single job cannot exceed a 
+    pre-configured maximum value. (dhruba)
+
+    HADOOP-4288. Fixes a NPE problem in CapacityScheduler. 
+    (Amar Kamat via ddas)
+
+    HADOOP-4014. Create hard links with 'fsutil hardlink' on Windows. (shv)
+
+    HADOOP-4393. Merged org.apache.hadoop.fs.permission.AccessControlException
+    and org.apache.hadoop.security.AccessControlIOException into a single
+    class hadoop.security.AccessControlException. (omalley via acmurthy)
+
+    HADOOP-4287. Fixes an issue to do with maintaining counts of running/pending
+    maps/reduces. (Sreekanth Ramakrishnan via ddas)
+
+    HADOOP-4361. Makes sure that jobs killed from command line are killed
+    fast (i.e., there is a slot to run the cleanup task soon).
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4400. Add "hdfs://" to fs.default.name on quickstart.html.
+    (Jeff Hammerbacher via omalley)
+
+    HADOOP-4378. Fix TestJobQueueInformation to use SleepJob rather than
+    WordCount via TestMiniMRWithDFS. (Sreekanth Ramakrishnan via acmurthy) 
+
+    HADOOP-4376. Fix formatting in hadoop-default.xml for
+    hadoop.http.filter.initializers. (Enis Soztutar via acmurthy) 
+
+    HADOOP-4410. Adds an extra arg to the API FileUtil.makeShellPath to
+    determine whether to canonicalize file paths or not.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4236. Ensure un-initialized jobs are killed correctly on
+    user-demand. (Sharad Agarwal via acmurthy) 
+
+    HADOOP-4373. Fix calculation of Guaranteed Capacity for the
+    capacity-scheduler. (Hemanth Yamijala via acmurthy) 
+
+    HADOOP-4053. Schedulers must be notified when jobs complete. (Amar Kamat via omalley)
+
+    HADOOP-4335. Fix FsShell -ls for filesystems without owners/groups. (David
+    Phillips via cdouglas)
+
+    HADOOP-4426. TestCapacityScheduler broke due to the two commits HADOOP-4053
+    and HADOOP-4373. This patch fixes that. (Hemanth Yamijala via ddas)
+
+    HADOOP-4418. Updates documentation in forrest for Mapred, streaming and pipes.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3155. Ensure that there is only one thread fetching 
+    TaskCompletionEvents on TaskTracker re-init. (Dhruba Borthakur via
+    acmurthy) 
+
+    HADOOP-4425. Fix EditLogInputStream to overload the bulk read method.
+    (cdouglas)
+
+    HADOOP-4427. Adds the new queue/job commands to the manual.
+    (Sreekanth Ramakrishnan via ddas)
+
+    HADOOP-4278. Increase debug logging for unit test TestDatanodeDeath.
+    Fix the case when primary is dead.  (dhruba via szetszwo)
+
+    HADOOP-4423. Keep block length when the block recovery is triggered by
+    append.  (szetszwo)
+
+    HADOOP-4449. Fix dfsadmin usage. (Raghu Angadi via cdouglas)
+
+    HADOOP-4455. Added TestSerDe so that unit tests can run successfully.
+    (Ashish Thusoo via dhruba)
+
+    HADOOP-4457. Fixes an input split logging problem introduced by
+    HADOOP-3245. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-4464. Separate out TestFileCreationClient from TestFileCreation.
+    (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-4404. saveFSImage() removes files from a storage directory that do 
+    not correspond to its type. (shv)
+
+    HADOOP-4149. Fix handling of updates to the job priority, by changing the
+    list of jobs to be keyed by the priority, submit time, and job tracker id.
+    (Amar Kamat via omalley)
+
+    HADOOP-4296. Fix job client failures by not retiring a job as soon as it
+    is finished. (dhruba)
+
+    HADOOP-4439. Remove configuration variables that aren't usable yet, in
+    particular mapred.tasktracker.tasks.maxmemory and mapred.task.max.memory.
+    (Hemanth Yamijala via omalley)
+
+    HADOOP-4230. Fix for serde2 interface, limit operator, select * operator,
+    UDF trim functions and sampling. (Ashish Thusoo via dhruba)
+
+    HADOOP-4358. No need to truncate access time in INode. Also fixes NPE 
+    in CreateEditsLog. (Raghu Angadi) 
+
+    HADOOP-4387. TestHDFSFileSystemContract fails on windows nightly builds.
+    (Raghu Angadi)
+
+    HADOOP-4466. Ensure that SequenceFileOutputFormat isn't tied to Writables
+    and can be used with other Serialization frameworks. (Chris Wensel via
+    acmurthy)
+
+    HADOOP-4525. Fix ipc.server.ipcnodelay originally missed in in HADOOP-2232.
+    (cdouglas via Clint Morgan)
+
+    HADOOP-4498. Ensure that JobHistory correctly escapes the job name so that
+    regex patterns work. (Chris Wensel via acmurthy)
+
+    HADOOP-4446. Modify guaranteed capacity labels in capacity scheduler's UI
+    to reflect the information being displayed. (Sreekanth Ramakrishnan via 
+    yhemanth)
+
+    HADOOP-4282. Some user facing URLs are not filtered by user filters.
+    (szetszwo)
+
+    HADOOP-4595. Fixes two race conditions - one to do with updating free slot count,
+    and another to do with starting the MapEventsFetcher thread. (ddas)
+
+    HADOOP-4552. Fix a deadlock in RPC server. (Raghu Angadi)
+
+    HADOOP-4471. Sort running jobs by priority in the capacity scheduler.
+    (Amar Kamat via yhemanth) 
+
+    HADOOP-4500. Fix MultiFileSplit to get the FileSystem from the relevant
+    path rather than the JobClient. (Joydeep Sen Sarma via cdouglas)
+
+Release 0.18.4 - Unreleased
+
+  BUG FIXES
+
+    HADOOP-5114. Remove timeout for accept() in DataNode. This makes accept() 
+    fail in JDK on Windows and causes many tests to fail. (Raghu Angadi)
+
+    HADOOP-5192. Block receiver should not remove a block that's created or
+    being written by other threads. (hairong)
+ 
+    HADOOP-5134. FSNamesystem#commitBlockSynchronization adds under-construction
+    block locations to blocksMap. (Dhruba Borthakur via hairong)
+
+    HADOOP-5412. Simulated DataNode should not write to a block that's being
+    written by another thread. (hairong)
+
+    HADOOP-5465. Fix the problem of blocks remaining under-replicated by
+    providing synchronized modification to the counter xmitsInProgress in
+    DataNode. (hairong)
+
+    HADOOP-5557. Fixes some minor problems in TestOverReplicatedBlocks.
+    (szetszwo)
+
+    HADOOP-5644. Namenode is stuck in safe mode. (suresh Srinivas via hairong)
+
+    HADOOP-6017. Lease Manager in NameNode does not handle certain characters
+    in filenames. This results in fatal errors in Secondary NameNode and while
+    restrating NameNode. (Tsz Wo (Nicholas), SZE via rangadi)
+
+Release 0.18.3 - 2009-01-27
+
+  IMPROVEMENTS
+
+    HADOOP-4150. Include librecordio in hadoop releases. (Giridharan Kesavan
+    via acmurthy)
+
+    HADOOP-4668. Improve documentation for setCombinerClass to clarify the
+    restrictions on combiners. (omalley)
+
+  BUG FIXES
+
+    HADOOP-4499. DFSClient should invoke checksumOk only once. (Raghu Angadi)
+
+    HADOOP-4597. Calculate mis-replicated blocks when safe-mode is turned
+    off manually. (shv)
+
+    HADOOP-3121. lsr should keep listing the remaining items but not
+    terminate if there is any IOException. (szetszwo)
+
+    HADOOP-4610. Always calculate mis-replicated blocks when safe-mode is 
+    turned off. (shv)
+
+    HADOOP-3883. Limit namenode to assign at most one generation stamp for
+    a particular block within a short period. (szetszwo)
+
+    HADOOP-4556. Block went missing. (hairong)
+
+    HADOOP-4643. NameNode should exclude excessive replicas when counting
+    live replicas for a block. (hairong)
+
+    HADOOP-4703. Should not wait for proxy forever in lease recovering.
+    (szetszwo)
+
+    HADOOP-4647. NamenodeFsck should close the DFSClient it has created.
+    (szetszwo)
+
+    HADOOP-4616. Fuse-dfs can handle bad values from FileSystem.read call.
+    (Pete Wyckoff via dhruba)
+
+    HADOOP-4061. Throttle Datanode decommission monitoring in Namenode.
+    (szetszwo)
+
+    HADOOP-4659. Root cause of connection failure is being lost to code that
+    uses it for delaying startup. (Steve Loughran and Hairong via hairong)
+
+    HADOOP-4614. Lazily open segments when merging map spills to avoid using
+    too many file descriptors. (Yuri Pradkin via cdouglas)
+
+    HADOOP-4257. The DFS client should pick only one datanode as the candidate
+    to initiate lease recovery.  (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-4713. Fix librecordio to handle records larger than 64k. (Christian
+    Kunz via cdouglas)
+
+    HADOOP-4635. Fix a memory leak in fuse dfs. (pete wyckoff via mahadev)
+
+    HADOOP-4714. Report status between merges and make the number of records
+    between progress reports configurable. (Jothi Padmanabhan via cdouglas)
+
+    HADOOP-4726. Fix documentation typos "the the". (Edward J. Yoon via
+    szetszwo)
+
+    HADOOP-4679. Datanode prints tons of log messages: waiting for threadgroup
+    to exit, active threads is XX. (hairong)
+
+    HADOOP-4746. Job output directory should be normalized. (hairong)
+
+    HADOOP-4717. Removal of default port# in NameNode.getUri() causes a
+    map/reduce job failed to prompt temporary output. (hairong)
+
+    HADOOP-4778. Check for zero size block meta file when updating a block.
+    (szetszwo)
+
+    HADOOP-4742. Replica gets deleted by mistake. (Wang Xu via hairong)
+
+    HADOOP-4702. Failed block replication leaves an incomplete block in
+    receiver's tmp data directory. (hairong)
+
+    HADOOP-4613. Fix block browsing on Web UI. (Johan Oskarsson via shv)
+
+    HADOOP-4806. HDFS rename should not use src path as a regular expression.
+    (szetszwo)
+
+    HADOOP-4795. Prevent lease monitor getting into an infinite loop when
+    leases and the namespace tree does not match. (szetszwo)
+
+    HADOOP-4620. Fixes Streaming to handle well the cases of map/reduce with empty
+    input/output. (Ravi Gummadi via ddas)
+
+    HADOOP-4857. Fixes TestUlimit to have exactly 1 map in the jobs spawned.
+    (Ravi Gummadi via ddas)
+
+    HADOOP-4810. Data lost at cluster startup time. (hairong)
+
+    HADOOP-4797. Improve how RPC server reads and writes large buffers. Avoids
+    soft-leak of direct buffers and excess copies in NIO layer. (Raghu Angadi)
+
+    HADOOP-4840. TestNodeCount sometimes fails with NullPointerException.
+    (hairong)
+
+    HADOOP-4904. Fix deadlock while leaving safe mode. (shv)
+
+    HADOOP-1980. 'dfsadmin -safemode enter' should prevent the namenode from
+    leaving safemode automatically. (shv & Raghu Angadi)
+
+    HADOOP-4951. Lease monitor should acquire the LeaseManager lock but not the
+    Monitor lock. (szetszwo)
+
+    HADOOP-4935. processMisReplicatedBlocks() should not clear 
+    excessReplicateMap. (shv)
+
+    HADOOP-4961. Fix ConcurrentModificationException in lease recovery 
+    of empty files. (shv)
+
+    HADOOP-4971. A long (unexpected) delay at datanodes could make subsequent
+    block reports from many datanode at the same time. (Raghu Angadi)
+    
+    HADOOP-4910. NameNode should exclude replicas when choosing excessive
+    replicas to delete to avoid data lose. (hairong)
+
+    HADOOP-4983. Fixes a problem in updating Counters in the status reporting.
+    (Amareshwari Sriramadasu via ddas)
+
+Release 0.18.2 - 2008-11-03
+
+  BUG FIXES
+
+    HADOOP-3614. Fix a bug that Datanode may use an old GenerationStamp to get
+    meta file. (szetszwo)
+
+    HADOOP-4314. Simulated datanodes should not include blocks that are still
+    being written in their block report. (Raghu Angadi)
+
+    HADOOP-4228. dfs datanode metrics, bytes_read and bytes_written, overflow
+    due to incorrect type used. (hairong)
+
+    HADOOP-4395. The FSEditLog loading is incorrect for the case OP_SET_OWNER.
+    (szetszwo)
+
+    HADOOP-4351. FSNamesystem.getBlockLocationsInternal throws
+    ArrayIndexOutOfBoundsException. (hairong)
+
+    HADOOP-4403. Make TestLeaseRecovery and TestFileCreation more robust.
+    (szetszwo)
+
+    HADOOP-4292. Do not support append() for LocalFileSystem. (hairong)
+
+    HADOOP-4399. Make fuse-dfs multi-thread access safe.
+    (Pete Wyckoff via dhruba)
+
+    HADOOP-4369. Use setMetric(...) instead of incrMetric(...) for metrics
+    averages.  (Brian Bockelman via szetszwo)
+
+    HADOOP-4469. Rename and add the ant task jar file to the tar file. (nigel)
+
+    HADOOP-3914. DFSClient sends Checksum Ok only once for a block. 
+    (Christian Kunz via hairong)
+ 
+    HADOOP-4467. SerializationFactory now uses the current context ClassLoader
+    allowing for user supplied Serialization instances. (Chris Wensel via
+    acmurthy)
+
+    HADOOP-4517. Release FSDataset lock before joining ongoing create threads.
+    (szetszwo)
+ 
+    HADOOP-4526. fsck failing with NullPointerException. (hairong)
+
+    HADOOP-4483 Honor the max parameter in DatanodeDescriptor.getBlockArray(..)
+    (Ahad Rana and Hairong Kuang via szetszwo)
+
+    HADOOP-4340. Correctly set the exit code from JobShell.main so that the
+    'hadoop jar' command returns the right code to the user. (acmurthy)
+
+  NEW FEATURES
+
+    HADOOP-2421.  Add jdiff output to documentation, listing all API
+    changes from the prior release.  (cutting)
+
+Release 0.18.1 - 2008-09-17
+
+  IMPROVEMENTS
+
+    HADOOP-3934. Upgrade log4j to 1.2.15. (omalley)
+
+  BUG FIXES
+
+    HADOOP-3995. In case of quota failure on HDFS, rename does not restore
+    source filename. (rangadi)
+
+    HADOOP-3821. Prevent SequenceFile and IFile from duplicating codecs in
+    CodecPool when closed more than once. (Arun Murthy via cdouglas)
+
+    HADOOP-4040. Remove coded default of the IPC idle connection timeout
+    from the TaskTracker, which was causing HDFS client connections to not be 
+    collected. (ddas via omalley)
+
+    HADOOP-4046. Made WritableComparable's constructor protected instead of 
+    private to re-enable class derivation. (cdouglas via omalley)
+
+    HADOOP-3940. Fix in-memory merge condition to wait when there are no map
+    outputs or when the final map outputs are being fetched without contention.
+    (cdouglas)
+
+Release 0.18.0 - 2008-08-19
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-2703.  The default options to fsck skips checking files
+    that are being written to. The output of fsck is incompatible
+    with previous release. (lohit vijayarenu via dhruba) 
+
+    HADOOP-2865. FsShell.ls() printout format changed to print file names
+    in the end of the line. (Edward J. Yoon via shv)
+
+    HADOOP-3283. The Datanode has a RPC server. It currently supports
+    two RPCs: the first RPC retrives the metadata about a block and the
+    second RPC sets the generation stamp of an existing block.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2797. Code related to upgrading to 0.14 (Block CRCs) is 
+    removed. As result, upgrade to 0.18 or later from 0.13 or earlier
+    is not supported. If upgrading from 0.13 or earlier is required,
+    please upgrade to an intermediate version (0.14-0.17) and then
+    to this version. (rangadi)
+
+    HADOOP-544. This issue introduces new classes JobID, TaskID and 
+    TaskAttemptID, which should be used instead of their string counterparts.
+    Functions in JobClient, TaskReport, RunningJob, jobcontrol.Job and 
+    TaskCompletionEvent that use string arguments are deprecated in favor 
+    of the corresponding ones that use ID objects. Applications can use 
+    xxxID.toString() and xxxID.forName() methods to convert/restore objects 
+    to/from strings. (Enis Soztutar via ddas)
+
+    HADOOP-2188. RPC client sends a ping rather than throw timeouts.
+    RPC server does not throw away old RPCs. If clients and the server are on
+    different versions, they are not able to function well. In addition,
+    The property ipc.client.timeout is removed from the default hadoop
+    configuration. It also removes metrics RpcOpsDiscardedOPsNum. (hairong)
+
+    HADOOP-2181. This issue adds logging for input splits in Jobtracker log 
+    and jobHistory log. Also adds web UI for viewing input splits in job UI 
+    and history UI. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3226. Run combiners multiple times over map outputs as they
+    are merged in both the map and the reduce tasks. (cdouglas via omalley)
+
+    HADOOP-3329.  DatanodeDescriptor objects should not be stored in the
+    fsimage. (dhruba)
+
+    HADOOP-2656.  The Block object has a generation stamp inside it.
+    Existing blocks get a generation stamp of 0. This is needed to support
+    appends. (dhruba)
+
+    HADOOP-3390. Removed deprecated ClientProtocol.abandonFileInProgress().
+    (Tsz Wo (Nicholas), SZE via rangadi)
+
+    HADOOP-3405. Made some map/reduce internal classes non-public:
+    MapTaskStatus, ReduceTaskStatus, JobSubmissionProtocol, 
+    CompletedJobStatusStore. (enis via omaley)
+
+    HADOOP-3265. Removed depcrecated API getFileCacheHints().
+    (Lohit Vijayarenu via rangadi)
+
+    HADOOP-3310. The namenode instructs the primary datanode to do lease
+    recovery. The block gets a new  generation stamp.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2909. Improve IPC idle connection management. Property
+    ipc.client.maxidletime is removed from the default configuration,
+    instead it is defined as twice of the ipc.client.connection.maxidletime.
+    A connection with outstanding requests won't be treated as idle.
+    (hairong)
+
+    HADOOP-3459. Change in the output format of dfs -ls to more closely match
+    /bin/ls. New format is: perm repl owner group size date name
+    (Mukund Madhugiri via omally)
+
+    HADOOP-3113. An fsync invoked on a HDFS file really really
+    persists data! The datanode moves blocks in the tmp directory to 
+    the real block directory on a datanode-restart. (dhruba)
+
+    HADOOP-3452. Change fsck to return non-zero status for a corrupt
+    FileSystem. (lohit vijayarenu via cdouglas)
+
+    HADOOP-3193. Include the address of the client that found the corrupted
+    block in the log. Also include a CorruptedBlocks metric to track the size
+    of the corrupted block map. (cdouglas)
+
+    HADOOP-3512. Separate out the tools into a tools jar. (omalley)
+
+    HADOOP-3598. Ensure that temporary task-output directories are not created
+    if they are not necessary e.g. for Maps with no side-effect files.
+    (acmurthy)
+
+    HADOOP-3665. Modify WritableComparator so that it only creates instances
+    of the keytype if the type does not define a WritableComparator. Calling
+    the superclass compare will throw a NullPointerException. Also define
+    a RawComparator for NullWritable and permit it to be written as a key
+    to SequenceFiles. (cdouglas)
+
+    HADOOP-3673. Avoid deadlock caused by DataNode RPC receoverBlock().
+    (Tsz Wo (Nicholas), SZE via rangadi)
+
+  NEW FEATURES
+
+    HADOOP-3074. Provides a UrlStreamHandler for DFS and other FS,
+    relying on FileSystem (taton)
+
+    HADOOP-2585. Name-node imports namespace data from a recent checkpoint
+    accessible via a NFS mount. (shv)
+
+    HADOOP-3061. Writable types for doubles and bytes. (Andrzej
+    Bialecki via omalley)
+
+    HADOOP-2857. Allow libhdfs to set jvm options. (Craig Macdonald
+    via omalley)
+
+    HADOOP-3317. Add default port for HDFS namenode.  The port in
+    "hdfs:" URIs now defaults to 8020, so that one may simply use URIs
+    of the form "hdfs://example.com/dir/file". (cutting)
+
+    HADOOP-2019. Adds support for .tar, .tgz and .tar.gz files in 
+    DistributedCache (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3058. Add FSNamesystem status metrics. 
+    (Lohit Vjayarenu via rangadi)
+
+    HADOOP-1915. Allow users to specify counters via strings instead
+    of enumerations. (tomwhite via omalley)
+
+    HADOOP-2065. Delay invalidating corrupt replicas of block until its 
+    is removed from under replicated state. If all replicas are found to 
+    be corrupt, retain all copies and mark the block as corrupt.
+    (Lohit Vjayarenu via rangadi)
+
+    HADOOP-3221. Adds org.apache.hadoop.mapred.lib.NLineInputFormat, which 
+    splits files into splits each of N lines. N can be specified by 
+    configuration property "mapred.line.input.format.linespermap", which
+    defaults to 1. (Amareshwari Sriramadasu via ddas) 
+
+    HADOOP-3336. Direct a subset of annotated FSNamesystem calls for audit
+    logging. (cdouglas)
+
+    HADOOP-3400. A new API FileSystem.deleteOnExit() that facilitates
+    handling of temporary files in HDFS. (dhruba)
+
+    HADOOP-4.  Add fuse-dfs to contrib, permitting one to mount an
+    HDFS filesystem on systems that support FUSE, e.g., Linux.
+    (Pete Wyckoff via cutting)
+
+    HADOOP-3246. Add FTPFileSystem.  (Ankur Goel via cutting)
+
+    HADOOP-3250. Extend FileSystem API to allow appending to files.
+    (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-3177. Implement Syncable interface for FileSystem.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-1328. Implement user counters in streaming. (tomwhite via
+    omalley)
+
+    HADOOP-3187. Quotas for namespace management. (Hairong Kuang via ddas)
+
+    HADOOP-3307. Support for Archives in Hadoop. (Mahadev Konar via ddas)
+
+    HADOOP-3460. Add SequenceFileAsBinaryOutputFormat to permit direct
+    writes of serialized data. (Koji Noguchi via cdouglas)
+
+    HADOOP-3230. Add ability to get counter values from command
+    line. (tomwhite via omalley)
+
+    HADOOP-930. Add support for native S3 files.  (tomwhite via cutting)
+
+    HADOOP-3502. Quota API needs documentation in Forrest. (hairong)
+
+    HADOOP-3413. Allow SequenceFile.Reader to use serialization
+    framework. (tomwhite via omalley)
+
+    HADOOP-3541. Import of the namespace from a checkpoint documented 
+    in hadoop user guide. (shv)
+
+  IMPROVEMENTS
+
+    HADOOP-3677. Simplify generation stamp upgrade by making is a 
+    local upgrade on datandodes. Deleted distributed upgrade.
+    (rangadi)
+   
+    HADOOP-2928. Remove deprecated FileSystem.getContentLength().
+    (Lohit Vijayarenu via rangadi)
+
+    HADOOP-3130. Make the connect timeout smaller for getFile.
+    (Amar Ramesh Kamat via ddas)
+
+    HADOOP-3160. Remove deprecated exists() from ClientProtocol and 
+    FSNamesystem (Lohit Vjayarenu via rangadi)
+
+    HADOOP-2910. Throttle IPC Clients during bursts of requests or
+    server slowdown. Clients retry connection for up to 15 minutes
+    when socket connection times out. (hairong)
+
+    HADOOP-3295. Allow TextOutputFormat to use configurable spearators.
+    (Zheng Shao via cdouglas).
+
+    HADOOP-3308. Improve QuickSort by excluding values eq the pivot from the
+    partition. (cdouglas)
+
+    HADOOP-2461. Trim property names in configuration.
+    (Tsz Wo (Nicholas), SZE via shv)
+
+    HADOOP-2799. Deprecate o.a.h.io.Closable in favor of java.io.Closable.
+    (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-3345. Enhance the hudson-test-patch target to cleanup messages,
+    fix minor defects, and add eclipse plugin and python unit tests. (nigel)
+
+    HADOOP-3144. Improve robustness of LineRecordReader by defining a maximum
+    line length (mapred.linerecordreader.maxlength), thereby avoiding reading
+    too far into the following split. (Zheng Shao via cdouglas)
+
+    HADOOP-3334. Move lease handling from FSNamesystem into a seperate class.
+    (Tsz Wo (Nicholas), SZE via rangadi)
+
+    HADOOP-3332. Reduces the amount of logging in Reducer's shuffle phase.
+    (Devaraj Das)
+
+    HADOOP-3355. Enhances Configuration class to accept hex numbers for getInt
+    and getLong. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3350. Add an argument to distcp to permit the user to limit the
+    number of maps. (cdouglas)
+
+    HADOOP-3013. Add corrupt block reporting to fsck.
+    (lohit vijayarenu via cdouglas)
+
+    HADOOP-3377. Remove TaskRunner::replaceAll and replace with equivalent
+    String::replace. (Brice Arnould via cdouglas)
+
+    HADOOP-3398. Minor improvement to a utility function in that participates
+    in backoff calculation. (cdouglas)
+
+    HADOOP-3381. Clear referenced when directories are deleted so that 
+    effect of memory leaks are not multiplied. (rangadi)
+
+    HADOOP-2867. Adds the task's CWD to its LD_LIBRARY_PATH. 
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3232. DU class runs the 'du' command in a seperate thread so
+    that it does not block user. DataNode misses heartbeats in large
+    nodes otherwise. (Johan Oskarsson via rangadi)
+
+    HADOOP-3035. During block transfers between datanodes, the receiving
+    datanode, now can report corrupt replicas received from src node to
+    the namenode. (Lohit Vijayarenu via rangadi)
+
+    HADOOP-3434. Retain the cause of the bind failure in Server::bind.
+    (Steve Loughran via cdouglas)
+
+    HADOOP-3429. Increases the size of the buffers used for the communication
+    for Streaming jobs. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3486. Change default for initial block report to 0 seconds
+    and document it. (Sanjay Radia via omalley)
+
+    HADOOP-3448. Improve the text in the assertion making sure the
+    layout versions are consistent in the data node. (Steve Loughran
+    via omalley)
+
+    HADOOP-2095. Improve the Map-Reduce shuffle/merge by cutting down
+    buffer-copies; changed intermediate sort/merge to use the new IFile format
+    rather than SequenceFiles and compression of map-outputs is now
+    implemented by compressing the entire file rather than SequenceFile
+    compression. Shuffle also has been changed to use a simple byte-buffer
+    manager rather than the InMemoryFileSystem. 
+    Configuration changes to hadoop-default.xml:
+      deprecated mapred.map.output.compression.type 
+    (acmurthy)
+
+    HADOOP-236. JobTacker now refuses connection from a task tracker with a 
+    different version number. (Sharad Agarwal via ddas)
+
+    HADOOP-3427. Improves the shuffle scheduler. It now waits for notifications
+    from shuffle threads when it has scheduled enough, before scheduling more.
+    (ddas)
+
+    HADOOP-2393. Moves the handling of dir deletions in the tasktracker to
+    a separate thread. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3501. Deprecate InMemoryFileSystem. (cutting via omalley)
+
+    HADOOP-3366. Stall the shuffle while in-memory merge is in progress.
+    (acmurthy) 
+
+    HADOOP-2916. Refactor src structure, but leave package structure alone.
+    (Raghu Angadi via mukund) 
+
+    HADOOP-3492. Add forrest documentation for user archives.
+    (Mahadev Konar via hairong)
+
+    HADOOP-3467. Improve documentation for FileSystem::deleteOnExit.
+    (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-3379. Documents stream.non.zero.exit.status.is.failure for Streaming.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3096. Improves documentation about the Task Execution Environment in 
+    the Map-Reduce tutorial. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-2984. Add forrest documentation for DistCp. (cdouglas)
+
+    HADOOP-3406. Add forrest documentation for Profiling.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-2762. Add forrest documentation for controls of memory limits on 
+    hadoop daemons and Map-Reduce tasks. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3535. Fix documentation and name of IOUtils.close to
+    reflect that it should only be used in cleanup contexts. (omalley)
+
+    HADOOP-3593. Updates the mapred tutorial. (ddas)
+
+    HADOOP-3547. Documents the way in which native libraries can be distributed
+    via the DistributedCache. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3606. Updates the Streaming doc. (Amareshwari Sriramadasu via ddas) 
+
+    HADOOP-3532. Add jdiff reports to the build scripts. (omalley)
+
+    HADOOP-3100. Develop tests to test the DFS command line interface. (mukund)
+
+    HADOOP-3688. Fix up HDFS docs. (Robert Chansler via hairong)
+
+  OPTIMIZATIONS
+
+    HADOOP-3274. The default constructor of BytesWritable creates empty 
+    byte array. (Tsz Wo (Nicholas), SZE via shv)
+
+    HADOOP-3272. Remove redundant copy of Block object in BlocksMap.
+    (Lohit Vjayarenu via shv)
+
+    HADOOP-3164. Reduce DataNode CPU usage by using FileChannel.tranferTo().
+    On Linux DataNode takes 5 times less CPU while serving data. Results may
+    vary on other platforms. (rangadi)
+
+    HADOOP-3248. Optimization of saveFSImage. (Dhruba via shv)
+
+    HADOOP-3297. Fetch more task completion events from the job
+    tracker and task tracker. (ddas via omalley)
+
+    HADOOP-3364. Faster image and log edits loading. (shv)
+
+    HADOOP-3369. Fast block processing during name-node startup. (shv)
+
+    HADOOP-1702. Reduce buffer copies when data is written to DFS. 
+    DataNodes take 30% less CPU while writing data. (rangadi)
+
+    HADOOP-3095. Speed up split generation in the FileInputSplit,
+    especially for non-HDFS file systems. Deprecates
+    InputFormat.validateInput. (tomwhite via omalley)
+
+    HADOOP-3552. Add forrest documentation for Hadoop commands.
+    (Sharad Agarwal via cdouglas)
+
+  BUG FIXES
+
+    HADOOP-2905. 'fsck -move' triggers NPE in NameNode. 
+    (Lohit Vjayarenu via rangadi)
+
+    Increment ClientProtocol.versionID missed by HADOOP-2585. (shv)
+
+    HADOOP-3254. Restructure internal namenode methods that process
+    heartbeats to use well-defined BlockCommand object(s) instead of 
+    using the base java Object. (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-3176.  Change lease record when a open-for-write-file 
+    gets renamed. (dhruba)
+
+    HADOOP-3269.  Fix a case when namenode fails to restart
+    while processing a lease record.  ((Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-3282. Port issues in TestCheckpoint resolved. (shv)
+
+    HADOOP-3268. file:// URLs issue in TestUrlStreamHandler under Windows.
+    (taton)
+
+    HADOOP-3127. Deleting files in trash should really remove them.
+    (Brice Arnould via omalley)
+
+    HADOOP-3300. Fix locking of explicit locks in NetworkTopology.
+    (tomwhite via omalley)
+
+    HADOOP-3270. Constant DatanodeCommands are stored in static final
+    immutable variables for better code clarity.  
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2793. Fix broken links for worst performing shuffle tasks in
+    the job history page. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3313. Avoid unnecessary calls to System.currentTimeMillis
+    in RPC::Invoker. (cdouglas)
+
+    HADOOP-3318. Recognize "Darwin" as an alias for "Mac OS X" to
+    support Soylatte. (Sam Pullara via omalley)
+
+    HADOOP-3301. Fix misleading error message when S3 URI hostname
+    contains an underscore. (tomwhite via omalley)
+
+    HADOOP-3338. Fix Eclipse plugin to compile after HADOOP-544 was
+    committed. Updated all references to use the new JobID representation.
+    (taton via nigel)
+
+    HADOOP-3337. Loading FSEditLog was broken by HADOOP-3283 since it 
+    changed Writable serialization of DatanodeInfo. This patch handles it.
+    (Tsz Wo (Nicholas), SZE via rangadi)
+
+    HADOOP-3101. Prevent JobClient from throwing an exception when printing
+    usage. (Edward J. Yoon via cdouglas)
+
+    HADOOP-3119. Update javadoc for Text::getBytes to better describe its
+    behavior. (Tim Nelson via cdouglas)
+
+    HADOOP-2294. Fix documentation in libhdfs to refer to the correct free
+    function. (Craig Macdonald via cdouglas)
+
+    HADOOP-3335. Prevent the libhdfs build from deleting the wrong
+    files on make clean. (cutting via omalley)
+
+    HADOOP-2930. Make {start,stop}-balancer.sh work even if hadoop-daemon.sh
+    is not in the PATH. (Spiros Papadimitriou via hairong)
+
+    HADOOP-3085. Catch Exception in metrics util classes to ensure that
+    misconfigured metrics don't prevent others from updating. (cdouglas)
+
+    HADOOP-3299. CompositeInputFormat should configure the sub-input
+    formats. (cdouglas via omalley)
+
+    HADOOP-3309. Lower io.sort.mb and fs.inmemory.size.mb for MiniMRDFSSort
+    unit test so it passes on Windows. (lohit vijayarenu via cdouglas)
+
+    HADOOP-3348. TestUrlStreamHandler should set URLStreamFactory after
+    DataNodes are initialized. (Lohit Vijayarenu via rangadi)
+
+    HADOOP-3371. Ignore InstanceAlreadyExistsException from
+    MBeanUtil::registerMBean. (lohit vijayarenu via cdouglas)
+
+    HADOOP-3349. A file rename was incorrectly changing the name inside a
+    lease record. (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-3365. Removes an unnecessary copy of the key from SegmentDescriptor
+    to MergeQueue. (Devaraj Das)
+
+    HADOOP-3388. Fix for TestDatanodeBlockScanner to handle blocks with
+    generation stamps in them.  (dhruba)
+
+    HADOOP-3203. Fixes TaskTracker::localizeJob to pass correct file sizes
+    for the jarfile and the jobfile. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3391. Fix a findbugs warning introduced by HADOOP-3248 (rangadi)
+
+    HADOOP-3393. Fix datanode shutdown to call DataBlockScanner::shutdown and
+    close its log, even if the scanner thread is not running. (lohit vijayarenu
+    via cdouglas)
+
+    HADOOP-3399. A debug message was logged at info level. (rangadi)
+
+    HADOOP-3396. TestDatanodeBlockScanner occationally fails. 
+    (Lohit Vijayarenu via rangadi)
+
+    HADOOP-3339. Some of the failures on 3rd datanode in DFS write pipelie 
+    are not detected properly. This could lead to hard failure of client's
+    write operation. (rangadi)
+
+    HADOOP-3409. Namenode should save the root inode into fsimage. (hairong)
+
+    HADOOP-3296. Fix task cache to work for more than two levels in the cache
+    hierarchy. This also adds a new counter to track cache hits at levels
+    greater than two. (Amar Kamat via cdouglas)
+
+    HADOOP-3375. Lease paths were sometimes not removed from 
+    LeaseManager.sortedLeasesByPath. (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-3424. Values returned by getPartition should be checked to
+    make sure they are in the range 0 to #reduces - 1 (cdouglas via
+    omalley)
+
+    HADOOP-3408. Change FSNamesystem to send its metrics as integers to
+    accommodate collectors that don't support long values. (lohit vijayarenu
+    via cdouglas)
+
+    HADOOP-3403. Fixes a problem in the JobTracker to do with handling of lost
+    tasktrackers. (Arun Murthy via ddas)
+
+    HADOOP-1318. Completed maps are not failed if the number of reducers are
+    zero. (Amareshwari Sriramadasu via ddas).
+
+    HADOOP-3351. Fixes the history viewer tool to not do huge StringBuffer
+    allocations. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3419. Fixes TestFsck to wait for updates to happen before
+    checking results to make the test more reliable. (Lohit Vijaya
+    Renu via omalley)
+
+    HADOOP-3259. Makes failure to read system properties due to a
+    security manager non-fatal. (Edward Yoon via omalley)
+
+    HADOOP-3451. Update libhdfs to use FileSystem::getFileBlockLocations
+    instead of removed getFileCacheHints. (lohit vijayarenu via cdouglas)
+
+    HADOOP-3401. Update FileBench to set the new
+    "mapred.work.output.dir" property to work post-3041. (cdouglas via omalley)
+
+    HADOOP-2669. DFSClient locks pendingCreates appropriately. (dhruba)
+ 
+    HADOOP-3410. Fix KFS implemenation to return correct file
+    modification time.  (Sriram Rao via cutting)
+
+    HADOOP-3340. Fix DFS metrics for BlocksReplicated, HeartbeatsNum, and
+    BlockReportsAverageTime. (lohit vijayarenu via cdouglas)
+
+    HADOOP-3435. Remove the assuption in the scripts that bash is at
+    /bin/bash and fix the test patch to require bash instead of sh.
+    (Brice Arnould via omalley)
+
+    HADOOP-3471. Fix spurious errors from TestIndexedSort and add additional
+    logging to let failures be reproducible. (cdouglas)
+
+    HADOOP-3443. Avoid copying map output across partitions when renaming a
+    single spill. (omalley via cdouglas)
+
+    HADOOP-3454. Fix Text::find to search only valid byte ranges. (Chad Whipkey
+    via cdouglas)
+
+    HADOOP-3417. Removes the static configuration variable,
+    commandLineConfig from JobClient. Moves the cli parsing from
+    JobShell to GenericOptionsParser.  Thus removes the class
+    org.apache.hadoop.mapred.JobShell.  (Amareshwari Sriramadasu via
+    ddas)
+
+    HADOOP-2132. Only RUNNING/PREP jobs can be killed. (Jothi Padmanabhan 
+    via ddas)
+
+    HADOOP-3476. Code cleanup in fuse-dfs.
+    (Peter Wyckoff via dhruba)
+
+    HADOOP-2427. Ensure that the cwd of completed tasks is cleaned-up
+    correctly on task-completion. (Amareshwari Sri Ramadasu via acmurthy) 
+
+    HADOOP-2565. Remove DFSPath cache of FileStatus. 
+    (Tsz Wo (Nicholas), SZE via hairong)
+
+    HADOOP-3326. Cleanup the local-fs and in-memory merge in the ReduceTask by
+    spawing only one thread each for the on-disk and in-memory merge.
+    (Sharad Agarwal via acmurthy)
+
+    HADOOP-3493. Fix TestStreamingFailure to use FileUtil.fullyDelete to
+    ensure correct cleanup. (Lohit Vijayarenu via acmurthy) 
+
+    HADOOP-3455. Fix NPE in ipc.Client in case of connection failure and
+    improve its synchronization. (hairong)
+
+    HADOOP-3240. Fix a testcase to not create files in the current directory.
+    Instead the file is created in the test directory (Mahadev Konar via ddas)
+
+    HADOOP-3496.  Fix failure in TestHarFileSystem.testArchives due to change
+    in HADOOP-3095.  (tomwhite)
+
+    HADOOP-3135. Get the system directory from the JobTracker instead of from
+    the conf. (Subramaniam Krishnan via ddas)
+
+    HADOOP-3503. Fix a race condition when client and namenode start
+    simultaneous recovery of the same block.  (dhruba & Tsz Wo
+    (Nicholas), SZE)
+
+    HADOOP-3440. Fixes DistributedCache to not create symlinks for paths which
+    don't have fragments even when createSymLink is true. 
+    (Abhijit Bagri via ddas) 
+
+    HADOOP-3463. Hadoop-daemons script should cd to $HADOOP_HOME. (omalley)
+
+    HADOOP-3489. Fix NPE in SafeModeMonitor. (Lohit Vijayarenu via shv)
+
+    HADOOP-3509. Fix NPE in FSNamesystem.close. (Tsz Wo (Nicholas), SZE via 
+    shv)
+
+    HADOOP-3491. Name-node shutdown causes InterruptedException in 
+    ResolutionMonitor. (Lohit Vijayarenu via shv)
+
+    HADOOP-3511. Fixes namenode image to not set the root's quota to an
+    invalid value when the quota was not saved in the image. (hairong)
+
+    HADOOP-3516. Ensure the JobClient in HadoopArchives is initialized
+    with a configuration. (Subramaniam Krishnan via omalley)
+
+    HADOOP-3513. Improve NNThroughputBenchmark log messages. (shv)
+
+    HADOOP-3519.  Fix NPE in DFS FileSystem rename.  (hairong via tomwhite)
+    
+    HADOOP-3528. Metrics FilesCreated and files_deleted metrics
+    do not match. (Lohit via Mahadev)
+
+    HADOOP-3418. When a directory is deleted, any leases that point to files
+    in the subdirectory are removed. ((Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-3542. Diables the creation of _logs directory for the archives
+    directory. (Mahadev Konar via ddas)
+
+    HADOOP-3544. Fixes a documentation issue for hadoop archives.
+    (Mahadev Konar via ddas)
+
+    HADOOP-3517. Fixes a problem in the reducer due to which the last InMemory
+    merge may be missed. (Arun Murthy via ddas)
+
+    HADOOP-3548. Fixes build.xml to copy all *.jar files to the dist.
+    (Owen O'Malley via ddas)
+
+    HADOOP-3363. Fix unformatted storage detection in FSImage. (shv)
+
+    HADOOP-3560. Fixes a problem to do with split creation in archives.
+    (Mahadev Konar via ddas)
+
+    HADOOP-3545. Fixes a overflow problem in archives.
+    (Mahadev Konar via ddas)
+
+    HADOOP-3561. Prevent the trash from deleting its parent directories.
+    (cdouglas)
+
+    HADOOP-3575. Fix the clover ant target after package refactoring.
+    (Nigel Daley via cdouglas)
+
+    HADOOP-3539.  Fix the tool path in the bin/hadoop script under
+    cygwin. (Tsz Wo (Nicholas), Sze via omalley)
+
+    HADOOP-3520.  TestDFSUpgradeFromImage triggers a race condition in the
+    Upgrade Manager. Fixed. (dhruba)
+
+    HADOOP-3586. Provide deprecated, backwards compatibile semantics for the
+    combiner to be run once and only once on each record. (cdouglas)
+
+    HADOOP-3533. Add deprecated methods to provide API compatibility
+    between 0.18 and 0.17. Remove the deprecated methods in trunk. (omalley)
+
+    HADOOP-3580. Fixes a problem to do with specifying a har as an input to 
+    a job. (Mahadev Konar via ddas)
+
+    HADOOP-3333. Don't assign a task to a tasktracker that it failed to  
+    execute earlier (used to happen in the case of lost tasktrackers where
+    the tasktracker would reinitialize and bind to a different port). 
+    (Jothi Padmanabhan and Arun Murthy via ddas)
+
+    HADOOP-3534. Log IOExceptions that happen in closing the name
+    system when the NameNode shuts down. (Tsz Wo (Nicholas) Sze via omalley)
+
+    HADOOP-3546. TaskTracker re-initialization gets stuck in cleaning up.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3576. Fix NullPointerException when renaming a directory
+    to its subdirectory. (Tse Wo (Nicholas), SZE via hairong)
+
+    HADOOP-3320. Fix NullPointerException in NetworkTopology.getDistance().
+    (hairong)
+
+    HADOOP-3569. KFS input stream read() now correctly reads 1 byte
+    instead of 4. (Sriram Rao via omalley)
+
+    HADOOP-3599. Fix JobConf::setCombineOnceOnly to modify the instance rather
+    than a parameter. (Owen O'Malley via cdouglas)
+
+    HADOOP-3590. Null pointer exception in JobTracker when the task tracker is 
+    not yet resolved. (Amar Ramesh Kamat via ddas)
+
+    HADOOP-3603. Fix MapOutputCollector to spill when io.sort.spill.percent is
+    1.0 and to detect spills when emitted records write no data. (cdouglas)
+
+    HADOOP-3615. Set DatanodeProtocol.versionID to the correct value.
+    (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-3559. Fix the libhdfs test script and config to work with the
+    current semantics. (lohit vijayarenu via cdouglas)
+
+    HADOOP-3480.  Need to update Eclipse template to reflect current trunk.
+    (Brice Arnould via tomwhite)
+  
+    HADOOP-3588. Fixed usability issues with archives. (mahadev)
+
+    HADOOP-3635. Uncaught exception in DataBlockScanner.
+    (Tsz Wo (Nicholas), SZE via hairong)
+
+    HADOOP-3639. Exception when closing DFSClient while multiple files are
+    open. (Benjamin Gufler via hairong)
+
+    HADOOP-3572. SetQuotas usage interface has some minor bugs. (hairong)
+
+    HADOOP-3649. Fix bug in removing blocks from the corrupted block map.
+    (Lohit Vijayarenu via shv)
+
+    HADOOP-3604. Work around a JVM synchronization problem observed while
+    retrieving the address of direct buffers from compression code by obtaining
+    a lock during this call. (Arun C Murthy via cdouglas)
+
+    HADOOP-3683. Fix dfs metrics to count file listings rather than files
+    listed. (lohit vijayarenu via cdouglas)
+
+    HADOOP-3597. Fix SortValidator to use filesystems other than the default as
+    input. Validation job still runs on default fs.
+    (Jothi Padmanabhan via cdouglas)
+
+    HADOOP-3693. Fix archives, distcp and native library documentation to
+    conform to style guidelines. (Amareshwari Sriramadasu via cdouglas)
+
+    HADOOP-3653. Fix test-patch target to properly account for Eclipse
+    classpath jars. (Brice Arnould via nigel)
+
+    HADOOP-3692. Fix documentation for Cluster setup and Quick start guides. 
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3691. Fix streaming and tutorial docs. (Jothi Padmanabhan via ddas)
+
+    HADOOP-3630. Fix NullPointerException in CompositeRecordReader from empty
+    sources (cdouglas)
+
+    HADOOP-3706. Fix a ClassLoader issue in the mapred.join Parser that
+    prevents it from loading user-specified InputFormats.
+    (Jingkei Ly via cdouglas)
+
+    HADOOP-3718. Fix KFSOutputStream::write(int) to output a byte instead of
+    an int, per the OutputStream contract. (Sriram Rao via cdouglas)
+
+    HADOOP-3647. Add debug logs to help track down a very occassional,
+    hard-to-reproduce, bug in shuffle/merge on the reducer. (acmurthy) 
+
+    HADOOP-3716. Prevent listStatus in KosmosFileSystem from returning
+    null for valid, empty directories. (Sriram Rao via cdouglas)
+
+    HADOOP-3752. Fix audit logging to record rename events. (cdouglas)
+
+    HADOOP-3737. Fix CompressedWritable to call Deflater::end to release
+    compressor memory. (Grant Glouser via cdouglas)
+
+    HADOOP-3670. Fixes JobTracker to clear out split bytes when no longer 
+    required. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3755. Update gridmix to work with HOD 0.4 (Runping Qi via cdouglas)
+  
+    HADOOP-3743. Fix -libjars, -files, -archives options to work even if 
+    user code does not implement tools. (Amareshwari Sriramadasu via mahadev)
+
+    HADOOP-3774. Fix typos in shell output. (Tsz Wo (Nicholas), SZE via
+    cdouglas)
+
+    HADOOP-3762. Fixed FileSystem cache to work with the default port. (cutting
+    via omalley)
+
+    HADOOP-3798. Fix tests compilation. (Mukund Madhugiri via omalley)
+
+    HADOOP-3794. Return modification time instead of zero for KosmosFileSystem.
+    (Sriram Rao via cdouglas)
+
+    HADOOP-3806. Remove debug statement to stdout from QuickSort. (cdouglas)
+
+    HADOOP-3776. Fix NPE at NameNode when datanode reports a block after it is
+    deleted at NameNode. (rangadi)
+
+    HADOOP-3537. Disallow adding a datanode to a network topology when its
+    network location is not resolved. (hairong)
+
+    HADOOP-3571. Fix bug in block removal used in lease recovery. (shv)
+
+    HADOOP-3645. MetricsTimeVaryingRate returns wrong value for
+    metric_avg_time. (Lohit Vijayarenu via hairong)
+
+    HADOOP-3521. Reverted the missing cast to float for sending Counters' values
+    to Hadoop metrics which was removed by HADOOP-544. (acmurthy)   
+
+    HADOOP-3820. Fixes two problems in the gridmix-env - a syntax error, and a 
+    wrong definition of USE_REAL_DATASET by default. (Arun Murthy via ddas)
+
+    HADOOP-3724. Fixes two problems related to storing and recovering lease
+    in the fsimage. (dhruba)
+    
+    HADOOP-3827.  Fixed compression of empty map-outputs. (acmurthy) 
+
+    HADOOP-3865. Remove reference to FSNamesystem from metrics preventing
+    garbage collection. (Lohit Vijayarenu via cdouglas)
+
+    HADOOP-3884.  Fix so that Eclipse plugin builds against recent
+    Eclipse releases.  (cutting)
+
+    HADOOP-3837. Streaming jobs report progress status. (dhruba)
+
+    HADOOP-3897. Fix a NPE in secondary namenode. (Lohit Vijayarenu via 
+    cdouglas)
+
+    HADOOP-3901. Fix bin/hadoop to correctly set classpath under cygwin.
+    (Tsz Wo (Nicholas) Sze via omalley)
+
+    HADOOP-3947. Fix a problem in tasktracker reinitialization. 
+    (Amareshwari Sriramadasu via ddas)
+
+Release 0.17.3 - Unreleased
+
+  IMPROVEMENTS
+
+    HADOOP-4164. Chinese translation of the documentation. (Xuebing Yan via 
+    omalley)
+
+  BUG FIXES
+
+    HADOOP-4277. Checksum verification was mistakenly disabled for
+    LocalFileSystem. (Raghu Angadi)
+
+    HADOOP-4271. Checksum input stream can sometimes return invalid 
+    data to the user. (Ning Li via rangadi)
+
+    HADOOP-4318. DistCp should use absolute paths for cleanup.  (szetszwo)
+
+    HADOOP-4326. ChecksumFileSystem does not override create(...) correctly.
+    (szetszwo)
+
+Release 0.17.2 - 2008-08-11
+
+  BUG FIXES
+
+    HADOOP-3678. Avoid spurious exceptions logged at DataNode when clients
+    read from DFS. (rangadi)
+
+    HADOOP-3707. NameNode keeps a count of number of blocks scheduled
+    to be written to a datanode and uses it to avoid allocating more
+    blocks than a datanode can hold. (rangadi)
+
+    HADOOP-3760. Fix a bug with HDFS file close() mistakenly introduced
+    by HADOOP-3681. (Lohit Vijayarenu via rangadi)
+
+    HADOOP-3681. DFSClient can get into an infinite loop while closing
+    a file if there are some errors. (Lohit Vijayarenu via rangadi)
+
+    HADOOP-3002. Hold off block removal while in safe mode. (shv)
+
+    HADOOP-3685. Unbalanced replication target. (hairong)
+
+    HADOOP-3758. Shutdown datanode on version mismatch instead of retrying
+    continuously, preventing excessive logging at the namenode.
+    (lohit vijayarenu via cdouglas)
+
+    HADOOP-3633. Correct exception handling in DataXceiveServer, and throttle
+    the number of xceiver threads in a data-node. (shv)
+
+    HADOOP-3370. Ensure that the TaskTracker.runningJobs data-structure is
+    correctly cleaned-up on task completion. (Zheng Shao via acmurthy) 
+
+    HADOOP-3813. Fix task-output clean-up on HDFS to use the recursive 
+    FileSystem.delete rather than the FileUtil.fullyDelete. (Amareshwari
+    Sri Ramadasu via acmurthy)  
+
+    HADOOP-3859. Allow the maximum number of xceivers in the data node to
+    be configurable. (Johan Oskarsson via omalley)
+
+    HADOOP-3931. Fix corner case in the map-side sort that causes some values 
+    to be counted as too large and cause pre-mature spills to disk. Some values
+    will also bypass the combiner incorrectly. (cdouglas via omalley)
+
+Release 0.17.1 - 2008-06-23
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-3565. Fix the Java serialization, which is not enabled by
+    default, to clear the state of the serializer between objects.
+    (tomwhite via omalley)
+
+  IMPROVEMENTS
+
+    HADOOP-3522. Improve documentation on reduce pointing out that
+    input keys and values will be reused. (omalley)
+
+    HADOOP-3487. Balancer uses thread pools for managing its threads;
+    therefore provides better resource management. (hairong)
+
+  BUG FIXES
+
+    HADOOP-2159 Namenode stuck in safemode. The counter blockSafe should
+    not be decremented for invalid blocks. (hairong)
+
+    HADOOP-3472 MapFile.Reader getClosest() function returns incorrect results
+    when before is true (Todd Lipcon via Stack)
+
+    HADOOP-3442. Limit recursion depth on the stack for QuickSort to prevent
+    StackOverflowErrors. To avoid O(n*n) cases, when partitioning depth exceeds
+    a multiple of log(n), change to HeapSort. (cdouglas)
+
+    HADOOP-3477. Fix build to not package contrib/*/bin twice in
+    distributions.  (Adam Heath via cutting)
+
+    HADOOP-3475. Fix MapTask to correctly size the accounting allocation of
+    io.sort.mb. (cdouglas)
+
+    HADOOP-3550. Fix the serialization data structures in MapTask where the
+    value lengths are incorrectly calculated. (cdouglas)
+
+    HADOOP-3526. Fix contrib/data_join framework by cloning values retained
+    in the reduce. (Spyros Blanas via cdouglas)
+
+    HADOOP-1979. Speed up fsck by adding a buffered stream. (Lohit
+    Vijaya Renu via omalley)
+
+Release 0.17.0 - 2008-05-18
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-2786.  Move hbase out of hadoop core
+
+    HADOOP-2345.  New HDFS transactions to support appending 
+    to files.  Disk layout version changed from -11 to -12. (dhruba)
+
+    HADOOP-2192. Error messages from "dfs mv" command improved.
+    (Mahadev Konar via dhruba)
+
+    HADOOP-1902. "dfs du" command without any arguments operates on the
+    current working directory.  (Mahadev Konar via dhruba)
+
+    HADOOP-2873.  Fixed bad disk format introduced by HADOOP-2345.
+    Disk layout version changed from -12 to -13. See changelist 630992
+    (dhruba)
+
+    HADOOP-1985.  This addresses rack-awareness for Map tasks and for 
+    HDFS in a uniform way. (ddas)
+
+    HADOOP-1986.  Add support for a general serialization mechanism for
+    Map Reduce. (tomwhite)
+
+    HADOOP-771. FileSystem.delete() takes an explicit parameter that
+    specifies whether a recursive delete is intended.
+    (Mahadev Konar via dhruba)
+
+    HADOOP-2470. Remove getContentLength(String), open(String, long, long)
+    and isDir(String) from ClientProtocol. ClientProtocol version changed
+    from 26 to 27. (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-2822. Remove deprecated code for classes InputFormatBase and 
+    PhasedFileSystem. (Amareshwari Sriramadasu via enis)
+
+    HADOOP-2116. Changes the layout of the task execution directory. 
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-2828. The following deprecated methods in Configuration.java
+    have been removed
+        getObject(String name)
+        setObject(String name, Object value)
+        get(String name, Object defaultValue)
+        set(String name, Object value)
+        Iterator entries()
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-2824. Removes one deprecated constructor from MiniMRCluster.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-2823. Removes deprecated methods getColumn(), getLine() from
+    org.apache.hadoop.record.compiler.generated.SimpleCharStream. 
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3060. Removes one unused constructor argument from MiniMRCluster.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-2854. Remove deprecated o.a.h.ipc.Server::getUserInfo().
+    (lohit vijayarenu via cdouglas)
+
+    HADOOP-2563. Remove deprecated FileSystem::listPaths.
+    (lohit vijayarenu via cdouglas)
+
+    HADOOP-2818.  Remove deprecated methods in Counters.
+    (Amareshwari Sriramadasu via tomwhite)
+
+    HADOOP-2831. Remove deprecated o.a.h.dfs.INode::getAbsoluteName()
+    (lohit vijayarenu via cdouglas)
+
+    HADOOP-2839. Remove deprecated FileSystem::globPaths.
+    (lohit vijayarenu via cdouglas)
+
+    HADOOP-2634. Deprecate ClientProtocol::exists.
+    (lohit vijayarenu via cdouglas)
+
+    HADOOP-2410.  Make EC2 cluster nodes more independent of each other.
+    Multiple concurrent EC2 clusters are now supported, and nodes may be
+    added to a cluster on the fly with new nodes starting in the same EC2
+    availability zone as the cluster.  Ganglia monitoring and large
+    instance sizes have also been added.  (Chris K Wensel via tomwhite)
+
+    HADOOP-2826. Deprecated FileSplit.getFile(), LineRecordReader.readLine().
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3239. getFileInfo() returns null for non-existing files instead
+    of throwing FileNotFoundException. (Lohit Vijayarenu via shv)
+
+    HADOOP-3266. Removed HOD changes from CHANGES.txt, as they are now inside 
+    src/contrib/hod  (Hemanth Yamijala via ddas)
+
+    HADOOP-3280. Separate the configuration of the virtual memory size
+    (mapred.child.ulimit) from the jvm heap size, so that 64 bit
+    streaming applications are supported even when running with 32 bit
+    jvms. (acmurthy via omalley)
+
+  NEW FEATURES
+
+    HADOOP-1398.  Add HBase in-memory block cache.  (tomwhite)
+
+    HADOOP-2178.  Job History on DFS. (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2063. A new parameter to dfs -get command to fetch a file 
+    even if it is corrupted.  (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2219. A new command "df -count" that counts the number of
+    files and directories.  (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2906. Add an OutputFormat capable of using keys, values, and
+    config params to map records to different output files.
+    (Runping Qi via cdouglas)
+
+    HADOOP-2346. Utilities to support timeout while writing to sockets.
+    DFSClient and DataNode sockets have 10min write timeout. (rangadi)
+    
+    HADOOP-2951.  Add a contrib module that provides a utility to
+    build or update Lucene indexes using Map/Reduce.  (Ning Li via cutting)
+
+    HADOOP-1622.  Allow multiple jar files for map reduce.
+    (Mahadev Konar via dhruba)
+
+    HADOOP-2055. Allows users to set PathFilter on the FileInputFormat.
+    (Alejandro Abdelnur via ddas)
+
+    HADOOP-2551. More environment variables like HADOOP_NAMENODE_OPTS
+    for better control of HADOOP_OPTS for each component. (rangadi)
+
+    HADOOP-3001. Add job counters that measure the number of bytes
+    read and written to HDFS, S3, KFS, and local file systems. (omalley)
+
+    HADOOP-3048.  A new Interface and a default implementation to convert 
+    and restore serializations of objects to/from strings. (enis)
+
+  IMPROVEMENTS
+
+    HADOOP-2655. Copy on write for data and metadata files in the 
+    presence of snapshots. Needed for supporting appends to HDFS
+    files. (dhruba) 
+
+    HADOOP-1967.  When a Path specifies the same scheme as the default
+    FileSystem but no authority, the default FileSystem's authority is
+    used.  Also add warnings for old-format FileSystem names, accessor
+    methods for fs.default.name, and check for null authority in HDFS.
+    (cutting)
+
+    HADOOP-2895. Let the profiling string be configurable.
+    (Martin Traverso via cdouglas)
+
+    HADOOP-910. Enables Reduces to do merges for the on-disk map output files 
+    in parallel with their copying. (Amar Kamat via ddas)
+
+    HADOOP-730. Use rename rather than copy for local renames. (cdouglas)
+
+    HADOOP-2810. Updated the Hadoop Core logo. (nigel)
+
+    HADOOP-2057.  Streaming should optionally treat a non-zero exit status
+    of a child process as a failed task.  (Rick Cox via tomwhite)
+
+    HADOOP-2765. Enables specifying ulimits for streaming/pipes tasks (ddas)
+
+    HADOOP-2888. Make gridmix scripts more readily configurable and amenable
+    to automated execution. (Mukund Madhugiri via cdouglas)
+
+    HADOOP-2908.  A document that describes the DFS Shell command. 
+    (Mahadev Konar via dhruba)
+
+    HADOOP-2981.  Update README.txt to reflect the upcoming use of
+    cryptography. (omalley)
+
+    HADOOP-2804.  Add support to publish CHANGES.txt as HTML when running
+    the Ant 'docs' target. (nigel)
+
+    HADOOP-2559. Change DFS block placement to allocate the first replica
+    locally, the second off-rack, and the third intra-rack from the
+    second. (lohit vijayarenu via cdouglas)
+
+    HADOOP-2939. Make the automated patch testing process an executable 
+    Ant target, test-patch. (nigel)
+
+    HADOOP-2239. Add HsftpFileSystem to permit transferring files over ssl.
+    (cdouglas)
+
+    HADOOP-2886.  Track individual RPC metrics.
+    (girish vaitheeswaran via dhruba)
+
+    HADOOP-2373. Improvement in safe-mode reporting. (shv)
+
+    HADOOP-3091. Modify FsShell command -put to accept multiple sources.
+    (Lohit Vijaya Renu via cdouglas)
+
+    HADOOP-3092. Show counter values from job -status command.
+    (Tom White via ddas)
+
+    HADOOP-1228.  Ant task to generate Eclipse project files.  (tomwhite)
+
+    HADOOP-3093. Adds Configuration.getStrings(name, default-value) and
+    the corresponding setStrings. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3106. Adds documentation in forrest for debugging.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3099. Add an option to distcp to preserve user, group, and
+    permission information. (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-2841. Unwrap AccessControlException and FileNotFoundException
+    from RemoteException for DFSClient. (shv)
+
+    HADOOP-3152.  Make index interval configuable when using
+    MapFileOutputFormat for map-reduce job.  (Rong-En Fan via cutting)
+
+    HADOOP-3143. Decrease number of slaves from 4 to 3 in TestMiniMRDFSSort,
+    as Hudson generates false negatives under the current load.
+    (Nigel Daley via cdouglas)
+
+    HADOOP-3174. Illustrative example for MultipleFileInputFormat. (Enis
+    Soztutar via acmurthy)  
+
+    HADOOP-2993. Clarify the usage of JAVA_HOME in the Quick Start guide.
+    (acmurthy via nigel)
+
+    HADOOP-3124. Make DataNode socket write timeout configurable. (rangadi)
+
+  OPTIMIZATIONS
+
+    HADOOP-2790.  Fixed inefficient method hasSpeculativeTask by removing
+    repetitive calls to get the current time and late checking to see if
+    we want speculation on at all. (omalley)
+
+    HADOOP-2758. Reduce buffer copies in DataNode when data is read from
+    HDFS, without negatively affecting read throughput. (rangadi)
+
+    HADOOP-2399. Input key and value to combiner and reducer is reused.
+    (Owen O'Malley via ddas). 
+
+    HADOOP-2423.  Code optimization in FSNamesystem.mkdirs.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2606. ReplicationMonitor selects data-nodes to replicate directly
+    from needed replication blocks instead of looking up for the blocks for 
+    each live data-node. (shv)
+
+    HADOOP-2148. Eliminate redundant data-node blockMap lookups. (shv)
+
+    HADOOP-2027. Return the number of bytes in each block in a file
+    via a single rpc to the namenode to speed up job planning. 
+    (Lohit Vijaya Renu via omalley)
+
+    HADOOP-2902.  Replace uses of "fs.default.name" with calls to the
+    accessor methods added in HADOOP-1967.  (cutting)
+
+    HADOOP-2119.  Optimize scheduling of jobs with large numbers of
+    tasks by replacing static arrays with lists of runnable tasks. 
+    (Amar Kamat via omalley)
+
+    HADOOP-2919.  Reduce the number of memory copies done during the
+    map output sorting. Also adds two config variables:
+    io.sort.spill.percent - the percentages of io.sort.mb that should
+                            cause a spill (default 80%)
+    io.sort.record.percent - the percent of io.sort.mb that should
+                             hold key/value indexes (default 5%)
+    (cdouglas via omalley)
+
+    HADOOP-3140. Doesn't add a task in the commit queue if the task hadn't
+    generated any output. (Amar Kamat via ddas)
+
+    HADOOP-3168. Reduce the amount of logging in streaming to an
+    exponentially increasing number of records (up to 10,000
+    records/log). (Zheng Shao via omalley)
+ 
+  BUG FIXES
+
+    HADOOP-2195. '-mkdir' behaviour is now closer to Linux shell in case of
+    errors. (Mahadev Konar via rangadi)
+    
+    HADOOP-2190. bring behaviour '-ls' and '-du' closer to Linux shell 
+    commands in case of errors. (Mahadev Konar via rangadi)
+    
+    HADOOP-2193. 'fs -rm' and 'fs -rmr' show error message when the target
+    file does not exist. (Mahadev Konar via rangadi)
+            
+    HADOOP-2738 Text is not subclassable because set(Text) and compareTo(Object)
+    access the other instance's private members directly. (jimk)
+
+    HADOOP-2779.  Remove the references to HBase in the build.xml. (omalley)
+
+    HADOOP-2194. dfs cat on a non-existent file throws FileNotFoundException.
+    (Mahadev Konar via dhruba)
+
+    HADOOP-2767. Fix for NetworkTopology erroneously skipping the last leaf 
+    node on a rack. (Hairong Kuang and Mark Butler via dhruba)
+
+    HADOOP-1593. FsShell works with paths in non-default FileSystem.
+    (Mahadev Konar via dhruba)
+
+    HADOOP-2191. du and dus command on non-existent directory gives 
+    appropriate error message.  (Mahadev Konar via dhruba)
+
+    HADOOP-2832. Remove tabs from code of DFSClient for better
+    indentation. (dhruba)
+
+    HADOOP-2844. distcp closes file handles for sequence files.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2727. Fix links in Web UI of the hadoop daemons and some docs
+    (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2871. Fixes a problem to do with file: URI in the JobHistory init.
+    (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2800.  Deprecate SetFile.Writer constructor not the whole class.
+    (Johan Oskarsson via tomwhite)
+
+    HADOOP-2891.  DFSClient.close() closes all open files. (dhruba)
+
+    HADOOP-2845.  Fix dfsadmin disk utilization report on Solaris.
+    (Martin Traverso via tomwhite)
+
+    HADOOP-2912. MiniDFSCluster restart should wait for namenode to exit
+    safemode. This was causing TestFsck to fail.  (Mahadev Konar via dhruba)
+
+    HADOOP-2820. The following classes in streaming are removed : 
+    StreamLineRecordReader StreamOutputFormat StreamSequenceRecordReader.
+    (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2819. The following methods in JobConf are removed:
+    getInputKeyClass() setInputKeyClass getInputValueClass()
+    setInputValueClass(Class theClass) setSpeculativeExecution
+    getSpeculativeExecution() (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2817. Removes deprecated mapred.tasktracker.tasks.maximum and 
+    ClusterStatus.getMaxTasks(). (Amareshwari Sri Ramadasu via ddas) 
+
+    HADOOP-2821. Removes deprecated ShellUtil and ToolBase classes from
+    the util package. (Amareshwari Sri Ramadasu via ddas) 
+
+    HADOOP-2934. The namenode was encountreing a NPE while loading
+    leases from the fsimage. Fixed. (dhruba)
+
+    HADOOP-2938. Some fs commands did not glob paths.
+    (Tsz Wo (Nicholas), SZE via rangadi)
+
+    HADOOP-2943. Compression of intermediate map output causes failures
+    in the merge. (cdouglas)
+
+    HADOOP-2870.  DataNode and NameNode closes all connections while
+    shutting down. (Hairong Kuang via dhruba)
+
+    HADOOP-2973. Fix TestLocalDFS for Windows platform.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2971. select multiple times if it returns early in 
+    SocketIOWithTimeout. (rangadi)
+
+    HADOOP-2955. Fix TestCrcCorruption test failures caused by HADOOP-2758
+    (rangadi)
+
+    HADOOP-2657. A flush call on the DFSOutputStream flushes the last
+    partial CRC chunk too.  (dhruba)
+
+    HADOOP-2974. IPC unit tests used "0.0.0.0" to connect to server, which
+    is not always supported. (rangadi)
+
+    HADOOP-2996. Fixes uses of StringBuffer in StreamUtils class.
+    (Dave Brosius via ddas)
+
+    HADOOP-2995. Fixes StreamBaseRecordReader's getProgress to return a 
+    floating point number. (Dave Brosius via ddas)
+
+    HADOOP-2972. Fix for a NPE in FSDataset.invalidate.
+    (Mahadev Konar via dhruba)
+
+    HADOOP-2994. Code cleanup for DFSClient: remove redundant 
+    conversions from string to string.  (Dave Brosius via dhruba)
+
+    HADOOP-3009. TestFileCreation sometimes fails because restarting
+    minidfscluster sometimes creates datanodes with ports that are
+    different from their original instance. (dhruba)
+
+    HADOOP-2992. Distributed Upgrade framework works correctly with
+    more than one upgrade object.  (Konstantin Shvachko via dhruba)
+
+    HADOOP-2679. Fix a typo in libhdfs.  (Jason via dhruba)
+
+    HADOOP-2976. When a lease expires, the Namenode ensures that 
+    blocks of the file are adequately replicated. (dhruba)
+
+    HADOOP-2901. Fixes the creation of info servers in the JobClient
+    and JobTracker. Removes the creation from JobClient and removes
+    additional info server from the JobTracker. Also adds the command
+    line utility to view the history files (HADOOP-2896), and fixes
+    bugs in JSPs to do with analysis - HADOOP-2742, HADOOP-2792.
+    (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2890. If different datanodes report the same block but
+    with different sizes to the namenode, the namenode picks the
+    replica(s) with the largest size as the only valid replica(s). (dhruba)
+
+    HADOOP-2825. Deprecated MapOutputLocation.getFile() is removed.
+    (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2806. Fixes a streaming document.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3008. SocketIOWithTimeout throws InterruptedIOException if the
+    thread is interrupted while it is waiting. (rangadi)
+    
+    HADOOP-3006. Fix wrong packet size reported by DataNode when a block
+    is being replicated. (rangadi)
+
+    HADOOP-3029. Datanode prints log message "firstbadlink" only if 
+    it detects a bad connection to another datanode in the pipeline. (dhruba)
+
+    HADOOP-3030. Release reserved space for file in InMemoryFileSystem if
+    checksum reservation fails. (Devaraj Das via cdouglas)
+
+    HADOOP-3036. Fix findbugs warnings in UpgradeUtilities. (Konstantin
+    Shvachko via cdouglas)
+
+    HADOOP-3025. ChecksumFileSystem supports the delete method with 
+    the recursive flag. (Mahadev Konar via dhruba)
+
+    HADOOP-3012. dfs -mv file to user home directory throws exception if 
+    the user home directory does not exist. (Mahadev Konar via dhruba)
+    
+    HADOOP-3066. Should not require superuser privilege to query if hdfs is in
+    safe mode (jimk)
+
+    HADOOP-3040. If the input line starts with the separator char, the key
+    is set as empty. (Amareshwari Sriramadasu via ddas) 
+
+    HADOOP-3080. Removes flush calls from JobHistory.
+    (Amareshwari Sriramadasu via ddas) 
+
+    HADOOP-3086. Adds the testcase missed during commit of hadoop-3040.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3046. Fix the raw comparators for Text and BytesWritables
+    to use the provided length rather than recompute it. (omalley)
+
+    HADOOP-3094. Fix BytesWritable.toString to avoid extending the sign bit
+    (Owen O'Malley via cdouglas)
+
+    HADOOP-3067. DFSInputStream's position read does not close the sockets.
+    (rangadi)
+
+    HADOOP-3073. close() on SocketInputStream or SocketOutputStream should
+    close the underlying channel. (rangadi)
+
+    HADOOP-3087. Fixes a problem to do with refreshing of loadHistory.jsp.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3065. Better logging message if the rack location of a datanode
+    cannot be determined.  (Devaraj Das via dhruba)
+
+    HADOOP-3064. Commas in a file path should not be treated as delimiters.
+    (Hairong Kuang via shv)
+
+    HADOOP-2997. Adds test for non-writable serialier. Also fixes a problem 
+    introduced by HADOOP-2399. (Tom White via ddas)
+
+    HADOOP-3114. Fix TestDFSShell on Windows. (Lohit Vijaya Renu via cdouglas)
+
+    HADOOP-3118.  Fix Namenode NPE while loading fsimage after a cluster 
+    upgrade from older disk format. (dhruba)
+
+    HADOOP-3161. Fix FIleUtil.HardLink.getLinkCount on Mac OS. (nigel
+    via omalley)
+
+    HADOOP-2927. Fix TestDU to acurately calculate the expected file size.
+    (shv via nigel)
+
+    HADOOP-3123. Fix the native library build scripts to work on Solaris.
+    (tomwhite via omalley)
+
+    HADOOP-3089.  Streaming should accept stderr from task before
+    first key arrives.  (Rick Cox via tomwhite)
+
+    HADOOP-3146. A DFSOutputStream.flush method is renamed as
+    DFSOutputStream.fsync.  (dhruba)
+
+    HADOOP-3165. -put/-copyFromLocal did not treat input file "-" as stdin.
+    (Lohit Vijayarenu via rangadi)
+
+    HADOOP-3041. Deprecate JobConf.setOutputPath and JobConf.getOutputPath.
+    Deprecate OutputFormatBase. Add FileOutputFormat. Existing output formats
+    extending OutputFormatBase, now extend FileOutputFormat. Add the following
+    APIs in FileOutputFormat: setOutputPath, getOutputPath, getWorkOutputPath.
+    (Amareshwari Sriramadasu via nigel)
+
+    HADOOP-3083. The fsimage does not store leases. This would have to be
+    reworked in the next release to support appends. (dhruba)
+
+    HADOOP-3166. Fix an ArrayIndexOutOfBoundsException in the spill thread
+    and make exception handling more promiscuous to catch this condition.
+    (cdouglas)
+
+    HADOOP-3050. DataNode sends one and only one block report after
+    it registers with the namenode. (Hairong Kuang)
+
+    HADOOP-3044. NNBench sets the right configuration for the mapper.
+    (Hairong Kuang)
+
+    HADOOP-3178. Fix GridMix scripts for small and medium jobs
+    to handle input paths differently. (Mukund Madhugiri via nigel)
+
+    HADOOP-1911. Fix an infinite loop in DFSClient when all replicas of a
+    block are bad (cdouglas)
+
+    HADOOP-3157. Fix path handling in DistributedCache and TestMiniMRLocalFS.
+    (Doug Cutting via rangadi) 
+
+    HADOOP-3018. Fix the eclipse plug-in contrib wrt removed deprecated
+    methods (taton)
+
+    HADOOP-3183. Fix TestJobShell to use 'ls' instead of java.io.File::exists
+    since cygwin symlinks are unsupported.
+    (Mahadev konar via cdouglas)
+
+    HADOOP-3175. Fix FsShell.CommandFormat to handle "-" in arguments.
+    (Edward J. Yoon via rangadi)
+
+    HADOOP-3220. Safemode message corrected. (shv)
+
+    HADOOP-3208. Fix WritableDeserializer to set the Configuration on
+    deserialized Writables. (Enis Soztutar via cdouglas)
+
+   HADOOP-3224. 'dfs -du /dir' does not return correct size.
+   (Lohit Vjayarenu via rangadi)
+
+   HADOOP-3223. Fix typo in help message for -chmod. (rangadi)
+
+   HADOOP-1373. checkPath() should ignore case when it compares authoriy.
+   (Edward J. Yoon via rangadi)
+
+   HADOOP-3204. Fixes a problem to do with ReduceTask's LocalFSMerger not
+   catching Throwable.  (Amar Ramesh Kamat via ddas)
+
+    HADOOP-3229. Report progress when collecting records from the mapper and
+    the combiner. (Doug Cutting via cdouglas)
+
+    HADOOP-3225. Unwrapping methods of RemoteException should initialize
+    detailedMassage field. (Mahadev Konar, shv, cdouglas)
+
+    HADOOP-3247. Fix gridmix scripts to use the correct globbing syntax and
+    change maxentToSameCluster to run the correct number of jobs.
+    (Runping Qi via cdouglas)
+
+    HADOOP-3242. Fix the RecordReader of SequenceFileAsBinaryInputFormat to
+    correctly read from the start of the split and not the beginning of the
+    file. (cdouglas via acmurthy) 
+
+    HADOOP-3256. Encodes the job name used in the filename for history files.
+    (Arun Murthy via ddas)
+
+    HADOOP-3162. Ensure that comma-separated input paths are treated correctly
+    as multiple input paths. (Amareshwari Sri Ramadasu via acmurthy)
+
+    HADOOP-3263. Ensure that the job-history log file always follows the
+    pattern of hostname_timestamp_jobid_username_jobname even if username
+    and/or jobname are not specfied. This helps to avoid wrong assumptions
+    made about the job-history log filename in jobhistory.jsp. (acmurthy) 
+
+    HADOOP-3251. Fixes getFilesystemName in JobTracker and LocalJobRunner to
+    use FileSystem.getUri instead of FileSystem.getName. (Arun Murthy via ddas)
+
+    HADOOP-3237. Fixes TestDFSShell.testErrOutPut on Windows platform.
+    (Mahadev Konar via ddas)
+
+    HADOOP-3279. TaskTracker checks for SUCCEEDED task status in addition to 
+    COMMIT_PENDING status when it fails maps due to lost map.
+    (Devaraj Das)
+
+    HADOOP-3286. Prevent collisions in gridmix output dirs by increasing the
+    granularity of the timestamp. (Runping Qi via cdouglas)
+
+    HADOOP-3285. Fix input split locality when the splits align to
+    fs blocks. (omalley)
+
+    HADOOP-3372. Fix heap management in streaming tests. (Arun Murthy via
+    cdouglas)
+
+    HADOOP-3031. Fix javac warnings in test classes. (cdouglas)
+
+    HADOOP-3382. Fix memory leak when files are not cleanly closed (rangadi)
+
+    HADOOP-3322. Fix to push MetricsRecord for rpc metrics. (Eric Yang via
+    mukund)
+
+Release 0.16.4 - 2008-05-05
+
+  BUG FIXES
+
+    HADOOP-3138. DFS mkdirs() should not throw an exception if the directory
+    already exists. (rangadi via mukund)
+
+    HADOOP-3294. Fix distcp to check the destination length and retry the copy
+    if it doesn't match the src length. (Tsz Wo (Nicholas), SZE via mukund)
+
+    HADOOP-3186. Fix incorrect permission checkding for mv and renameTo
+    in HDFS. (Tsz Wo (Nicholas), SZE via mukund)
+
+Release 0.16.3 - 2008-04-16
+
+  BUG FIXES
+
+    HADOOP-3010. Fix ConcurrentModificationException in ipc.Server.Responder.
+    (rangadi)
+
+    HADOOP-3154. Catch all Throwables from the SpillThread in MapTask, rather
+    than IOExceptions only. (ddas via cdouglas)
+
+    HADOOP-3159. Avoid file system cache being overwritten whenever
+    configuration is modified. (Tsz Wo (Nicholas), SZE via hairong)
+
+    HADOOP-3139. Remove the consistency check for the FileSystem cache in
+    closeAll() that causes spurious warnings and a deadlock.
+    (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-3195. Fix TestFileSystem to be deterministic.
+    (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-3069. Primary name-node should not truncate image when transferring
+    it from the secondary. (shv)
+
+    HADOOP-3182. Change permissions of the job-submission directory to 777
+    from 733 to ensure sharing of HOD clusters works correctly. (Tsz Wo
+    (Nicholas), Sze and Amareshwari Sri Ramadasu via acmurthy) 
+
+Release 0.16.2 - 2008-04-02
+
+  BUG FIXES
+
+    HADOOP-3011. Prohibit distcp from overwriting directories on the
+    destination filesystem with files. (cdouglas)
+
+    HADOOP-3033. The BlockReceiver thread in the datanode writes data to 
+    the block file, changes file position (if needed) and flushes all by
+    itself. The PacketResponder thread does not flush block file. (dhruba)
+
+    HADOOP-2978. Fixes the JobHistory log format for counters.
+    (Runping Qi via ddas)
+
+    HADOOP-2985. Fixes LocalJobRunner to tolerate null job output path.
+    Also makes the _temporary a constant in MRConstants.java.
+    (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3003. FileSystem cache key is updated after a 
+    FileSystem object is created. (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-3042. Updates the Javadoc in JobConf.getOutputPath to reflect 
+    the actual temporary path. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3007. Tolerate mirror failures while DataNode is replicating
+    blocks as it used to before. (rangadi)
+
+    HADOOP-2944. Fixes a "Run on Hadoop" wizard NPE when creating a
+    Location from the wizard. (taton)
+
+    HADOOP-3049. Fixes a problem in MultiThreadedMapRunner to do with
+    catching RuntimeExceptions. (Alejandro Abdelnur via ddas)
+
+    HADOOP-3039. Fixes a problem to do with exceptions in tasks not
+    killing jobs. (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3027. Fixes a problem to do with adding a shutdown hook in
+    FileSystem.  (Amareshwari Sriramadasu via ddas)
+
+    HADOOP-3056. Fix distcp when the target is an empty directory by
+    making sure the directory is created first. (cdouglas and acmurthy 
+    via omalley)
+
+    HADOOP-3070. Protect the trash emptier thread from null pointer
+    exceptions. (Koji Noguchi via omalley)
+
+    HADOOP-3084. Fix HftpFileSystem to work for zero-lenghth files.
+    (cdouglas)
+
+    HADOOP-3107. Fix NPE when fsck invokes getListings. (dhruba)
+
+    HADOOP-3104. Limit MultithreadedMapRunner to have a fixed length queue
+    between the RecordReader and the map threads. (Alejandro Abdelnur via
+    omalley)
+
+    HADOOP-2833. Do not use "Dr. Who" as the default user in JobClient. 
+    A valid user name is required. (Tsz Wo (Nicholas), SZE via rangadi)
+
+    HADOOP-3128. Throw RemoteException in setPermissions and setOwner of 
+    DistributedFileSystem.  (shv via nigel)
+
+Release 0.16.1 - 2008-03-13
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-2869. Deprecate SequenceFile.setCompressionType in favor of
+    SequenceFile.createWriter, SequenceFileOutputFormat.setCompressionType,
+    and JobConf.setMapOutputCompressionType. (Arun C Murthy via cdouglas)
+    Configuration changes to hadoop-default.xml:
+      deprecated io.seqfile.compression.type
+
+  IMPROVEMENTS
+
+    HADOOP-2371. User guide for file permissions in HDFS.
+    (Robert Chansler via rangadi)
+
+    HADOOP-3098. Allow more characters in user and group names while
+    using -chown and -chgrp commands. (rangadi)
+    
+  BUG FIXES
+
+    HADOOP-2789. Race condition in IPC Server Responder that could close
+    connections early. (Raghu Angadi)
+    
+    HADOOP-2785. minor. Fix a typo in Datanode block verification 
+    (Raghu Angadi)
+    
+    HADOOP-2788. minor. Fix help message for chgrp shell command (Raghu Angadi).
+    
+    HADOOP-1188. fstime file is updated when a storage directory containing
+    namespace image becomes inaccessible. (shv)
+
+    HADOOP-2787. An application can set a configuration variable named
+    dfs.umask to set the umask that is used by DFS.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2780. The default socket buffer size for DataNodes is 128K.
+    (dhruba)
+
+    HADOOP-2716. Superuser privileges for the Balancer.
+    (Tsz Wo (Nicholas), SZE via shv)
+
+    HADOOP-2754. Filter out .crc files from local file system listing.
+    (Hairong Kuang via shv)
+
+    HADOOP-2733. Fix compiler warnings in test code.
+    (Tsz Wo (Nicholas), SZE via cdouglas)
+
+    HADOOP-2725. Modify distcp to avoid leaving partially copied files at
+    the destination after encountering an error. (Tsz Wo (Nicholas), SZE
+    via cdouglas)
+
+    HADOOP-2391. Cleanup job output directory before declaring a job as
+    SUCCESSFUL. (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2808. Minor fix to FileUtil::copy to mind the overwrite
+    formal. (cdouglas)
+
+    HADOOP-2683. Moving UGI out of the RPC Server.
+    (Tsz Wo (Nicholas), SZE via shv)
+
+    HADOOP-2814. Fix for NPE in datanode in unit test TestDataTransferProtocol.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-2811. Dump of counters in job history does not add comma between
+    groups. (runping via omalley)
+
+    HADOOP-2735. Enables setting TMPDIR for tasks. 
+    (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2843. Fix protections on map-side join classes to enable derivation.
+    (cdouglas via omalley)
+
+    HADOOP-2840. Fix gridmix scripts to correctly invoke the java sort through
+    the proper jar. (Mukund Madhugiri via cdouglas)
+
+    HADOOP-2769.  TestNNThroughputBnechmark should not use a fixed port for
+    the namenode http port. (omalley)
+
+    HADOOP-2852. Update gridmix benchmark to avoid an artifically long tail.
+    (cdouglas)
+
+    HADOOP-2894. Fix a problem to do with tasktrackers failing to connect to
+    JobTracker upon reinitialization. (Owen O'Malley via ddas).
+
+    HADOOP-2903.  Fix exception generated by Metrics while using pushMetric().
+    (girish vaitheeswaran via dhruba)
+
+    HADOOP-2904.  Fix to RPC metrics to log the correct host name. 
+    (girish vaitheeswaran via dhruba)
+
+    HADOOP-2918.  Improve error logging so that dfs writes failure with
+    "No lease on file" can be diagnosed. (dhruba)
+
+    HADOOP-2923.  Add SequenceFileAsBinaryInputFormat, which was
+    missed in the commit for HADOOP-2603. (cdouglas via omalley)
+
+    HADOOP-2931. IOException thrown by DFSOutputStream had wrong stack
+    trace in some cases. (Michael Bieniosek via rangadi)
+
+    HADOOP-2883. Write failures and data corruptions on HDFS files.
+    The write timeout is back to what it was on 0.15 release. Also, the
+    datnodes flushes the block file buffered output stream before
+    sending a positive ack for the packet back to the client. (dhruba)
+
+    HADOOP-2756. NPE in DFSClient while closing DFSOutputStreams 
+    under load. (rangadi)
+
+    HADOOP-2958. Fixed FileBench which broke due to HADOOP-2391 which performs
+    a check for existence of the output directory and a trivial bug in
+    GenericMRLoadGenerator where min/max word lenghts were identical since
+    they were looking at the same config variables (Chris Douglas via
+    acmurthy) 
+
+    HADOOP-2915. Fixed FileSystem.CACHE so that a username is included
+    in the cache key. (Tsz Wo (Nicholas), SZE via nigel)
+
+    HADOOP-2813. TestDU unit test uses its own directory to run its 
+    sequence of tests.  (Mahadev Konar via dhruba)
+
+Release 0.16.0 - 2008-02-07
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-1245.  Use the mapred.tasktracker.tasks.maximum value
+    configured on each tasktracker when allocating tasks, instead of
+    the value configured on the jobtracker. InterTrackerProtocol
+    version changed from 5 to 6. (Michael Bieniosek via omalley)
+
+    HADOOP-1843. Removed code from Configuration and JobConf deprecated by 
+    HADOOP-785 and a minor fix to Configuration.toString. Specifically the 
+    important change is that mapred-default.xml is no longer supported and 
+    Configuration no longer supports the notion of default/final resources.
+    (acmurthy) 
+
+    HADOOP-1302.  Remove deprecated abacus code from the contrib directory.
+    This also fixes a configuration bug in AggregateWordCount, so that the
+    job now works.  (enis)
+
+    HADOOP-2288.  Enhance FileSystem API to support access control.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2184.  RPC Support for user permissions and authentication.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-2185.  RPC Server uses any available port if the specified
+    port is zero. Otherwise it uses the specified port. Also combines
+    the configuration attributes for the servers' bind address and
+    port from "x.x.x.x" and "y" to "x.x.x.x:y". 
+    Deprecated configuration variables:
+      dfs.info.bindAddress
+      dfs.info.port
+      dfs.datanode.bindAddress
+      dfs.datanode.port
+      dfs.datanode.info.bindAdress
+      dfs.datanode.info.port
+      dfs.secondary.info.bindAddress
+      dfs.secondary.info.port
+      mapred.job.tracker.info.bindAddress
+      mapred.job.tracker.info.port
+      mapred.task.tracker.report.bindAddress
+      tasktracker.http.bindAddress
+      tasktracker.http.port
+    New configuration variables (post HADOOP-2404):
+      dfs.secondary.http.address
+      dfs.datanode.address
+      dfs.datanode.http.address
+      dfs.http.address
+      mapred.job.tracker.http.address
+      mapred.task.tracker.report.address
+      mapred.task.tracker.http.address
+    (Konstantin Shvachko via dhruba)
+
+    HADOOP-2401.  Only the current leaseholder can abandon a block for
+    a HDFS file.  ClientProtocol version changed from 20 to 21.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2381.  Support permission information in FileStatus. Client
+    Protocol version changed from 21 to 22.  (Raghu Angadi via dhruba)
+
+    HADOOP-2110. Block report processing creates fewer transient objects.
+    Datanode Protocol version changed from 10 to 11.  
+    (Sanjay Radia via dhruba)
+    
+    HADOOP-2567.  Add FileSystem#getHomeDirectory(), which returns the
+    user's home directory in a FileSystem as a fully-qualified path.
+    FileSystem#getWorkingDirectory() is also changed to return a
+    fully-qualified path, which can break applications that attempt
+    to, e.g., pass LocalFileSystem#getWorkingDir().toString() directly
+    to java.io methods that accept file names. (cutting)
+
+    HADOOP-2514.  Change trash feature to maintain a per-user trash
+    directory, named ".Trash" in the user's home directory.  The
+    "fs.trash.root" parameter is no longer used.  Full source paths
+    are also no longer reproduced within the trash.
+
+    HADOOP-2012. Periodic data verification on Datanodes.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-1707. The DFSClient does not use a local disk file to cache
+    writes to a HDFS file. Changed Data Transfer Version from 7 to 8.
+    (dhruba)
+
+    HADOOP-2652. Fix permission issues for HftpFileSystem. This is an 
+    incompatible change since distcp may not be able to copy files 
+    from cluster A (compiled with this patch) to cluster B (compiled 
+    with previous versions). (Tsz Wo (Nicholas), SZE via dhruba)
+
+  NEW FEATURES
+
+    HADOOP-1857.  Ability to run a script when a task fails to capture stack
+    traces. (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2299.  Defination of a login interface.  A simple implementation for
+    Unix users and groups. (Hairong Kuang via dhruba)
+
+    HADOOP-1652.  A utility to balance data among datanodes in a HDFS cluster.
+    (Hairong Kuang via dhruba)
+
+    HADOOP-2085.  A library to support map-side joins of consistently 
+    partitioned and sorted data sets. (Chris Douglas via omalley)
+
+    HADOOP-2336. Shell commands to modify file permissions. (rangadi)
+
+    HADOOP-1298. Implement file permissions for HDFS.
+    (Tsz Wo (Nicholas) & taton via cutting)
+
+    HADOOP-2447. HDFS can be configured to limit the total number of 
+    objects (inodes and blocks) in the file system. (dhruba)
+
+    HADOOP-2487. Added an option to get statuses for all submitted/run jobs.
+    This information can be used to develop tools for analysing jobs.
+    (Amareshwari Sri Ramadasu via acmurthy)
+
+    HADOOP-1873. Implement user permissions for Map/Reduce framework.
+    (Hairong Kuang via shv)
+
+    HADOOP-2532.  Add to MapFile a getClosest method that returns the key
+    that comes just before if the key is not present.  (stack via tomwhite)
+   
+    HADOOP-1883. Add versioning to Record I/O. (Vivek Ratan via ddas)
+
+    HADOOP-2603.  Add SeqeunceFileAsBinaryInputFormat, which reads
+    sequence files as BytesWritable/BytesWritable regardless of the
+    key and value types used to write the file. (cdouglas via omalley)
+
+    HADOOP-2367. Add ability to profile a subset of map/reduce tasks and fetch
+    the result to the local filesystem of the submitting application. Also
+    includes a general IntegerRanges extension to Configuration for setting
+    positive, ranged parameters. (Owen O'Malley via cdouglas)
+
+  IMPROVEMENTS
+
+    HADOOP-2045.  Change committer list on website to a table, so that
+    folks can list their organization, timezone, etc.  (cutting)
+
+    HADOOP-2058.  Facilitate creating new datanodes dynamically in
+    MiniDFSCluster. (Hairong Kuang via dhruba)
+
+    HADOOP-1855.  fsck verifies block placement policies and reports
+    violations.  (Konstantin Shvachko via dhruba)
+
+    HADOOP-1604.  An system administrator can finalize namenode upgrades 
+    without running the cluster. (Konstantin Shvachko via dhruba)
+
+    HADOOP-1839.  Link-ify the Pending/Running/Complete/Killed grid in
+    jobdetails.jsp to help quickly narrow down and see categorized TIPs' 
+    details via jobtasks.jsp. (Amar Kamat via acmurthy)
+
+    HADOOP-1210.  Log counters in job history. (Owen O'Malley via ddas)
+
+    HADOOP-1912. Datanode has two new commands COPY and REPLACE. These are
+    needed for supporting data rebalance.  (Hairong Kuang via dhruba)
+
+    HADOOP-2086. This patch adds the ability to add dependencies to a job
+    (run via JobControl) after construction.  (Adrian Woodhead via ddas)
+
+    HADOOP-1185. Support changing the logging level of a server without 
+    restarting the server.  (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2134.  Remove developer-centric requirements from overview.html and
+    keep it end-user focussed, specifically sections related to subversion and
+    building Hadoop. (Jim Kellerman via acmurthy)
+
+    HADOOP-1989. Support simulated DataNodes. This helps creating large virtual
+    clusters for testing purposes.  (Sanjay Radia via dhruba)
+    
+    HADOOP-1274. Support different number of mappers and reducers per
+    TaskTracker to  allow administrators to better configure and utilize
+    heterogenous clusters. 
+    Configuration changes to hadoop-default.xml:
+      add mapred.tasktracker.map.tasks.maximum (default value of 2)
+      add mapred.tasktracker.reduce.tasks.maximum (default value of 2)
+      remove mapred.tasktracker.tasks.maximum (deprecated for 0.16.0)
+    (Amareshwari Sri Ramadasu via acmurthy) 
+
+    HADOOP-2104. Adds a description to the ant targets. This makes the 
+    output of "ant -projecthelp" sensible. (Chris Douglas via ddas)
+
+    HADOOP-2127. Added a pipes sort example to benchmark trivial pipes
+    application versus trivial java application. (omalley via acmurthy)
+
+    HADOOP-2113. A new shell command "dfs -text" to view the contents of
+    a gziped or SequenceFile. (Chris Douglas via dhruba)
+
+    HADOOP-2207.  Add a "package" target for contrib modules that
+    permits each to determine what files are copied into release
+    builds.  (stack via cutting)
+
+    HADOOP-1984. Makes the backoff for failed fetches exponential. 
+    Earlier, it was a random backoff from an interval. 
+    (Amar Kamat via ddas)
+
+    HADOOP-1327.  Include website documentation for streaming. (Rob Weltman
+    via omalley)
+
+    HADOOP-2000.  Rewrite NNBench to measure namenode performance accurately.
+    It now uses the map-reduce framework for load generation.
+    (Mukund Madhugiri via dhruba)
+
+    HADOOP-2248. Speeds up the framework w.r.t Counters. Also has API
+    updates to the Counters part. (Owen O'Malley via ddas)
+
+    HADOOP-2326. The initial block report at Datanode startup time has
+    a random backoff period.  (Sanjay Radia via dhruba)
+
+    HADOOP-2432. HDFS includes the name of the file while throwing 
+    "File does not exist"  exception. (Jim Kellerman via dhruba)
+
+    HADOOP-2457. Added a 'forrest.home' property to the 'docs' target in
+    build.xml. (acmurthy) 
+
+    HADOOP-2149.  A new benchmark for three name-node operation: file create, 
+    open, and block report, to evaluate the name-node performance 
+    for optimizations or new features. (Konstantin Shvachko via shv)
+
+    HADOOP-2466. Change FileInputFormat.computeSplitSize to a protected
+    non-static method to allow sub-classes to provide alternate
+    implementations. (Alejandro Abdelnur via acmurthy) 
+
+    HADOOP-2425. Change TextOutputFormat to handle Text specifically for better
+    performance. Make NullWritable implement Comparable. Make TextOutputFormat
+    treat NullWritable like null. (omalley)
+
+    HADOOP-1719. Improves the utilization of shuffle copier threads.
+    (Amar Kamat via ddas)
+ 
+    HADOOP-2390. Added documentation for user-controls for intermediate
+    map-outputs & final job-outputs and native-hadoop libraries. (acmurthy) 
+ 
+    HADOOP-1660. Add the cwd of the map/reduce task to the java.library.path
+    of the child-jvm to support loading of native libraries distributed via
+    the DistributedCache. (acmurthy)
+ 
+    HADOOP-2285. Speeds up TextInputFormat. Also includes updates to the
+    Text API. (Owen O'Malley via cdouglas)
+
+    HADOOP-2233. Adds a generic load generator for modeling MR jobs. (cdouglas)
+
+    HADOOP-2369. Adds a set of scripts for simulating a mix of user map/reduce
+    workloads. (Runping Qi via cdouglas)
+
+    HADOOP-2547. Removes use of a 'magic number' in build.xml. 
+    (Hrishikesh via nigel)
+
+    HADOOP-2268. Fix org.apache.hadoop.mapred.jobcontrol classes to use the
+    List/Map interfaces rather than concrete ArrayList/HashMap classes
+    internally. (Adrian Woodhead via acmurthy)
+
+    HADOOP-2406. Add a benchmark for measuring read/write performance through
+    the InputFormat interface, particularly with compression. (cdouglas)
+
+    HADOOP-2131. Allow finer-grained control over speculative-execution. Now
+    users can set it for maps and reduces independently.
+    Configuration changes to hadoop-default.xml:
+      deprecated mapred.speculative.execution
+      add mapred.map.tasks.speculative.execution
+      add mapred.reduce.tasks.speculative.execution
+    (Amareshwari Sri Ramadasu via acmurthy) 
+      
+    HADOOP-1965. Interleave sort/spill in teh map-task along with calls to the
+    Mapper.map method. This is done by splitting the 'io.sort.mb' buffer into
+    two and using one half for collecting map-outputs and the other half for
+    sort/spill. (Amar Kamat via acmurthy)
+    
+    HADOOP-2464. Unit tests for chmod, chown, and chgrp using DFS.
+    (Raghu Angadi)
+
+    HADOOP-1876. Persist statuses of completed jobs in HDFS so that the
+    JobClient can query and get information about decommissioned jobs and also
+    across JobTracker restarts.
+    Configuration changes to hadoop-default.xml:
+      add mapred.job.tracker.persist.jobstatus.active (default value of false)
+      add mapred.job.tracker.persist.jobstatus.hours (default value of 0)
+      add mapred.job.tracker.persist.jobstatus.dir (default value of
+                                                    /jobtracker/jobsInfo)
+    (Alejandro Abdelnur via acmurthy) 
+
+    HADOOP-2077. Added version and build information to STARTUP_MSG for all
+    hadoop daemons to aid error-reporting, debugging etc. (acmurthy) 
+
+    HADOOP-2398. Additional instrumentation for NameNode and RPC server.
+    Add support for accessing instrumentation statistics via JMX.
+    (Sanjay radia via dhruba)
+
+    HADOOP-2449. A return of the non-MR version of NNBench.
+    (Sanjay Radia via shv)
+
+    HADOOP-1989. Remove 'datanodecluster' command from bin/hadoop.
+    (Sanjay Radia via shv)
+
+    HADOOP-1742. Improve JavaDoc documentation for ClientProtocol, DFSClient,
+    and FSNamesystem. (Konstantin Shvachko)
+
+    HADOOP-2298. Add Ant target for a binary-only distribution.
+    (Hrishikesh via nigel)
+
+    HADOOP-2509. Add Ant target for Rat report (Apache license header
+    reports).  (Hrishikesh via nigel)
+
+    HADOOP-2469.  WritableUtils.clone should take a Configuration
+    instead of a JobConf. (stack via omalley)
+
+    HADOOP-2659. Introduce superuser permissions for admin operations.
+    (Tsz Wo (Nicholas), SZE via shv)
+
+    HADOOP-2596. Added a SequenceFile.createWriter api which allows the user
+    to specify the blocksize, replication factor and the buffersize to be
+    used for the underlying HDFS file. (Alejandro Abdelnur via acmurthy) 
+
+    HADOOP-2431. Test HDFS File Permissions. (Hairong Kuang via shv)
+
+    HADOOP-2232. Add an option to disable Nagle's algorithm in the IPC stack.
+    (Clint Morgan via cdouglas)
+
+    HADOOP-2342. Created a micro-benchmark for measuring 
+    local-file versus hdfs reads. (Owen O'Malley via nigel)
+
+    HADOOP-2529. First version of HDFS User Guide. (Raghu Angadi)
+
+    HADOOP-2690. Add jar-test target to build.xml, separating compilation
+    and packaging of the test classes. (Enis Soztutar via cdouglas)
+
+  OPTIMIZATIONS
+
+    HADOOP-1898.  Release the lock protecting the last time of the last stack
+    dump while the dump is happening. (Amareshwari Sri Ramadasu via omalley)
+
+    HADOOP-1900. Makes the heartbeat and task event queries interval 
+    dependent on the cluster size.  (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2208. Counter update frequency (from TaskTracker to JobTracker) is 
+    capped at 1 minute.  (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2284. Reduce the number of progress updates during the sorting in 
+    the map task. (Amar Kamat via ddas)
+
+  BUG FIXES
+
+    HADOOP-2583.  Fixes a bug in the Eclipse plug-in UI to edit locations.
+    Plug-in version is now synchronized with Hadoop version.
+
+    HADOOP-2100.  Remove faulty check for existence of $HADOOP_PID_DIR and let
+    'mkdir -p' check & create it. (Michael Bieniosek via acmurthy)
+
+    HADOOP-1642.  Ensure jobids generated by LocalJobRunner are unique to
+    avoid collissions and hence job-failures. (Doug Cutting via acmurthy) 
+
+    HADOOP-2096.  Close open file-descriptors held by streams while localizing
+    job.xml in the JobTracker and while displaying it on the webui in 
+    jobconf.jsp. (Amar Kamat via acmurthy)
+
+    HADOOP-2098.  Log start & completion of empty jobs to JobHistory, which
+    also ensures that we close the file-descriptor of the job's history log 
+    opened during job-submission. (Amar Kamat via acmurthy)
+
+    HADOOP-2112.  Adding back changes to build.xml lost while reverting
+    HADOOP-1622 i.e. http://svn.apache.org/viewvc?view=rev&revision=588771.
+    (acmurthy)
+
+    HADOOP-2089.  Fixes the command line argument handling to handle multiple
+    -cacheArchive in Hadoop streaming.  (Lohit Vijayarenu via ddas)
+
+    HADOOP-2071.  Fix StreamXmlRecordReader to use a BufferedInputStream
+    wrapped over the DFSInputStream since mark/reset aren't supported by
+    DFSInputStream anymore. (Lohit Vijayarenu via acmurthy)
+
+    HADOOP-1348.  Allow XML comments inside configuration files. 
+    (Rajagopal Natarajan and Enis Soztutar via enis)
+
+    HADOOP-1952.  Improve handling of invalid, user-specified classes while
+    configuring streaming jobs such as combiner, input/output formats etc.
+    Now invalid options are caught, logged and jobs are failed early. (Lohit
+    Vijayarenu via acmurthy)
+
+    HADOOP-2151. FileSystem.globPaths validates the list of Paths that
+    it returns.  (Lohit Vijayarenu via dhruba)
+
+    HADOOP-2121. Cleanup DFSOutputStream when the stream encountered errors
+    when Datanodes became full.  (Raghu Angadi via dhruba)
+
+    HADOOP-1130. The FileSystem.closeAll() method closes all existing
+    DFSClients.  (Chris Douglas via dhruba)
+
+    HADOOP-2204. DFSTestUtil.waitReplication was not waiting for all replicas
+    to get created, thus causing unit test failure.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-2078. An zero size file may have no blocks associated with it.
+    (Konstantin Shvachko via dhruba)
+
+    HADOOP-2212. ChecksumFileSystem.getSumBufferSize might throw 
+    java.lang.ArithmeticException. The fix is to initialize bytesPerChecksum
+    to 0.  (Michael Bieniosek via ddas)
+
+    HADOOP-2216.  Fix jobtasks.jsp to ensure that it first collects the
+    taskids which satisfy the filtering criteria and then use that list to
+    print out only the required task-reports, previously it was oblivious to
+    the filtering and hence used the wrong index into the array of task-reports. 
+    (Amar Kamat via acmurthy)
+
+    HADOOP-2272.  Fix findbugs target to reflect changes made to the location
+    of the streaming jar file by HADOOP-2207.  (Adrian Woodhead via nigel)
+
+    HADOOP-2244.  Fixes the MapWritable.readFields to clear the instance 
+    field variable every time readFields is called. (Michael Stack via ddas).
+
+    HADOOP-2245.  Fixes LocalJobRunner to include a jobId in the mapId. Also,  
+    adds a testcase for JobControl. (Adrian Woodhead via ddas).
+
+    HADOOP-2275. Fix erroneous detection of corrupted file when namenode 
+    fails to allocate any datanodes for newly allocated block.
+    (Dhruba Borthakur via dhruba)
+
+    HADOOP-2256. Fix a buf in the namenode that could cause it to encounter
+    an infinite loop while deleting excess replicas that were created by 
+    block rebalancing.  (Hairong Kuang via dhruba)
+
+    HADOOP-2209. SecondaryNamenode process exits if it encounters exceptions 
+    that it cannot handle.  (Dhruba Borthakur via dhruba)
+
+    HADOOP-2314. Prevent TestBlockReplacement from occasionally getting
+    into an infinite loop.  (Hairong Kuang via dhruba)
+
+    HADOOP-2300. This fixes a bug where mapred.tasktracker.tasks.maximum
+    would be ignored even if it was set in hadoop-site.xml.
+    (Amareshwari Sri Ramadasu via ddas)
+
+    HADOOP-2349.  Improve code layout in file system transaction logging code.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2368.  Fix unit tests on Windows.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2363.  This fix allows running multiple instances of the unit test
+    in parallel. The bug was introduced in HADOOP-2185 that changed
+    port-rolling behaviour.  (Konstantin Shvachko via dhruba)
+
+    HADOOP-2271.  Fix chmod task to be non-parallel. (Adrian Woodhead via
+    omalley)
+
+    HADOOP-2313.  Fail the build if building libhdfs fails. (nigel via omalley)
+
+    HADOOP-2359.  Remove warning for interruptted exception when closing down
+    minidfs. (dhruba via omalley)
+
+    HADOOP-1841. Prevent slow clients from consuming threads in the NameNode. 
+    (dhruba)
+    
+    HADOOP-2323. JobTracker.close() should not print stack traces for
+    normal exit.  (jimk via cutting)
+
+    HADOOP-2376. Prevents sort example from overriding the number of maps.
+    (Owen O'Malley via ddas)
+
+    HADOOP-2434. FSDatasetInterface read interface causes HDFS reads to occur 
+    in 1 byte chunks, causing performance degradation.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-2459. Fix package target so that src/docs/build files are not
+    included in the release.  (nigel)
+
+    HADOOP-2215.  Fix documentation in cluster_setup.html &
+    mapred_tutorial.html reflect that mapred.tasktracker.tasks.maximum has
+    been superceeded by mapred.tasktracker.{map|reduce}.tasks.maximum. 
+    (Amareshwari Sri Ramadasu via acmurthy)
+
+    HADOOP-2459. Fix package target so that src/docs/build files are not
+    included in the release.  (nigel)
+
+    HADOOP-2352. Remove AC_CHECK_LIB for libz and liblzo to ensure that
+    libhadoop.so doesn't have a dependency on them. (acmurthy) 
+
+    HADOOP-2453. Fix the configuration for wordcount-simple example in Hadoop 
+    Pipes which currently produces an XML parsing error. (Amareshwari Sri
+    Ramadasu via acmurthy)
+
+    HADOOP-2476. Unit test failure while reading permission bits of local
+    file system (on Windows) fixed.  (Raghu Angadi via dhruba)
+
+    HADOOP-2247.  Fine-tune the strategies for killing mappers and reducers
+    due to failures while fetching map-outputs. Now the map-completion times
+    and number of currently running reduces are taken into account by the
+    JobTracker before  killing the mappers, while the progress made by the
+    reducer and the number of fetch-failures vis-a-vis total number of
+    fetch-attempts are taken into account before teh reducer kills itself.
+    (Amar Kamat via acmurthy)
+    
+    HADOOP-2452. Fix eclipse plug-in build.xml to refers to the right
+    location where hadoop-*-core.jar is generated. (taton)
+
+    HADOOP-2492. Additional debugging in the rpc server to better 
+    diagnose ConcurrentModificationException. (dhruba)
+
+    HADOOP-2344. Enhance the utility for executing shell commands to read the
+    stdout/stderr streams while waiting for the command to finish (to free up
+    the buffers). Also, this patch throws away stderr of the DF utility.
+    @deprecated 
+      org.apache.hadoop.fs.ShellCommand for org.apache.hadoop.util.Shell
+      org.apache.hadoop.util.ShellUtil for 
+        org.apache.hadoop.util.Shell.ShellCommandExecutor
+    (Amar Kamat via acmurthy)
+
+    HADOOP-2511. Fix a javadoc warning in org.apache.hadoop.util.Shell
+    introduced by HADOOP-2344. (acmurthy) 
+
+    HADOOP-2442. Fix TestLocalFileSystemPermission.testLocalFSsetOwner
+    to work on more platforms. (Raghu Angadi via nigel)
+
+    HADOOP-2488. Fix a regression in random read performance.
+    (Michael Stack via rangadi)
+
+    HADOOP-2523. Fix TestDFSShell.testFilePermissions on Windows.
+    (Raghu Angadi via nigel)
+
+    HADOOP-2535. Removed support for deprecated mapred.child.heap.size and
+    fixed some indentation issues in TaskRunner. (acmurthy)
+    Configuration changes to hadoop-default.xml:
+      remove mapred.child.heap.size
+
+    HADOOP-2512. Fix error stream handling in Shell. Use exit code to
+    detect shell command errors in RawLocalFileSystem. (Raghu Angadi)
+
+    HADOOP-2446. Fixes TestHDFSServerPorts and TestMRServerPorts so they
+    do not rely on statically configured ports and cleanup better. (nigel)
+
+    HADOOP-2537. Make build process compatible with Ant 1.7.0.
+    (Hrishikesh via nigel)
+
+    HADOOP-1281. Ensure running tasks of completed map TIPs (e.g. speculative
+    tasks) are killed as soon as the TIP completed. (acmurthy)
+
+    HADOOP-2571. Suppress a suprious warning in test code. (cdouglas)
+
+    HADOOP-2481. NNBench report its progress periodically.
+    (Hairong Kuang via dhruba)
+
+    HADOOP-2601. Start name-node on a free port for TestNNThroughputBenchmark.
+    (Konstantin Shvachko)
+
+    HADOOP-2494.  Set +x on contrib/*/bin/* in packaged tar bundle.
+    (stack via tomwhite)
+
+    HADOOP-2605. Remove bogus leading slash in task-tracker report bindAddress.
+    (Konstantin Shvachko)
+    
+    HADOOP-2620. Trivial. 'bin/hadoop fs -help' did not list chmod, chown, and
+    chgrp. (Raghu Angadi)
+
+    HADOOP-2614. The DFS WebUI accesses are configured to be from the user
+    specified by dfs.web.ugi.  (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2543. Implement a "no-permission-checking" mode for smooth
+    upgrade from a pre-0.16 install of HDFS.
+    (Hairong Kuang via dhruba)
+
+    HADOOP-290. A DataNode log message now prints the target of a replication
+    request correctly. (dhruba)
+
+    HADOOP-2538. Redirect to a warning, if plaintext parameter is true but 
+    the filter parameter is not given in TaskLogServlet.  
+    (Michael Bieniosek via enis)
+
+    HADOOP-2582. Prevent 'bin/hadoop fs -copyToLocal' from creating
+    zero-length files when the src does not exist.
+    (Lohit Vijayarenu via cdouglas)
+
+    HADOOP-2189. Incrementing user counters should count as progress. (ddas)
+
+    HADOOP-2649. The NameNode periodically computes replication work for
+    the datanodes. The periodicity of this computation is now configurable.
+    (dhruba)
+
+    HADOOP-2549. Correct disk size computation so that data-nodes could switch 
+    to other local drives if current is full. (Hairong Kuang via shv)
+
+    HADOOP-2633. Fsck should call name-node methods directly rather than 
+    through rpc. (Tsz Wo (Nicholas), SZE via shv)
+
+    HADOOP-2687. Modify a few log message generated by dfs client to be
+    logged only at INFO level. (stack via dhruba)
+
+    HADOOP-2402. Fix BlockCompressorStream to ensure it buffers data before
+    sending it down to the compressor so that each write call doesn't
+    compress. (Chris Douglas via acmurthy) 
+
+    HADOOP-2645. The Metrics initialization code does not throw
+    exceptions when servers are restarted by MiniDFSCluster.
+    (Sanjay Radia via dhruba)
+
+    HADOOP-2691. Fix a race condition that was causing the DFSClient
+    to erroneously remove a good datanode from a pipeline that actually
+    had another datanode that was bad. (dhruba)
+
+    HADOOP-1195. All code in FSNamesystem checks the return value
+    of getDataNode for null before using it. (dhruba)
+
+    HADOOP-2640. Fix a bug in MultiFileSplitInputFormat that was always
+    returning 1 split in some circumstances. (Enis Soztutar via nigel)
+
+    HADOOP-2626. Fix paths with special characters to work correctly
+    with the local filesystem.  (Thomas Friol via cutting)
+
+    HADOOP-2646. Fix SortValidator to work with fully-qualified 
+    working directories.  (Arun C Murthy via nigel)
+
+    HADOOP-2092. Added a ping mechanism to the pipes' task to periodically
+    check if the parent Java task is running, and exit if the parent isn't
+    alive and responding. (Amareshwari Sri Ramadasu via acmurthy) 
+
+    HADOOP-2714. TestDecommission failed on windows because the replication
+    request was timing out. (dhruba)
+
+    HADOOP-2576. Namenode performance degradation over time triggered by
+    large heartbeat interval. (Raghu Angadi)
+
+    HADOOP-2713. TestDatanodeDeath failed on windows because the replication
+    request was timing out. (dhruba)
+
+    HADOOP-2639. Fixes a problem to do with incorrect maintenance of values 
+    for runningMapTasks/runningReduceTasks. (Amar Kamat and Arun Murthy 
+    via ddas)
+
+    HADOOP-2723. Fixed the check for checking whether to do user task
+    profiling. (Amareshwari Sri Ramadasu via omalley)
+
+    HADOOP-2734. Link forrest docs to new http://hadoop.apache.org
+    (Doug Cutting via nigel)
+
+    HADOOP-2641. Added Apache license headers to 95 files. (nigel)
+
+    HADOOP-2732. Fix bug in path globbing.  (Hairong Kuang via nigel)
+
+    HADOOP-2404. Fix backwards compatability with hadoop-0.15 configuration
+    files that was broken by HADOOP-2185. (omalley)
+
+    HADOOP-2755. Fix fsck performance degradation because of permissions 
+    issue.  (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-2768. Fix performance regression caused by HADOOP-1707.
+    (dhruba borthakur via nigel)
+
+    HADOOP-3108. Fix NPE in setPermission and setOwner. (shv)
+
+Release 0.15.3 - 2008-01-18
+
+  BUG FIXES
+
+    HADOOP-2562. globPaths supports {ab,cd}.  (Hairong Kuang via dhruba)
+
+    HADOOP-2540. fsck reports missing blocks incorrectly. (dhruba)
+
+    HADOOP-2570. "work" directory created unconditionally, and symlinks
+    created from the task cwds.
+
+    HADOOP-2574. Fixed mapred_tutorial.xml to correct minor errors with the
+    WordCount examples. (acmurthy) 
+
+Release 0.15.2 - 2008-01-02
+
+  BUG FIXES
+
+    HADOOP-2246.  Moved the changelog for HADOOP-1851 from the NEW FEATURES 
+    section to the INCOMPATIBLE CHANGES section. (acmurthy)
+
+    HADOOP-2238.  Fix TaskGraphServlet so that it sets the content type of 
+    the response appropriately.  (Paul Saab via enis)
+
+    HADOOP-2129.  Fix so that distcp works correctly when source is
+    HDFS but not the default filesystem.  HDFS paths returned by the
+    listStatus() method are now fully-qualified.  (cutting)
+
+    HADOOP-2378.  Fixes a problem where the last task completion event would
+    get created after the job completes. (Alejandro Abdelnur via ddas)
+
+    HADOOP-2228.  Checks whether a job with a certain jobId is already running
+    and then tries to create the JobInProgress object. 
+    (Johan Oskarsson via ddas)
+
+    HADOOP-2422.  dfs -cat multiple files fail with 'Unable to write to 
+    output stream'.  (Raghu Angadi via dhruba)
+
+    HADOOP-2460.  When the namenode encounters ioerrors on writing a
+    transaction log, it stops writing new transactions to that one.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-2227.  Use the LocalDirAllocator uniformly for handling all of the
+    temporary storage required for a given task. It also implies that
+    mapred.local.dir.minspacestart is handled by checking if there is enough
+    free-space on any one of the available disks. (Amareshwari Sri Ramadasu
+    via acmurthy)
+
+    HADOOP-2437.  Fix the LocalDirAllocator to choose the seed for the
+    round-robin disk selections randomly. This helps in spreading data across
+    multiple partitions much better. (acmurhty)
+
+    HADOOP-2486. When the list of files from the InMemoryFileSystem is obtained
+    for merging, this patch will ensure that only those files whose checksums
+    have also got created (renamed) are returned. (ddas)
+
+    HADOOP-2456. Hardcode English locale to prevent NumberFormatException
+    from occurring when starting the NameNode with certain locales.
+    (Matthias Friedrich via nigel)
+
+  IMPROVEMENTS
+
+    HADOOP-2160.  Remove project-level, non-user documentation from
+    releases, since it's now maintained in a separate tree.  (cutting)
+
+    HADOOP-1327.  Add user documentation for streaming.  (cutting)
+
+    HADOOP-2382.  Add hadoop-default.html to subversion. (cutting)
+
+    HADOOP-2158. hdfsListDirectory calls FileSystem.listStatus instead
+    of FileSystem.listPaths. This reduces the number of RPC calls on the
+    namenode, thereby improving scalability.  (Christian Kunz via dhruba)
+
+Release 0.15.1 - 2007-11-27
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-713.  Reduce CPU usage on namenode while listing directories.
+    FileSystem.listPaths does not return the size of the entire subtree.
+    Introduced a new API ClientProtocol.getContentLength that returns the
+    size of the subtree. (Dhruba Borthakur via dhruba)
+
+  IMPROVEMENTS
+
+    HADOOP-1917.  Addition of guides/tutorial for better overall
+    documentation for Hadoop. Specifically: 
+    * quickstart.html is targetted towards first-time users and helps them 
+      setup a single-node cluster and play with Hadoop. 
+    * cluster_setup.html helps admins to configure and setup non-trivial
+      hadoop clusters.
+    * mapred_tutorial.html is a comprehensive Map-Reduce tutorial. 
+    (acmurthy) 
+
+  BUG FIXES
+
+    HADOOP-2174.  Removed the unnecessary Reporter.setStatus call from
+    FSCopyFilesMapper.close which led to a NPE since the reporter isn't valid
+    in the close method. (Chris Douglas via acmurthy) 
+
+    HADOOP-2172.  Restore performance of random access to local files
+    by caching positions of local input streams, avoiding a system
+    call. (cutting)
+
+    HADOOP-2205.  Regenerate the Hadoop website since some of the changes made
+    by HADOOP-1917 weren't correctly copied over to the trunk/docs directory. 
+    Also fixed a couple of minor typos and broken links. (acmurthy)
+
+Release 0.15.0 - 2007-11-2
+
+  INCOMPATIBLE CHANGES
+
+    HADOOP-1708.  Make files appear in namespace as soon as they are
+    created.  (Dhruba Borthakur via dhruba)
+
+    HADOOP-999.  A HDFS Client immediately informs the NameNode of a new
+    file creation.  ClientProtocol version changed from 14 to 15.
+    (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-932.  File locking interfaces and implementations (that were
+    earlier deprecated) are removed.  Client Protocol version changed 
+    from 15 to 16.  (Raghu Angadi via dhruba)
+
+    HADOOP-1621.  FileStatus is now a concrete class and FileSystem.listPaths
+    is deprecated and replaced with listStatus. (Chris Douglas via omalley)
+
+    HADOOP-1656.  The blockSize of a file is stored persistently in the file
+    inode. (Dhruba Borthakur via dhruba)
+
+    HADOOP-1838.  The blocksize of files created with an earlier release is
+    set to the default block size.  (Dhruba Borthakur via dhruba)
+
+    HADOOP-785.  Add support for 'final' Configuration parameters,
+    removing support for 'mapred-default.xml', and changing
+    'hadoop-site.xml' to not override other files.  Now folks should
+    generally use 'hadoop-site.xml' for all configurations.  Values
+    with a 'final' tag may not be overridden by subsequently loaded
+    configuration files, e.g., by jobs.  (Arun C. Murthy via cutting)
+
+    HADOOP-1846. DatanodeReport in ClientProtocol can report live 
+    datanodes, dead datanodes or all datanodes. Client Protocol version
+    changed from 17 to 18.  (Hairong Kuang via dhruba)
+
+    HADOOP-1851.  Permit specification of map output compression type
+    and codec, independent of the final output's compression
+    parameters.  (Arun C Murthy via cutting)
+
+    HADOOP-1819.  Jobtracker cleanups, including binding ports before
+    clearing state directories, so that inadvertently starting a
+    second jobtracker doesn't trash one that's already running. Removed
+    method JobTracker.getTracker() because the static variable, which
+    stored the value caused initialization problems.
+    (omalley via cutting)
+
+  NEW FEATURES
+
+    HADOOP-89.  A client can access file data even before the creator
+    has closed the file. Introduce a new command "tail" from dfs shell.
+    (Dhruba Borthakur via dhruba)
+
+    HADOOP-1636.  Allow configuration of the number of jobs kept in
+    memory by the JobTracker.  (Michael Bieniosek via omalley)
+
+    HADOOP-1667.  Reorganize CHANGES.txt into sections to make it
+    easier to read.  Also remove numbering, to make merging easier.
+    (cutting)
+
+    HADOOP-1610.  Add metrics for failed tasks.
+    (Devaraj Das via tomwhite)
+
+    HADOOP-1767.  Add "bin/hadoop job -list" sub-command. (taton via cutting)
+
+    HADOOP-1351.  Add "bin/hadoop job [-fail-task|-kill-task]" sub-commands
+    to terminate a particular task-attempt. (Enis Soztutar via acmurthy)
+
+    HADOOP-1880. SleepJob : An example job that sleeps at each map and 
+    reduce task. (enis)
+
+    HADOOP-1809. Add a link in web site to #hadoop IRC channel. (enis)
+
+    HADOOP-1894. Add percentage graphs and mapred task completion graphs 
+    to Web User Interface. Users not using Firefox may install a plugin to 
+    their browsers to see svg graphics. (enis)
+
+    HADOOP-1914. Introduce a new NamenodeProtocol to allow secondary 
+    namenodes and rebalancing processes to communicate with a primary 
+    namenode.  (Hairong Kuang via dhruba)
+
+    HADOOP-1963.  Add a FileSystem implementation for the Kosmos
+    Filesystem (KFS).  (Sriram Rao via cutting)
+
+    HADOOP-1822.  Allow the specialization and configuration of socket
+    factories. Provide a StandardSocketFactory, and a SocksSocketFactory to
+    allow the use of SOCKS proxies. (taton).
+
+    HADOOP-1968. FileSystem supports wildcard input syntax "{ }".
+    (Hairong Kuang via dhruba)
+
+    HADOOP-2566. Add globStatus method to the FileSystem interface
+    and deprecate globPath and listPath. (Hairong Kuang via hairong)
+
+  OPTIMIZATIONS
+
+    HADOOP-1910.  Reduce the number of RPCs that DistributedFileSystem.create()
+    makes to the namenode. (Raghu Angadi via dhruba)
+
+    HADOOP-1565.  Reduce memory usage of NameNode by replacing 
+    TreeMap in HDFS Namespace with ArrayList.  
+    (Dhruba Borthakur via dhruba)
+
+    HADOOP-1743.  Change DFS INode from a nested class to standalone
+    class, with specialized subclasses for directories and files, to
+    save memory on the namenode.  (Konstantin Shvachko via cutting)
+
+    HADOOP-1759.  Change file name in INode from String to byte[],
+    saving memory on the namenode. (Konstantin Shvachko via cutting)
+
+    HADOOP-1766.  Save memory in namenode by having BlockInfo extend
+    Block, and replace many uses of Block with BlockInfo.
+    (Konstantin Shvachko via cutting)
+
+    HADOOP-1687.  Save memory in namenode by optimizing BlockMap
+    representation.  (Konstantin Shvachko via cutting)
+
+    HADOOP-1774. Remove use of INode.parent in Block CRC upgrade.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-1788.  Increase the buffer size on the Pipes command socket.
+    (Amareshwari Sri Ramadasu and Christian Kunz via omalley)
+
+  BUG FIXES
+
+    HADOOP-1946.  The Datanode code does not need to invoke du on
+    every heartbeat.  (Hairong Kuang via dhruba)
+
+    HADOOP-1935. Fix a NullPointerException in internalReleaseCreate.
+    (Dhruba Borthakur)
+
+    HADOOP-1933. The nodes listed in include and exclude files 
+    are always listed in the datanode report.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-1953. The job tracker should wait beteween calls to try and delete 
+    the system directory (Owen O'Malley via devaraj)
+
+    HADOOP-1932. TestFileCreation fails with message saying filestatus.dat
+    is of incorrect size.  (Dhruba Borthakur via dhruba)
+
+    HADOOP-1573. Support for 0 reducers in PIPES. 
+    (Owen O'Malley via devaraj)
+
+    HADOOP-1500. Fix typographical errors in the DFS WebUI.
+    (Nigel Daley via dhruba)
+
+    HADOOP-1076. Periodic checkpoint can continue even if an earlier
+    checkpoint encountered an error.  (Dhruba Borthakur via dhruba)
+
+    HADOOP-1887. The Namenode encounters an ArrayIndexOutOfBoundsException
+    while listing a directory that had a file that was
+    being actively written to.  (Dhruba Borthakur via dhruba)
+
+    HADOOP-1904. The Namenode encounters an exception because the
+    list of blocks per datanode-descriptor was corrupted.
+    (Konstantin Shvachko via dhruba)
+
+    HADOOP-1762. The Namenode fsimage does not contain a list of
+    Datanodes.  (Raghu Angadi via dhruba)
+
+    HADOOP-1890. Removed debugging prints introduced by HADOOP-1774.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-1763. Too many lost task trackers on large clusters due to
+    insufficient number of RPC handler threads on the JobTracker.
+    (Devaraj Das)
+
+    HADOOP-1463.  HDFS report correct usage statistics for disk space
+    used by HDFS.  (Hairong Kuang via dhruba)
+
+    HADOOP-1692.  In DFS ant task, don't cache the Configuration.
+    (Chris Douglas via cutting)
+
+    HADOOP-1726.  Remove lib/jetty-ext/ant.jar. (omalley)
+
+    HADOOP-1772.  Fix hadoop-daemon.sh script to get correct hostname
+    under Cygwin.  (Tsz Wo (Nicholas), SZE via cutting)
+
+    HADOOP-1749.  Change TestDFSUpgrade to sort files, fixing sporadic
+    test failures.  (Enis Soztutar via cutting)
+
+    HADOOP-1748.  Fix tasktracker to be able to launch tasks when log
+    directory is relative.  (omalley via cutting)
+
+    HADOOP-1775.  Fix a NullPointerException and an
+    IllegalArgumentException in MapWritable.
+    (Jim Kellerman via cutting)
+
+    HADOOP-1795.  Fix so that jobs can generate output file names with
+    special characters.  (Fr??d??ric Bertin via cutting)
+
+    HADOOP-1810.  Fix incorrect value type in MRBench (SmallJobs)
+    (Devaraj Das via tomwhite)
+
+    HADOOP-1806.  Fix ant task to compile again, also fix default
+    builds to compile ant tasks.  (Chris Douglas via cutting)
+
+    HADOOP-1758.  Fix escape processing in librecordio to not be
+    quadratic.  (Vivek Ratan via cutting)
+
+    HADOOP-1817.  Fix MultiFileSplit to read and write the split
+    length, so that it is not always zero in map tasks.
+    (Thomas Friol via cutting)
+
+    HADOOP-1853.  Fix contrib/streaming to accept multiple -cacheFile
+    options.  (Prachi Gupta via cutting)
+
+    HADOOP-1818. Fix MultiFileInputFormat so that it does not return 
+    empty splits when numPaths < numSplits.  (Thomas Friol via enis)
+
+    HADOOP-1840. Fix race condition which leads to task's diagnostic
+    messages getting lost. (acmurthy) 
+
+    HADOOP-1885. Fix race condition in MiniDFSCluster shutdown.
+    (Chris Douglas via nigel)
+
+    HADOOP-1889.  Fix path in EC2 scripts for building your own AMI.
+    (tomwhite)
+
+    HADOOP-1892.  Fix a NullPointerException in the JobTracker when
+    trying to fetch a task's diagnostic messages from the JobClient.
+    (Amar Kamat via acmurthy)
+
+    HADOOP-1897.  Completely remove about.html page from the web site.
+    (enis)
+
+    HADOOP-1907.  Fix null pointer exception when getting task diagnostics
+    in JobClient. (Christian Kunz via omalley)
+
+    HADOOP-1882.  Remove spurious asterisks from decimal number displays.
+    (Raghu Angadi via cutting)
+
+    HADOOP-1783.  Make S3 FileSystem return Paths fully-qualified with
+    scheme and host.  (tomwhite)
+
+    HADOOP-1925.  Make pipes' autoconf script look for libsocket and libnsl, so
+    that it can compile under Solaris. (omalley)
+
+    HADOOP-1940.  TestDFSUpgradeFromImage must shut down its MiniDFSCluster.
+    (Chris Douglas via nigel)
+
+    HADOOP-1930.  Fix the blame for failed fetchs on the right host. (Arun C.
+    Murthy via omalley)
+
+    HADOOP-1934.  Fix the platform name on Mac to use underscores rather than
+    spaces. (omalley)
+
+    HADOOP-1959.  Use "/" instead of File.separator in the StatusHttpServer.
+    (jimk via omalley)
+
+    HADOOP-1626.  Improve dfsadmin help messages.
+    (Lohit Vijayarenu via dhruba)
+
+    HADOOP-1695.  The SecondaryNamenode waits for the Primary NameNode to
+    start up.  (Dhruba Borthakur)
+
+    HADOOP-1983.  Have Pipes flush the command socket when progress is sent
+    to prevent timeouts during long computations. (omalley)
+
+    HADOOP-1875.  Non-existant directories or read-only directories are
+    filtered from dfs.client.buffer.dir.  (Hairong Kuang via dhruba)
+
+    HADOOP-1992.  Fix the performance degradation in the sort validator. 
+    (acmurthy via omalley)
+
+    HADOOP-1874.  Move task-outputs' promotion/discard to a separate thread
+    distinct from the main heartbeat-processing thread. The main upside being 
+    that we do not lock-up the JobTracker during HDFS operations, which
+    otherwise may lead to lost tasktrackers if the NameNode is unresponsive.
+    (Devaraj Das via acmurthy)
+
+    HADOOP-2026. Namenode prints out one log line for "Number of transactions"
+    at most once every minute. (Dhruba Borthakur)
+
+    HADOOP-2022.  Ensure that status information for successful tasks is correctly
+    recorded at the JobTracker, so that, for example, one may view correct
+    information via taskdetails.jsp. This bug was introduced by HADOOP-1874.
+    (Amar Kamat via acmurthy)
+                                
+    HADOOP-2031.  Correctly maintain the taskid which takes the TIP to 
+    completion, failing which the case of lost tasktrackers isn't handled
+    properly i.e. the map TIP is incorrectly left marked as 'complete' and it
+    is never rescheduled elsewhere, leading to hung reduces.
+    (Devaraj Das via acmurthy)
+
+    HADOOP-2018. The source datanode of a data transfer waits for
+    a response from the target datanode before closing the data stream.
+    (Hairong Kuang via dhruba)
+                                
+    HADOOP-2023. Disable TestLocalDirAllocator on Windows.
+    (Hairong Kuang via nigel)
+
+    HADOOP-2016.  Ignore status-updates from FAILED/KILLED tasks at the 
+    TaskTracker. This fixes a race-condition which caused the tasks to wrongly 
+    remain in the RUNNING state even after being killed by the JobTracker and
+    thus handicap the cleanup of the task's output sub-directory. (acmurthy)
+
+    HADOOP-1771. Fix a NullPointerException in streaming caused by an 
+    IOException in MROutputThread. (lohit vijayarenu via nigel)
+
+    HADOOP-2028. Fix distcp so that the log dir does not need to be 
+    specified and the destination does not need to exist.
+    (Chris Douglas via nigel)
+
+    HADOOP-2044. The namenode protects all lease manipulations using a 
+    sortedLease lock.  (Dhruba Borthakur)
+
+    HADOOP-2051. The TaskCommit thread should not die for exceptions other
+    than the InterruptedException. This behavior is there for the other long
+    running threads in the JobTracker. (Arun C Murthy via ddas)
+
+    HADOOP-1973. The FileSystem object would be accessed on the JobTracker
+    through a RPC in the InterTrackerProtocol. The check for the object being
+    null was missing and hence NPE would be thrown sometimes. This issue fixes
+    that problem.  (Amareshwari Sri Ramadasu via ddas) 
+
+    HADOOP-2033.  The SequenceFile.Writer.sync method was a no-op, which caused
+    very uneven splits for applications like distcp that count on them.
+    (omalley)
+
+    HADOOP-2070.  Added a flush method to pipes' DownwardProtocol and call
+    that before waiting for the application to finish to ensure all buffered
+    data is flushed. (Owen O'Malley via acmurthy)
+
+    HADOOP-2080.  Fixed calculation of the checksum file size when the values
+    are large. (omalley)
+
+    HADOOP-2048.  Change error handling in distcp so that each map copies
+    as much as possible before reporting the error. Also report progress on
+    every copy. (Chris Douglas via omalley)
+
+    HADOOP-2073.  Change size of VERSION file after writing contents to it.
+    (Konstantin Shvachko via dhruba)
+ 
+    HADOOP-2102.  Fix the deprecated ToolBase to pass its Configuration object
+    to the superceding ToolRunner to ensure it picks up the appropriate
+    configuration resources. (Dennis Kubes and Enis Soztutar via acmurthy) 
+ 
+    HADOOP-2103.  Fix minor javadoc bugs introduce by HADOOP-2046. (Nigel
+    Daley via acmurthy) 
+
+  IMPROVEMENTS
+
+    HADOOP-1908. Restructure data node code so that block sending and 
+    receiving are seperated from data transfer header handling.
+    (Hairong Kuang via dhruba)
+
+    HADOOP-1921. Save the configuration of completed/failed jobs and make them
+    available via the web-ui. (Amar Kamat via devaraj)
+
+    HADOOP-1266. Remove dependency of package org.apache.hadoop.net on 
+    org.apache.hadoop.dfs.  (Hairong Kuang via dhruba)
+
+    HADOOP-1779. Replace INodeDirectory.getINode() by a getExistingPathINodes()
+    to allow the retrieval of all existing INodes along a given path in a
+    single lookup. This facilitates removal of the 'parent' field in the
+    inode. (Christophe Taton via dhruba)
+
+    HADOOP-1756. Add toString() to some Writable-s. (ab)
+
+    HADOOP-1727.  New classes: MapWritable and SortedMapWritable.
+    (Jim Kellerman via ab)
+
+    HADOOP-1651.  Improve progress reporting.
+    (Devaraj Das via tomwhite)
+
+    HADOOP-1595.  dfsshell can wait for a file to achieve its intended
+    replication target. (Tsz Wo (Nicholas), SZE via dhruba)
+
+    HADOOP-1693.  Remove un-needed log fields in DFS replication classes,
+    since the log may be accessed statically. (Konstantin Shvachko via cutting)
+
+    HADOOP-1231.  Add generics to Mapper and Reducer interfaces.
+    (tomwhite via cutting)
+
+    HADOOP-1436.  Improved command-line APIs, so that all tools need
+    not subclass ToolBase, and generic parameter parser is public.
+    (Enis Soztutar via cutting)
+
+    HADOOP-1703.  DFS-internal code cleanups, removing several uses of
+    the obsolete UTF8.  (Christophe Taton via cutting)
+
+    HADOOP-1731.  Add Hadoop's version to contrib jar file names.
+    (cutting)
+
+    HADOOP-1689.  Make shell scripts more portable.  All shell scripts
+    now explicitly depend on bash, but do not require that bash be
+    installed in a particular location, as long as it is on $PATH.
+    (cutting)
+
+    HADOOP-1744.  Remove many uses of the deprecated UTF8 class from
+    the HDFS namenode.  (Christophe Taton via cutting)
+
+    HADOOP-1654.  Add IOUtils class, containing generic io-related
+    utility methods.   (Enis Soztutar via cutting)
+
+    HADOOP-1158.  Change JobTracker to record map-output transmission
+    errors and use them to trigger speculative re-execution of tasks.
+    (Arun C Murthy via cutting)
+
+    HADOOP-1601.  Change GenericWritable to use ReflectionUtils for
+    instance creation, avoiding classloader issues, and to implement
+    Configurable.  (Enis Soztutar via cutting)
+
+    HADOOP-1750.  Log standard output and standard error when forking
+    task processes.  (omalley via cutting)
+
+    HADOOP-1803.  Generalize build.xml to make files in all
+    src/contrib/*/bin directories executable.  (stack via cutting)
+
+    HADOOP-1739.  Let OS always choose the tasktracker's umbilical
+    port.  Also switch default address for umbilical connections to
+    loopback.  (cutting)
+
+    HADOOP-1812. Let OS choose ports for IPC and RPC unit tests. (cutting)
+
+    HADOOP-1825.  Create $HADOOP_PID_DIR when it does not exist.
+    (Michael Bieniosek via cutting)
+
+    HADOOP-1425.  Replace uses of ToolBase with the Tool interface.
+    (Enis Soztutar via cutting)
+
+    HADOOP-1569.  Reimplement DistCP to use the standard FileSystem/URI
+    code in Hadoop so that you can copy from and to all of the supported file 
+    systems.(Chris Douglas via omalley)
+
+    HADOOP-1018.  Improve documentation w.r.t handling of lost hearbeats between 
+    TaskTrackers and JobTracker. (acmurthy)
+
+    HADOOP-1718.  Add ant targets for measuring code coverage with clover.
+    (simonwillnauer via nigel)
+
+    HADOOP-1592.  Log error messages to the client console when tasks
+    fail.  (Amar Kamat via cutting)
+
+    HADOOP-1879.  Remove some unneeded casts.  (Nilay Vaish via cutting)
+
+    HADOOP-1878.  Add space between priority links on job details
+    page. (Thomas Friol via cutting)
+
+    HADOOP-120.  In ArrayWritable, prevent creation with null value
+    class, and improve documentation.  (Cameron Pope via cutting)
+
+    HADOOP-1926. Add a random text writer example/benchmark so that we can
+    benchmark compression codecs on random data. (acmurthy via omalley)
+
+    HADOOP-1906. Warn the user if they have an obsolete madred-default.xml
+    file in their configuration directory. (acmurthy via omalley)
+
+    HADOOP-1971.  Warn when job does not specify a jar. (enis via cutting)
+
+    HADOOP-1942. Increase the concurrency of transaction logging to 
+    edits log. Reduce the number of syncs by double-buffering the changes
+    to the transaction log. (Dhruba Borthakur)
+
+    HADOOP-2046.  Improve mapred javadoc.  (Arun C. Murthy via cutting)
+
+    HADOOP-2105.  Improve overview.html to clarify supported platforms, 
+    software pre-requisites for hadoop, how to install them on various 
+    platforms and a better general description of hadoop and it's utility. 
+    (Jim Kellerman via acmurthy) 
+
+
+Release 0.14.4 - 2007-11-26
+
+  BUG FIXES
+
+    HADOOP-2140.  Add missing Apache Licensing text at the front of several
+    C and C++ files.
+
+    HADOOP-2169.  Fix the DT_SONAME field of libhdfs.so to set it to the
+    correct value of 'libhdfs.so', currently it is set to the absolute path of
+    libhdfs.so. (acmurthy) 
+
+    HADOOP-2001.  Make the job priority updates and job kills synchronized on
+    the JobTracker. Deadlock was seen in the JobTracker because of the lack of
+    this synchronization.  (Arun C Murthy via ddas)
+
+
+Release 0.14.3 - 2007-10-19
+
+  BUG FIXES
+
+    HADOOP-2053. Fixed a dangling reference to a memory buffer in the map 
+    output sorter. (acmurthy via omalley)
+
+    HADOOP-2036. Fix a NullPointerException in JvmMetrics class. (nigel)
+
+    HADOOP-2043. Release 0.14.2 was compiled with Java 1.6 rather than
+    Java 1.5.  (cutting)
+
+
+Release 0.14.2 - 2007-10-09
+
+  BUG FIXES
+
+    HADOOP-1948. Removed spurious error message during block crc upgrade.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-1862.  reduces are getting stuck trying to find map outputs. 
+    (Arun C. Murthy via ddas)
+ 
+    HADOOP-1977. Fixed handling of ToolBase cli options in JobClient.
+    (enis via omalley)
+
+    HADOOP-1972.  Fix LzoCompressor to ensure the user has actually asked
+    to finish compression. (arun via omalley)
+
+    HADOOP-1970.  Fix deadlock in progress reporting in the task. (Vivek
+    Ratan via omalley)
+
+    HADOOP-1978.  Name-node removes edits.new after a successful startup.
+    (Konstantin Shvachko via dhruba)
+
+    HADOOP-1955.  The Namenode tries to not pick the same source Datanode for
+    a replication request if the earlier replication request for the same
+    block and that source Datanode had failed.
+    (Raghu Angadi via dhruba)
+
+    HADOOP-1961.  The -get option to dfs-shell works when a single filename
+    is specified.  (Raghu Angadi via dhruba)
+
+    HADOOP-1997.  TestCheckpoint closes the edits file after writing to it,
+    otherwise the rename of this file on Windows fails.
+    (Konstantin Shvachko via dhruba)
+
+Release 0.14.1 - 2007-09-04
+
+  BUG FIXES
+
+    HADOOP-1740.  Fix null pointer exception in sorting map outputs. (Devaraj
+    Das via omalley)
+
+    HADOOP-1790.  Fix tasktracker to work correctly on multi-homed
+    boxes.  (Torsten Curdt via cutting)
+
+    HADOOP-1798.  Fix jobtracker to correctly account for failed
+    tasks.  (omalley via cutting)
+
+
+Release 0.14.0 - 2007-08-17
+
+  INCOMPATIBLE CHANGES
+
+  1. HADOOP-1134.
+     CONFIG/API - dfs.block.size must now be a multiple of
+       io.byte.per.checksum, otherwise new files can not be written.
+     LAYOUT - DFS layout version changed from -6 to -7, which will require an
+       upgrade from previous versions.
+     PROTOCOL - Datanode RPC protocol version changed from 7 to 8.
+
+  2. HADOOP-1283
+     API - deprecated file locking API.
+
+  3. HADOOP-894
+     PROTOCOL - changed ClientProtocol to fetch parts of block locations.
+
+  4. HADOOP-1336
+     CONFIG - Enable speculative execution by default.
+
+  5. HADOOP-1197
+     API - deprecated method for Configuration.getObject, because
+       Configurations should only contain strings.
+
+  6. HADOOP-1343
+     API - deprecate Configuration.set(String,Object) so that only strings are
+       put in Configrations.
+
+  7. HADOOP-1207
+     CLI - Fix FsShell 'rm' command to continue when a non-existent file is
+       encountered.
+
+  8. HADOOP-1473
+     CLI/API - Job, TIP, and Task id formats have changed and are now unique
+       across job tracker restarts.
+
+  9. HADOOP-1400
+     API - JobClient constructor now takes a JobConf object instead of a
+       Configuration object.
+
+  NEW FEATURES and BUG FIXES
+
+  1. HADOOP-1197.  In Configuration, deprecate getObject() and add
+     getRaw(), which skips variable expansion. (omalley via cutting)
+
+  2. HADOOP-1343.  In Configuration, deprecate set(String,Object) and
+     implement Iterable. (omalley via cutting)
+
+  3. HADOOP-1344.  Add RunningJob#getJobName(). (Michael Bieniosek via cutting)
+
+  4. HADOOP-1342.  In aggregators, permit one to limit the number of
+     unique values per key.  (Runping Qi via cutting)
+
+  5. HADOOP-1340.  Set the replication factor of the MD5 file in the filecache
+     to be the same as the replication factor of the original file.
+     (Dhruba Borthakur via tomwhite.)
+
+  6. HADOOP-1355.  Fix null pointer dereference in 
+     TaskLogAppender.append(LoggingEvent).  (Arun C Murthy via tomwhite.)
+
+  7. HADOOP-1357.  Fix CopyFiles to correctly avoid removing "/".
+     (Arun C Murthy via cutting)
+
+  8. HADOOP-234.  Add pipes facility, which permits writing MapReduce
+     programs in C++.
+
+  9. HADOOP-1359.  Fix a potential NullPointerException in HDFS.
+     (Hairong Kuang via cutting)
+
+ 10. HADOOP-1364.  Fix inconsistent synchronization in SequenceFile.
+     (omalley via cutting)
+
+ 11. HADOOP-1379.  Add findbugs target to build.xml.
+     (Nigel Daley via cutting)
+
+ 12. HADOOP-1364.  Fix various inconsistent synchronization issues.
+     (Devaraj Das via cutting)
+
+ 13. HADOOP-1393.  Remove a potential unexpected negative number from
+     uses of random number generator. (omalley via cutting)
+
+ 14. HADOOP-1387.  A number of "performance" code-cleanups suggested
+     by findbugs.  (Arun C Murthy via cutting)
+
+ 15. HADOOP-1401.  Add contrib/hbase javadoc to tree.  (stack via cutting)
+
+ 16. HADOOP-894.  Change HDFS so that the client only retrieves a limited
+     number of block locations per request from the namenode.
+     (Konstantin Shvachko via cutting)
+
+ 17. HADOOP-1406.  Plug a leak in MapReduce's use of metrics.
+     (David Bowen via cutting)
+
+ 18. HADOOP-1394.  Implement "performance" code-cleanups in HDFS
+     suggested by findbugs.  (Raghu Angadi via cutting)
+
+ 19. HADOOP-1413.  Add example program that uses Knuth's dancing links
+     algorithm to solve pentomino problems.  (omalley via cutting)
+
+ 20. HADOOP-1226.  Change HDFS so that paths it returns are always
+     fully qualified.  (Dhruba Borthakur via cutting)
+
+ 21. HADOOP-800.  Improvements to HDFS web-based file browser.
+     (Enis Soztutar via cutting)
+
+ 22. HADOOP-1408.  Fix a compiler warning by adding a class to replace
+     a generic.  (omalley via cutting)
+
+ 23. HADOOP-1376.  Modify RandomWriter example so that it can generate
+     data for the Terasort benchmark.  (Devaraj Das via cutting)
+
+ 24. HADOOP-1429.  Stop logging exceptions during normal IPC server
+     shutdown.  (stack via cutting)
+
+ 25. HADOOP-1461.  Fix the synchronization of the task tracker to
+     avoid lockups in job cleanup.  (Arun C Murthy via omalley)
+
+ 26. HADOOP-1446.  Update the TaskTracker metrics while the task is
+     running. (Devaraj via omalley)
+
+ 27. HADOOP-1414.  Fix a number of issues identified by FindBugs as
+     "Bad Practice".  (Dhruba Borthakur via cutting)
+
+ 28. HADOOP-1392.  Fix "correctness" bugs identified by FindBugs in
+     fs and dfs packages.  (Raghu Angadi via cutting)
+
+ 29. HADOOP-1412.  Fix "dodgy" bugs identified by FindBugs in fs and
+     io packages.  (Hairong Kuang via cutting)
+
+ 30. HADOOP-1261.  Remove redundant events from HDFS namenode's edit
+     log when a datanode restarts.  (Raghu Angadi via cutting)
+
+ 31. HADOOP-1336.  Re-enable speculative execution by
+     default. (omalley via cutting)
+
+ 32. HADOOP-1311.  Fix a bug in BytesWritable#set() where start offset
+     was ignored.  (Dhruba Borthakur via cutting)
+
+ 33. HADOOP-1450.  Move checksumming closer to user code, so that
+     checksums are created before data is stored in large buffers and
+     verified after data is read from large buffers, to better catch
+     memory errors.  (cutting)
+
+ 34. HADOOP-1447.  Add support in contrib/data_join for text inputs.
+     (Senthil Subramanian via cutting)
+
+ 35. HADOOP-1456.  Fix TestDecommission assertion failure by setting
+     the namenode to ignore the load on datanodes while allocating
+     replicas.  (Dhruba Borthakur via tomwhite)
+
+ 36. HADOOP-1396.  Fix FileNotFoundException on DFS block.
+     (Dhruba Borthakur via tomwhite)
+
+ 37. HADOOP-1467.  Remove redundant counters from WordCount example.
+     (Owen O'Malley via tomwhite)
+
+ 38. HADOOP-1139.  Log HDFS block transitions at INFO level, to better
+     enable diagnosis of problems.  (Dhruba Borthakur via cutting)
+
+ 39. HADOOP-1269.  Finer grained locking in HDFS namenode.
+     (Dhruba Borthakur via cutting)
+
+ 40. HADOOP-1438.  Improve HDFS documentation, correcting typos and
+     making images appear in PDF.  Also update copyright date for all
+     docs.  (Luke Nezda via cutting)
+
+ 41. HADOOP-1457.  Add counters for monitoring task assignments.
+     (Arun C Murthy via tomwhite)
+
+ 42. HADOOP-1472.  Fix so that timed-out tasks are counted as failures
+     rather than as killed.  (Arun C Murthy via cutting)
+
+ 43. HADOOP-1234.  Fix a race condition in file cache that caused
+     tasktracker to not be able to find cached files.
+     (Arun C Murthy via cutting)
+
+ 44. HADOOP-1482.  Fix secondary namenode to roll info port.
+     (Dhruba Borthakur via cutting)
+
+ 45. HADOOP-1300.  Improve removal of excess block replicas to be
+     rack-aware.  Attempts are now made to keep replicas on more
+     racks.  (Hairong Kuang via cutting)
+
+ 46. HADOOP-1417.  Disable a few FindBugs checks that generate a lot
+     of spurious warnings.  (Nigel Daley via cutting)
+
+ 47. HADOOP-1320.  Rewrite RandomWriter example to bypass reduce.
+     (Arun C Murthy via cutting)
+
+ 48. HADOOP-1449.  Add some examples to contrib/data_join.
+     (Senthil Subramanian via cutting)
+
+ 49. HADOOP-1459.  Fix so that, in HDFS, getFileCacheHints() returns
+     hostnames instead of IP addresses.  (Dhruba Borthakur via cutting)
+
+ 50. HADOOP-1493.  Permit specification of "java.library.path" system
+     property in "mapred.child.java.opts" configuration property.
+     (Enis Soztutar via cutting)
+
+ 51. HADOOP-1372.  Use LocalDirAllocator for HDFS temporary block
+     files, so that disk space, writability, etc. is considered.
+     (Dhruba Borthakur via cutting)
+
+ 52. HADOOP-1193.  Pool allocation of compression codecs.  This
+     eliminates a memory leak that could cause OutOfMemoryException,
+     and also substantially improves performance.
+     (Arun C Murthy via cutting)
+
+ 53. HADOOP-1492.  Fix a NullPointerException handling version
+     mismatch during datanode registration.
+     (Konstantin Shvachko via cutting)
+
+ 54. HADOOP-1442.  Fix handling of zero-length input splits.
+     (Senthil Subramanian via cutting)
+
+ 55. HADOOP-1444.  Fix HDFS block id generation to check pending
+     blocks for duplicates. (Dhruba Borthakur via cutting)
+
+ 56. HADOOP-1207.  Fix FsShell's 'rm' command to not stop when one of
+     the named files does not exist.  (Tsz Wo Sze via cutting)
+
+ 57. HADOOP-1475.  Clear tasktracker's file cache before it
+     re-initializes, to avoid confusion.  (omalley via cutting)
+
+ 58. HADOOP-1505.  Remove spurious stacktrace in ZlibFactory
+     introduced in HADOOP-1093.  (Michael Stack via tomwhite)
+
+ 59. HADOOP-1484.  Permit one to kill jobs from the web ui.  Note that
+     this is disabled by default.  One must set
+     "webinterface.private.actions" to enable this.
+     (Enis Soztutar via cutting)
+
+ 60. HADOOP-1003.  Remove flushing of namenode edit log from primary
+     namenode lock, increasing namenode throughput.
+     (Dhruba Borthakur via cutting)
+
+ 61. HADOOP-1023.  Add links to searchable mail archives.
+     (tomwhite via cutting)
+
+ 62. HADOOP-1504.  Fix terminate-hadoop-cluster script in contrib/ec2
+     to only terminate Hadoop instances, and not other instances
+     started by the same user.  (tomwhite via cutting)
+
+ 63. HADOOP-1462.  Improve task progress reporting.  Progress reports
+     are no longer blocking since i/o is performed in a separate
+     thread.  Reporting during sorting and more is also more
+     consistent.  (Vivek Ratan via cutting)
+
+ 64. [ intentionally blank ]
+
+ 65. HADOOP-1453.  Remove some unneeded calls to FileSystem#exists()
+     when opening files, reducing the namenode load somewhat.
+     (Raghu Angadi via cutting)
+
+ 66. HADOOP-1489.  Fix text input truncation bug due to mark/reset.
+     Add a unittest. (Bwolen Yang via cutting)
+
+ 67. HADOOP-1455.  Permit specification of arbitrary job options on
+     pipes command line.  (Devaraj Das via cutting)
+
+ 68. HADOOP-1501.  Better randomize sending of block reports to
+     namenode, so reduce load spikes.  (Dhruba Borthakur via cutting)
+
+ 69. HADOOP-1147.  Remove @author tags from Java source files.
+
+ 70. HADOOP-1283.  Convert most uses of UTF8 in the namenode to be
+     String.  (Konstantin Shvachko via cutting)
+
+ 71. HADOOP-1511.  Speedup hbase unit tests.  (stack via cutting)
+
+ 72. HADOOP-1517.  Remove some synchronization in namenode to permit
+     finer grained locking previously added.  (Konstantin Shvachko via cutting)
+
+ 73. HADOOP-1512.  Fix failing TestTextInputFormat on Windows.
+     (Senthil Subramanian via nigel)
+
+ 74. HADOOP-1518.  Add a session id to job metrics, for use by HOD.
+     (David Bowen via cutting)
+
+ 75. HADOOP-1292.  Change 'bin/hadoop fs -get' to first copy files to
+     a temporary name, then rename them to their final name, so that
+     failures don't leave partial files.  (Tsz Wo Sze via cutting)
+
+ 76. HADOOP-1377.  Add support for modification time to FileSystem and
+     implement in HDFS and local implementations.  Also, alter access
+     to file properties to be through a new FileStatus interface.
+     (Dhruba Borthakur via cutting)
+
+ 77. HADOOP-1515.  Add MultiFileInputFormat, which can pack multiple,
+     typically small, input files into each split.  (Enis Soztutar via cutting)
+
+ 78. HADOOP-1514.  Make reducers report progress while waiting for map
+     outputs, so they're not killed.  (Vivek Ratan via cutting)
+
+ 79. HADOOP-1508.  Add an Ant task for FsShell operations.  Also add
+     new FsShell commands "touchz", "test" and "stat".
+     (Chris Douglas via cutting)
+
+ 80. HADOOP-1028.  Add log messages for server startup and shutdown.
+     (Tsz Wo Sze via cutting)
+
+ 81. HADOOP-1485.  Add metrics for monitoring shuffle.
+     (Devaraj Das via cutting)
+
+ 82. HADOOP-1536.  Remove file locks from libhdfs tests.
+     (Dhruba Borthakur via nigel)
+
+ 83. HADOOP-1520.  Add appropriate synchronization to FSEditsLog.
+     (Dhruba Borthakur via nigel)
+
+ 84. HADOOP-1513.  Fix a race condition in directory creation. 
+     (Devaraj via omalley)
+
+ 85. HADOOP-1546.  Remove spurious column from HDFS web UI.
+     (Dhruba Borthakur via cutting)
+
+ 86. HADOOP-1556.  Make LocalJobRunner delete working files at end of
+     job run.  (Devaraj Das via tomwhite)
+
+ 87. HADOOP-1571.  Add contrib lib directories to root build.xml
+     javadoc classpath.  (Michael Stack via tomwhite)
+
+ 88. HADOOP-1554.  Log killed tasks to the job history and display them on the
+     web/ui. (Devaraj Das via omalley)
+
+ 89. HADOOP-1533.  Add persistent error logging for distcp. The logs are stored
+    into a specified hdfs directory. (Senthil Subramanian via omalley)
+
+ 90. HADOOP-1286.  Add support to HDFS for distributed upgrades, which
+     permits coordinated upgrade of datanode data.
+     (Konstantin Shvachko via cutting)
+
+ 91. HADOOP-1580.  Improve contrib/streaming so that subprocess exit
+     status is displayed for errors.  (John Heidemann via cutting)
+
+ 92. HADOOP-1448.  In HDFS, randomize lists of non-local block
+     locations returned to client, so that load is better balanced.
+     (Hairong Kuang via cutting)
+
+ 93. HADOOP-1578.  Fix datanode to send its storage id to namenode
+     during registration.  (Konstantin Shvachko via cutting)
+
+ 94. HADOOP-1584.  Fix a bug in GenericWritable which limited it to
+     128 types instead of 256.  (Espen Amble Kolstad via cutting)
+
+ 95. HADOOP-1473.  Make job ids unique across jobtracker restarts.
+     (omalley via cutting)
+
+ 96. HADOOP-1582.  Fix hdfslib to return 0 instead of -1 at
+     end-of-file, per C conventions.  (Christian Kunz via cutting)
+
+ 97. HADOOP-911.  Fix a multithreading bug in libhdfs.
+     (Christian Kunz)
+
+ 98. HADOOP-1486.  Fix so that fatal exceptions in namenode cause it
+     to exit.  (Dhruba Borthakur via cutting)
+
+ 99. HADOOP-1470.  Factor checksum generation and validation out of
+     ChecksumFileSystem so that it can be reused by FileSystem's with
+     built-in checksumming.  (Hairong Kuang via cutting)
+
+100. HADOOP-1590.  Use relative urls in jobtracker jsp pages, so that
+     webapp can be used in non-root contexts.  (Thomas Friol via cutting)
+
+101. HADOOP-1596.  Fix the parsing of taskids by streaming and improve the
+     error reporting. (omalley)
+
+102. HADOOP-1535.  Fix the user-controlled grouping to the reduce function.
+     (Vivek Ratan via omalley)
+
+103. HADOOP-1585.  Modify GenericWritable to declare the classes as subtypes
+     of Writable (Espen Amble Kolstad via omalley)
+
+104. HADOOP-1576.  Fix errors in count of completed tasks when
+     speculative execution is enabled.  (Arun C Murthy via cutting)
+
+105. HADOOP-1598.  Fix license headers: adding missing; updating old.
+     (Enis Soztutar via cutting)
+
+106. HADOOP-1547.  Provide examples for aggregate library.
+     (Runping Qi via tomwhite)
+
+107. HADOOP-1570.  Permit jobs to enable and disable the use of
+     hadoop's native library.  (Arun C Murthy via cutting)
+
+108. HADOOP-1433.  Add job priority.  (Johan Oskarsson via tomwhite)
+
+109. HADOOP-1597.  Add status reports and post-upgrade options to HDFS
+     distributed upgrade.  (Konstantin Shvachko via cutting)
+
+110. HADOOP-1524.  Permit user task logs to appear as they're
+     created.  (Michael Bieniosek via cutting)
+
+111. HADOOP-1599.  Fix distcp bug on Windows.  (Senthil Subramanian via cutting)
+
+112. HADOOP-1562.  Add JVM metrics, including GC and logging stats.
+     (David Bowen via cutting)
+
+113. HADOOP-1613.  Fix "DFS Health" page to display correct time of
+     last contact.  (Dhruba Borthakur via cutting)
+
+114. HADOOP-1134.  Add optimized checksum support to HDFS.  Checksums
+     are now stored with each block, rather than as parallel files.
+     This reduces the namenode's memory requirements and increases
+     data integrity.  (Raghu Angadi via cutting)
+
+115. HADOOP-1400.  Make JobClient retry requests, so that clients can
+     survive jobtracker problems.  (omalley via cutting)
+
+116. HADOOP-1564.  Add unit tests for HDFS block-level checksums.
+     (Dhruba Borthakur via cutting)
+
+117. HADOOP-1620.  Reduce the number of abstract FileSystem methods,
+     simplifying implementations.  (cutting)
+
+118. HADOOP-1625.  Fix a "could not move files" exception in datanode.
+     (Raghu Angadi via cutting)
+
+119. HADOOP-1624.  Fix an infinite loop in datanode. (Raghu Angadi via cutting)
+
+120. HADOOP-1084.  Switch mapred file cache to use file modification
+     time instead of checksum to detect file changes, as checksums are
+     no longer easily accessed.  (Arun C Murthy via cutting)
+
+130. HADOOP-1623.  Fix an infinite loop when copying directories.
+     (Dhruba Borthakur via cutting)
+
+131. HADOOP-1603.  Fix a bug in namenode initialization where
+     default replication is sometimes reset to one on restart.
+     (Raghu Angadi via cutting)
+
+132. HADOOP-1635.  Remove hardcoded keypair name and fix launch-hadoop-cluster
+     to support later versions of ec2-api-tools.  (Stu Hood via tomwhite)
+
+133. HADOOP-1638.  Fix contrib EC2 scripts to support NAT addressing.
+     (Stu Hood via tomwhite) 
+
+134. HADOOP-1632.  Fix an IllegalArgumentException in fsck.
+     (Hairong Kuang via cutting)
+
+135. HADOOP-1619.  Fix FSInputChecker to not attempt to read past EOF.
+     (Hairong Kuang via cutting)
+
+136. HADOOP-1640.  Fix TestDecommission on Windows.
+     (Dhruba Borthakur via cutting)
+
+137. HADOOP-1587.  Fix TestSymLink to get required system properties.
+     (Devaraj Das via omalley)
+
+138. HADOOP-1628.  Add block CRC protocol unit tests. (Raghu Angadi via omalley)
+
+139. HADOOP-1653.  FSDirectory code-cleanups. FSDirectory.INode
+     becomes a static class.  (Christophe Taton via dhruba)
+
+140. HADOOP-1066.  Restructure documentation to make more user
+     friendly.  (Connie Kleinjans and Jeff Hammerbacher via cutting)
+
+141. HADOOP-1551.  libhdfs supports setting replication factor and
+     retrieving modification time of files.  (Sameer Paranjpye via dhruba)
+
+141. HADOOP-1647.  FileSystem.getFileStatus returns valid values for "/".
+     (Dhruba Borthakur via dhruba)
+
+142. HADOOP-1657.  Fix NNBench to ensure that the block size is a
+     multiple of bytes.per.checksum. (Raghu Angadi via dhruba)
+
+143. HADOOP-1553.  Replace user task output and log capture code to use shell
+     redirection instead of copier threads in the TaskTracker. Capping the
+     size of the output is now done via tail in memory and thus should not be 
+     large. The output of the tasklog servlet is not forced into UTF8 and is
+     not buffered entirely in memory. (omalley)
+     Configuration changes to hadoop-default.xml:
+       remove mapred.userlog.num.splits
+       remove mapred.userlog.purge.splits
+       change default mapred.userlog.limit.kb to 0 (no limit)
+       change default mapred.userlog.retain.hours to 24
+     Configuration changes to log4j.properties:
+       remove log4j.appender.TLA.noKeepSplits
+       remove log4j.appender.TLA.purgeLogSplits
+       remove log4j.appender.TLA.logsRetainHours
+     URL changes:
+       http://<tasktracker>/tasklog.jsp -> http://<tasktracker>tasklog with
+         parameters limited to start and end, which may be positive (from
+         start) or negative (from end).
+     Environment:
+       require bash (v2 or later) and tail
+
+144. HADOOP-1659.  Fix a job id/job name mixup. (Arun C. Murthy via omalley)
+
+145. HADOOP-1665.  With HDFS Trash enabled and the same file was created
+     and deleted more than once, the suceeding deletions creates Trash item
+     names suffixed with a integer.  (Dhruba Borthakur via dhruba)
+
+146. HADOOP-1666.  FsShell object can be used for multiple fs commands.
+     (Dhruba Borthakur via dhruba)
+
+147. HADOOP-1654.  Remove performance regression introduced by Block CRC.
+     (Raghu Angadi via dhruba)
+
+148. HADOOP-1680.  Improvements to Block CRC upgrade messages.
+     (Raghu Angadi via dhruba)
+
+149. HADOOP-71.  Allow Text and SequenceFile Map/Reduce inputs from non-default 
+     filesystems. (omalley)
+
+150. HADOOP-1568.  Expose HDFS as xml/http filesystem to provide cross-version
+     compatability. (Chris Douglas via omalley)
+
+151. HADOOP-1668.  Added an INCOMPATIBILITY section to CHANGES.txt. (nigel)
+
+152. HADOOP-1629.  Added a upgrade test for HADOOP-1134.
+     (Raghu Angadi via nigel)
+
+153. HADOOP-1698.  Fix performance problems on map output sorting for jobs
+     with large numbers of reduces. (Devaraj Das via omalley)
+
+154. HADOOP-1716.  Fix a Pipes wordcount example to remove the 'file:'
+     schema from its output path.  (omalley via cutting)
+
+155. HADOOP-1714.  Fix TestDFSUpgradeFromImage to work on Windows.
+     (Raghu Angadi via nigel)
+
+156. HADOOP-1663.  Return a non-zero exit code if streaming fails. (Lohit Renu
+     via omalley)
+
+157. HADOOP-1712.  Fix an unhandled exception on datanode during block
+     CRC upgrade. (Raghu Angadi via cutting)
+
+158. HADOOP-1717.  Fix TestDFSUpgradeFromImage to work on Solaris.
+     (nigel via cutting)
+
+159. HADOOP-1437.  Add Eclipse plugin in contrib.
+     (Eugene Hung and Christophe Taton via cutting)
+
+
+Release 0.13.0 - 2007-06-08
+
+ 1. HADOOP-1047.  Fix TestReplication to succeed more reliably.
+    (Hairong Kuang via cutting)
+
+ 2. HADOOP-1063.  Fix a race condition in MiniDFSCluster test code.
+    (Hairong Kuang via cutting)
+
+ 3. HADOOP-1101.  In web ui, split shuffle statistics from reduce
+    statistics, and add some task averages.  (Devaraj Das via cutting)
+
+ 4. HADOOP-1071.  Improve handling of protocol version mismatch in
+    JobTracker.  (Tahir Hashmi via cutting)
+
+ 5. HADOOP-1116.  Increase heap size used for contrib unit tests.
+    (Philippe Gassmann via cutting)
+
+ 6. HADOOP-1120.  Add contrib/data_join, tools to simplify joining
+    data from multiple sources using MapReduce.  (Runping Qi via cutting)
+
+ 7. HADOOP-1064.  Reduce log level of some DFSClient messages.
+    (Dhruba Borthakur via cutting)
+
+ 8. HADOOP-1137.  Fix StatusHttpServer to work correctly when
+    resources are in a jar file.  (Benjamin Reed via cutting)
+
+ 9. HADOOP-1094.  Optimize generated Writable implementations for
+    records to not allocate a new BinaryOutputArchive or
+    BinaryInputArchive per call.  (Milind Bhandarkar via cutting)
+
+10. HADOOP-1068.  Improve error message for clusters with 0 datanodes.
+    (Dhruba Borthakur via tomwhite)
+
+11. HADOOP-1122.  Fix divide-by-zero exception in FSNamesystem
+    chooseTarget method.  (Dhruba Borthakur via tomwhite)
+
+12. HADOOP-1131.  Add a closeAll() static method to FileSystem.
+    (Philippe Gassmann via tomwhite)
+
+13. HADOOP-1085.  Improve port selection in HDFS and MapReduce test
+    code.  Ports are now selected by the OS during testing rather than
+    by probing for free ports, improving test reliability.
+    (Arun C Murthy via cutting)
+
+14. HADOOP-1153.  Fix HDFS daemons to correctly stop their threads.
+    (Konstantin Shvachko via cutting)
+
+15. HADOOP-1146.  Add a counter for reduce input keys and rename the
+    "reduce input records" counter to be "reduce input groups".
+    (David Bowen via cutting)
+
+16. HADOOP-1165.  In records, replace idential generated toString
+    methods with a method on the base class.  (Milind Bhandarkar via cutting)
+
+17. HADOOP-1164.  Fix TestReplicationPolicy to specify port zero, so
+    that a free port is automatically selected.  (omalley via cutting)
+
+18. HADOOP-1166.  Add a NullOutputFormat and use it in the
+    RandomWriter example.  (omalley via cutting)
+
+19. HADOOP-1169.  Fix a cut/paste error in CopyFiles utility so that
+    S3-based source files are correctly copied.  (Michael Stack via cutting)
+
+20. HADOOP-1167.  Remove extra synchronization in InMemoryFileSystem.
+    (omalley via cutting)
+
+21. HADOOP-1110.  Fix an off-by-one error counting map inputs.
+    (David Bowen via cutting)
+
+22. HADOOP-1178.  Fix a NullPointerException during namenode startup.
+    (Dhruba Borthakur via cutting)
+
+23. HADOOP-1011.  Fix a ConcurrentModificationException when viewing
+    job history.  (Tahir Hashmi via cutting)
+
+24. HADOOP-672.  Improve help for fs shell commands.
+    (Dhruba Borthakur via cutting)
+
+25. HADOOP-1170.  Improve datanode performance by removing device
+    checks from common operations.  (Igor Bolotin via cutting)
+
+26. HADOOP-1090.  Fix SortValidator's detection of whether the input 
+    file belongs to the sort-input or sort-output directory.
+    (Arun C Murthy via tomwhite)
+
+27. HADOOP-1081.  Fix bin/hadoop on Darwin.  (Michael Bieniosek via cutting)
+
+28. HADOOP-1045.  Add contrib/hbase, a BigTable-like online database.
+    (Jim Kellerman via cutting)
+
+29. HADOOP-1156.  Fix a NullPointerException in MiniDFSCluster.
+    (Hairong Kuang via cutting)
+
+30. HADOOP-702.  Add tools to help automate HDFS upgrades.
+    (Konstantin Shvachko via cutting)
+
+31. HADOOP-1163.  Fix ganglia metrics to aggregate metrics from different
+    hosts properly.  (Michael Bieniosek via tomwhite)
+
+32. HADOOP-1194.  Make compression style record level for map output
+    compression.  (Arun C Murthy via tomwhite)
+
+33. HADOOP-1187.  Improve DFS Scalability: avoid scanning entire list of
+    datanodes in getAdditionalBlocks.  (Dhruba Borthakur via tomwhite)
+
+34. HADOOP-1133.  Add tool to analyze and debug namenode on a production
+    cluster.  (Dhruba Borthakur via tomwhite)
+
+35. HADOOP-1151.  Remove spurious printing to stderr in streaming 
+    PipeMapRed.  (Koji Noguchi via tomwhite)
+
+36. HADOOP-988.  Change namenode to use a single map of blocks to metadata.
+    (Raghu Angadi via tomwhite)
+
+37. HADOOP-1203.  Change UpgradeUtilities used by DFS tests to use
+    MiniDFSCluster to start and stop NameNode/DataNodes.
+    (Nigel Daley via tomwhite)
+
+38. HADOOP-1217.  Add test.timeout property to build.xml, so that
+    long-running unit tests may be automatically terminated.
+    (Nigel Daley via cutting)
+
+39. HADOOP-1149.  Improve DFS Scalability: make 
+    processOverReplicatedBlock() a no-op if blocks are not 
+    over-replicated.  (Raghu Angadi via tomwhite)
+
+40. HADOOP-1149.  Improve DFS Scalability: optimize getDistance(), 
+    contains(), and isOnSameRack() in NetworkTopology.  
+    (Hairong Kuang via tomwhite)
+
+41. HADOOP-1218.  Make synchronization on TaskTracker's RunningJob 
+    object consistent.  (Devaraj Das via tomwhite)
+
+42. HADOOP-1219.  Ignore progress report once a task has reported as 
+    'done'.  (Devaraj Das via tomwhite)
+
+43. HADOOP-1114.  Permit user to specify additional CLASSPATH elements
+    with a HADOOP_CLASSPATH environment variable. (cutting)
+
+44. HADOOP-1198.  Remove ipc.client.timeout parameter override from 
+    unit test configuration.  Using the default is more robust and
+    has almost the same run time.  (Arun C Murthy via tomwhite)
+
+45. HADOOP-1211.  Remove deprecated constructor and unused static 
+    members in DataNode class.  (Konstantin Shvachko via tomwhite)
+
+46. HADOOP-1136.  Fix ArrayIndexOutOfBoundsException in 
+    FSNamesystem$UnderReplicatedBlocks add() method.  
+    (Hairong Kuang via tomwhite)
+
+47. HADOOP-978.  Add the client name and the address of the node that
+    previously started to create the file to the description of 
+    AlreadyBeingCreatedException.  (Konstantin Shvachko via tomwhite)
+
+48. HADOOP-1001.  Check the type of keys and values generated by the 
+    mapper against the types specified in JobConf.  
+    (Tahir Hashmi via tomwhite)
+
+49. HADOOP-971.  Improve DFS Scalability: Improve name node performance
+    by adding a hostname to datanodes map.  (Hairong Kuang via tomwhite)
+
+50. HADOOP-1189.  Fix 'No space left on device' exceptions on datanodes.
+    (Raghu Angadi via tomwhite)
+
+51. HADOOP-819.  Change LineRecordWriter to not insert a tab between
+    key and value when either is null, and to print nothing when both
+    are null.  (Runping Qi via cutting)
+
+52. HADOOP-1204.  Rename InputFormatBase to be FileInputFormat, and
+    deprecate InputFormatBase.  Also make LineRecordReader easier to
+    extend.  (Runping Qi via cutting)
+
+53. HADOOP-1213.  Improve logging of errors by IPC server, to
+    consistently include the service name and the call.  (cutting)
+
+54. HADOOP-1238.  Fix metrics reporting by TaskTracker to correctly
+    track maps_running and reduces_running.
+    (Michael Bieniosek via cutting)
+
+55. HADOOP-1093.  Fix a race condition in HDFS where blocks were
+    sometimes erased before they were reported written.
+    (Dhruba Borthakur via cutting)
+
+56. HADOOP-1239.  Add a package name to some testjar test classes.
+    (Jim Kellerman via cutting)
+
+57. HADOOP-1241.  Fix NullPointerException in processReport when 
+    namenode is restarted.  (Dhruba Borthakur via tomwhite)
+
+58. HADOOP-1244.  Fix stop-dfs.sh to no longer incorrectly specify 
+    slaves file for stopping datanode.  
+    (Michael Bieniosek via tomwhite)
+
+59. HADOOP-1253.  Fix ConcurrentModificationException and 
+    NullPointerException in JobControl.  
+    (Johan Oskarson via tomwhite)
+
+60. HADOOP-1256.  Fix NameNode so that multiple DataNodeDescriptors
+    can no longer be created on startup.  (Hairong Kuang via cutting)
+
+61. HADOOP-1214.  Replace streaming classes with new counterparts 
+    from Hadoop core.  (Runping Qi via tomwhite)
+
+62. HADOOP-1250.  Move a chmod utility from streaming to FileUtil.
+    (omalley via cutting)
+
+63. HADOOP-1258.  Fix TestCheckpoint test case to wait for 
+    MiniDFSCluster to be active.  (Nigel Daley via tomwhite)
+
+64. HADOOP-1148.  Re-indent all Java source code to consistently use
+    two spaces per indent level.  (cutting)
+
+65. HADOOP-1251.  Add a method to Reporter to get the map InputSplit.
+    (omalley via cutting)
+
+66. HADOOP-1224.  Fix "Browse the filesystem" link to no longer point 
+    to dead datanodes.  (Enis Soztutar via tomwhite)
+
+67. HADOOP-1154.  Fail a streaming task if the threads reading from or 
+    writing to the streaming process fail.  (Koji Noguchi via tomwhite)
+
+68. HADOOP-968.  Move shuffle and sort to run in reduce's child JVM,
+    rather than in TaskTracker.  (Devaraj Das via cutting)
+
+69. HADOOP-1111.  Add support for client notification of job
+    completion. If the job configuration has a job.end.notification.url
+    property it will make a HTTP GET request to the specified URL.
+    The number of retries and the interval between retries is also
+    configurable. (Alejandro Abdelnur via tomwhite)
+
+70. HADOOP-1275.  Fix misspelled job notification property in
+    hadoop-default.xml.  (Alejandro Abdelnur via tomwhite)
+
+71. HADOOP-1152.  Fix race condition in MapOutputCopier.copyOutput file
+    rename causing possible reduce task hang.
+    (Tahir Hashmi via tomwhite)
+
+72. HADOOP-1050.  Distinguish between failed and killed tasks so as to 
+    not count a lost tasktracker against the job.  
+    (Arun C Murthy via tomwhite)
+
+73. HADOOP-1271.  Fix StreamBaseRecordReader to be able to log record 
+    data that's not UTF-8.  (Arun C Murthy via tomwhite)
+
+74. HADOOP-1190.  Fix unchecked warnings in main Hadoop code.  
+    (tomwhite)
+
+75. HADOOP-1127.  Fix AlreadyBeingCreatedException in namenode for 
+    jobs run with speculative execution.
+    (Arun C Murthy via tomwhite)
+
+76. HADOOP-1282.  Omnibus HBase patch.  Improved tests & configuration.
+    (Jim Kellerman via cutting)
+
+77. HADOOP-1262.  Make dfs client try to read from a different replica 
+    of the checksum file when a checksum error is detected.  
+    (Hairong Kuang via tomwhite)
+
+78. HADOOP-1279.  Fix JobTracker to maintain list of recently
+    completed jobs by order of completion, not submission.
+    (Arun C Murthy via cutting)
+
+79. HADOOP-1284.  In contrib/streaming, permit flexible specification
+    of field delimiter and fields for partitioning and sorting.
+    (Runping Qi via cutting)
+
+80. HADOOP-1176.  Fix a bug where reduce would hang when a map had
+    more than 2GB of output for it.  (Arun C Murthy via cutting)
+
+81. HADOOP-1293.  Fix contrib/streaming to print more than the first
+    twenty lines of standard error.  (Koji Noguchi via cutting)
+
+82. HADOOP-1297.  Fix datanode so that requests to remove blocks that
+    do not exist no longer causes block reports to be re-sent every
+    second.  (Dhruba Borthakur via cutting)
+
+83. HADOOP-1216.  Change MapReduce so that, when numReduceTasks is
+    zero, map outputs are written directly as final output, skipping
+    shuffle, sort and reduce.  Use this to implement reduce=NONE
+    option in contrib/streaming.  (Runping Qi via cutting)
+
+84. HADOOP-1294.  Fix unchecked warnings in main Hadoop code under 
+    Java 6.  (tomwhite)
+
+85. HADOOP-1299.  Fix so that RPC will restart after RPC.stopClient()
+    has been called.  (Michael Stack via cutting)
+
+86. HADOOP-1278.  Improve blacklisting of TaskTrackers by JobTracker,
+    to reduce false positives.  (Arun C Murthy via cutting)
+
+87. HADOOP-1290.  Move contrib/abacus into mapred/lib/aggregate.
+    (Runping Qi via cutting)
+
+88. HADOOP-1272.  Extract inner classes from FSNamesystem into separate 
+    classes.  (Dhruba Borthakur via tomwhite)
+
+89. HADOOP-1247.  Add support to contrib/streaming for aggregate
+    package, formerly called Abacus.  (Runping Qi via cutting)
+
+90. HADOOP-1061.  Fix bug in listing files in the S3 filesystem.
+    NOTE: this change is not backwards compatible!  You should use the 
+    MigrationTool supplied to migrate existing S3 filesystem data to 
+    the new format.  Please backup your data first before upgrading 
+    (using 'hadoop distcp' for example).  (tomwhite)
+
+91. HADOOP-1304.  Make configurable the maximum number of task
+    attempts before a job fails.  (Devaraj Das via cutting)
+
+92. HADOOP-1308.  Use generics to restrict types when classes are
+    passed as parameters to JobConf methods. (Michael Bieniosek via cutting)
+
+93. HADOOP-1312.  Fix a ConcurrentModificationException in NameNode
+    that killed the heartbeat monitoring thread.
+    (Dhruba Borthakur via cutting)
+
+94. HADOOP-1315.  Clean up contrib/streaming, switching it to use core
+    classes more and removing unused code.  (Runping Qi via cutting)
+
+95. HADOOP-485.  Allow a different comparator for grouping keys in
+    calls to reduce.  (Tahir Hashmi via cutting)
+
+96. HADOOP-1322.  Fix TaskTracker blacklisting to work correctly in
+    one- and two-node clusters.  (Arun C Murthy via cutting)
+
+97. HADOOP-1144.  Permit one to specify a maximum percentage of tasks
+    that can fail before a job is aborted.  The default is zero.
+    (Arun C Murthy via cutting)
+
+98. HADOOP-1184.  Fix HDFS decomissioning to complete when the only
+    copy of a block is on a decommissioned node. (Dhruba Borthakur via cutting)
+
+99. HADOOP-1263.  Change DFSClient to retry certain namenode calls
+    with a random, exponentially increasing backoff time, to avoid
+    overloading the namenode on, e.g., job start.  (Hairong Kuang via cutting)
+
+100. HADOOP-1325.  First complete, functioning version of HBase.
+    (Jim Kellerman via cutting)
+
+101. HADOOP-1276.  Make tasktracker expiry interval configurable.
+    (Arun C Murthy via cutting)
+
+102. HADOOP-1326.  Change JobClient#RunJob() to return the job.
+    (omalley via cutting)
+
+103. HADOOP-1270.  Randomize the fetch of map outputs, speeding the
+     shuffle.  (Arun C Murthy via cutting)
+
+104. HADOOP-1200.  Restore disk checking lost in HADOOP-1170.
+     (Hairong Kuang via cutting)
+
+105. HADOOP-1252.  Changed MapReduce's allocation of local files to
+     use round-robin among available devices, rather than a hashcode.
+     More care is also taken to not allocate files on full or offline
+     drives.  (Devaraj Das via cutting)
+
+106. HADOOP-1324.  Change so that an FSError kills only the task that
+     generates it rather than the entire task tracker.
+     (Arun C Murthy via cutting)
+
+107. HADOOP-1310.  Fix unchecked warnings in aggregate code.  (tomwhite)
+
+108. HADOOP-1255.  Fix a bug where the namenode falls into an infinite
+     loop trying to remove a dead node.  (Hairong Kuang via cutting)
+
+109. HADOOP-1160.  Fix DistributedFileSystem.close() to close the
+     underlying FileSystem, correctly aborting files being written.
+     (Hairong Kuang via cutting)
+
+110. HADOOP-1341.  Fix intermittent failures in HBase unit tests
+     caused by deadlock.  (Jim Kellerman via cutting)
+
+111. HADOOP-1350.  Fix shuffle performance problem caused by forcing
+     chunked encoding of map outputs.  (Devaraj Das via cutting)
+
+112. HADOOP-1345.  Fix HDFS to correctly retry another replica when a
+     checksum error is encountered.  (Hairong Kuang via cutting)
+
+113. HADOOP-1205.  Improve synchronization around HDFS block map.
+     (Hairong Kuang via cutting)
+
+114. HADOOP-1353.  Fix a potential NullPointerException in namenode.
+     (Dhruba Borthakur via cutting)
+
+115. HADOOP-1354.  Fix a potential NullPointerException in FsShell.
+     (Hairong Kuang via cutting)
+
+116. HADOOP-1358.  Fix a potential bug when DFSClient calls skipBytes.
+     (Hairong Kuang via cutting)
+
+117. HADOOP-1356.  Fix a bug in ValueHistogram.  (Runping Qi via cutting)
+
+118. HADOOP-1363.  Fix locking bug in JobClient#waitForCompletion().
+     (omalley via cutting)
+
+119. HADOOP-1368.  Fix inconsistent synchronization in JobInProgress.
+     (omalley via cutting)
+
+120. HADOOP-1369.  Fix inconsistent synchronization in TaskTracker.
+     (omalley via cutting)
+
+121. HADOOP-1361.  Fix various calls to skipBytes() to check return
+     value. (Hairong Kuang via cutting)
+
+122. HADOOP-1388.  Fix a potential NullPointerException in web ui.
+     (Devaraj Das via cutting)
+
+123. HADOOP-1385.  Fix MD5Hash#hashCode() to generally hash to more
+     than 256 values.  (omalley via cutting)
+
+124. HADOOP-1386.  Fix Path to not permit the empty string as a
+     path, as this has lead to accidental file deletion.  Instead
+     force applications to use "." to name the default directory.
+     (Hairong Kuang via cutting)
+
+125. HADOOP-1407.  Fix integer division bug in JobInProgress which
+     meant failed tasks didn't cause the job to fail.
+     (Arun C Murthy via tomwhite)
+
+126. HADOOP-1427.  Fix a typo that caused GzipCodec to incorrectly use
+     a very small input buffer.  (Espen Amble Kolstad via cutting)
+
+127. HADOOP-1435.  Fix globbing code to no longer use the empty string
+     to indicate the default directory, per HADOOP-1386.
+     (Hairong Kuang via cutting)
+
+128. HADOOP-1411.  Make task retry framework handle 
+     AlreadyBeingCreatedException when wrapped as a RemoteException.
+     (Hairong Kuang via tomwhite)
+
+129. HADOOP-1242.  Improve handling of DFS upgrades.
+     (Konstantin Shvachko via cutting)
+
+130. HADOOP-1332.  Fix so that TaskTracker exits reliably during unit
+     tests on Windows.  (omalley via cutting)
+
+131. HADOOP-1431.  Fix so that sort progress reporting during map runs
+     only while sorting, so that stuck maps are correctly terminated.
+     (Devaraj Das and Arun C Murthy via cutting)
+
+132. HADOOP-1452.  Change TaskTracker.MapOutputServlet.doGet.totalRead
+     to a long, permitting map outputs to exceed 2^31 bytes.
+     (omalley via cutting)
+
+133. HADOOP-1443.  Fix a bug opening zero-length files in HDFS.
+     (Konstantin Shvachko via cutting)
+
+
+Release 0.12.3 - 2007-04-06
+
+ 1. HADOOP-1162.  Fix bug in record CSV and XML serialization of
+    binary values.  (Milind Bhandarkar via cutting)
+
+ 2. HADOOP-1123.  Fix NullPointerException in LocalFileSystem when
+    trying to recover from a checksum error.
+    (Hairong Kuang & Nigel Daley via tomwhite)
+
+ 3. HADOOP-1177.  Fix bug where IOException in MapOutputLocation.getFile
+    was not being logged.  (Devaraj Das via tomwhite)
+
+ 4. HADOOP-1175.  Fix bugs in JSP for displaying a task's log messages.
+    (Arun C Murthy via cutting)
+
+ 5. HADOOP-1191.  Fix map tasks to wait until sort progress thread has
+    stopped before reporting the task done.  (Devaraj Das via cutting)
+
+ 6. HADOOP-1192.  Fix an integer overflow bug in FSShell's 'dus'
+    command and a performance problem in HDFS's implementation of it.
+    (Hairong Kuang via cutting)
+
+ 7. HADOOP-1105. Fix reducers to make "progress" while iterating 
+    through values.  (Devaraj Das & Owen O'Malley via tomwhite)
+
+ 8. HADOOP-1179. Make Task Tracker close index file as soon as the read 
+    is done when serving get-map-output requests.  
+    (Devaraj Das via tomwhite)
+
+
+Release 0.12.2 - 2007-23-17
+
+ 1. HADOOP-1135.  Fix bug in block report processing which may cause
+    the namenode to delete blocks.  (Dhruba Borthakur via tomwhite)
+
+ 2. HADOOP-1145.  Make XML serializer and deserializer classes public
+    in record package.  (Milind Bhandarkar via cutting)
+
+ 3. HADOOP-1140.  Fix a deadlock in metrics. (David Bowen via cutting)
+
+ 4. HADOOP-1150.  Fix streaming -reducer and -mapper to give them
+    defaults. (Owen O'Malley via tomwhite)
+
+
+Release 0.12.1 - 2007-03-17
+
+ 1. HADOOP-1035.  Fix a StackOverflowError in FSDataSet.
+    (Raghu Angadi via cutting)
+
+ 2. HADOOP-1053.  Fix VInt representation of negative values.  Also
+    remove references in generated record code to methods outside of
+    the record package and improve some record documentation.
+    (Milind Bhandarkar via cutting)
+
+ 3. HADOOP-1067.  Compile fails if Checkstyle jar is present in lib
+    directory. Also remove dependency on a particular Checkstyle
+    version number. (tomwhite)
+
+ 4. HADOOP-1060.  Fix an IndexOutOfBoundsException in the JobTracker
+    that could cause jobs to hang.  (Arun C Murthy via cutting)
+
+ 5. HADOOP-1077.  Fix a race condition fetching map outputs that could
+    hang reduces.  (Devaraj Das via cutting)
+
+ 6. HADOOP-1083.  Fix so that when a cluster restarts with a missing
+    datanode, its blocks are replicated.  (Hairong Kuang via cutting)
+
+ 7. HADOOP-1082.  Fix a NullPointerException in ChecksumFileSystem.
+    (Hairong Kuang via cutting)
+
+ 8. HADOOP-1088.  Fix record serialization of negative values.
+    (Milind Bhandarkar via cutting)
+
+ 9. HADOOP-1080.  Fix bug in bin/hadoop on Windows when native
+    libraries are present.  (ab via cutting)
+
+10. HADOOP-1091.  Fix a NullPointerException in MetricsRecord.
+    (David Bowen via tomwhite)
+
+11. HADOOP-1092.  Fix a NullPointerException in HeartbeatMonitor
+    thread. (Hairong Kuang via tomwhite)
+
+12. HADOOP-1112.  Fix a race condition in Hadoop metrics.
+    (David Bowen via tomwhite)
+
+13. HADOOP-1108.  Checksummed file system should retry reading if a
+    different replica is found when handling ChecksumException.
+    (Hairong Kuang via tomwhite)
+
+14. HADOOP-1070.  Fix a problem with number of racks and datanodes
+    temporarily doubling.  (Konstantin Shvachko via tomwhite)
+
+15. HADOOP-1099.  Fix NullPointerException in JobInProgress.
+    (Gautam Kowshik via tomwhite)
+
+16. HADOOP-1115.  Fix bug where FsShell copyToLocal doesn't
+    copy directories.  (Hairong Kuang via tomwhite)
+
+17. HADOOP-1109.  Fix NullPointerException in StreamInputFormat.
+    (Koji Noguchi via tomwhite)
+
+18. HADOOP-1117.  Fix DFS scalability: when the namenode is
+    restarted it consumes 80% CPU. (Dhruba Borthakur via
+    tomwhite)
+
+19. HADOOP-1089.  Make the C++ version of write and read v-int
+    agree with the Java versions.  (Milind Bhandarkar via
+    tomwhite)
+
+20. HADOOP-1096.  Rename InputArchive and OutputArchive and
+    make them public. (Milind Bhandarkar via tomwhite)
+
+21. HADOOP-1128.  Fix missing progress information in map tasks.
+    (Espen Amble Kolstad, Andrzej Bialecki, and Owen O'Malley
+    via tomwhite)
+
+22. HADOOP-1129.  Fix DFSClient to not hide IOExceptions in
+    flush method.  (Hairong Kuang via tomwhite)
+
+23. HADOOP-1126.  Optimize CPU usage for under replicated blocks
+    when cluster restarts.  (Hairong Kuang via tomwhite)
+
+
+Release 0.12.0 - 2007-03-02
+
+ 1. HADOOP-975.  Separate stdout and stderr from tasks.
+    (Arun C Murthy via cutting)
+
+ 2. HADOOP-982.  Add some setters and a toString() method to
+    BytesWritable.  (omalley via cutting)
+
+ 3. HADOOP-858.  Move contrib/smallJobsBenchmark to src/test, removing
+    obsolete bits. (Nigel Daley via cutting)
+
+ 4. HADOOP-992.  Fix MiniMR unit tests to use MiniDFS when specified,
+    rather than the local FS.  (omalley via cutting)
+
+ 5. HADOOP-954.  Change use of metrics to use callback mechanism.
+    Also rename utility class Metrics to MetricsUtil.
+    (David Bowen & Nigel Daley via cutting)
+
+ 6. HADOOP-893.  Improve HDFS client's handling of dead datanodes.
+    The set is no longer reset with each block, but rather is now
+    maintained for the life of an open file.  (Raghu Angadi via cutting)
+
+ 7. HADOOP-882.  Upgrade to jets3t version 0.5, used by the S3
+    FileSystem.  This version supports retries.  (Michael Stack via cutting)
+
+ 8. HADOOP-977.  Send task's stdout and stderr to JobClient's stdout
+    and stderr respectively, with each line tagged by the task's name.
+    (Arun C Murthy via cutting)
+
+ 9. HADOOP-761.  Change unit tests to not use /tmp.  (Nigel Daley via cutting)
+
+10. HADOOP-1007. Make names of metrics used in Hadoop unique.
+    (Nigel Daley via cutting)
+
+11. HADOOP-491.  Change mapred.task.timeout to be per-job, and make a
+    value of zero mean no timeout.  Also change contrib/streaming to
+    disable task timeouts.  (Arun C Murthy via cutting)
+
+12. HADOOP-1010.  Add Reporter.NULL, a Reporter implementation that
+    does nothing.  (Runping Qi via cutting)
+
+13. HADOOP-923.  In HDFS NameNode, move replication computation to a
+    separate thread, to improve heartbeat processing time.
+    (Dhruba Borthakur via cutting) 
+
+14. HADOOP-476.  Rewrite contrib/streaming command-line processing,
+    improving parameter validation.  (Sanjay Dahiya via cutting)
+
+15. HADOOP-973.  Improve error messages in Namenode.  This should help
+    to track down a problem that was appearing as a
+    NullPointerException.  (Dhruba Borthakur via cutting) 
+
+16. HADOOP-649.  Fix so that jobs with no tasks are not lost.
+    (Thomas Friol via cutting)
+
+17. HADOOP-803.  Reduce memory use by HDFS namenode, phase I.
+    (Raghu Angadi via cutting)
+
+18. HADOOP-1021.  Fix MRCaching-based unit tests on Windows.
+    (Nigel Daley via cutting)
+
+19. HADOOP-889.  Remove duplicate code from HDFS unit tests.
+    (Milind Bhandarkar via cutting)
+
+20. HADOOP-943.  Improve HDFS's fsck command to display the filename
+    for under-replicated blocks.  (Dhruba Borthakur via cutting) 
+
+21. HADOOP-333.  Add validator for sort benchmark output.
+    (Arun C Murthy via cutting)
+
+22. HADOOP-947.  Improve performance of datanode decomissioning.
+    (Dhruba Borthakur via cutting)
+
+23. HADOOP-442.  Permit one to specify hosts allowed to connect to
+    namenode and jobtracker with include and exclude files.  (Wendy
+    Chien via cutting)
+
+24. HADOOP-1017.  Cache constructors, for improved performance.
+    (Ron Bodkin via cutting)
+
+25. HADOOP-867.  Move split creation out of JobTracker to client.
+    Splits are now saved in a separate file, read by task processes
+    directly, so that user code is no longer required in the
+    JobTracker.  (omalley via cutting)
+
+26. HADOOP-1006.  Remove obsolete '-local' option from test code.
+    (Gautam Kowshik via cutting)
+
+27. HADOOP-952. Create a public (shared) Hadoop EC2 AMI.
+    The EC2 scripts now support launch of public AMIs.
+    (tomwhite)
+    
+28. HADOOP-1025. Remove some obsolete code in ipc.Server.  (cutting)
+
+29. HADOOP-997. Implement S3 retry mechanism for failed block
+    transfers. This includes a generic retry mechanism for use
+    elsewhere in Hadoop. (tomwhite)
+
+30. HADOOP-990.  Improve HDFS support for full datanode volumes.
+    (Raghu Angadi via cutting)
+
+31. HADOOP-564.  Replace uses of "dfs://" URIs with the more standard
+    "hdfs://".  (Wendy Chien via cutting)
+
+32. HADOOP-1030.  In unit tests, unify setting of ipc.client.timeout.
+    Also increase the value used from one to two seconds, in hopes of
+    making tests complete more reliably.  (cutting)
+
+33. HADOOP-654.  Stop assigning tasks to a tasktracker if it has
+    failed more than a specified number in the job.
+    (Arun C Murthy via cutting)
+
+34. HADOOP-985.  Change HDFS to identify nodes by IP address rather
+    than by DNS hostname.  (Raghu Angadi via cutting)
+
+35. HADOOP-248.  Optimize location of map outputs to not use random
+    probes.  (Devaraj Das via cutting)
+
+36. HADOOP-1029.  Fix streaming's input format to correctly seek to
+    the start of splits.  (Arun C Murthy via cutting)
+
+37. HADOOP-492.  Add per-job and per-task counters.  These are
+    incremented via the Reporter interface and available through the
+    web ui and the JobClient API.  The mapreduce framework maintains a
+    few basic counters, and applications may add their own.  Counters
+    are also passed to the metrics system.
+    (David Bowen via cutting)
+
+38. HADOOP-1034.  Fix datanode to better log exceptions.
+    (Philippe Gassmann via cutting)
+
+39. HADOOP-878.  In contrib/streaming, fix reducer=NONE to work with
+    multiple maps.  (Arun C Murthy via cutting)
+
+40. HADOOP-1039.  In HDFS's TestCheckpoint, avoid restarting
+    MiniDFSCluster so often, speeding this test.  (Dhruba Borthakur via cutting)
+
+41. HADOOP-1040.  Update RandomWriter example to use counters and
+    user-defined input and output formats.  (omalley via cutting)
+
+42. HADOOP-1027.  Fix problems with in-memory merging during shuffle
+    and re-enable this optimization.  (Devaraj Das via cutting)
+
+43. HADOOP-1036.  Fix exception handling in TaskTracker to keep tasks
+    from being lost.  (Arun C Murthy via cutting)
+
+44. HADOOP-1042.  Improve the handling of failed map output fetches.
+    (Devaraj Das via cutting)
+
+45. HADOOP-928.  Make checksums optional per FileSystem.
+    (Hairong Kuang via cutting)
+
+46. HADOOP-1044.  Fix HDFS's TestDecommission to not spuriously fail.
+    (Wendy Chien via cutting)
+
+47. HADOOP-972.  Optimize HDFS's rack-aware block placement algorithm.
+    (Hairong Kuang via cutting)
+
+48. HADOOP-1043.  Optimize shuffle, increasing parallelism.
+    (Devaraj Das via cutting)
+
+49. HADOOP-940.  Improve HDFS's replication scheduling.
+    (Dhruba Borthakur via cutting) 
+
+50. HADOOP-1020.  Fix a bug in Path resolution, and a with unit tests
+    on Windows.  (cutting)
+
+51. HADOOP-941.  Enhance record facility.
+    (Milind Bhandarkar via cutting)
+
+52. HADOOP-1000.  Fix so that log messages in task subprocesses are
+    not written to a task's standard error.  (Arun C Murthy via cutting)
+
+53. HADOOP-1037.  Fix bin/slaves.sh, which currently only works with
+    /bin/bash, to specify /bin/bash rather than /bin/sh.  (cutting)
+
+54. HADOOP-1046. Clean up tmp from partially received stale block files. (ab)
+
+55. HADOOP-1041.  Optimize mapred counter implementation.  Also group
+    counters by their declaring Enum.  (David Bowen via cutting)
+
+56. HADOOP-1032.  Permit one to specify jars that will be cached
+    across multiple jobs.  (Gautam Kowshik via cutting)
+
+57. HADOOP-1051.  Add optional checkstyle task to build.xml.  To use
+    this developers must download the (LGPL'd) checkstyle jar
+    themselves.  (tomwhite via cutting)
+
+58. HADOOP-1049.  Fix a race condition in IPC client.
+    (Devaraj Das via cutting)
+
+60. HADOOP-1056.  Check HDFS include/exclude node lists with both IP
+    address and hostname.  (Wendy Chien via cutting)
+
+61. HADOOP-994.  In HDFS, limit the number of blocks invalidated at
+    once.  Large lists were causing datenodes to timeout.
+    (Dhruba Borthakur via cutting) 
+
+62. HADOOP-432.  Add a trash feature, disabled by default.  When
+    enabled, the FSShell 'rm' command will move things to a trash
+    directory in the filesystem.  In HDFS, a thread periodically
+    checkpoints the trash and removes old checkpoints.  (cutting)
+
+
+Release 0.11.2 - 2007-02-16
+
+ 1. HADOOP-1009.  Fix an infinite loop in the HDFS namenode.
+    (Dhruba Borthakur via cutting) 
+
+ 2. HADOOP-1014.  Disable in-memory merging during shuffle, as this is
+    causing data corruption.  (Devaraj Das via cutting)
+
+
+Release 0.11.1 - 2007-02-09
+
+ 1. HADOOP-976.  Make SequenceFile.Metadata public.  (Runping Qi via cutting)
+
+ 2. HADOOP-917.  Fix a NullPointerException in SequenceFile's merger
+    with large map outputs.  (omalley via cutting)
+
+ 3. HADOOP-984.  Fix a bug in shuffle error handling introduced by
+    HADOOP-331.  If a map output is unavailable, the job tracker is
+    once more informed.  (Arun C Murthy via cutting)
+
+ 4. HADOOP-987.  Fix a problem in HDFS where blocks were not removed
+    from neededReplications after a replication target was selected.
+    (Hairong Kuang via cutting)
+
+Release 0.11.0 - 2007-02-02
+
+ 1. HADOOP-781.  Remove methods deprecated in 0.10 that are no longer
+    widely used.  (cutting)
+
+ 2. HADOOP-842.  Change HDFS protocol so that the open() method is
+    passed the client hostname, to permit the namenode to order block
+    locations on the basis of network topology.
+    (Hairong Kuang via cutting)
+
+ 3. HADOOP-852.  Add an ant task to compile record definitions, and
+    use it to compile record unit tests.  (Milind Bhandarkar via cutting)
+
+ 4. HADOOP-757.  Fix "Bad File Descriptor" exception in HDFS client
+    when an output file is closed twice.  (Raghu Angadi via cutting)
+
+ 5. [ intentionally blank ]
+
+ 6. HADOOP-890.  Replace dashes in metric names with underscores,
+    for better compatibility with some monitoring systems.
+    (Nigel Daley via cutting)
+
+ 7. HADOOP-801.  Add to jobtracker a log of task completion events.
+    (Sanjay Dahiya via cutting)
+
+ 8. HADOOP-855.  In HDFS, try to repair files with checksum errors.
+    An exception is still thrown, but corrupt blocks are now removed
+    when they have replicas.  (Wendy Chien via cutting)
+
+ 9. HADOOP-886.  Reduce number of timer threads created by metrics API
+    by pooling contexts.  (Nigel Daley via cutting)
+
+10. HADOOP-897.  Add a "javac.args" property to build.xml that permits
+    one to pass arbitrary options to javac. (Milind Bhandarkar via cutting)
+
+11. HADOOP-899.  Update libhdfs for changes in HADOOP-871.
+    (Sameer Paranjpye via cutting)
+
+12. HADOOP-905.  Remove some dead code from JobClient.  (cutting)
+
+13. HADOOP-902.  Fix a NullPointerException in HDFS client when
+    closing output streams.  (Raghu Angadi via cutting)
+
+14. HADOOP-735.  Switch generated record code to use BytesWritable to
+    represent fields of type 'buffer'. (Milind Bhandarkar via cutting)
+
+15. HADOOP-830.  Improve mapreduce merge performance by buffering and
+    merging multiple map outputs as they arrive at reduce nodes before
+    they're written to disk.  (Devaraj Das via cutting)
+
+16. HADOOP-908.  Add a new contrib package, Abacus, that simplifies
+    counting and aggregation, built on MapReduce.  (Runping Qi via cutting)
+
+17. HADOOP-901.  Add support for recursive renaming to the S3 filesystem.
+    (Tom White via cutting)
+
+18. HADOOP-912.  Fix a bug in TaskTracker.isIdle() that was
+    sporadically causing unit test failures.  (Arun C Murthy via cutting)
+
+19. HADOOP-909.  Fix the 'du' command to correctly compute the size of
+    FileSystem directory trees.  (Hairong Kuang via cutting)
+
+20. HADOOP-731.  When a checksum error is encountered on a file stored
+    in HDFS, try another replica of the data, if any.
+    (Wendy Chien via cutting)
+
+21. HADOOP-732.  Add support to SequenceFile for arbitrary metadata,
+    as a set of attribute value pairs.  (Runping Qi via cutting)
+
+22. HADOOP-929.  Fix PhasedFileSystem to pass configuration to
+    underlying FileSystem.  (Sanjay Dahiya via cutting)
+
+23. HADOOP-935.  Fix contrib/abacus to not delete pre-existing output
+    files, but rather to fail in this case.  (Runping Qi via cutting)
+
+24. HADOOP-936.  More metric renamings, as in HADOOP-890.
+    (Nigel Daley via cutting)
+
+25. HADOOP-856.  Fix HDFS's fsck command to not report that
+    non-existent filesystems are healthy.  (Milind Bhandarkar via cutting)
+
+26. HADOOP-602.  Remove the dependency on Lucene's PriorityQueue
+    utility, by copying it into Hadoop.  This facilitates using Hadoop
+    with different versions of Lucene without worrying about CLASSPATH
+    order.  (Milind Bhandarkar via cutting)
+
+27. [ intentionally blank ]
+
+28. HADOOP-227.  Add support for backup namenodes, which periodically
+    get snapshots of the namenode state.  (Dhruba Borthakur via cutting) 
+
+29. HADOOP-884.  Add scripts in contrib/ec2 to facilitate running
+    Hadoop on an Amazon's EC2 cluster.  (Tom White via cutting)
+
+30. HADOOP-937.  Change the namenode to request re-registration of
+    datanodes in more circumstances.  (Hairong Kuang via cutting)
+
+31. HADOOP-922.  Optimize small forward seeks in HDFS.  If data is has
+    likely already in flight, skip ahead rather than re-opening the
+    block.  (Dhruba Borthakur via cutting)
+
+32. HADOOP-961.  Add a 'job -events' sub-command that prints job
+    events, including task completions and failures.  (omalley via cutting)
+
+33. HADOOP-959.  Fix namenode snapshot code added in HADOOP-227 to
+    work on Windows.  (Dhruba Borthakur via cutting)
+
+34. HADOOP-934.  Fix TaskTracker to catch metrics exceptions that were
+    causing heartbeats to fail.  (Arun Murthy via cutting)
+
+35. HADOOP-881.  Fix JobTracker web interface to display the correct
+    number of task failures.  (Sanjay Dahiya via cutting)
+
+36. HADOOP-788.  Change contrib/streaming to subclass TextInputFormat,
+    permitting it to take advantage of native compression facilities.
+    (Sanjay Dahiya via cutting)
+
+37. HADOOP-962.  In contrib/ec2: make scripts executable in tar file;
+    add a README; make the environment file use a template.
+    (Tom White via cutting)
+
+38. HADOOP-549.  Fix a NullPointerException in TaskReport's
+    serialization.  (omalley via cutting)
+
+39. HADOOP-963.  Fix remote exceptions to have the stack trace of the
+    caller thread, not the IPC listener thread.  (omalley via cutting)
+
+40. HADOOP-967.  Change RPC clients to start sending a version header.
+    (omalley via cutting)
+
+41. HADOOP-964.  Fix a bug introduced by HADOOP-830 where jobs failed
+    whose comparators and/or i/o types were in the job's jar.
+    (Dennis Kubes via cutting)
+
+42. HADOOP-969.  Fix a deadlock in JobTracker.  (omalley via cutting)
+
+43. HADOOP-862.  Add support for the S3 FileSystem to the CopyFiles
+    tool.  (Michael Stack via cutting)
+
+44. HADOOP-965.  Fix IsolationRunner so that job's jar can be found.
+    (Dennis Kubes via cutting)
+
+45. HADOOP-309.  Fix two NullPointerExceptions in StatusHttpServer.
+    (navychen via cutting)
+
+46. HADOOP-692.  Add rack awareness to HDFS's placement of blocks.
+    (Hairong Kuang via cutting)
+
+
+Release 0.10.1 - 2007-01-10
+
+ 1. HADOOP-857.  Fix S3 FileSystem implementation to permit its use
+    for MapReduce input and output.  (Tom White via cutting)
+
+ 2. HADOOP-863.  Reduce logging verbosity introduced by HADOOP-813.
+    (Devaraj Das via cutting)
+
+ 3. HADOOP-815.  Fix memory leaks in JobTracker. (Arun C Murthy via cutting)
+
+ 4. HADOOP-600.  Fix a race condition in JobTracker.
+    (Arun C Murthy via cutting)
+
+ 5. HADOOP-864.  Fix 'bin/hadoop -jar' to operate correctly when
+    hadoop.tmp.dir does not yet exist.  (omalley via cutting)
+
+ 6. HADOOP-866.  Fix 'dfs -get' command to remove existing crc files,
+    if any.  (Milind Bhandarkar via cutting)
+
+ 7. HADOOP-871.  Fix a bug in bin/hadoop setting JAVA_LIBRARY_PATH.
+    (Arun C Murthy via cutting)
+
+ 8. HADOOP-868.  Decrease the number of open files during map,
+    respecting io.sort.fa ctor.  (Devaraj Das via cutting)
+
+ 9. HADOOP-865.  Fix S3 FileSystem so that partially created files can
+    be deleted.  (Tom White via cutting)
+
+10. HADOOP-873.	 Pass java.library.path correctly to child processes.
+    (omalley via cutting)
+
+11. HADOOP-851.  Add support for the LZO codec.  This is much faster
+    than the default, zlib-based compression, but it is only available
+    when the native library is built.  (Arun C Murthy via cutting)
+
+12. HADOOP-880.  Fix S3 FileSystem to remove directories.
+    (Tom White via cutting)
+
+13. HADOOP-879.  Fix InputFormatBase to handle output generated by
+    MapFileOutputFormat.  (cutting)
+
+14. HADOOP-659.  In HDFS, prioritize replication of blocks based on
+    current replication level.  Blocks which are severely
+    under-replicated should be further replicated before blocks which
+    are less under-replicated.  (Hairong Kuang via cutting)
+
+15. HADOOP-726.  Deprecate FileSystem locking methods.  They are not
+    currently usable.  Locking should eventually provided as an
+    independent service.  (Raghu Angadi via cutting)
+
+16. HADOOP-758.  Fix exception handling during reduce so that root
+    exceptions are not masked by exceptions in cleanups.
+    (Raghu Angadi via cutting)
+
+
+Release 0.10.0 - 2007-01-05
+
+ 1. HADOOP-763. Change DFS namenode benchmark to not use MapReduce.
+    (Nigel Daley via cutting)
+
+ 2. HADOOP-777. Use fully-qualified hostnames for tasktrackers and
+    datanodes.  (Mahadev Konar via cutting)
+
+ 3. HADOOP-621. Change 'dfs -cat' to exit sooner when output has been
+    closed.  (Dhruba Borthakur via cutting) 
+
+ 4. HADOOP-752. Rationalize some synchronization in DFS namenode.
+    (Dhruba Borthakur via cutting) 
+
+ 5. HADOOP-629. Fix RPC services to better check the protocol name and
+    version.  (omalley via cutting)
+
+ 6. HADOOP-774. Limit the number of invalid blocks returned with
+    heartbeats by the namenode to datanodes.  Transmitting and
+    processing very large invalid block lists can tie up both the
+    namenode and datanode for too long.  (Dhruba Borthakur via cutting) 
+
+ 7. HADOOP-738. Change 'dfs -get' command to not create CRC files by
+    default, adding a -crc option to force their creation.
+    (Milind Bhandarkar via cutting)
+
+ 8. HADOOP-676. Improved exceptions and error messages for common job
+    input specification errors.  (Sanjay Dahiya via cutting)
+
+ 9. [Included in 0.9.2 release]
+
+10. HADOOP-756. Add new dfsadmin option to wait for filesystem to be
+    operational.  (Dhruba Borthakur via cutting)
+
+11. HADOOP-770. Fix jobtracker web interface to display, on restart,
+    jobs that were running when it was last stopped.
+    (Sanjay Dahiya via cutting)
+
+12. HADOOP-331. Write all map outputs to a single file with an index,
+    rather than to a separate file per reduce task.  This should both
+    speed the shuffle and make things more scalable.
+    (Devaraj Das via cutting)
+
+13. HADOOP-818. Fix contrib unit tests to not depend on core unit
+    tests.  (omalley via cutting)
+
+14. HADOOP-786. Log common exception at debug level.
+    (Sanjay Dahiya via cutting)
+
+15. HADOOP-796. Provide more convenient access to failed task
+    information in the web interface.  (Sanjay Dahiya via cutting)
+
+16. HADOOP-764. Reduce memory allocations in namenode some.
+    (Dhruba Borthakur via cutting) 
+
+17. HADOOP-802. Update description of mapred.speculative.execution to
+    mention reduces.  (Nigel Daley via cutting)
+
+18. HADOOP-806. Include link to datanodes on front page of namenode
+    web interface.  (Raghu Angadi via cutting)
+
+19. HADOOP-618.  Make JobSubmissionProtocol public.
+    (Arun C Murthy via cutting)
+
+20. HADOOP-782.  Fully remove killed tasks.  (Arun C Murthy via cutting)
+
+21. HADOOP-792.  Fix 'dfs -mv' to return correct status.
+    (Dhruba Borthakur via cutting) 
+
+22. HADOOP-673.  Give each task its own working directory again.
+    (Mahadev Konar via cutting)
+
+23. HADOOP-571.  Extend the syntax of Path to be a URI; to be
+    optionally qualified with a scheme and authority.  The scheme
+    determines the FileSystem implementation, while the authority
+    determines the FileSystem instance.  New FileSystem
+    implementations may be provided by defining an fs.<scheme>.impl
+    property, naming the FileSystem implementation class.  This
+    permits easy integration of new FileSystem implementations.
+    (cutting)
+
+24. HADOOP-720.  Add an HDFS white paper to website.
+    (Dhruba Borthakur via cutting) 
+
+25. HADOOP-794.  Fix a divide-by-zero exception when a job specifies
+    zero map tasks.  (omalley via cutting)
+
+26. HADOOP-454.  Add a 'dfs -dus' command that provides summary disk
+    usage.  (Hairong Kuang via cutting)
+
+27. HADOOP-574.  Add an Amazon S3 implementation of FileSystem.  To
+    use this, one need only specify paths of the form
+    s3://id:secret@bucket/.  Alternately, the AWS access key id and
+    secret can be specified in your config, with the properties
+    fs.s3.awsAccessKeyId and fs.s3.awsSecretAccessKey.
+    (Tom White via cutting)
+
+28. HADOOP-824.  Rename DFSShell to be FsShell, since it applies
+    generically to all FileSystem implementations.  (cutting)
+
+29. HADOOP-813.  Fix map output sorting to report progress, so that
+    sorts which take longer than the task timeout do not fail.
+    (Devaraj Das via cutting)
+
+30. HADOOP-825.  Fix HDFS daemons when configured with new URI syntax.
+    (omalley via cutting)
+
+31. HADOOP-596.  Fix a bug in phase reporting during reduce.
+    (Sanjay Dahiya via cutting)
+
+32. HADOOP-811.  Add a utility, MultithreadedMapRunner.
+    (Alejandro Abdelnur via cutting)
+
+33. HADOOP-829.  Within HDFS, clearly separate three different
+    representations for datanodes: one for RPCs, one for
+    namenode-internal use, and one for namespace persistence.
+    (Dhruba Borthakur via cutting) 
+
+34. HADOOP-823.  Fix problem starting datanode when not all configured
+    data directories exist.  (Bryan Pendleton via cutting)
+
+35. HADOOP-451.  Add a Split interface.  CAUTION: This incompatibly
+    changes the InputFormat and RecordReader interfaces.  Not only is
+    FileSplit replaced with Split, but a FileSystem parameter is no
+    longer passed in several methods, input validation has changed,
+    etc.  (omalley via cutting)
+
+36. HADOOP-814.  Optimize locking in namenode. (Dhruba Borthakur via cutting) 
+
+37. HADOOP-738.  Change 'fs -put' and 'fs -get' commands to accept
+    standard input and output, respectively.  Standard i/o is
+    specified by a file named '-'.  (Wendy Chien via cutting)
+
+38. HADOOP-835.  Fix a NullPointerException reading record-compressed
+    SequenceFiles.  (Hairong Kuang via cutting)
+
+39. HADOOP-836.  Fix a MapReduce bug on Windows, where the wrong
+    FileSystem was used.  Also add a static FileSystem.getLocal()
+    method and better Path checking in HDFS, to help avoid such issues
+    in the future.  (omalley via cutting)
+
+40. HADOOP-837.  Improve RunJar utility to unpack jar file
+    hadoop.tmp.dir, rather than the system temporary directory.
+    (Hairong Kuang via cutting)
+
+41. HADOOP-841.  Fix native library to build 32-bit version even when
+    on a 64-bit host, if a 32-bit JVM is used.  (Arun C Murthy via cutting)
+
+42. HADOOP-838.  Fix tasktracker to pass java.library.path to
+    sub-processes, so that libhadoop.a is found.
+    (Arun C Murthy via cutting)
+
+43. HADOOP-844.  Send metrics messages on a fixed-delay schedule
+    instead of a fixed-rate schedule.  (David Bowen via cutting)
+
+44. HADOOP-849.  Fix OutOfMemory exceptions in TaskTracker due to a
+    file handle leak in SequenceFile.  (Devaraj Das via cutting)
+
+45. HADOOP-745.  Fix a synchronization bug in the HDFS namenode.
+    (Dhruba Borthakur via cutting)
+
+46. HADOOP-850.  Add Writable implementations for variable-length
+    integers.  (ab via cutting)
+
+47. HADOOP-525.  Add raw comparators to record types.  This greatly
+    improves record sort performance.  (Milind Bhandarkar via cutting)
+
+48. HADOOP-628.  Fix a problem with 'fs -cat' command, where some
+    characters were replaced with question marks.  (Wendy Chien via cutting)
+
+49. HADOOP-804.  Reduce verbosity of MapReduce logging.
+    (Sanjay Dahiya via cutting)
+
+50. HADOOP-853.  Rename 'site' to 'docs', in preparation for inclusion
+    in releases.  (cutting)
+
+51. HADOOP-371.  Include contrib jars and site documentation in
+    distributions.  Also add contrib and example documentation to
+    distributed javadoc, in separate sections.  (Nigel Daley via cutting)
+
+52. HADOOP-846.  Report progress during entire map, as sorting of
+    intermediate outputs may happen at any time, potentially causing
+    task timeouts.  (Devaraj Das via cutting)
+
+53. HADOOP-840.  In task tracker, queue task cleanups and perform them
+    in a separate thread.  (omalley & Mahadev Konar via cutting)
+
+54. HADOOP-681.  Add to HDFS the ability to decommission nodes.  This
+    causes their blocks to be re-replicated on other nodes, so that
+    they may be removed from a cluster.  (Dhruba Borthakur via cutting)
+
+55. HADOOP-470.  In HDFS web ui, list the datanodes containing each
+    copy of a block.  (Hairong Kuang via cutting)
+
+56. HADOOP-700.  Change bin/hadoop to only include core jar file on
+    classpath, not example, test, etc.  Also rename core jar to
+    hadoop-${version}-core.jar so that it can be more easily
+    identified.  (Nigel Daley via cutting)
+
+57. HADOOP-619.  Extend InputFormatBase to accept individual files and
+    glob patterns as MapReduce inputs, not just directories.  Also
+    change contrib/streaming to use this.  (Sanjay Dahia via cutting)
+
+
+Release 0.9.2 - 2006-12-15
+
+ 1. HADOOP-639. Restructure InterTrackerProtocol to make task
+    accounting more reliable.  (Arun C Murthy via cutting)
+
+ 2. HADOOP-827. Turn off speculative execution by default, since it's
+    currently broken.  (omalley via cutting)
+
+ 3. HADOOP-791. Fix a deadlock in the task tracker.
+    (Mahadev Konar via cutting)
+
+
+Release 0.9.1 - 2006-12-06
+
+ 1. HADOOP-780. Use ReflectionUtils to instantiate key and value
+    objects. (ab)
+
+ 2. HADOOP-779. Fix contrib/streaming to work correctly with gzipped
+    input files.  (Hairong Kuang via cutting)
+
+
+Release 0.9.0 - 2006-12-01
+
+ 1. HADOOP-655.  Remove most deprecated code.  A few deprecated things
+    remain, notably UTF8 and some methods that are still required.
+    Also cleaned up constructors for SequenceFile, MapFile, SetFile,
+    and ArrayFile a bit.  (cutting)
+
+ 2. HADOOP-565.  Upgrade to Jetty version 6. (Sanjay Dahiya via cutting)
+
+ 3. HADOOP-682.  Fix DFS format command to work correctly when
+    configured with a non-existent directory. (Sanjay Dahiya via cutting)
+
+ 4. HADOOP-645.  Fix a bug in contrib/streaming when -reducer is NONE.
+    (Dhruba Borthakur via cutting) 
+
+ 5. HADOOP-687.  Fix a classpath bug in bin/hadoop that blocked the
+    servers from starting. (Sameer Paranjpye via omalley)
+
+ 6. HADOOP-683.  Remove a script dependency on bash, so it works with
+    dash, the new default for /bin/sh on Ubuntu.  (James Todd via cutting)
+
+ 7. HADOOP-382.  Extend unit tests to run multiple datanodes.
+    (Milind Bhandarkar via cutting)
+
+ 8. HADOOP-604.  Fix some synchronization issues and a
+    NullPointerException in DFS datanode.  (Raghu Angadi via cutting)
+
+ 9. HADOOP-459.  Fix memory leaks and a host of other issues with
+    libhdfs.  (Sameer Paranjpye via cutting)
+
+10. HADOOP-694.  Fix a NullPointerException in jobtracker.
+    (Mahadev Konar via cutting)
+
+11. HADOOP-637.  Fix a memory leak in the IPC server.  Direct buffers
+    are not collected like normal buffers, and provided little
+    advantage.  (Raghu Angadi via cutting)
+
+12. HADOOP-696.  Fix TestTextInputFormat unit test to not rely on the
+    order of directory listings.  (Sameer Paranjpye via cutting)
+
+13. HADOOP-611.  Add support for iterator-based merging to
+    SequenceFile.  (Devaraj Das via cutting)
+
+14. HADOOP-688.  Move DFS administrative commands to a separate
+    command named 'dfsadmin'.  (Dhruba Borthakur via cutting) 
+
+15. HADOOP-708.  Fix test-libhdfs to return the correct status, so
+    that failures will break the build.  (Nigel Daley via cutting)
+
+16. HADOOP-646.  Fix namenode to handle edits files larger than 2GB.
+    (Milind Bhandarkar via cutting)
+
+17. HADOOP-705.  Fix a bug in the JobTracker when failed jobs were
+    not completely cleaned up.  (Mahadev Konar via cutting)
+
+18. HADOOP-613.  Perform final merge while reducing.  This removes one
+    sort pass over the data and should consequently significantly
+    decrease overall processing time.  (Devaraj Das via cutting)
+
+19. HADOOP-661.  Make each job's configuration visible through the web
+    ui.  (Arun C Murthy via cutting)
+
+20. HADOOP-489.  In MapReduce, separate user logs from system logs.
+    Each task's log output is now available through the web ui.  (Arun
+    C Murthy via cutting)
+
+21. HADOOP-712.  Fix record io's xml serialization to correctly handle
+    control-characters.  (Milind Bhandarkar via cutting)
+
+22. HADOOP-668.  Improvements to the web-based DFS browser.
+    (Hairong Kuang via cutting)
+
+23. HADOOP-715.  Fix build.xml so that test logs are written in build
+    directory, rather than in CWD.  (Arun C Murthy via cutting)
+
+24. HADOOP-538.  Add support for building an optional native library,
+    libhadoop.so, that improves the performance of zlib-based
+    compression.  To build this, specify -Dcompile.native to Ant.
+    (Arun C Murthy via cutting)
+
+25. HADOOP-610.  Fix an problem when the DFS block size is configured
+    to be smaller than the buffer size, typically only when debugging.
+    (Milind Bhandarkar via cutting)
+
+26. HADOOP-695.  Fix a NullPointerException in contrib/streaming.
+    (Hairong Kuang via cutting)
+
+27. HADOOP-652.  In DFS, when a file is deleted, the block count is
+    now decremented.  (Vladimir Krokhmalyov via cutting)
+
+28. HADOOP-725.  In DFS, optimize block placement algorithm,
+    previously a performance bottleneck.  (Milind Bhandarkar via cutting)
+
+29. HADOOP-723.  In MapReduce, fix a race condition during the
+    shuffle, which resulted in FileNotFoundExceptions.  (omalley via cutting)
+
+30. HADOOP-447.  In DFS, fix getBlockSize(Path) to work with relative
+    paths.  (Raghu Angadi via cutting)
+
+31. HADOOP-733.  Make exit codes in DFShell consistent and add a unit
+    test.  (Dhruba Borthakur via cutting)
+
+32. HADOOP-709.  Fix contrib/streaming to work with commands that
+    contain control characters.  (Dhruba Borthakur via cutting)
+
+33. HADOOP-677.  In IPC, permit a version header to be transmitted
+    when connections are established.  This will permit us to change
+    the format of IPC requests back-compatibly in subsequent releases.
+    (omalley via cutting)
+
+34. HADOOP-699.  Fix DFS web interface so that filesystem browsing
+    works correctly, using the right port number.  Also add support
+    for sorting datanode list by various columns.
+    (Raghu Angadi via cutting)
+
+35. HADOOP-76.  Implement speculative reduce.  Now when a job is
+    configured for speculative execution, both maps and reduces will
+    execute speculatively.  Reduce outputs are written to temporary
+    location and moved to the final location when reduce is complete.
+    (Sanjay Dahiya via cutting)
+
+36. HADOOP-736.  Roll back to Jetty 5.1.4, due to performance problems
+    with Jetty 6.0.1.
+
+37. HADOOP-739.  Fix TestIPC to use different port number, making it
+    more reliable.  (Nigel Daley via cutting)
+
+38. HADOOP-749.  Fix a NullPointerException in jobfailures.jsp.
+    (omalley via cutting)
+
+39. HADOOP-747.  Fix record serialization to work correctly when
+    records are embedded in Maps.  (Milind Bhandarkar via cutting)
+
+40. HADOOP-698.  Fix HDFS client not to retry the same datanode on
+    read failures.  (Milind Bhandarkar via cutting)
+
+41. HADOOP-689. Add GenericWritable, to facilitate polymorphism in
+    MapReduce, SequenceFile, etc. (Feng Jiang via cutting)
+
+42. HADOOP-430.  Stop datanode's HTTP server when registration with
+    namenode fails.  (Wendy Chien via cutting)
+
+43. HADOOP-750.  Fix a potential race condition during mapreduce
+    shuffle.  (omalley via cutting)
+
+44. HADOOP-728.  Fix contrib/streaming-related issues, including
+    '-reducer NONE'.  (Sanjay Dahiya via cutting)
+
+
+Release 0.8.0 - 2006-11-03
+
+ 1. HADOOP-477.  Extend contrib/streaming to scan the PATH environment
+    variables when resolving executable program names.
+    (Dhruba Borthakur via cutting) 
+
+ 2. HADOOP-583.  In DFSClient, reduce the log level of re-connect
+    attempts from 'info' to 'debug', so they are not normally shown.
+    (Konstantin Shvachko via cutting)
+
+ 3. HADOOP-498.  Re-implement DFS integrity checker to run server-side,
+    for much improved performance.  (Milind Bhandarkar via cutting)
+
+ 4. HADOOP-586.  Use the jar name for otherwise un-named jobs.
+    (Sanjay Dahiya via cutting)
+
+ 5. HADOOP-514.  Make DFS heartbeat interval configurable.
+    (Milind Bhandarkar via cutting)
+
+ 6. HADOOP-588.  Fix logging and accounting of failed tasks.
+    (Sanjay Dahiya via cutting)
+
+ 7. HADOOP-462.  Improve command line parsing in DFSShell, so that
+    incorrect numbers of arguments result in informative errors rather
+    than ArrayOutOfBoundsException.  (Dhruba Borthakur via cutting) 
+
+ 8. HADOOP-561.  Fix DFS so that one replica of each block is written
+    locally, if possible.  This was the intent, but there as a bug.
+    (Dhruba Borthakur via cutting) 
+
+ 9. HADOOP-610.  Fix TaskTracker to survive more exceptions, keeping
+    tasks from becoming lost.  (omalley via cutting)
+
+10. HADOOP-625.  Add a servlet to all http daemons that displays a
+    stack dump, useful for debugging.  (omalley via cutting)
+
+11. HADOOP-554.  Fix DFSShell to return -1 for errors.
+    (Dhruba Borthakur via cutting) 
+
+12. HADOOP-626.  Correct the documentation in the NNBench example
+    code, and also remove a mistaken call there.
+    (Nigel Daley via cutting)
+
+13. HADOOP-634.  Add missing license to many files.
+    (Nigel Daley via cutting)
+
+14. HADOOP-627.  Fix some synchronization problems in MiniMRCluster
+    that sometimes caused unit tests to fail.  (Nigel Daley via cutting)
+
+15. HADOOP-563.  Improve the NameNode's lease policy so that leases
+    are held for one hour without renewal (instead of one minute).
+    However another attempt to create the same file will still succeed
+    if the lease has not been renewed within a minute.  This prevents
+    communication or scheduling problems from causing a write to fail
+    for up to an hour, barring some other process trying to create the
+    same file.  (Dhruba Borthakur via cutting)
+
+16. HADOOP-635.  In DFSShell, permit specification of multiple files
+    as the source for file copy and move commands.
+    (Dhruba Borthakur via cutting)
+
+17. HADOOP-641.  Change NameNode to request a fresh block report from
+    a re-discovered DataNode, so that no-longer-needed replications
+    are stopped promptly.  (Konstantin Shvachko via cutting)
+
+18. HADOOP-642.  Change IPC client to specify an explicit connect
+    timeout.  (Konstantin Shvachko via cutting)
+
+19. HADOOP-638.  Fix an unsynchronized access to TaskTracker's
+    internal state.  (Nigel Daley via cutting)
+
+20. HADOOP-624.  Fix servlet path to stop a Jetty warning on startup.
+    (omalley via cutting)
+
+21. HADOOP-578.  Failed tasks are no longer placed at the end of the
+    task queue.  This was originally done to work around other
+    problems that have now been fixed.  Re-executing failed tasks
+    sooner causes buggy jobs to fail faster.  (Sanjay Dahiya via cutting)
+
+22. HADOOP-658.  Update source file headers per Apache policy.  (cutting)
+
+23. HADOOP-636.  Add MapFile & ArrayFile constructors which accept a
+    Progressable, and pass it down to SequenceFile.  This permits
+    reduce tasks which use MapFile to still report progress while
+    writing blocks to the filesystem.  (cutting)
+
+24. HADOOP-576.  Enable contrib/streaming to use the file cache.  Also
+    extend the cache to permit symbolic links to cached items, rather
+    than local file copies.  (Mahadev Konar via cutting)
+
+25. HADOOP-482.  Fix unit tests to work when a cluster is running on
+    the same machine, removing port conflicts.  (Wendy Chien via cutting)
+
+26. HADOOP-90.  Permit dfs.name.dir to list multiple directories,
+    where namenode data is to be replicated. (Milind Bhandarkar via cutting)
+
+27. HADOOP-651.  Fix DFSCk to correctly pass parameters to the servlet
+    on the namenode.  (Milind Bhandarkar via cutting)
+
+28. HADOOP-553.  Change main() routines of DataNode and NameNode to
+    log exceptions rather than letting the JVM print them to standard
+    error.  Also, change the hadoop-daemon.sh script to rotate
+    standard i/o log files.  (Raghu Angadi via cutting)
+
+29. HADOOP-399.  Fix javadoc warnings.  (Nigel Daley via cutting)
+
+30. HADOOP-599.  Fix web ui and command line to correctly report DFS
+    filesystem size statistics.  Also improve web layout.
+    (Raghu Angadi via cutting)
+
+31. HADOOP-660.  Permit specification of junit test output format.
+    (Nigel Daley via cutting)
+
+32. HADOOP-663.  Fix a few unit test issues.  (Mahadev Konar via cutting)
+
+33. HADOOP-664.  Cause entire build to fail if libhdfs tests fail.
+    (Nigel Daley via cutting)
+
+34. HADOOP-633.  Keep jobtracker from dying when job initialization
+    throws exceptions.  Also improve exception handling in a few other
+    places and add more informative thread names.
+    (omalley via cutting)
+
+35. HADOOP-669.  Fix a problem introduced by HADOOP-90 that can cause
+    DFS to lose files.  (Milind Bhandarkar via cutting)
+
+36. HADOOP-373.  Consistently check the value returned by
+    FileSystem.mkdirs().  (Wendy Chien via cutting)
+
+37. HADOOP-670.  Code cleanups in some DFS internals: use generic
+    types, replace Vector with ArrayList, etc.
+    (Konstantin Shvachko via cutting)
+
+38. HADOOP-647.  Permit map outputs to use a different compression
+    type than the job output.  (omalley via cutting)
+
+39. HADOOP-671.  Fix file cache to check for pre-existence before
+    creating .  (Mahadev Konar via cutting)
+
+40. HADOOP-665.  Extend many DFSShell commands to accept multiple
+    arguments.  Now commands like "ls", "rm", etc. will operate on
+    multiple files.  (Dhruba Borthakur via cutting)
+
+
+Release 0.7.2 - 2006-10-18
+
+ 1. HADOOP-607.  Fix a bug where classes included in job jars were not
+    found by tasks.  (Mahadev Konar via cutting)
+
+ 2. HADOOP-609.  Add a unit test that checks that classes in job jars
+    can be found by tasks.  Also modify unit tests to specify multiple
+    local directories.  (Mahadev Konar via cutting)
+
+
+Release 0.7.1 - 2006-10-11
+
+ 1. HADOOP-593.  Fix a NullPointerException in the JobTracker.
+    (omalley via cutting)
+
+ 2. HADOOP-592.  Fix a NullPointerException in the IPC Server.  Also
+    consistently log when stale calls are discarded.  (omalley via cutting)
+
+ 3. HADOOP-594.  Increase the DFS safe-mode threshold from .95 to
+    .999, so that nearly all blocks must be reported before filesystem
+    modifications are permitted.  (Konstantin Shvachko via cutting)
+
+ 4. HADOOP-598.  Fix tasks to retry when reporting completion, so that
+    a single RPC timeout won't fail a task.  (omalley via cutting)
+
+ 5. HADOOP-597.  Fix TaskTracker to not discard map outputs for errors
+    in transmitting them to reduce nodes.  (omalley via cutting)
+
+
+Release 0.7.0 - 2006-10-06
+
+ 1. HADOOP-243.  Fix rounding in the display of task and job progress
+    so that things are not shown to be 100% complete until they are in
+    fact finished.  (omalley via cutting) 
+
+ 2. HADOOP-438.  Limit the length of absolute paths in DFS, since the
+    file format used to store pathnames has some limitations.
+    (Wendy Chien via cutting)
+
+ 3. HADOOP-530.  Improve error messages in SequenceFile when keys or
+    values are of the wrong type.  (Hairong Kuang via cutting)
+
+ 4. HADOOP-288.  Add a file caching system and use it in MapReduce to
+    cache job jar files on slave nodes.  (Mahadev Konar via cutting)
+
+ 5. HADOOP-533.  Fix unit test to not modify conf directory.
+   (Hairong Kuang via cutting)
+
+ 6. HADOOP-527.  Permit specification of the local address that various
+    Hadoop daemons should bind to.  (Philippe Gassmann via cutting)
+
+ 7. HADOOP-542.  Updates to contrib/streaming: reformatted source code,
+    on-the-fly merge sort, a fix for HADOOP-540, etc.
+    (Michel Tourn via cutting)
+
+ 8. HADOOP-545.  Remove an unused config file parameter.
+    (Philippe Gassmann via cutting)
+
+ 9. HADOOP-548.  Add an Ant property "test.output" to build.xml that
+    causes test output to be logged to the console.  (omalley via cutting)
+
+10. HADOOP-261.  Record an error message when map output is lost.
+    (omalley via cutting)
+
+11. HADOOP-293.  Report the full list of task error messages in the
+    web ui, not just the most recent.  (omalley via cutting)
+
+12. HADOOP-551.  Restore JobClient's console printouts to only include
+    a maximum of one update per one percent of progress.
+    (omalley via cutting)
+
+13. HADOOP-306.  Add a "safe" mode to DFS.  The name node enters this
+    when less than a specified percentage of file data is complete.
+    Currently safe mode is only used on startup, but eventually it
+    will also be entered when datanodes disconnect and file data
+    becomes incomplete.  While in safe mode no filesystem
+    modifications are permitted and block replication is inhibited.
+    (Konstantin Shvachko via cutting)
+
+14. HADOOP-431.  Change 'dfs -rm' to not operate recursively and add a
+    new command, 'dfs -rmr' which operates recursively.
+    (Sameer Paranjpye via cutting)
+
+15. HADOOP-263.  Include timestamps for job transitions.  The web
+    interface now displays the start and end times of tasks and the
+    start times of sorting and reducing for reduce tasks.  Also,
+    extend ObjectWritable to handle enums, so that they can be passed
+    as RPC parameters.  (Sanjay Dahiya via cutting)
+
+16. HADOOP-556.  Contrib/streaming: send keep-alive reports to task
+    tracker every 10 seconds rather than every 100 records, to avoid
+    task timeouts.  (Michel Tourn via cutting)
+
+17. HADOOP-547.  Fix reduce tasks to ping tasktracker while copying
+    data, rather than only between copies, avoiding task timeouts.
+    (Sanjay Dahiya via cutting)
+
+18. HADOOP-537.  Fix src/c++/libhdfs build process to create files in
+    build/, no longer modifying the source tree.
+    (Arun C Murthy via cutting)
+
+19. HADOOP-487.  Throw a more informative exception for unknown RPC
+    hosts.  (Sameer Paranjpye via cutting)
+
+20. HADOOP-559.  Add file name globbing (pattern matching) support to
+    the FileSystem API, and use it in DFSShell ('bin/hadoop dfs')
+    commands.  (Hairong Kuang via cutting)
+
+21. HADOOP-508.  Fix a bug in FSDataInputStream.  Incorrect data was
+    returned after seeking to a random location.
+    (Milind Bhandarkar via cutting)
+
+22. HADOOP-560.  Add a "killed" task state.  This can be used to
+    distinguish kills from other failures.  Task state has also been
+    converted to use an enum type instead of an int, uncovering a bug
+    elsewhere.  The web interface is also updated to display killed
+    tasks.  (omalley via cutting)
+
+23. HADOOP-423.  Normalize Paths containing directories named "." and
+    "..", using the standard, unix interpretation.  Also add checks in
+    DFS, prohibiting the use of "." or ".." as directory or file
+    names.  (Wendy Chien via cutting)
+
+24. HADOOP-513.  Replace map output handling with a servlet, rather
+    than a JSP page.  This fixes an issue where
+    IllegalStateException's were logged, sets content-length
+    correctly, and better handles some errors.  (omalley via cutting)
+
+25. HADOOP-552.  Improved error checking when copying map output files
+    to reduce nodes.  (omalley via cutting)
+
+26. HADOOP-566.  Fix scripts to work correctly when accessed through
+    relative symbolic links.  (Lee Faris via cutting)
+
+27. HADOOP-519.  Add positioned read methods to FSInputStream.  These
+    permit one to read from a stream without moving its position, and
+    can hence be performed by multiple threads at once on a single
+    stream. Implement an optimized version for DFS and local FS.
+    (Milind Bhandarkar via cutting)
+
+28. HADOOP-522. Permit block compression with MapFile and SetFile.
+    Since these formats are always sorted, block compression can
+    provide a big advantage.  (cutting)
+
+29. HADOOP-567. Record version and revision information in builds.  A
+    package manifest is added to the generated jar file containing
+    version information, and a VersionInfo utility is added that
+    includes further information, including the build date and user,
+    and the subversion revision and repository.  A 'bin/hadoop
+    version' comand is added to show this information, and it is also
+    added to various web interfaces.  (omalley via cutting)
+
+30. HADOOP-568.  Fix so that errors while initializing tasks on a
+    tasktracker correctly report the task as failed to the jobtracker,
+    so that it will be rescheduled.  (omalley via cutting)
+
+31. HADOOP-550.  Disable automatic UTF-8 validation in Text.  This
+    permits, e.g., TextInputFormat to again operate on non-UTF-8 data.
+    (Hairong and Mahadev via cutting)
+
+32. HADOOP-343.  Fix mapred copying so that a failed tasktracker
+    doesn't cause other copies to slow.  (Sameer Paranjpye via cutting)
+
+33. HADOOP-239.  Add a persistent job history mechanism, so that basic
+    job statistics are not lost after 24 hours and/or when the
+    jobtracker is restarted.  (Sanjay Dahiya via cutting)
+
+34. HADOOP-506.  Ignore heartbeats from stale task trackers.
+   (Sanjay Dahiya via cutting)
+
+35. HADOOP-255.  Discard stale, queued IPC calls.  Do not process
+    calls whose clients will likely time out before they receive a
+    response.  When the queue is full, new calls are now received and
+    queued, and the oldest calls are discarded, so that, when servers
+    get bogged down, they no longer develop a backlog on the socket.
+    This should improve some DFS namenode failure modes.
+    (omalley via cutting)
+
+36. HADOOP-581.  Fix datanode to not reset itself on communications
+    errors with the namenode.  If a request to the namenode fails, the
+    datanode should retry, not restart.  This reduces the load on the
+    namenode, since restarts cause a resend of the block report.
+    (omalley via cutting)
+
+
+Release 0.6.2 - 2006-09-18
+
+1. HADOOP-532.  Fix a bug reading value-compressed sequence files,
+   where an exception was thrown reporting that the full value had not
+   been read.  (omalley via cutting)
+
+2. HADOOP-534.  Change the default value class in JobConf to be Text
+   instead of the now-deprecated UTF8.  This fixes the Grep example
+   program, which was updated to use Text, but relies on this
+   default.  (Hairong Kuang via cutting)
+
+
+Release 0.6.1 - 2006-09-13
+
+ 1. HADOOP-520.  Fix a bug in libhdfs, where write failures were not
+    correctly returning error codes.  (Arun C Murthy via cutting)
+
+ 2. HADOOP-523.  Fix a NullPointerException when TextInputFormat is
+    explicitly specified.  Also add a test case for this.
+    (omalley via cutting)
+
+ 3. HADOOP-521.  Fix another NullPointerException finding the
+    ClassLoader when using libhdfs.  (omalley via cutting)
+
+ 4. HADOOP-526.  Fix a NullPointerException when attempting to start
+    two datanodes in the same directory.  (Milind Bhandarkar via cutting)
+
+ 5. HADOOP-529.  Fix a NullPointerException when opening
+    value-compressed sequence files generated by pre-0.6.0 Hadoop.
+    (omalley via cutting)
+
+
+Release 0.6.0 - 2006-09-08
+
+ 1. HADOOP-427.  Replace some uses of DatanodeDescriptor in the DFS
+    web UI code with DatanodeInfo, the preferred public class.
+    (Devaraj Das via cutting)
+
+ 2. HADOOP-426.  Fix streaming contrib module to work correctly on
+    Solaris.  This was causing nightly builds to fail.
+    (Michel Tourn via cutting)
+
+ 3. HADOOP-400.  Improvements to task assignment.  Tasks are no longer
+    re-run on nodes where they have failed (unless no other node is
+    available).  Also, tasks are better load-balanced among nodes.
+    (omalley via cutting)
+
+ 4. HADOOP-324.  Fix datanode to not exit when a disk is full, but
+    rather simply to fail writes.  (Wendy Chien via cutting)
+
+ 5. HADOOP-434.  Change smallJobsBenchmark to use standard Hadoop
+    scripts.  (Sanjay Dahiya via cutting)
+
+ 6. HADOOP-453.  Fix a bug in Text.setCapacity().  (siren via cutting)
+
+
+ 7. HADOOP-450.  Change so that input types are determined by the
+    RecordReader rather than specified directly in the JobConf.  This
+    facilitates jobs with a variety of input types.
+
+    WARNING: This contains incompatible API changes!  The RecordReader
+    interface has two new methods that all user-defined InputFormats
+    must now define.  Also, the values returned by TextInputFormat are
+    no longer of class UTF8, but now of class Text.
+
+ 8. HADOOP-436.  Fix an error-handling bug in the web ui.
+    (Devaraj Das via cutting)
+
+ 9. HADOOP-455.  Fix a bug in Text, where DEL was not permitted.
+    (Hairong Kuang via cutting)
+
+10. HADOOP-456.  Change the DFS namenode to keep a persistent record
+    of the set of known datanodes.  This will be used to implement a
+    "safe mode" where filesystem changes are prohibited when a
+    critical percentage of the datanodes are unavailable.
+    (Konstantin Shvachko via cutting)
+
+11. HADOOP-322.  Add a job control utility.  This permits one to
+    specify job interdependencies.  Each job is submitted only after
+    the jobs it depends on have successfully completed.
+    (Runping Qi via cutting)
+
+12. HADOOP-176.  Fix a bug in IntWritable.Comparator.
+    (Dick King via cutting)
+
+13. HADOOP-421.  Replace uses of String in recordio package with Text
+    class, for improved handling of UTF-8 data.
+    (Milind Bhandarkar via cutting)
+
+14. HADOOP-464.  Improved error message when job jar not found.
+    (Michel Tourn via cutting)
+
+15. HADOOP-469.  Fix /bin/bash specifics that have crept into our
+    /bin/sh scripts since HADOOP-352.
+    (Jean-Baptiste Quenot via cutting)
+
+16. HADOOP-468.  Add HADOOP_NICENESS environment variable to set
+    scheduling priority for daemons.  (Vetle Roeim via cutting)
+
+17. HADOOP-473.  Fix TextInputFormat to correctly handle more EOL
+    formats.  Things now work correctly with CR, LF or CRLF.
+    (Dennis Kubes & James White via cutting)
+
+18. HADOOP-461.  Make Java 1.5 an explicit requirement.  (cutting)
+
+19. HADOOP-54.  Add block compression to SequenceFile.  One may now
+    specify that blocks of keys and values are compressed together,
+    improving compression for small keys and values.
+    SequenceFile.Writer's constructor is now deprecated and replaced
+    with a factory method.  (Arun C Murthy via cutting)
+
+20. HADOOP-281.  Prohibit DFS files that are also directories.
+    (Wendy Chien via cutting)
+
+21. HADOOP-486.  Add the job username to JobStatus instances returned
+    by JobClient.  (Mahadev Konar via cutting)
+
+22. HADOOP-437.  contrib/streaming: Add support for gzipped inputs.
+    (Michel Tourn via cutting)
+
+23. HADOOP-463.  Add variable expansion to config files.
+    Configuration property values may now contain variable
+    expressions.  A variable is referenced with the syntax
+    '${variable}'.  Variables values are found first in the
+    configuration, and then in Java system properties.  The default
+    configuration is modified so that temporary directories are now
+    under ${hadoop.tmp.dir}, which is, by default,
+    /tmp/hadoop-${user.name}.  (Michel Tourn via cutting)
+
+24. HADOOP-419. Fix a NullPointerException finding the ClassLoader
+    when using libhdfs.  (omalley via cutting)
+
+25. HADOOP-460. Fix contrib/smallJobsBenchmark to use Text instead of
+    UTF8.  (Sanjay Dahiya via cutting)
+
+26. HADOOP-196.  Fix Configuration(Configuration) constructor to work
+    correctly.  (Sami Siren via cutting)
+
+27. HADOOP-501.  Fix Configuration.toString() to handle URL resources.
+    (Thomas Friol via cutting)
+
+28. HADOOP-499.  Reduce the use of Strings in contrib/streaming,
+    replacing them with Text for better performance.
+    (Hairong Kuang via cutting)
+
+29. HADOOP-64.  Manage multiple volumes with a single DataNode.
+    Previously DataNode would create a separate daemon per configured
+    volume, each with its own connection to the NameNode.  Now all
+    volumes are handled by a single DataNode daemon, reducing the load
+    on the NameNode.  (Milind Bhandarkar via cutting)
+
+30. HADOOP-424.  Fix MapReduce so that jobs which generate zero splits
+    do not fail.  (Fr??d??ric Bertin via cutting)
+
+31. HADOOP-408.  Adjust some timeouts and remove some others so that
+    unit tests run faster.  (cutting)
+
+32. HADOOP-507.  Fix an IllegalAccessException in DFS.
+    (omalley via cutting)
+
+33. HADOOP-320.  Fix so that checksum files are correctly copied when
+    the destination of a file copy is a directory.
+    (Hairong Kuang via cutting)
+
+34. HADOOP-286.  In DFSClient, avoid pinging the NameNode with
+    renewLease() calls when no files are being written.
+    (Konstantin Shvachko via cutting)
+
+35. HADOOP-312.  Close idle IPC connections.  All IPC connections were
+    cached forever.  Now, after a connection has been idle for more
+    than a configurable amount of time (one second by default), the
+    connection is closed, conserving resources on both client and
+    server. (Devaraj Das via cutting)
+
+36. HADOOP-497.  Permit the specification of the network interface and
+    nameserver to be used when determining the local hostname
+    advertised by datanodes and tasktrackers.
+    (Lorenzo Thione via cutting)
+
+37. HADOOP-441.  Add a compression codec API and extend SequenceFile
+    to use it.  This will permit the use of alternate compression
+    codecs in SequenceFile.  (Arun C Murthy via cutting)
+
+38. HADOOP-483. Improvements to libhdfs build and documentation.
+    (Arun C Murthy via cutting)
+
+39. HADOOP-458.  Fix a memory corruption bug in libhdfs.
+    (Arun C Murthy via cutting)
+
+40. HADOOP-517.  Fix a contrib/streaming bug in end-of-line detection.
+    (Hairong Kuang via cutting)
+
+41. HADOOP-474.  Add CompressionCodecFactory, and use it in
+    TextInputFormat and TextOutputFormat.  Compressed input files are
+    automatically decompressed when they have the correct extension.
+    Output files will, when output compression is specified, be
+    generated with an approprate extension.  Also add a gzip codec and
+    fix problems with UTF8 text inputs.  (omalley via cutting)
+
+
+Release 0.5.0 - 2006-08-04
+
+ 1. HADOOP-352.  Fix shell scripts to use /bin/sh instead of
+    /bin/bash, for better portability.
+    (Jean-Baptiste Quenot via cutting)
+
+ 2. HADOOP-313.  Permit task state to be saved so that single tasks
+    may be manually re-executed when debugging.  (omalley via cutting)
+
+ 3. HADOOP-339.  Add method to JobClient API listing jobs that are
+    not yet complete, i.e., that are queued or running.
+    (Mahadev Konar via cutting)
+
+ 4. HADOOP-355.  Updates to the streaming contrib module, including
+    API fixes, making reduce optional, and adding an input type for
+    StreamSequenceRecordReader.  (Michel Tourn via cutting)
+
+ 5. HADOOP-358.  Fix a NPE bug in Path.equals().
+    (Fr??d??ric Bertin via cutting)
+
+ 6. HADOOP-327.  Fix ToolBase to not call System.exit() when
+    exceptions are thrown.  (Hairong Kuang via cutting)
+
+ 7. HADOOP-359.  Permit map output to be compressed.
+    (omalley via cutting)
+
+ 8. HADOOP-341.  Permit input URI to CopyFiles to use the HTTP
+    protocol.  This lets one, e.g., more easily copy log files into
+    DFS.  (Arun C Murthy via cutting)
+
+ 9. HADOOP-361.  Remove unix dependencies from streaming contrib
+    module tests, making them pure java. (Michel Tourn via cutting)
+
+10. HADOOP-354.  Make public methods to stop DFS daemons.
+    (Barry Kaplan via cutting)
+
+11. HADOOP-252.  Add versioning to RPC protocols.
+    (Milind Bhandarkar via cutting)
+
+12. HADOOP-356.  Add contrib to "compile" and "test" build targets, so
+    that this code is better maintained. (Michel Tourn via cutting)
+
+13. HADOOP-307.  Add smallJobsBenchmark contrib module.  This runs
+    lots of small jobs, in order to determine per-task overheads.
+    (Sanjay Dahiya via cutting)
+
+14. HADOOP-342.  Add a tool for log analysis: Logalyzer.
+    (Arun C Murthy via cutting)
+
+15. HADOOP-347.  Add web-based browsing of DFS content.  The namenode
+    redirects browsing requests to datanodes.  Content requests are
+    redirected to datanodes where the data is local when possible.
+    (Devaraj Das via cutting)
+
+16. HADOOP-351.  Make Hadoop IPC kernel independent of Jetty.
+    (Devaraj Das via cutting)
+
+17. HADOOP-237.  Add metric reporting to DFS and MapReduce.  With only
+    minor configuration changes, one can now monitor many Hadoop
+    system statistics using Ganglia or other monitoring systems.
+    (Milind Bhandarkar via cutting)
+
+18. HADOOP-376.  Fix datanode's HTTP server to scan for a free port.
+    (omalley via cutting)
+
+19. HADOOP-260.  Add --config option to shell scripts, specifying an
+    alternate configuration directory. (Milind Bhandarkar via cutting)
+
+20. HADOOP-381.  Permit developers to save the temporary files for
+    tasks whose names match a regular expression, to facilliate
+    debugging.  (omalley via cutting)
+
+21. HADOOP-344.  Fix some Windows-related problems with DF.
+    (Konstantin Shvachko via cutting)
+
+22. HADOOP-380.  Fix reduce tasks to poll less frequently for map
+    outputs. (Mahadev Konar via cutting)
+
+23. HADOOP-321.  Refactor DatanodeInfo, in preparation for
+    HADOOP-306.  (Konstantin Shvachko & omalley via cutting)
+
+24. HADOOP-385.  Fix some bugs in record io code generation.
+    (Milind Bhandarkar via cutting)
+
+25. HADOOP-302.  Add new Text class to replace UTF8, removing
+    limitations of that class.  Also refactor utility methods for
+    writing zero-compressed integers (VInts and VLongs).
+    (Hairong Kuang via cutting)
+
+26. HADOOP-335.  Refactor DFS namespace/transaction logging in
+    namenode.   (Konstantin Shvachko via cutting)
+
+27. HADOOP-375.  Fix handling of the datanode HTTP daemon's port so
+    that multiple datanode's can be run on a single host.
+    (Devaraj Das via cutting)
+
+28. HADOOP-386.  When removing excess DFS block replicas, remove those
+    on nodes with the least free space first.
+    (Johan Oskarson via cutting)
+
+29. HADOOP-389.  Fix intermittent failures of mapreduce unit tests.
+    Also fix some build dependencies.
+    (Mahadev & Konstantin via cutting)
+
+30. HADOOP-362.  Fix a problem where jobs hang when status messages
+    are recieved out-of-order.  (omalley via cutting)
+
+31. HADOOP-394.  Change order of DFS shutdown in unit tests to
+    minimize errors logged.  (Konstantin Shvachko via cutting)
+
+32. HADOOP-396.  Make DatanodeID implement Writable.
+    (Konstantin Shvachko via cutting)
+
+33. HADOOP-377.  Permit one to add URL resources to a Configuration.
+    (Jean-Baptiste Quenot via cutting)
+
+34. HADOOP-345.  Permit iteration over Configuration key/value pairs.
+    (Michel Tourn via cutting)
+
+35. HADOOP-409.  Streaming contrib module: make configuration
+    properties available to commands as environment variables.
+    (Michel Tourn via cutting)
+
+36. HADOOP-369.  Add -getmerge option to dfs command that appends all
+    files in a directory into a single local file.
+    (Johan Oskarson via cutting)
+
+37. HADOOP-410.  Replace some TreeMaps with HashMaps in DFS, for
+    a 17% performance improvement. (Milind Bhandarkar via cutting)
+
+38. HADOOP-411.  Add unit tests for command line parser.
+    (Hairong Kuang via cutting)
+
+39. HADOOP-412.  Add MapReduce input formats that support filtering
+    of SequenceFile data, including sampling and regex matching.
+    Also, move JobConf.newInstance() to a new utility class.
+    (Hairong Kuang via cutting)
+
+40. HADOOP-226.  Fix fsck command to properly consider replication
+    counts, now that these can vary per file.  (Bryan Pendleton via cutting)
+
+41. HADOOP-425.  Add a Python MapReduce example, using Jython.
+    (omalley via cutting)
+
+
+Release 0.4.0 - 2006-06-28
+
+ 1. HADOOP-298.  Improved progress reports for CopyFiles utility, the
+    distributed file copier.  (omalley via cutting)
+
+ 2. HADOOP-299.  Fix the task tracker, permitting multiple jobs to
+    more easily execute at the same time.  (omalley via cutting)
+
+ 3. HADOOP-250.  Add an HTTP user interface to the namenode, running
+    on port 50070. (Devaraj Das via cutting)
+
+ 4. HADOOP-123.  Add MapReduce unit tests that run a jobtracker and
+    tasktracker, greatly increasing code coverage.
+    (Milind Bhandarkar via cutting)
+
+ 5. HADOOP-271.  Add links from jobtracker's web ui to tasktracker's
+    web ui.  Also attempt to log a thread dump of child processes
+    before they're killed.  (omalley via cutting)
+
+ 6. HADOOP-210.  Change RPC server to use a selector instead of a
+    thread per connection.  This should make it easier to scale to
+    larger clusters.  Note that this incompatibly changes the RPC
+    protocol: clients and servers must both be upgraded to the new
+    version to ensure correct operation.  (Devaraj Das via cutting)
+
+ 7. HADOOP-311.  Change DFS client to retry failed reads, so that a
+    single read failure will not alone cause failure of a task.
+    (omalley via cutting)
+
+ 8. HADOOP-314.  Remove the "append" phase when reducing.  Map output
+    files are now directly passed to the sorter, without first
+    appending them into a single file.  Now, the first third of reduce
+    progress is "copy" (transferring map output to reduce nodes), the
+    middle third is "sort" (sorting map output) and the last third is
+    "reduce" (generating output).  Long-term, the "sort" phase will
+    also be removed.  (omalley via cutting)
+
+ 9. HADOOP-316.  Fix a potential deadlock in the jobtracker.
+    (omalley via cutting)
+
+10. HADOOP-319.  Fix FileSystem.close() to remove the FileSystem
+    instance from the cache.  (Hairong Kuang via cutting)
+
+11. HADOOP-135.  Fix potential deadlock in JobTracker by acquiring
+    locks in a consistent order.  (omalley via cutting)
+
+12. HADOOP-278.  Check for existence of input directories before
+    starting MapReduce jobs, making it easier to debug this common
+    error.  (omalley via cutting)
+
+13. HADOOP-304.  Improve error message for
+    UnregisterdDatanodeException to include expected node name.
+   (Konstantin Shvachko via cutting)
+
+14. HADOOP-305.  Fix TaskTracker to ask for new tasks as soon as a
+    task is finished, rather than waiting for the next heartbeat.
+    This improves performance when tasks are short.
+    (Mahadev Konar via cutting)
+
+15. HADOOP-59.  Add support for generic command line options.  One may
+    now specify the filesystem (-fs), the MapReduce jobtracker (-jt),
+    a config file (-conf) or any configuration property (-D).  The
+    "dfs", "fsck", "job", and "distcp" commands currently support
+    this, with more to be added.  (Hairong Kuang via cutting)
+
+16. HADOOP-296.  Permit specification of the amount of reserved space
+    on a DFS datanode.  One may specify both the percentage free and
+    the number of bytes.  (Johan Oskarson via cutting)
+
+17. HADOOP-325.  Fix a problem initializing RPC parameter classes, and
+    remove the workaround used to initialize classes.
+    (omalley via cutting)
+
+18. HADOOP-328.  Add an option to the "distcp" command to ignore read
+    errors while copying.  (omalley via cutting)
+
+19. HADOOP-27.  Don't allocate tasks to trackers whose local free
+    space is too low.  (Johan Oskarson via cutting)
+
+20. HADOOP-318.  Keep slow DFS output from causing task timeouts.
+    This incompatibly changes some public interfaces, adding a
+    parameter to OutputFormat.getRecordWriter() and the new method
+    Reporter.progress(), but it makes lots of tasks succeed that were
+    previously failing.  (Milind Bhandarkar via cutting)
+
+
+Release 0.3.2 - 2006-06-09
+
+ 1. HADOOP-275.  Update the streaming contrib module to use log4j for
+    its logging.  (Michel Tourn via cutting)
+
+ 2. HADOOP-279.  Provide defaults for log4j logging parameters, so
+    that things still work reasonably when Hadoop-specific system
+    properties are not provided.  (omalley via cutting)
+
+ 3. HADOOP-280.  Fix a typo in AllTestDriver which caused the wrong
+    test to be run when "DistributedFSCheck" was specified.
+   (Konstantin Shvachko via cutting)
+
+ 4. HADOOP-240.  DFS's mkdirs() implementation no longer logs a warning
+    when the directory already exists. (Hairong Kuang via cutting)
+
+ 5. HADOOP-285.  Fix DFS datanodes to be able to re-join the cluster
+    after the connection to the namenode is lost.  (omalley via cutting)
+
+ 6. HADOOP-277.  Fix a race condition when creating directories.
+   (Sameer Paranjpye via cutting)
+
+ 7. HADOOP-289.  Improved exception handling in DFS datanode.
+    (Konstantin Shvachko via cutting)
+
+ 8. HADOOP-292.  Fix client-side logging to go to standard error
+    rather than standard output, so that it can be distinguished from
+    application output.  (omalley via cutting)
+
+ 9. HADOOP-294.  Fixed bug where conditions for retrying after errors
+    in the DFS client were reversed.  (omalley via cutting)
+
+
+Release 0.3.1 - 2006-06-05
+
+ 1. HADOOP-272.  Fix a bug in bin/hadoop setting log
+    parameters. (omalley & cutting)
+
+ 2. HADOOP-274.  Change applications to log to standard output rather
+    than to a rolling log file like daemons.  (omalley via cutting)
+
+ 3. HADOOP-262.  Fix reduce tasks to report progress while they're
+    waiting for map outputs, so that they do not time out.
+    (Mahadev Konar via cutting)
+
+ 4. HADOOP-245 and HADOOP-246.  Improvements to record io package.  
+    (Mahadev Konar via cutting)
+
+ 5. HADOOP-276.  Add logging config files to jar file so that they're
+    always found.  (omalley via cutting)
+
+
+Release 0.3.0 - 2006-06-02
+
+ 1. HADOOP-208.  Enhance MapReduce web interface, adding new pages
+    for failed tasks, and tasktrackers.  (omalley via cutting)
+
+ 2. HADOOP-204.  Tweaks to metrics package.  (David Bowen via cutting)
+
+ 3. HADOOP-209.  Add a MapReduce-based file copier.  This will
+    copy files within or between file systems in parallel.
+    (Milind Bhandarkar via cutting)
+
+ 4. HADOOP-146.  Fix DFS to check when randomly generating a new block
+    id that no existing blocks already have that id.
+    (Milind Bhandarkar via cutting)
+
+ 5. HADOOP-180. Make a daemon thread that does the actual task clean ups, so
+    that the main offerService thread in the taskTracker doesn't get stuck
+    and miss his heartbeat window. This was killing many task trackers as
+    big jobs finished (300+ tasks / node). (omalley via cutting)
+
+ 6. HADOOP-200. Avoid transmitting entire list of map task names to
+    reduce tasks.  Instead just transmit the number of map tasks and
+    henceforth refer to them by number when collecting map output.
+    (omalley via cutting)
+
+ 7. HADOOP-219. Fix a NullPointerException when handling a checksum
+    exception under SequenceFile.Sorter.sort().  (cutting & stack)
+
+ 8. HADOOP-212. Permit alteration of the file block size in DFS.  The
+    default block size for new files may now be specified in the
+    configuration with the dfs.block.size property.  The block size
+    may also be specified when files are opened.
+    (omalley via cutting)
+
+ 9. HADOOP-218. Avoid accessing configuration while looping through
+    tasks in JobTracker.  (Mahadev Konar via cutting)
+
+10. HADOOP-161. Add hashCode() method to DFS's Block.
+    (Milind Bhandarkar via cutting)
+
+11. HADOOP-115. Map output types may now be specified.  These are also
+    used as reduce input types, thus permitting reduce input types to
+    differ from reduce output types.  (Runping Qi via cutting)
+
+12. HADOOP-216. Add task progress to task status page.
+    (Bryan Pendelton via cutting)
+
+13. HADOOP-233.  Add web server to task tracker that shows running
+    tasks and logs.  Also add log access to job tracker web interface.
+    (omalley via cutting)
+
+14. HADOOP-205.  Incorporate pending tasks into tasktracker load
+    calculations.  (Mahadev Konar via cutting)
+
+15. HADOOP-247.  Fix sort progress to better handle exceptions.
+    (Mahadev Konar via cutting)
+
+16. HADOOP-195.  Improve performance of the transfer of map outputs to
+    reduce nodes by performing multiple transfers in parallel, each on
+    a separate socket.  (Sameer Paranjpye via cutting)
+
+17. HADOOP-251.  Fix task processes to be tolerant of failed progress
+    reports to their parent process.  (omalley via cutting)
+
+18. HADOOP-325.  Improve the FileNotFound exceptions thrown by
+    LocalFileSystem to include the name of the file.
+    (Benjamin Reed via cutting)
+
+19. HADOOP-254.  Use HTTP to transfer map output data to reduce
+    nodes.  This, together with HADOOP-195, greatly improves the
+    performance of these transfers.  (omalley via cutting)
+
+20. HADOOP-163.  Cause datanodes that\ are unable to either read or
+    write data to exit, so that the namenode will no longer target
+    them for new blocks and will replicate their data on other nodes.
+    (Hairong Kuang via cutting)
+
+21. HADOOP-222.  Add a -setrep option to the dfs commands that alters
+    file replication levels.  (Johan Oskarson via cutting)
+
+22. HADOOP-75.  In DFS, only check for a complete file when the file
+    is closed, rather than as each block is written.
+    (Milind Bhandarkar via cutting)
+
+23. HADOOP-124. Change DFS so that datanodes are identified by a
+    persistent ID rather than by host and port.  This solves a number
+    of filesystem integrity problems, when, e.g., datanodes are
+    restarted.  (Konstantin Shvachko via cutting)
+
+24. HADOOP-256.  Add a C API for DFS.  (Arun C Murthy via cutting)
+
+25. HADOOP-211.  Switch to use the Jakarta Commons logging internally,
+    configured to use log4j by default.  (Arun C Murthy and cutting)
+
+26. HADOOP-265.  Tasktracker now fails to start if it does not have a
+    writable local directory for temporary files.  In this case, it
+    logs a message to the JobTracker and exits. (Hairong Kuang via cutting)
+
+27. HADOOP-270.  Fix potential deadlock in datanode shutdown.
+    (Hairong Kuang via cutting)
+
+Release 0.2.1 - 2006-05-12
+
+ 1. HADOOP-199.  Fix reduce progress (broken by HADOOP-182).
+    (omalley via cutting)
+
+ 2. HADOOP-201.  Fix 'bin/hadoop dfs -report'.  (cutting)
+
+ 3. HADOOP-207.  Fix JDK 1.4 incompatibility introduced by HADOOP-96.
+    System.getenv() does not work in JDK 1.4.  (Hairong Kuang via cutting)
+
+
+Release 0.2.0 - 2006-05-05
+
+ 1. Fix HADOOP-126. 'bin/hadoop dfs -cp' now correctly copies .crc
+    files.  (Konstantin Shvachko via cutting)
+
+ 2. Fix HADOOP-51. Change DFS to support per-file replication counts.
+    (Konstantin Shvachko via cutting)
+
+ 3. Fix HADOOP-131.  Add scripts to start/stop dfs and mapred daemons.
+    Use these in start/stop-all scripts.  (Chris Mattmann via cutting)
+
+ 4. Stop using ssh options by default that are not yet in widely used
+    versions of ssh.  Folks can still enable their use by uncommenting
+    a line in conf/hadoop-env.sh. (cutting)
+
+ 5. Fix HADOOP-92.  Show information about all attempts to run each
+    task in the web ui.  (Mahadev konar via cutting)
+
+ 6. Fix HADOOP-128.  Improved DFS error handling. (Owen O'Malley via cutting)
+
+ 7. Fix HADOOP-129.  Replace uses of java.io.File with new class named
+    Path.  This fixes bugs where java.io.File methods were called
+    directly when FileSystem methods were desired, and reduces the
+    likelihood of such bugs in the future.  It also makes the handling
+    of pathnames more consistent between local and dfs FileSystems and
+    between Windows and Unix. java.io.File-based methods are still
+    available for back-compatibility, but are deprecated and will be
+    removed once 0.2 is released. (cutting)
+
+ 8. Change dfs.data.dir and mapred.local.dir to be comma-separated
+    lists of directories, no longer be space-separated. This fixes
+    several bugs on Windows. (cutting)
+
+ 9. Fix HADOOP-144.  Use mapred task id for dfs client id, to
+    facilitate debugging.  (omalley via cutting)
+
+10. Fix HADOOP-143.  Do not line-wrap stack-traces in web ui.
+    (omalley via cutting)
+
+11. Fix HADOOP-118.  In DFS, improve clean up of abandoned file
+    creations.  (omalley via cutting)
+
+12. Fix HADOOP-138.  Stop multiple tasks in a single heartbeat, rather
+    than one per heartbeat.  (Stefan via cutting)
+
+13. Fix HADOOP-139.  Remove a potential deadlock in
+    LocalFileSystem.lock().  (Igor Bolotin via cutting)
+
+14. Fix HADOOP-134.  Don't hang jobs when the tasktracker is
+    misconfigured to use an un-writable local directory.  (omalley via cutting)
+
+15. Fix HADOOP-115.  Correct an error message.  (Stack via cutting)
+
+16. Fix HADOOP-133.  Retry pings from child to parent, in case of
+    (local) communcation problems.  Also log exit status, so that one
+    can distinguish patricide from other deaths.  (omalley via cutting)
+
+17. Fix HADOOP-142.  Avoid re-running a task on a host where it has
+    previously failed.  (omalley via cutting)
+
+18. Fix HADOOP-148.  Maintain a task failure count for each
+    tasktracker and display it in the web ui.  (omalley via cutting)
+
+19. Fix HADOOP-151.  Close a potential socket leak, where new IPC
+    connection pools were created per configuration instance that RPCs
+    use.  Now a global RPC connection pool is used again, as
+    originally intended.  (cutting)
+
+20. Fix HADOOP-69.  Don't throw a NullPointerException when getting
+    hints for non-existing file split.  (Bryan Pendelton via cutting)
+
+21. Fix HADOOP-157.  When a task that writes dfs files (e.g., a reduce
+    task) failed and was retried, it would fail again and again,
+    eventually failing the job.  The problem was that dfs did not yet
+    know that the failed task had abandoned the files, and would not
+    yet let another task create files with the same names.  Dfs now
+    retries when creating a file long enough for locks on abandoned
+    files to expire.  (omalley via cutting)
+
+22. Fix HADOOP-150.  Improved task names that include job
+    names. (omalley via cutting)
+
+23. Fix HADOOP-162.  Fix ConcurrentModificationException when
+    releasing file locks. (omalley via cutting)
+
+24. Fix HADOOP-132.  Initial check-in of new Metrics API, including 
+    implementations for writing metric data to a file and for sending
+    it to Ganglia.  (David Bowen via cutting)
+
+25. Fix HADOOP-160.  Remove some uneeded synchronization around
+    time-consuming operations in the TaskTracker.  (omalley via cutting)
+
+26. Fix HADOOP-166.  RPCs failed when passed subclasses of a declared
+    parameter type.  This is fixed by changing ObjectWritable to store
+    both the declared type and the instance type for Writables.  Note
+    that this incompatibly changes the format of ObjectWritable and
+    will render unreadable any ObjectWritables stored in files.
+    Nutch only uses ObjectWritable in intermediate files, so this
+    should not be a problem for Nutch.  (Stefan & cutting)
+
+27. Fix HADOOP-168.  MapReduce RPC protocol methods should all declare
+    IOException, so that timeouts are handled appropriately.
+    (omalley via cutting)
+
+28. Fix HADOOP-169.  Don't fail a reduce task if a call to the
+    jobtracker to locate map outputs fails.  (omalley via cutting)
+
+29. Fix HADOOP-170.  Permit FileSystem clients to examine and modify
+    the replication count of individual files.  Also fix a few
+    replication-related bugs. (Konstantin Shvachko via cutting)
+
+30. Permit specification of a higher replication levels for job
+    submission files (job.xml and job.jar).  This helps with large
+    clusters, since these files are read by every node.  (cutting)
+
+31. HADOOP-173.  Optimize allocation of tasks with local data.  (cutting)
+
+32. HADOOP-167.  Reduce number of Configurations and JobConf's
+    created.  (omalley via cutting)
+
+33. NUTCH-256.  Change FileSystem#createNewFile() to create a .crc
+    file.  The lack of a .crc file was causing warnings.  (cutting)
+
+34. HADOOP-174.  Change JobClient to not abort job until it has failed
+    to contact the job tracker for five attempts, not just one as
+    before.  (omalley via cutting)
+
+35. HADOOP-177.  Change MapReduce web interface to page through tasks.
+    Previously, when jobs had more than a few thousand tasks they
+    could crash web browsers.  (Mahadev Konar via cutting)
+
+36. HADOOP-178.  In DFS, piggyback blockwork requests from datanodes
+    on heartbeat responses from namenode.  This reduces the volume of
+    RPC traffic.  Also move startup delay in blockwork from datanode
+    to namenode.  This fixes a problem where restarting the namenode
+    triggered a lot of uneeded replication. (Hairong Kuang via cutting)
+
+37. HADOOP-183.  If the DFS namenode is restarted with different
+    minimum and/or maximum replication counts, existing files'
+    replication counts are now automatically adjusted to be within the
+    newly configured bounds. (Hairong Kuang via cutting)
+
+38. HADOOP-186.  Better error handling in TaskTracker's top-level
+    loop.  Also improve calculation of time to send next heartbeat.
+    (omalley via cutting)
+
+39. HADOOP-187.  Add two MapReduce examples/benchmarks.  One creates
+    files containing random data.  The second sorts the output of the
+    first.  (omalley via cutting)
+
+40. HADOOP-185.  Fix so that, when a task tracker times out making the
+    RPC asking for a new task to run, the job tracker does not think
+    that it is actually running the task returned.  (omalley via cutting)
+
+41. HADOOP-190.  If a child process hangs after it has reported
+    completion, its output should not be lost.  (Stack via cutting)
+
+42. HADOOP-184. Re-structure some test code to better support testing
+    on a cluster.  (Mahadev Konar via cutting)
+
+43. HADOOP-191  Add streaming package, Hadoop's first contrib module.
+    This permits folks to easily submit MapReduce jobs whose map and
+    reduce functions are implemented by shell commands.  Use
+    'bin/hadoop jar build/hadoop-streaming.jar' to get details.
+    (Michel Tourn via cutting)
+
+44. HADOOP-189.  Fix MapReduce in standalone configuration to
+    correctly handle job jar files that contain a lib directory with
+    nested jar files.  (cutting)
+
+45. HADOOP-65.  Initial version of record I/O framework that enables
+    the specification of record types and generates marshalling code
+    in both Java and C++.  Generated Java code implements
+    WritableComparable, but is not yet otherwise used by
+    Hadoop. (Milind Bhandarkar via cutting)
+
+46. HADOOP-193.  Add a MapReduce-based FileSystem benchmark.
+    (Konstantin Shvachko via cutting)
+
+47. HADOOP-194.  Add a MapReduce-based FileSystem checker.  This reads
+    every block in every file in the filesystem.  (Konstantin Shvachko
+    via cutting)
+
+48. HADOOP-182.  Fix so that lost task trackers to not change the
+    status of reduce tasks or completed jobs.  Also fixes the progress
+    meter so that failed tasks are subtracted. (omalley via cutting)
+
+49. HADOOP-96.  Logging improvements.  Log files are now separate from
+    standard output and standard error files.  Logs are now rolled.
+    Logging of all DFS state changes can be enabled, to facilitate
+    debugging.  (Hairong Kuang via cutting)
+
+
+Release 0.1.1 - 2006-04-08
+
+ 1. Added CHANGES.txt, logging all significant changes to Hadoop.  (cutting)
+
+ 2. Fix MapReduceBase.close() to throw IOException, as declared in the
+    Closeable interface.  This permits subclasses which override this
+    method to throw that exception. (cutting)
+
+ 3. Fix HADOOP-117.  Pathnames were mistakenly transposed in
+    JobConf.getLocalFile() causing many mapred temporary files to not
+    be removed.  (Raghavendra Prabhu via cutting)
+ 
+ 4. Fix HADOOP-116. Clean up job submission files when jobs complete.
+    (cutting)
+
+ 5. Fix HADOOP-125. Fix handling of absolute paths on Windows (cutting)
+
+Release 0.1.0 - 2006-04-01
+
+ 1. The first release of Hadoop.
+

+ 244 - 0
common/LICENSE.txt

@@ -0,0 +1,244 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+APACHE HADOOP SUBCOMPONENTS:
+
+The Apache Hadoop project contains subcomponents with separate copyright
+notices and license terms. Your use of the source code for the these
+subcomponents is subject to the terms and conditions of the following
+licenses. 
+
+For the org.apache.hadoop.util.bloom.* classes:
+
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract
+ * 034819 (http://www.one-lab.org)
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following 
+ * conditions are met:
+ *  - Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in 
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the name of the University Catholique de Louvain - UCL
+ *    nor the names of its contributors may be used to endorse or 
+ *    promote products derived from this software without specific prior 
+ *    written permission.
+ *    
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */

+ 2 - 0
common/NOTICE.txt

@@ -0,0 +1,2 @@
+This product includes software developed by The Apache Software
+Foundation (http://www.apache.org/).

+ 31 - 0
common/README.txt

@@ -0,0 +1,31 @@
+For the latest information about Hadoop, please visit our website at:
+
+   http://hadoop.apache.org/core/
+
+and our wiki, at:
+
+   http://wiki.apache.org/hadoop/
+
+This distribution includes cryptographic software.  The country in 
+which you currently reside may have restrictions on the import, 
+possession, use, and/or re-export to another country, of 
+encryption software.  BEFORE using any encryption software, please 
+check your country's laws, regulations and policies concerning the
+import, possession, or use, and re-export of encryption software, to 
+see if this is permitted.  See <http://www.wassenaar.org/> for more
+information.
+
+The U.S. Government Department of Commerce, Bureau of Industry and
+Security (BIS), has classified this software as Export Commodity 
+Control Number (ECCN) 5D002.C.1, which includes information security
+software using or performing cryptographic functions with asymmetric
+algorithms.  The form and manner of this Apache Software Foundation
+distribution makes it eligible for export under the License Exception
+ENC Technology Software Unrestricted (TSU) exception (see the BIS 
+Export Administration Regulations, Section 740.13) for both object 
+code and source code.
+
+The following provides more details on the included cryptographic
+software:
+  Hadoop Core uses the SSL libraries from the Jetty project written 
+by mortbay.org.

+ 132 - 0
common/bin/hadoop

@@ -0,0 +1,132 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script runs the hadoop core commands. 
+
+bin=`which $0`
+bin=`dirname ${bin}`
+bin=`cd "$bin"; pwd`
+ 
+if [ -e "$bin/../libexec/hadoop-config.sh" ]; then
+  . "$bin"/../libexec/hadoop-config.sh
+else
+  . "$bin"/hadoop-config.sh
+fi
+
+function print_usage(){
+  echo "Usage: hadoop [--config confdir] COMMAND"
+  echo "       where COMMAND is one of:"
+  echo "  fs                   run a generic filesystem user client"
+  echo "  version              print the version"
+  echo "  jar <jar>            run a jar file"
+  echo "  distcp <srcurl> <desturl> copy file or directories recursively"
+  echo "  archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
+  echo "  classpath            prints the class path needed to get the"
+  echo "                       Hadoop jar and the required libraries"
+  echo "  daemonlog            get/set the log level for each daemon"
+  echo " or"
+  echo "  CLASSNAME            run the class named CLASSNAME"
+  echo ""
+  echo "Most commands print help when invoked w/o parameters."
+}
+
+if [ $# = 0 ]; then
+  print_usage
+  exit
+fi
+
+COMMAND=$1
+case $COMMAND in
+  #hdfs commands
+  namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer)
+    echo "DEPRECATED: Use of this script to execute hdfs command is deprecated."
+    echo "Instead use the hdfs command for it."
+    echo ""
+    #try to locate hdfs and if present, delegate to it.  
+    if [ -f "${HADOOP_HDFS_HOME}"/bin/hdfs ]; then
+      exec "${HADOOP_HDFS_HOME}"/bin/hdfs $*
+    elif [ -f "${HADOOP_PREFIX}"/bin/hdfs ]; then
+      exec "${HADOOP_PREFIX}"/bin/hdfs $*
+    else
+      echo "HDFS not found."
+      exit
+    fi
+    ;;
+
+  #mapred commands  
+  mradmin|jobtracker|tasktracker|pipes|job|queue)
+    echo "DEPRECATED: Use of this script to execute mapred command is deprecated."
+    echo "Instead use the mapred command for it."
+    echo ""
+    #try to locate mapred and if present, delegate to it.
+    if [ -f "${HADOOP_MAPRED_HOME}"/bin/mapred ]; then
+      exec "${HADOOP_MAPRED_HOME}"/bin/mapred $*
+    elif [ -f "${HADOOP_PREFIX}"/bin/mapred ]; then
+      exec "${HADOOP_PREFIX}"/bin/mapred $* 
+    else
+      echo "MAPRED not found."
+      exit
+    fi
+    ;;
+
+  classpath)
+    if $cygwin; then
+      CLASSPATH=`cygpath -p -w "$CLASSPATH"`
+    fi
+    echo $CLASSPATH
+    exit
+    ;;
+
+  #core commands  
+  *)
+    # the core commands
+    if [ "$COMMAND" = "fs" ] ; then
+      CLASS=org.apache.hadoop.fs.FsShell
+      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+    elif [ "$COMMAND" = "version" ] ; then
+      CLASS=org.apache.hadoop.util.VersionInfo
+      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+    elif [ "$COMMAND" = "jar" ] ; then
+      CLASS=org.apache.hadoop.util.RunJar
+    elif [ "$COMMAND" = "distcp" ] ; then
+      CLASS=org.apache.hadoop.tools.DistCp
+      CLASSPATH=${CLASSPATH}:${TOOL_PATH}
+      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+    elif [ "$COMMAND" = "daemonlog" ] ; then
+      CLASS=org.apache.hadoop.log.LogLevel
+      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+    elif [ "$COMMAND" = "archive" ] ; then
+      CLASS=org.apache.hadoop.tools.HadoopArchives
+      CLASSPATH=${CLASSPATH}:${TOOL_PATH}
+      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+    elif [[ "$COMMAND" = -*  ]] ; then
+        # class and package names cannot begin with a -
+        echo "Error: No command named \`$COMMAND' was found. Perhaps you meant \`hadoop ${COMMAND#-}'"
+        exit 1
+    else
+      CLASS=$COMMAND
+    fi
+    shift
+    
+    if $cygwin; then
+      CLASSPATH=`cygpath -p -w "$CLASSPATH"`
+    fi
+    export CLASSPATH=$CLASSPATH
+    exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
+    ;;
+
+esac

+ 354 - 0
common/bin/hadoop-config.sh

@@ -0,0 +1,354 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# included in all the hadoop scripts with source command
+# should not be executable directly
+# also should not be passed any arguments, since we need original $*
+
+# Resolve links ($0 may be a softlink) and convert a relative path
+# to an absolute path.  NB: The -P option requires bash built-ins
+# or POSIX:2001 compliant cd and pwd.
+this="${BASH_SOURCE-$0}"
+common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P)
+script="$(basename -- "$this")"
+this="$common_bin/$script"
+
+# the root of the Hadoop installation
+# See HADOOP-6255 for directory structure layout
+export HADOOP_PREFIX=`dirname "$this"`/..
+
+#check to see if the conf dir is given as an optional argument
+if [ $# -gt 1 ]
+then
+    if [ "--config" = "$1" ]
+	  then
+	      shift
+	      confdir=$1
+	      shift
+	      HADOOP_CONF_DIR=$confdir
+    fi
+fi
+ 
+# Allow alternate conf dir location.
+if [ -e "${HADOOP_PREFIX}/conf/hadoop-env.sh" ]; then
+  DEFAULT_CONF_DIR="conf"
+else
+  DEFAULT_CONF_DIR="etc/hadoop"
+fi
+
+export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_PREFIX/$DEFAULT_CONF_DIR}"
+
+# User can specify hostnames or a file where the hostnames are (not both)
+if [[ ( "$HADOOP_SLAVES" != '' ) && ( "$HADOOP_SLAVE_NAMES" != '' ) ]] ; then
+  echo \
+    "Error: Please specify one variable HADOOP_SLAVES or " \
+    "HADOOP_SLAVE_NAME and not both."
+  exit 1
+fi
+
+# Process command line options that specify hosts or file with host
+# list
+if [ $# -gt 1 ]
+then
+    if [ "--hosts" = "$1" ]
+    then
+        shift
+        export HADOOP_SLAVES="${HADOOP_CONF_DIR}/$$1"
+        shift
+    elif [ "--hostnames" = "$1" ]
+    then
+        shift
+        export HADOOP_SLAVE_NAMES=$1
+        shift
+    fi
+fi
+
+# User can specify hostnames or a file where the hostnames are (not both)
+# (same check as above but now we know it's command line options that cause
+# the problem)
+if [[ ( "$HADOOP_SLAVES" != '' ) && ( "$HADOOP_SLAVE_NAMES" != '' ) ]] ; then
+  echo \
+    "Error: Please specify one of --hosts or --hostnames options and not both."
+  exit 1
+fi
+
+cygwin=false
+case "`uname`" in
+CYGWIN*) cygwin=true;;
+esac
+
+if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
+  . "${HADOOP_CONF_DIR}/hadoop-env.sh"
+fi
+
+# check if net.ipv6.bindv6only is set to 1
+bindv6only=$(/sbin/sysctl -n net.ipv6.bindv6only 2> /dev/null)
+if [ -n "$bindv6only" ] && [ "$bindv6only" -eq "1" ] && [ "$HADOOP_ALLOW_IPV6" != "yes" ]
+then
+  echo "Error: \"net.ipv6.bindv6only\" is set to 1 - Java networking could be broken"
+  echo "For more info: http://wiki.apache.org/hadoop/HadoopIPv6"
+  exit 1
+fi
+
+# Newer versions of glibc use an arena memory allocator that causes virtual
+# memory usage to explode. This interacts badly with the many threads that
+# we use in Hadoop. Tune the variable down to prevent vmem explosion.
+export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-4}
+
+# some Java parameters
+if [ "$JAVA_HOME" != "" ]; then
+  #echo "run java in $JAVA_HOME"
+  JAVA_HOME=$JAVA_HOME
+fi
+  
+if [ "$JAVA_HOME" = "" ]; then
+  echo "Error: JAVA_HOME is not set."
+  exit 1
+fi
+
+JAVA=$JAVA_HOME/bin/java
+JAVA_HEAP_MAX=-Xmx1000m 
+
+# check envvars which might override default args
+if [ "$HADOOP_HEAPSIZE" != "" ]; then
+  #echo "run with heapsize $HADOOP_HEAPSIZE"
+  JAVA_HEAP_MAX="-Xmx""$HADOOP_HEAPSIZE""m"
+  #echo $JAVA_HEAP_MAX
+fi
+
+# CLASSPATH initially contains $HADOOP_CONF_DIR
+CLASSPATH="${HADOOP_CONF_DIR}"
+
+# for developers, add Hadoop classes to CLASSPATH
+if [ -d "$HADOOP_PREFIX/build/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/build/classes
+fi
+if [ -d "$HADOOP_PREFIX/build/webapps" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/build
+fi
+if [ -d "$HADOOP_PREFIX/build/test/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/build/test/classes
+fi
+if [ -d "$HADOOP_PREFIX/build/test/core/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/build/test/core/classes
+fi
+
+# so that filenames w/ spaces are handled correctly in loops below
+IFS=
+
+# for releases, add core hadoop jar & webapps to CLASSPATH
+if [ -d "$HADOOP_PREFIX/webapps" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX
+fi
+
+if [ -d "$HADOOP_PREFIX/share/hadoop/common/lib" ]; then
+  for f in $HADOOP_PREFIX/share/hadoop/common/lib/*.jar; do
+    CLASSPATH=${CLASSPATH}:$f;
+  done
+fi
+
+for f in $HADOOP_PREFIX/share/hadoop/common/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+# for developers, add libs to CLASSPATH
+for f in $HADOOP_PREFIX/lib/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+if [ -d "$HADOOP_PREFIX/build/ivy/lib/Hadoop-Common/common" ]; then
+for f in $HADOOP_PREFIX/build/ivy/lib/Hadoop-Common/common/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+fi
+
+if [ -d "$HADOOP_PREFIX/build/ivy/lib/hadoop-hdfs/hdfs" ]; then
+for f in $HADOOP_PREFIX/build/ivy/lib/hadoop-hdfs/hdfs/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+fi
+
+if [ -d "$HADOOP_PREFIX/build/ivy/lib/Hadoop/mapred" ]; then
+for f in $HADOOP_PREFIX/build/ivy/lib/Hadoop/mapred/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+fi
+
+for f in $HADOOP_PREFIX/lib/jsp-2.1/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+# add user-specified CLASSPATH last
+if [ "$HADOOP_CLASSPATH" != "" ]; then
+  CLASSPATH=${CLASSPATH}:${HADOOP_CLASSPATH}
+fi
+
+# default log directory & file
+if [ "$HADOOP_LOG_DIR" = "" ]; then
+  HADOOP_LOG_DIR="$HADOOP_PREFIX/logs"
+fi
+if [ "$HADOOP_LOGFILE" = "" ]; then
+  HADOOP_LOGFILE='hadoop.log'
+fi
+
+# default policy file for service-level authorization
+if [ "$HADOOP_POLICYFILE" = "" ]; then
+  HADOOP_POLICYFILE="hadoop-policy.xml"
+fi
+
+# restore ordinary behaviour
+unset IFS
+
+# cygwin path translation
+if $cygwin; then
+  HADOOP_PREFIX=`cygpath -w "$HADOOP_PREFIX"`
+  HADOOP_LOG_DIR=`cygpath -w "$HADOOP_LOG_DIR"`
+  JAVA_LIBRARY_PATH=`cygpath -w "$JAVA_LIBRARY_PATH"`
+fi
+
+# setup 'java.library.path' for native-hadoop code if necessary
+
+if [ -d "${HADOOP_PREFIX}/build/native" -o -d "${HADOOP_PREFIX}/lib/native" ]; then
+  JAVA_PLATFORM=`CLASSPATH=${CLASSPATH} ${JAVA} -Xmx32m ${HADOOP_JAVA_PLATFORM_OPTS} org.apache.hadoop.util.PlatformName | sed -e "s/ /_/g"`
+  
+  if [ -d "$HADOOP_PREFIX/build/native" ]; then
+    if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
+        JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_PREFIX}/build/native/${JAVA_PLATFORM}/lib
+    else
+        JAVA_LIBRARY_PATH=${HADOOP_PREFIX}/build/native/${JAVA_PLATFORM}/lib
+    fi
+  fi
+  
+  if [ -d "${HADOOP_PREFIX}/lib/native" ]; then
+    if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
+      JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_PREFIX}/lib/native/${JAVA_PLATFORM}
+    else
+      JAVA_LIBRARY_PATH=${HADOOP_PREFIX}/lib/native/${JAVA_PLATFORM}
+    fi
+  fi
+fi
+
+if [ -e "${HADOOP_PREFIX}/lib/libhadoop.a" ]; then
+  JAVA_LIBRARY_PATH=${HADOOP_PREFIX}/lib
+fi
+
+# cygwin path translation
+if $cygwin; then
+  JAVA_LIBRARY_PATH=`cygpath -p "$JAVA_LIBRARY_PATH"`
+fi
+
+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_LOGFILE"
+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.home.dir=$HADOOP_PREFIX"
+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_ROOT_LOGGER:-INFO,console}"
+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,console}"
+if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
+  HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
+fi  
+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.policy.file=$HADOOP_POLICYFILE"
+
+# Disable ipv6 as it can cause issues
+HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
+
+# put hdfs in classpath if present
+if [ "$HADOOP_HDFS_HOME" = "" ]; then
+  if [ -d "${HADOOP_PREFIX}/share/hadoop/hdfs" ]; then
+    HADOOP_HDFS_HOME=$HADOOP_PREFIX/share/hadoop/hdfs
+    #echo Found HDFS installed at $HADOOP_HDFS_HOME
+  fi
+fi
+
+if [ -d "${HADOOP_HDFS_HOME}" ]; then
+
+  if [ -d "$HADOOP_HDFS_HOME/webapps" ]; then
+    CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME
+  fi
+  
+  if [ -d "${HADOOP_HDFS_HOME}/conf" ]; then
+    CLASSPATH=${CLASSPATH}:${HADOOP_HDFS_HOME}/conf
+  fi
+  
+  for f in $HADOOP_HDFS_HOME/hadoop-hdfs-*.jar; do
+    CLASSPATH=${CLASSPATH}:$f;
+  done
+
+  # add libs to CLASSPATH
+  if [ -d "${HADOOP_HDFS_HOME}/lib" ]; then
+    for f in $HADOOP_HDFS_HOME/lib/*.jar; do
+      CLASSPATH=${CLASSPATH}:$f;
+    done
+  fi
+  
+  if [ -d "$HADOOP_HDFS_HOME/build/classes" ]; then
+    CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/build/classes
+  fi
+fi
+
+# cygwin path translation
+if $cygwin; then
+  HADOOP_HDFS_HOME=`cygpath -w "$HADOOP_HDFS_HOME"`
+fi
+
+# set mapred home if mapred is present
+if [ "$HADOOP_MAPRED_HOME" = "" ]; then
+  if [ -d "${HADOOP_PREFIX}/share/hadoop/mapreduce" ]; then
+    HADOOP_MAPRED_HOME=$HADOOP_PREFIX/share/hadoop/mapreduce
+  fi
+fi
+
+if [ -d "${HADOOP_MAPRED_HOME}" ]; then
+
+  if [ -d "$HADOOP_MAPRED_HOME/webapps" ]; then
+    CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME
+  fi
+
+  if [ -d "${HADOOP_MAPRED_HOME}/conf" ]; then
+    CLASSPATH=${CLASSPATH}:${HADOOP_MAPRED_HOME}/conf
+  fi
+  
+  for f in $HADOOP_MAPRED_HOME/hadoop-mapreduce-*.jar; do
+    CLASSPATH=${CLASSPATH}:$f
+  done
+
+  if [ -d "${HADOOP_MAPRED_HOME}/lib" ]; then
+    for f in $HADOOP_MAPRED_HOME/lib/*.jar; do
+      CLASSPATH=${CLASSPATH}:$f
+    done
+  fi
+
+  if [ -d "$HADOOP_MAPRED_HOME/build/classes" ]; then
+    CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/classes
+  fi
+
+  if [ -d "$HADOOP_MAPRED_HOME/build/tools" ]; then
+    CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/build/tools
+  fi
+
+  for f in $HADOOP_MAPRED_HOME/hadoop-mapreduce-tools-*.jar; do
+    TOOL_PATH=${TOOL_PATH}:$f;
+  done
+  for f in $HADOOP_MAPRED_HOME/build/hadoop-mapreduce-tools-*.jar; do
+    TOOL_PATH=${TOOL_PATH}:$f;
+  done
+fi
+
+# cygwin path translation
+if $cygwin; then
+  HADOOP_MAPRED_HOME=`cygpath -w "$HADOOP_MAPRED_HOME"`
+  TOOL_PATH=`cygpath -p -w "$TOOL_PATH"`
+fi
+
+

+ 172 - 0
common/bin/hadoop-daemon.sh

@@ -0,0 +1,172 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Runs a Hadoop command as a daemon.
+#
+# Environment Variables
+#
+#   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_PREFIX}/conf.
+#   HADOOP_LOG_DIR   Where log files are stored.  PWD by default.
+#   HADOOP_MASTER    host:path where hadoop code should be rsync'd from
+#   HADOOP_PID_DIR   The pid files are stored. /tmp by default.
+#   HADOOP_IDENT_STRING   A string representing this instance of hadoop. $USER by default
+#   HADOOP_NICENESS The scheduling priority for daemons. Defaults to 0.
+##
+
+usage="Usage: hadoop-daemon.sh [--config <conf-dir>] [--hosts hostlistfile] [--script script] (start|stop) <hadoop-command> <args...>"
+
+# if no args specified, show usage
+if [ $# -le 1 ]; then
+  echo $usage
+  exit 1
+fi
+
+bin=`dirname "${BASH_SOURCE-$0}"`
+bin=`cd "$bin"; pwd`
+
+if [ -e "$bin/../libexec/hadoop-config.sh" ]; then
+  . "$bin"/../libexec/hadoop-config.sh
+else
+  . "$bin"/hadoop-config.sh
+fi
+
+# get arguments
+
+#default value
+hadoopScript="$HADOOP_PREFIX"/bin/hadoop
+if [ "--script" = "$1" ]
+  then
+    shift
+    hadoopScript=$1
+    shift
+fi
+startStop=$1
+shift
+command=$1
+shift
+
+hadoop_rotate_log ()
+{
+    log=$1;
+    num=5;
+    if [ -n "$2" ]; then
+	num=$2
+    fi
+    if [ -f "$log" ]; then # rotate logs
+	while [ $num -gt 1 ]; do
+	    prev=`expr $num - 1`
+	    [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
+	    num=$prev
+	done
+	mv "$log" "$log.$num";
+    fi
+}
+
+if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
+  . "${HADOOP_CONF_DIR}/hadoop-env.sh"
+fi
+
+# Determine if we're starting a secure datanode, and if so, redefine appropriate variables
+if [ "$command" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then
+  export HADOOP_PID_DIR=$HADOOP_SECURE_DN_PID_DIR
+  export HADOOP_LOG_DIR=$HADOOP_SECURE_DN_LOG_DIR
+  export HADOOP_IDENT_STRING=$HADOOP_SECURE_DN_USER   
+fi
+
+if [ "$HADOOP_IDENT_STRING" = "" ]; then
+  export HADOOP_IDENT_STRING="$USER"
+fi
+
+
+# get log directory
+if [ "$HADOOP_LOG_DIR" = "" ]; then
+  export HADOOP_LOG_DIR="$HADOOP_PREFIX/logs"
+fi
+mkdir -p "$HADOOP_LOG_DIR"
+chown $HADOOP_IDENT_STRING $HADOOP_LOG_DIR
+
+if [ "$HADOOP_PID_DIR" = "" ]; then
+  HADOOP_PID_DIR=/tmp
+fi
+
+# some variables
+export HADOOP_LOGFILE=hadoop-$HADOOP_IDENT_STRING-$command-$HOSTNAME.log
+export HADOOP_ROOT_LOGGER="INFO,DRFA"
+export HADOOP_SECURITY_LOGGER="INFO,DRFAS"
+log=$HADOOP_LOG_DIR/hadoop-$HADOOP_IDENT_STRING-$command-$HOSTNAME.out
+pid=$HADOOP_PID_DIR/hadoop-$HADOOP_IDENT_STRING-$command.pid
+
+# Set default scheduling priority
+if [ "$HADOOP_NICENESS" = "" ]; then
+    export HADOOP_NICENESS=0
+fi
+
+case $startStop in
+
+  (start)
+
+    mkdir -p "$HADOOP_PID_DIR"
+
+    if [ -f $pid ]; then
+      if kill -0 `cat $pid` > /dev/null 2>&1; then
+        echo $command running as process `cat $pid`.  Stop it first.
+        exit 1
+      fi
+    fi
+
+    if [ "$HADOOP_MASTER" != "" ]; then
+      echo rsync from $HADOOP_MASTER
+      rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' $HADOOP_MASTER/ "$HADOOP_PREFIX"
+    fi
+
+    hadoop_rotate_log $log
+    echo starting $command, logging to $log
+    cd "$HADOOP_PREFIX"
+    nohup nice -n $HADOOP_NICENESS $hadoopScript --config $HADOOP_CONF_DIR $command "$@" > "$log" 2>&1 < /dev/null &
+    echo $! > $pid
+    sleep 1; head "$log"
+    sleep 3;
+    if ! ps -p $! > /dev/null ; then
+      exit 1
+    fi
+    ;;
+          
+  (stop)
+
+    if [ -f $pid ]; then
+      if kill -0 `cat $pid` > /dev/null 2>&1; then
+        echo stopping $command
+        kill `cat $pid`
+      else
+        echo no $command to stop
+        exit 1
+      fi
+    else
+      echo no $command to stop
+      exit 1
+    fi
+    ;;
+
+  (*)
+    echo $usage
+    exit 1
+    ;;
+
+esac
+
+

+ 38 - 0
common/bin/hadoop-daemons.sh

@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Run a Hadoop command on all slave hosts.
+
+usage="Usage: hadoop-daemons.sh [--config confdir] [--hosts hostlistfile] [start|stop] command args..."
+
+# if no args specified, show usage
+if [ $# -le 1 ]; then
+  echo $usage
+  exit 1
+fi
+
+bin=`dirname "${BASH_SOURCE-$0}"`
+bin=`cd "$bin"; pwd`
+
+if [ -e "$bin/../libexec/hadoop-config.sh" ]; then
+  . "$bin"/../libexec/hadoop-config.sh
+else
+  . "$bin"/hadoop-config.sh
+fi
+
+exec "$bin/slaves.sh" --config $HADOOP_CONF_DIR cd "$HADOOP_PREFIX" \; "$bin/hadoop-daemon.sh" --config $HADOOP_CONF_DIR "$@"

+ 98 - 0
common/bin/rcc

@@ -0,0 +1,98 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# The Hadoop record compiler
+#
+# Environment Variables
+#
+#   JAVA_HOME        The java implementation to use.  Overrides JAVA_HOME.
+#
+#   HADOOP_OPTS      Extra Java runtime options.
+#
+#   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_PREFIX}/conf.
+#
+
+bin=`dirname "${BASH_SOURCE-$0}"`
+bin=`cd "$bin"; pwd`
+
+. "$bin"/../libexec/hadoop-config.sh
+
+if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
+  . "${HADOOP_CONF_DIR}/hadoop-env.sh"
+fi
+
+# some Java parameters
+if [ "$JAVA_HOME" != "" ]; then
+  #echo "run java in $JAVA_HOME"
+  JAVA_HOME=$JAVA_HOME
+fi
+  
+if [ "$JAVA_HOME" = "" ]; then
+  echo "Error: JAVA_HOME is not set."
+  exit 1
+fi
+
+JAVA=$JAVA_HOME/bin/java
+JAVA_HEAP_MAX=-Xmx1000m 
+
+# CLASSPATH initially contains $HADOOP_CONF_DIR
+CLASSPATH="${HADOOP_CONF_DIR}"
+
+# for developers, add Hadoop classes to CLASSPATH
+if [ -d "$HADOOP_PREFIX/build/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/build/classes
+fi
+if [ -d "$HADOOP_PREFIX/build/webapps" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/build
+fi
+if [ -d "$HADOOP_PREFIX/build/test/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/build/test/classes
+fi
+
+# so that filenames w/ spaces are handled correctly in loops below
+IFS=
+
+# for releases, add core hadoop jar & webapps to CLASSPATH
+if [ -d "$HADOOP_PREFIX/webapps" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX
+fi
+for f in $HADOOP_PREFIX/hadoop-*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+# add libs to CLASSPATH
+for f in $HADOOP_PREFIX/lib/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+for f in $HADOOP_PREFIX/lib/jetty-ext/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+# restore ordinary behaviour
+unset IFS
+
+CLASS='org.apache.hadoop.record.compiler.generated.Rcc'
+
+# cygwin path translation
+if expr `uname` : 'CYGWIN*' > /dev/null; then
+  CLASSPATH=`cygpath -p -w "$CLASSPATH"`
+fi
+
+# run it
+exec "$JAVA" $HADOOP_OPTS -classpath "$CLASSPATH" $CLASS "$@"

+ 69 - 0
common/bin/slaves.sh

@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Run a shell command on all slave hosts.
+#
+# Environment Variables
+#
+#   HADOOP_SLAVES    File naming remote hosts.
+#     Default is ${HADOOP_CONF_DIR}/slaves.
+#   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_PREFIX}/conf.
+#   HADOOP_SLAVE_SLEEP Seconds to sleep between spawning remote commands.
+#   HADOOP_SSH_OPTS Options passed to ssh when running remote commands.
+##
+
+usage="Usage: slaves.sh [--config confdir] command..."
+
+# if no args specified, show usage
+if [ $# -le 0 ]; then
+  echo $usage
+  exit 1
+fi
+
+bin=`dirname "${BASH_SOURCE-$0}"`
+bin=`cd "$bin"; pwd`
+
+if [ -e "$bin/../libexec/hadoop-config.sh" ]; then
+  . "$bin"/../libexec/hadoop-config.sh
+else
+  . "$bin"/hadoop-config.sh
+fi
+
+if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
+  . "${HADOOP_CONF_DIR}/hadoop-env.sh"
+fi
+
+# Where to start the script, see hadoop-config.sh
+# (it set up the variables based on command line options)
+if [ "$HADOOP_SLAVE_NAMES" != '' ] ; then
+  SLAVE_NAMES=$HADOOP_SLAVE_NAMES
+else
+  SLAVE_FILE=${HADOOP_SLAVES:-${HADOOP_CONF_DIR}/slaves}
+  SLAVE_NAMES=$(cat "$SLAVE_FILE" | sed  's/#.*$//;/^$/d')
+fi
+
+# start the daemons
+for slave in $SLAVE_NAMES ; do
+ ssh $HADOOP_SSH_OPTS $slave $"${@// /\\ }" \
+   2>&1 | sed "s/^/$slave: /" &
+ if [ "$HADOOP_SLAVE_SLEEP" != "" ]; then
+   sleep $HADOOP_SLAVE_SLEEP
+ fi
+done
+
+wait

+ 40 - 0
common/bin/start-all.sh

@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Start all hadoop daemons.  Run this on master node.
+
+echo "This script is Deprecated. Instead use start-dfs.sh and start-mapred.sh"
+
+bin=`dirname "${BASH_SOURCE-$0}"`
+bin=`cd "$bin"; pwd`
+
+if [ -e "$bin/../libexec/hadoop-config.sh" ]; then
+  . "$bin"/../libexec/hadoop-config.sh
+else
+  . "$bin"/hadoop-config.sh
+fi
+
+# start hdfs daemons if hdfs is present
+if [ -f "${HADOOP_HDFS_HOME}"/bin/start-dfs.sh ]; then
+  "${HADOOP_HDFS_HOME}"/bin/start-dfs.sh --config $HADOOP_CONF_DIR
+fi
+
+# start mapred daemons if mapred is present
+if [ -f "${HADOOP_MAPRED_HOME}"/bin/start-mapred.sh ]; then
+  "${HADOOP_MAPRED_HOME}"/bin/start-mapred.sh --config $HADOOP_CONF_DIR
+fi

+ 41 - 0
common/bin/stop-all.sh

@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# Stop all hadoop daemons.  Run this on master node.
+
+echo "This script is Deprecated. Instead use stop-dfs.sh and stop-mapred.sh"
+
+bin=`dirname "${BASH_SOURCE-$0}"`
+bin=`cd "$bin"; pwd`
+
+if [ -e "$bin/../libexec/hadoop-config.sh" ]; then
+  . "$bin"/../libexec/hadoop-config.sh
+else
+  . "$bin"/hadoop-config.sh
+fi
+
+# stop hdfs daemons if hdfs is present
+if [ -f "${HADOOP_HDFS_HOME}"/bin/stop-dfs.sh ]; then
+  "${HADOOP_HDFS_HOME}"/bin/stop-dfs.sh --config $HADOOP_CONF_DIR
+fi
+
+# stop mapred daemons if mapred is present
+if [ -f "${HADOOP_MAPRED_HOME}"/bin/stop-mapred.sh ]; then
+  "${HADOOP_MAPRED_HOME}"/bin/stop-mapred.sh --config $HADOOP_CONF_DIR
+fi
+

+ 1898 - 0
common/build.xml

@@ -0,0 +1,1898 @@
+<?xml version="1.0"?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<project name="Hadoop-Common" default="compile" 
+   xmlns:ivy="antlib:org.apache.ivy.ant"
+   xmlns:artifact="urn:maven-artifact-ant"> 
+
+  <!-- Load all the default properties, and any the user wants    -->
+  <!-- to contribute (without having to type -D or edit this file -->
+  <property file="${user.home}/build.properties" />
+  <property file="${basedir}/build.properties" />
+ 
+  <property name="module" value="common"/>
+  <property name="Name" value="Hadoop-${module}"/>
+  <property name="name" value="hadoop-${module}"/>
+  <property name="_version" value="0.23.0"/>
+  <property name="version" value="${_version}-SNAPSHOT"/>
+  <property name="final.name" value="${name}-${version}"/>
+  <property name="test.final.name" value="${name}-test-${version}"/>
+  <property name="year" value="2009"/>
+  <property name="package.release" value="1"/>
+
+  <property name="src.dir" value="${basedir}/src"/>  	
+  <property name="java.src.dir" value="${src.dir}/java"/>
+  <property name="native.src.dir" value="${basedir}/src/native"/>
+
+  <property name="lib.dir" value="${basedir}/lib"/>
+  <property name="conf.dir" value="${basedir}/conf"/>
+  <property name="docs.src" value="${basedir}/src/docs"/>
+  <property name="src.docs.cn" value="${basedir}/src/docs/cn"/>
+  <property name="changes.src" value="${docs.src}/changes"/>
+  <property name="src.webapps" value="${basedir}/src/webapps"/>
+
+  <property name="build.dir" value="${basedir}/build"/>
+  <property name="build.classes" value="${build.dir}/classes"/>
+  <property name="build.src" value="${build.dir}/src"/>
+  <property name="build.webapps" value="${build.dir}/webapps"/>
+
+  <!-- convert spaces to _ so that mac os doesn't break things -->
+  <exec executable="tr" inputstring="${os.name}" 
+        outputproperty="nonspace.os">
+     <arg value="[:space:]"/>
+     <arg value="_"/>
+  </exec>
+  <property name="build.platform" 
+            value="${nonspace.os}-${os.arch}-${sun.arch.data.model}"/>
+  <property name="jvm.arch" 
+            value="${sun.arch.data.model}"/>
+  <property name="build.native" value="${build.dir}/native/${build.platform}"/>
+  <property name="build.docs" value="${build.dir}/docs"/>
+  <property name="build.docs.cn" value="${build.dir}/docs/cn"/>
+  <property name="build.javadoc" value="${build.docs}/api"/>
+  <property name="build.javadoc.timestamp" value="${build.javadoc}/index.html" />
+  <property name="build.javadoc.dev" value="${build.docs}/dev-api"/>
+  <property name="build.encoding" value="ISO-8859-1"/>
+  <property name="install.c++" value="${build.dir}/c++/${build.platform}"/>
+
+  <property name="test.src.dir" value="${basedir}/src/test"/>
+  <property name="test.build.dir" value="${build.dir}/test"/>
+  <property name="test.generated.dir" value="${test.build.dir}/src"/>
+  <property name="test.build.data" value="${test.build.dir}/data"/>
+  <property name="test.cache.data" value="${test.build.dir}/cache"/>
+  <property name="test.debug.data" value="${test.build.dir}/debug"/>
+  <property name="test.log.dir" value="${test.build.dir}/logs"/>
+  <property name="test.build.classes" value="${test.build.dir}/classes"/>
+  <property name="test.build.extraconf" value="${test.build.dir}/extraconf"/>
+  <property name="test.build.javadoc" value="${test.build.dir}/docs/api"/>
+  <property name="test.build.javadoc.dev" value="${test.build.dir}/docs/dev-api"/>
+  <property name="test.build.webapps" value="${build.dir}/test/webapps"/>
+  <property name="test.include" value="Test*"/>
+  <property name="test.classpath.id" value="test.classpath"/>
+  <property name="test.output" value="no"/>
+  <property name="test.timeout" value="900000"/>
+  <property name="test.junit.output.format" value="plain"/>
+  <property name="test.junit.fork.mode" value="perTest" />
+  <property name="test.junit.printsummary" value="yes" />
+  <property name="test.junit.haltonfailure" value="no" />
+  <property name="test.junit.maxmemory" value="512m" />
+  <property name="test.conf.dir" value="${build.dir}/test/conf" />
+
+  <property name="test.core.build.classes" value="${test.build.dir}/core/classes"/>
+
+  <property name="test.all.tests.file" value="${test.src.dir}/all-tests"/>
+  <property name="test.exclude.file" value="${test.src.dir}/empty-file" />
+
+  <property name="javadoc.link.java"
+	    value="http://java.sun.com/javase/6/docs/api/"/>
+  <property name="javadoc.packages" value="org.apache.hadoop.*"/>
+  <property name="javadoc.maxmemory" value="512m" />
+
+  <property name="dist.dir" value="${build.dir}/${final.name}"/>
+
+  <property name="javac.debug" value="on"/>
+  <property name="javac.optimize" value="on"/>
+  <property name="javac.deprecation" value="off"/>
+  <property name="javac.version" value="1.6"/>
+  <property name="javac.args" value=""/>
+  <property name="javac.args.warnings" value="-Xlint:unchecked"/>
+
+  <property name="clover.db.dir" location="${build.dir}/test/clover/db"/>
+  <property name="clover.report.dir" location="${build.dir}/test/clover/reports"/>
+
+  <property name="rat.reporting.classname" value="rat.Report"/>
+
+  <property name="jdiff.build.dir" value="${build.docs}/jdiff"/>
+  <property name="jdiff.xml.dir" value="${lib.dir}/jdiff"/>
+  <property name="jdiff.stability" value="-unstable"/>
+  <property name="jdiff.compatibility" value=""/>
+  <property name="jdiff.stable" value="0.20.2"/>
+  <property name="jdiff.stable.javadoc" 
+            value="http://hadoop.apache.org/core/docs/r${jdiff.stable}/api/"/>
+
+  <property name="scratch.dir" value="${user.home}/tmp"/>
+  <property name="svn.cmd" value="svn"/>
+  <property name="grep.cmd" value="grep"/>
+  <property name="patch.cmd" value="patch"/>
+  <property name="make.cmd" value="make"/>
+
+	
+  <!-- IVY properteis set here -->
+  <property name="ivy.repo.dir" value="${user.home}/ivyrepo" />
+  <property name="ivy.dir" location="ivy" />
+  <loadproperties srcfile="${ivy.dir}/libraries.properties"/>
+  <property name="asfrepo" value="https://repository.apache.org"/> 
+  <property name="asfsnapshotrepo" value="${asfrepo}/content/repositories/snapshots"/>
+  <property name="asfstagingrepo"
+  value="${asfrepo}/service/local/staging/deploy/maven2"/>
+  <property name="mvnrepo" value="http://repo2.maven.org/maven2"/>
+  <property name="ivy.jar" location="${ivy.dir}/ivy-${ivy.version}.jar"/>
+  <property name="ant_task.jar" location="${ivy.dir}/maven-ant-tasks-${ant-task.version}.jar"/>
+  <property name="ant_task_repo_url" 
+     value="${mvnrepo}/org/apache/maven/maven-ant-tasks/${ant-task.version}/maven-ant-tasks-${ant-task.version}.jar"/>
+  <property name="ivy_repo_url" value="${mvnrepo}/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar"/>
+  <property name="ivysettings.xml" location="${ivy.dir}/ivysettings.xml" />
+  <property name="ivy.org" value="org.apache.hadoop"/>
+  <property name="build.dir" location="build" />
+  <property name="dist.dir" value="${build.dir}/${final.name}"/>
+  <property name="build.ivy.dir" location="${build.dir}/ivy" />
+  <property name="build.ivy.lib.dir" location="${build.ivy.dir}/lib" />
+  <property name="common.ivy.lib.dir" location="${build.ivy.lib.dir}/${ant.project.name}/common"/>
+  <property name="build.ivy.report.dir" location="${build.ivy.dir}/report"/>
+  <property name="build.ivy.maven.dir" location="${build.ivy.dir}/maven"/>
+  <property name="pom.xml" location="${build.ivy.maven.dir}/pom.xml"/>
+  <property name="hadoop-common.pom" location="${ivy.dir}/hadoop-common.xml"/>
+  <property name="build.ivy.maven.common.jar" location="${build.ivy.maven.dir}/hadoop-common-${version}.jar"/>
+  <property name="hadoop-common-test.pom" location="${ivy.dir}/hadoop-common-test.xml" />
+  <property name="build.ivy.maven.common-test.jar" location="${build.ivy.maven.dir}/hadoop-common-test-${version}.jar"/>
+
+  <!--this is the naming policy for artifacts we want pulled down-->
+  <property name="ivy.module" location="hadoop-common" />
+  <property name="ivy.artifact.retrieve.pattern" value="${ant.project.name}/[conf]/[artifact]-[revision].[ext]"/>
+
+  <!--this is how artifacts that get built are named-->
+  <property name="ivy.publish.pattern" value="[artifact]-[revision].[ext]"/>
+  <property name="hadoop-common.jar" location="${build.dir}/${final.name}.jar" />
+  <property name="hadoop-common-test.jar" location="${build.dir}/${test.final.name}.jar" />
+  <property name="hadoop-common-sources.jar" location="${build.dir}/${final.name}-sources.jar" />
+  <property name="hadoop-common-test-sources.jar" location="${build.dir}/${test.final.name}-sources.jar" />
+
+  <!-- jdiff.home property set -->
+  <property name="jdiff.home" value="${build.ivy.lib.dir}/${ant.project.name}/jdiff"/>
+  <property name="jdiff.jar" value="${jdiff.home}/jdiff-${jdiff.version}.jar"/>
+  <property name="xerces.jar" value="${jdiff.home}/xerces-${xerces.version}.jar"/>
+
+  <property name="clover.jar" location="${clover.home}/lib/clover.jar"/>
+  <available property="clover.present" file="${clover.jar}" />
+	
+  <!-- Eclipse properties -->
+  <property name="build.dir.eclipse" value="build/eclipse"/>
+  <property name="build.dir.eclipse-main-classes" value="${build.dir.eclipse}/classes-main"/>
+  <property name="build.dir.eclipse-test-classes" value="${build.dir.eclipse}/classes-test"/>
+  <property name="build.dir.eclipse-test-generated-classes" value="${build.dir.eclipse}/classes-test-generated"/>
+
+  <!-- check if clover reports should be generated -->
+  <condition property="clover.enabled">
+    <and>
+        <isset property="run.clover"/>
+        <isset property="clover.present"/>
+    </and>
+  </condition>
+
+  <condition property="staging">
+     <equals arg1="${repo}" arg2="staging"/>
+  </condition>
+
+  <!-- packaging properties -->
+  <property name="package.prefix" value="/usr"/>
+  <property name="package.conf.dir" value="/etc/hadoop"/>
+  <property name="package.log.dir" value="/var/log/hadoop"/>
+  <property name="package.pid.dir" value="/var/run/hadoop"/>
+  <property name="package.var.dir" value="/var/lib/hadoop"/>
+  <property name="package.share.dir" value="/share/hadoop/${module}"/>
+  <!-- Use fixed path to build rpm for avoiding rpmbuild conflict with dash path names -->
+  <property name="package.buildroot" value="/tmp/hadoop_package_build_${user.name}"/>
+  <property name="package.build.dir" value="/tmp/hadoop_package_build_${user.name}/BUILD"/>
+
+  <!-- the normal classpath -->
+  <path id="classpath">
+    <pathelement location="${build.classes}"/>
+    <pathelement location="${conf.dir}"/>
+    <path refid="ivy-common.classpath"/>
+  </path>
+
+  <path id="test.classpath">
+    <pathelement location="${test.build.extraconf}"/>
+    <pathelement location="${test.core.build.classes}" />
+    <pathelement location="${test.src.dir}"/>
+    <pathelement location="${test.build.dir}"/>
+    <pathelement location="${build.dir}"/>
+    <pathelement location="${build.examples}"/>
+    <pathelement location="${build.tools}"/>
+    <pathelement path="${clover.jar}"/>
+    <path refid="ivy-common.classpath"/>
+    <path refid="ivy-test.classpath"/>
+    <pathelement location="${build.classes}"/>
+    <pathelement location="${test.conf.dir}"/>
+  </path>
+<!--
+  <path id="test.hdfs.classpath">
+    <pathelement location="${test.hdfs.build.classes}" />
+    <path refid="test.classpath"/>
+  </path>
+
+  <path id="test.mapred.classpath">
+    <pathelement location="${test.mapred.build.classes}" />
+    <path refid="test.hdfs.classpath"/>
+  </path>
+
+  <path id="test.hdfs.with.mr.classpath">
+    <pathelement location="${test.hdfs.with.mr.build.classes}" />
+    <path refid="test.mapred.classpath"/>
+  </path>
+-->
+  <!-- the cluster test classpath: uses conf.dir for configuration -->
+  <path id="test.cluster.classpath">
+    <path refid="classpath"/>
+    <pathelement location="${test.build.classes}" />
+    <pathelement location="${test.src.dir}"/>
+    <pathelement location="${build.dir}"/>
+  </path>
+
+
+  <!-- ====================================================== -->
+  <!-- Macro definitions                                      -->
+  <!-- ====================================================== -->
+  <macrodef name="macro_tar" description="Worker Macro for tar">
+    <attribute name="param.destfile"/>
+    <element name="param.listofitems"/>
+    <sequential>
+      <tar compression="gzip" longfile="gnu"
+      destfile="@{param.destfile}">
+      <param.listofitems/>
+      </tar>
+    </sequential>
+  </macrodef>
+
+  <!-- ====================================================== -->
+  <!-- Stuff needed by all targets                            -->
+  <!-- ====================================================== -->
+  <target name="init" depends="ivy-retrieve-common">
+    <mkdir dir="${build.dir}"/>
+    <mkdir dir="${build.classes}"/>
+    <mkdir dir="${build.src}"/>
+    <mkdir dir="${build.webapps}"/>
+ 
+    <mkdir dir="${test.build.dir}"/>
+    <mkdir dir="${test.build.classes}"/>
+    <mkdir dir="${test.build.extraconf}"/>
+    <tempfile property="touch.temp.file" destDir="${java.io.tmpdir}"/>
+    <touch millis="0" file="${touch.temp.file}">
+      <fileset dir="${conf.dir}" includes="**/*.template"/>
+    </touch>
+    <delete file="${touch.temp.file}"/>
+    <!-- copy all of the jsp and static files -->
+    <copy todir="${build.webapps}">
+      <fileset dir="${src.webapps}">
+        <exclude name="**/*.jsp" />
+        <exclude name="**/*.jspx" />
+      </fileset>
+    </copy>
+
+    <copy todir="${conf.dir}" verbose="true">
+      <fileset dir="${conf.dir}" includes="**/*.template"/>
+      <mapper type="glob" from="*.template" to="*"/>
+    </copy>
+
+    <mkdir dir="${test.conf.dir}"/>
+    <copy todir="${test.conf.dir}" verbose="true">
+      <fileset dir="${conf.dir}" includes="**/*.template"/>
+      <mapper type="glob" from="*.template" to="*"/>
+    </copy>
+
+    <exec executable="sh">
+       <arg line="src/saveVersion.sh ${version} ${build.dir}"/>
+    </exec>
+	
+   <exec executable="sh">
+       <arg line="src/fixFontsPath.sh ${src.docs.cn}"/>
+   </exec>
+  </target>
+
+  <import file="${test.src.dir}/aop/build/aop.xml"/>
+
+  <!-- ====================================================== -->
+  <!-- Compile the Java files                                 -->
+  <!-- ====================================================== -->
+  <target name="record-parser" depends="init" if="javacc.home">
+      <javacc
+          target="${java.src.dir}/org/apache/hadoop/record/compiler/generated/rcc.jj"
+          outputdirectory="${java.src.dir}/org/apache/hadoop/record/compiler/generated"
+          javacchome="${javacc.home}" />
+  </target>
+  
+  <target name="compile-rcc-compiler" depends="init, record-parser">
+    <javac 
+        encoding="${build.encoding}" 
+        srcdir="${java.src.dir}"
+        includes="org/apache/hadoop/record/compiler/**/*.java"
+        destdir="${build.classes}"
+        debug="${javac.debug}"
+        optimize="${javac.optimize}"
+        target="${javac.version}"
+        source="${javac.version}"
+        deprecation="${javac.deprecation}">
+        <compilerarg line="${javac.args}"/>
+        <classpath refid="classpath"/>
+    </javac>
+    
+    <taskdef name="recordcc" classname="org.apache.hadoop.record.compiler.ant.RccTask">
+      <classpath refid="classpath" />
+    </taskdef>
+  </target>
+  
+  <target name="compile-core-classes" depends="init, compile-rcc-compiler">
+    <!-- Compile Java files (excluding JSPs) checking warnings -->
+    <javac 
+     encoding="${build.encoding}" 
+     srcdir="${java.src.dir};${build.src}"	
+     includes="org/apache/hadoop/**/*.java"
+     destdir="${build.classes}"
+     debug="${javac.debug}"
+     optimize="${javac.optimize}"
+     target="${javac.version}"
+     source="${javac.version}"
+     deprecation="${javac.deprecation}">
+      <compilerarg line="${javac.args} ${javac.args.warnings}" />
+      <classpath refid="classpath"/>
+    </javac>
+
+    <copy todir="${build.classes}">
+      <fileset dir="${java.src.dir}" includes="**/*.properties"/>
+      <fileset dir="${java.src.dir}" includes="core-default.xml"/>
+    </copy>
+     
+  </target>
+
+  <target name="compile-native">
+    <antcall target="compile-core-native">
+      <param name="compile.native" value="true"/>
+    </antcall> 
+  </target>
+
+  <target name="check-native-configure" if="compile.native">
+    <condition property="need.native.configure">
+       <not> <available file="${native.src.dir}/configure"/> </not>
+    </condition>
+  </target>
+
+  <target name="create-native-configure" depends="check-native-configure" if="need.native.configure">
+    <mkdir dir="${native.src.dir}/config"/>
+    <mkdir dir="${native.src.dir}/m4"/>
+    <exec executable="autoreconf" dir="${native.src.dir}" 
+          searchpath="yes" failonerror="yes">
+       <arg value="-i"/>
+       <arg value="-f"/>
+    </exec>
+  </target>
+
+  <target name="check-native-makefile" if="compile.native">
+    <condition property="need.native.makefile">
+       <not> <available file="${native.src.dir}/Makefile"/> </not>
+    </condition>
+  </target>
+
+  <target name="create-native-makefile" depends="check-native-makefile" if="need.native.makefile"> 
+    <antcall target="create-native-configure"/>
+    <mkdir dir="${build.native}"/>
+	<exec dir="${build.native}" executable="sh" failonerror="true">
+	  <env key="OS_NAME" value="${os.name}"/>
+	  <env key="OS_ARCH" value="${os.arch}"/>
+	  <env key="JVM_DATA_MODEL" value="${sun.arch.data.model}"/>
+	  <env key="HADOOP_NATIVE_SRCDIR" value="${native.src.dir}"/>
+	  <arg line="${native.src.dir}/configure"/>
+    </exec>
+  </target>
+
+
+  <target name="compile-core-native" depends="compile-core-classes,create-native-makefile"
+          if="compile.native">
+  	
+    <mkdir dir="${build.native}/lib"/>
+    <mkdir dir="${build.native}/src/org/apache/hadoop/io/compress/zlib"/>
+    <mkdir dir="${build.native}/src/org/apache/hadoop/io/nativeio"/>
+    <mkdir dir="${build.native}/src/org/apache/hadoop/security"/>
+
+  	<javah 
+  	  classpath="${build.classes}"
+  	  destdir="${build.native}/src/org/apache/hadoop/io/compress/zlib"
+      force="yes"
+  	  verbose="yes"
+  	  >
+  	  <class name="org.apache.hadoop.io.compress.zlib.ZlibCompressor" />
+      <class name="org.apache.hadoop.io.compress.zlib.ZlibDecompressor" />
+  	</javah>
+
+  	<javah
+  	  classpath="${build.classes}"
+  	  destdir="${build.native}/src/org/apache/hadoop/security"
+      force="yes"
+  	  verbose="yes"
+  	  >
+  	  <class name="org.apache.hadoop.security.JniBasedUnixGroupsMapping" />
+  	</javah>
+  	<javah
+  	  classpath="${build.classes}"
+  	  destdir="${build.native}/src/org/apache/hadoop/io/nativeio"
+      force="yes"
+  	  verbose="yes"
+  	  >
+  	  <class name="org.apache.hadoop.io.nativeio.NativeIO" />
+  	</javah>
+
+  	<javah
+  	  classpath="${build.classes}"
+  	  destdir="${build.native}/src/org/apache/hadoop/security"
+      force="yes"
+  	  verbose="yes"
+  	  >
+  	  <class name="org.apache.hadoop.security.JniBasedUnixGroupsNetgroupMapping" />
+  	</javah>
+
+    <exec dir="${build.native}" executable="${make.cmd}" failonerror="true">
+      <env key="OS_NAME" value="${os.name}"/>
+      <env key="OS_ARCH" value="${os.arch}"/>
+  	  <env key="JVM_DATA_MODEL" value="${sun.arch.data.model}"/>
+  	  <env key="HADOOP_NATIVE_SRCDIR" value="${native.src.dir}"/>
+    </exec>
+
+	<exec dir="${build.native}" executable="sh" failonerror="true">
+	  <arg line="${build.native}/libtool --mode=install cp ${build.native}/libhadoop.la ${build.native}/lib"/>
+    </exec>
+
+  </target>
+
+  <target name="compile-core"
+          depends="clover,compile-core-classes,
+  	compile-core-native" 
+  	description="Compile core only">
+  </target>
+
+  <target name="compile" depends="compile-core" description="Compile core">
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make hadoop-common.jar                                               -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="jar" depends="compile-core" description="Make hadoop-common.jar">
+    <tar compression="gzip" destfile="${build.classes}/bin.tgz">
+      <tarfileset dir="bin" mode="755"/>
+    </tar>
+    <property name="jar.properties.list" value="commons-logging.properties, log4j.properties, hadoop-metrics.properties" />
+    <jar jarfile="${build.dir}/${final.name}.jar"
+         basedir="${build.classes}">
+      <manifest>
+        <section name="org/apache/hadoop">
+          <attribute name="Implementation-Title" value="${ant.project.name}"/>
+          <attribute name="Implementation-Version" value="${version}"/>
+          <attribute name="Implementation-Vendor" value="Apache"/>
+        </section>
+      </manifest>
+      <fileset dir="${conf.dir}" includes="${jar.properties.list}" />
+      <fileset file="${jar.extra.properties.list}" />
+    </jar>
+
+    <jar jarfile="${hadoop-common-sources.jar}">
+      <fileset dir="${java.src.dir}" includes="org/apache/hadoop/**/*.java"/>
+      <fileset dir="${build.src}" includes="org/apache/hadoop/**/*.java"/>
+    </jar>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make the Hadoop metrics jar. (for use outside Hadoop)              -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="metrics.jar" depends="compile-core" description="Make the Hadoop metrics jar. (for use outside Hadoop)">
+    <jar jarfile="${build.dir}/hadoop-metrics-${version}.jar"
+         basedir="${build.classes}">
+      <include name="**/metrics/**" />
+      <exclude name="**/package.html" />
+    </jar>
+  </target>
+
+  <target name="metrics2.jar" depends="compile-core" description="Make the Hadoop metrics2 framework jar (for use plugin development)">
+    <jar jarfile="${build.dir}/hadoop-metrics2-${version}.jar"
+         basedir="${build.classes}">
+      <include name="**/metrics2/**" />
+    </jar>
+  </target>
+
+  <target name="generate-test-records" depends="compile-rcc-compiler">
+    <recordcc destdir="${test.generated.dir}">
+      <fileset dir="${test.src.dir}"
+	         includes="**/*.jr" />
+    </recordcc>
+  </target>
+
+  <target name="generate-avro-records" depends="init, ivy-retrieve-test">
+    <taskdef name="schema" classname="org.apache.avro.specific.SchemaTask">
+      <classpath refid="test.classpath"/>
+    </taskdef>
+    <schema destdir="${test.generated.dir}">
+      <fileset dir="${test.src.dir}">
+        <include name="**/*.avsc" />
+      </fileset>
+    </schema>
+  </target>
+
+  <target name="generate-avro-protocols" depends="init, ivy-retrieve-test">
+    <taskdef name="schema" classname="org.apache.avro.specific.ProtocolTask">
+      <classpath refid="test.classpath"/>
+    </taskdef>
+    <schema destdir="${test.generated.dir}">
+      <fileset dir="${test.src.dir}">
+        <include name="**/*.avpr" />
+     </fileset>
+    </schema>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Compile test code                                                  --> 
+  <!-- ================================================================== -->
+  <!-- This is a wrapper for fault-injection needs-->
+  <target name="-classes-compilation"
+    depends="compile-core-classes, compile-core-test"/> 
+
+  <target name="compile-core-test" depends="compile-core-classes, ivy-retrieve-test, generate-test-records, generate-avro-records, generate-avro-protocols">
+    <mkdir dir="${test.core.build.classes}"/>
+    <javac 
+     encoding="${build.encoding}" 
+     srcdir="${test.generated.dir}"
+     includes="org/apache/hadoop/**/*.java"
+     destdir="${test.core.build.classes}"
+     debug="${javac.debug}"
+     optimize="${javac.optimize}"
+     target="${javac.version}"
+     source="${javac.version}"
+     deprecation="${javac.deprecation}">
+      <compilerarg line="${javac.args}"/>
+      <classpath refid="test.classpath"/>
+    </javac>
+    <javac 
+     encoding="${build.encoding}" 
+     srcdir="${test.src.dir}/core"
+     includes="org/apache/hadoop/**/*.java"
+     destdir="${test.core.build.classes}"
+     debug="${javac.debug}"
+     optimize="${javac.optimize}"
+     target="${javac.version}"
+     source="${javac.version}"
+     deprecation="${javac.deprecation}">
+      <compilerarg line="${javac.args} ${javac.args.warnings}" />
+      <classpath refid="test.classpath"/>
+     </javac>
+
+    <taskdef
+       name="paranamer" 
+       classname="com.thoughtworks.paranamer.ant.ParanamerGeneratorTask">
+      <classpath refid="classpath" />
+    </taskdef>
+    <paranamer sourceDirectory="${test.src.dir}/core"
+	       outputDirectory="${test.core.build.classes}"/>
+
+    <delete dir="${test.cache.data}"/>
+    <mkdir dir="${test.cache.data}"/>
+    <copy file="${test.src.dir}/core/org/apache/hadoop/cli/testConf.xml" todir="${test.cache.data}"/>
+
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make hadoop-test.jar                                               -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="jar-test" depends="compile-core-test" description="Make hadoop-test.jar">
+    <copy todir="${test.build.classes}">
+      <fileset dir="${test.core.build.classes}"/>
+    </copy>
+    <jar jarfile="${build.dir}/${test.final.name}.jar"
+         basedir="${test.build.classes}">
+         <manifest>
+           <attribute name="Main-Class"
+                      value="org/apache/hadoop/test/CoreTestDriver"/>
+          <section name="org/apache/hadoop">
+            <attribute name="Implementation-Title" value="${ant.project.name}"/>
+            <attribute name="Implementation-Version" value="${version}"/>
+            <attribute name="Implementation-Vendor" value="Apache"/>
+          </section>
+         </manifest>
+    </jar>
+
+    <jar jarfile="${hadoop-common-test-sources.jar}">
+      <fileset dir="${test.generated.dir}" includes="org/apache/hadoop/**/*.java"/>
+      <fileset dir="${test.src.dir}/core" includes="org/apache/hadoop/**/*.java"/>
+    </jar>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Fault injection customization section.
+       These targets ought to be copied over to other projects and modified
+       as needed -->
+  <!-- ================================================================== -->
+  <target name="run-test-core-fault-inject" depends="injectfaults" 
+	  description="Run full set of the unit tests with fault injection">
+    <macro-run-tests-fault-inject target.name="run-test-core"
+      testcasesonly="false"/>
+  </target>
+
+  <target name="jar-test-fault-inject" depends="injectfaults" 
+    description="Make hadoop-test-fi.jar">
+    <macro-jar-test-fault-inject
+      target.name="jar-test"
+      jar.final.name="test.final.name"
+      jar.final.value="${test.final.name}-fi" />
+  </target>
+
+  <target name="jar-fault-inject" depends="injectfaults" 
+    description="Make hadoop-fi.jar">
+    <macro-jar-fault-inject
+      target.name="jar"
+      build.dir="${build-fi.dir}"
+      jar.final.name="final.name"
+      jar.final.value="${final.name}-fi" />
+  </target>
+
+  <!--This target is not included into the the top level list of target
+  for it serves a special "regression" testing purpose of non-FI tests in
+  FI environment -->
+  <target name="run-fault-inject-with-testcaseonly" depends="injectfaults">
+    <fail unless="testcase">Can't run this target without -Dtestcase setting!
+    </fail>
+    <macro-run-tests-fault-inject target.name="run-test-core" 
+      testcasesonly="true"/>
+  </target>
+  <!-- ================================================================== -->
+  <!-- End of Fault injection customization section                       -->
+  <!-- ================================================================== -->
+
+  <condition property="tests.notestcase">
+    <and>
+      <isfalse value="${test.fault.inject}"/>
+      <not>
+        <isset property="testcase"/>
+      </not>
+    </and>
+  </condition>
+  <condition property="tests.notestcase.fi">
+    <and>
+      <not>
+        <isset property="testcase" />
+      </not>
+      <istrue value="${test.fault.inject}" />
+    </and>
+  </condition>
+  <condition property="tests.testcase">
+    <and>
+      <isfalse value="${test.fault.inject}" />
+      <isset property="testcase" />
+    </and>
+  </condition>
+  <condition property="tests.testcaseonly">
+    <istrue value="${special.fi.testcasesonly}" />
+  </condition>
+  <condition property="tests.testcase.fi">
+    <and>
+      <istrue value="${test.fault.inject}" />
+      <isset property="testcase" />
+      <isfalse value="${special.fi.testcasesonly}" />
+    </and>
+  </condition>
+	     
+  <!-- ================================================================== -->
+  <!-- Run unit tests                                                     --> 
+  <!-- ================================================================== -->
+  <macrodef name="macro-test-runner">
+    <attribute name="test.file" />
+    <attribute name="classpath" />
+    <attribute name="test.dir" />
+    <attribute name="fileset.dir" />
+    <attribute name="hadoop.conf.dir.deployed" default="" />
+    <attribute name="test.krb5.conf.filename" default="" />
+    <sequential>
+      <delete file="${test.build.dir}/testsfailed"/>
+      <delete dir="@{test.dir}/data" />
+      <mkdir dir="@{test.dir}/data" />
+      <delete dir="${test.build.webapps}"/>
+      <copy todir="${test.build.webapps}">
+        <fileset dir="${test.src.dir}/test-webapps" includes="**/*" />
+      </copy>
+      <delete dir="@{test.dir}/logs" />
+      <mkdir dir="@{test.dir}/logs" />
+      <copy file="${test.src.dir}/hadoop-policy.xml"
+            todir="@{test.dir}/extraconf" />
+      <copy file="${test.src.dir}/fi-site.xml"
+            todir="@{test.dir}/extraconf" />
+      <junit showoutput="${test.output}"
+             printsummary="${test.junit.printsummary}"
+             haltonfailure="${test.junit.haltonfailure}"
+             fork="yes"
+             forkmode="${test.junit.fork.mode}"
+             maxmemory="${test.junit.maxmemory}"
+             dir="${basedir}"
+             timeout="${test.timeout}"
+             errorProperty="tests.failed"
+             failureProperty="tests.failed">
+        <jvmarg value="-ea" />
+        <sysproperty key="java.net.preferIPv4Stack" value="true"/>
+        <sysproperty key="test.build.data" value="${test.build.data}" />
+        <sysproperty key="test.cache.data" value="${test.cache.data}" />
+        <sysproperty key="test.debug.data" value="${test.debug.data}" />
+        <sysproperty key="hadoop.log.dir" value="${test.log.dir}" />
+        <sysproperty key="test.src.dir" value="${test.src.dir}" />
+        <sysproperty key="test.build.extraconf" value="@{test.dir}/extraconf" />
+         <sysproperty key="java.security.krb5.conf" value="@{test.krb5.conf.filename}"/>
+        <sysproperty key="hadoop.policy.file" value="hadoop-policy.xml" />
+        <sysproperty key="java.library.path"
+          value="${build.native}/lib:${lib.dir}/native/${build.platform}"/>
+        <sysproperty key="java.security.egd" value="file:///dev/urandom" />
+        <sysproperty key="install.c++.examples" value="${install.c++.examples}"/>
+        <!-- set io.compression.codec.lzo.class in the child jvm only if it is set -->
+        <syspropertyset dynamic="no">
+          <propertyref name="io.compression.codec.lzo.class"/>
+        </syspropertyset>
+        <!-- set compile.c++ in the child jvm only if it is set -->
+        <syspropertyset dynamic="no">
+          <propertyref name="compile.c++"/>
+        </syspropertyset>
+        <classpath refid="@{classpath}" />
+        <!-- Pass probability specifications to the spawn JVM -->
+        <syspropertyset id="FaultProbabilityProperties">
+          <propertyref regex="fi.*"/>
+        </syspropertyset>
+        <sysproperty key="test.system.hdrc.deployed.hadoopconfdir"
+                     value="@{hadoop.conf.dir.deployed}" />
+        <!-- user to group mapping class for TestAccessControlList -->
+        <syspropertyset dynamic="no">
+          <propertyref name="TestAccessControlListGroupMapping"/>
+        </syspropertyset>
+        <formatter type="${test.junit.output.format}" />
+        <batchtest todir="@{test.dir}" if="tests.notestcase">
+          <fileset dir="@{fileset.dir}/core"
+                   excludes="**/${test.exclude}.java aop/** system/**">
+             <patternset>
+               <includesfile name="@{test.file}"/>
+               <excludesfile name="${test.exclude.file}"/>
+             </patternset>
+         </fileset>
+        </batchtest>
+        <batchtest todir="${test.build.dir}" if="tests.notestcase.fi">
+          <fileset dir="@{fileset.dir}/aop"
+                   includes="**/${test.include}.java"
+                   excludes="**/${test.exclude}.java"
+                   excludesfile="${test.exclude.file}" />
+         </batchtest>
+         <batchtest todir="@{test.dir}" if="tests.testcase">
+           <fileset dir="@{fileset.dir}/core"
+             includes="**/${testcase}.java" excludes="aop/** system/**"/>
+         </batchtest>
+         <batchtest todir="${test.build.dir}" if="tests.testcase.fi">
+           <fileset dir="@{fileset.dir}/aop" includes="**/${testcase}.java" />
+         </batchtest>
+         <!--The following batch is for very special occasions only when
+                a non-FI tests are needed to be executed against FI-environment -->
+         <batchtest todir="${test.build.dir}" if="tests.testcaseonly">
+           <fileset dir="@{fileset.dir}/core" includes="**/${testcase}.java" />
+         </batchtest>
+      </junit>
+      <antcall target="checkfailure"/>
+    </sequential>
+  </macrodef>
+
+  <target name="run-test-core" depends="compile-core-test" description="Run core unit tests">
+    <macro-test-runner test.file="${test.all.tests.file}"
+                       classpath="${test.classpath.id}"
+                       test.dir="${test.build.dir}"
+                       fileset.dir="${test.src.dir}"
+                       test.krb5.conf.filename="${test.src.dir}/krb5.conf"
+                       >
+    </macro-test-runner>
+  </target>   
+
+  <target name="checkfailure" if="tests.failed">
+    <touch file="${test.build.dir}/testsfailed"/>
+    <fail unless="continueOnFailure">Tests failed!</fail>
+  </target>
+
+  <target name="test-core" description="Run core unit tests">
+    <delete file="${test.build.dir}/testsfailed"/> 
+    <property name="continueOnFailure" value="true"/> 
+    <antcall target="run-test-core"/>
+    <antcall target="run-test-core-fault-inject"/>
+    <available file="${test.build.dir}/testsfailed" property="testsfailed"/>
+    <fail if="testsfailed">Tests failed!</fail> 
+  </target>
+
+  <target name="test" depends="jar-test,test-core" description="Run all unit tests"/>
+
+  <!-- Run all unit tests, not just Test*, and use non-test configuration. -->
+  <target name="test-cluster" description="Run all unit tests, not just Test*, and use non-test configuration.">
+    <antcall target="test">
+      <param name="test.include" value="*"/>
+      <param name="test.classpath.id" value="test.cluster.classpath"/>
+    </antcall>
+  </target>
+
+  <target name="nightly" depends="test, tar">
+  </target>
+	
+  <!-- ================================================================== -->
+  <!-- Run optional third-party tool targets                              --> 
+  <!-- ================================================================== -->
+  <target name="checkstyle" depends="ivy-retrieve-checkstyle,check-for-checkstyle" if="checkstyle.present" 
+       description="Run optional third-party tool targets">
+       <taskdef resource="checkstyletask.properties">
+         <classpath refid="checkstyle-classpath"/>
+       </taskdef>
+  
+	<mkdir dir="${test.build.dir}"/>
+  	
+  	<checkstyle config="${test.src.dir}/checkstyle.xml"
+  		failOnViolation="false">
+      <fileset dir="${java.src.dir}" includes="**/*.java" excludes="**/generated/**"/>
+      <formatter type="xml" toFile="${test.build.dir}/checkstyle-errors.xml"/>
+  	</checkstyle>
+  	
+  	<xslt style="${test.src.dir}/checkstyle-noframes-sorted.xsl"
+        in="${test.build.dir}/checkstyle-errors.xml"
+        out="${test.build.dir}/checkstyle-errors.html"/>
+  </target>
+	
+  <target name="check-for-checkstyle">
+    <available property="checkstyle.present" resource="checkstyletask.properties">
+       <classpath refid="checkstyle-classpath"/>
+    </available>  	
+  </target>
+
+
+ <property name="findbugs.home" value=""/>
+  <target name="findbugs" depends="check-for-findbugs, jar" if="findbugs.present" description="Run findbugs if present">
+    <property environment="env"/>
+    <property name="findbugs.out.dir" value="${test.build.dir}/findbugs"/>
+    <property name="findbugs.exclude.file" value="${test.src.dir}/findbugsExcludeFile.xml"/>
+    <property name="findbugs.report.htmlfile" value="${findbugs.out.dir}/hadoop-findbugs-report.html"/>
+    <property name="findbugs.report.xmlfile" value="${findbugs.out.dir}/hadoop-findbugs-report.xml"/>
+    <taskdef name="findbugs" classname="edu.umd.cs.findbugs.anttask.FindBugsTask"
+        classpath="${findbugs.home}/lib/findbugs-ant.jar" />
+
+        <mkdir dir="${findbugs.out.dir}"/>
+
+    <findbugs home="${findbugs.home}" output="xml:withMessages"
+        outputFile="${findbugs.report.xmlfile}" effort="max"
+        excludeFilter="${findbugs.exclude.file}" jvmargs="-Xmx512M">
+      <auxClasspath>
+        <fileset dir="${env.ANT_HOME}/lib">
+          <include name="ant.jar"/>
+          <include name="ant-launcher.jar"/>
+        </fileset>
+        <fileset dir="${build.ivy.lib.dir}/${ant.project.name}/common">
+          <include name="**/*.jar"/>
+        </fileset>
+      </auxClasspath>
+      <sourcePath path="${java.src.dir}"/>
+      <class location="${basedir}/build/${final.name}.jar" />
+    </findbugs>
+
+        <xslt style="${findbugs.home}/src/xsl/default.xsl"
+        in="${findbugs.report.xmlfile}"
+        out="${findbugs.report.htmlfile}"/>
+  </target>
+	
+  <target name="check-for-findbugs">
+    <available property="findbugs.present"
+        file="${findbugs.home}/lib/findbugs.jar" />
+  </target>
+
+
+  <!-- ================================================================== -->
+  <!-- Documentation                                                      -->
+  <!-- ================================================================== -->
+  
+  <target name="docs" depends="forrest.check" description="Generate forrest-based documentation. 
+       To use, specify -Dforrest.home=&lt;base of Apache Forrest installation&gt; on the command line." if="forrest.home">
+    <exec dir="${docs.src}" executable="${forrest.home}/bin/forrest"
+	  failonerror="true">
+    </exec>
+    <copy todir="${build.docs}">
+      <fileset dir="${docs.src}/build/site/" />
+    </copy>
+    <copy file="${docs.src}/releasenotes.html" todir="${build.docs}"/>
+    <style basedir="${java.src.dir}" destdir="${build.docs}"
+           includes="core-default.xml" style="conf/configuration.xsl"/>
+    <antcall target="changes-to-html"/>
+    <antcall target="cn-docs"/>
+  </target>
+
+  <target name="cn-docs" depends="forrest.check, init" description="Generate forrest-based Chinese documentation. 
+        To use, specify -Dforrest.home=&lt;base of Apache Forrest installation&gt; on the command line." 
+        if="forrest.home">
+    <exec dir="${src.docs.cn}" executable="${forrest.home}/bin/forrest" failonerror="true">
+      <env key="LANG" value="en_US.utf8"/>
+    </exec>
+    <copy todir="${build.docs.cn}">
+      <fileset dir="${src.docs.cn}/build/site/" />
+    </copy>
+    <style basedir="${java.src.dir}" destdir="${build.docs.cn}"
+           includes="core-default.xml" style="conf/configuration.xsl"/>
+    <antcall target="changes-to-html"/>
+  </target>
+
+  <target name="forrest.check" unless="forrest.home">
+    <fail message="'forrest.home' is not defined. Please pass 
+      -Dforrest.home=&lt;base of Apache Forrest installation&gt; to Ant on the command-line." />
+  </target>
+
+  <target name="javadoc-dev" depends="compile, ivy-retrieve-javadoc" description="Generate javadoc for hadoop developers">
+    <mkdir dir="${build.javadoc.dev}"/>
+    <javadoc
+      overview="${java.src.dir}/overview.html"
+      packagenames="org.apache.hadoop.*"
+      destdir="${build.javadoc.dev}"
+      author="true"
+      version="true"
+      use="true"
+      windowtitle="${Name} ${version} API"
+      doctitle="${Name} ${version} Developer API"
+      bottom="Copyright &amp;copy; ${year} The Apache Software Foundation"
+      maxmemory="${javadoc.maxmemory}">
+        <packageset dir="${java.src.dir}"/>
+
+        <link href="${javadoc.link.java}"/>
+
+        <classpath >
+          <path refid="classpath" />
+          <path refid="javadoc-classpath"/>
+          <pathelement path="${java.class.path}"/>
+          <pathelement location="${build.tools}"/>
+        </classpath>
+
+    	<group title="Core" packages="org.apache.*"/>
+
+    </javadoc>
+  </target>	
+
+  <target name="javadoc-uptodate" depends="compile, ivy-retrieve-javadoc">
+    <uptodate property="javadoc.is.uptodate">
+      <srcfiles dir="${src.dir}">
+        <include name="**/*.java" />
+        <include name="**/*.html" />
+      </srcfiles>
+      <mapper type="merge" to="${build.javadoc.timestamp}" />
+    </uptodate>
+  </target>
+ 
+  <target name="javadoc" description="Generate javadoc" depends="jar, javadoc-uptodate"
+       unless="javadoc.is.uptodate">
+    <mkdir dir="${build.javadoc}"/>
+    <javadoc
+      overview="${java.src.dir}/overview.html"
+      packagenames="org.apache.hadoop.*"
+      destdir="${build.javadoc}"
+      author="true"
+      version="true"
+      use="true"
+      windowtitle="${Name} ${version} API"
+      doctitle="${Name} ${version} API"
+      bottom="Copyright &amp;copy; ${year} The Apache Software Foundation"
+      maxmemory="${javadoc.maxmemory}">
+        <packageset dir="${java.src.dir}"/>
+	
+        <link href="${javadoc.link.java}"/>
+
+        <classpath >
+          <path refid="classpath" />
+          <path refid="javadoc-classpath"/>
+          <pathelement path="${java.class.path}"/>
+          <pathelement location="${build.tools}"/>
+        </classpath>
+
+       <group title="Core" packages="org.apache.*"/>
+       <doclet name="org.apache.hadoop.classification.tools.ExcludePrivateAnnotationsStandardDoclet"
+               path="${build.dir}/${final.name}.jar"/>
+    </javadoc>
+  </target>	
+
+  <target name="api-xml" depends="ivy-retrieve-jdiff,javadoc,write-null">
+    <javadoc maxmemory="${javadoc.maxmemory}">
+       <doclet name="org.apache.hadoop.classification.tools.ExcludePrivateAnnotationsJDiffDoclet"
+               path="${build.dir}/${final.name}.jar:${jdiff.jar}:${xerces.jar}">
+         <param name="-apidir" value="${jdiff.xml.dir}"/>
+         <param name="-apiname" value="hadoop-core ${version}"/>
+         <param name="${jdiff.stability}"/>
+       </doclet>
+       <packageset dir="src/java"/>
+       <classpath >
+         <path refid="classpath" />
+         <path refid="jdiff-classpath" />
+         <pathelement path="${java.class.path}"/>
+       </classpath>
+    </javadoc>
+  </target>
+	
+  <target name="write-null">
+	<exec executable="touch">
+	   <arg value="${jdiff.home}/Null.java"/>
+        </exec>
+  </target> 
+
+  <target name="api-report" depends="ivy-retrieve-jdiff,api-xml">
+    <mkdir dir="${jdiff.build.dir}"/>
+    <javadoc sourcepath="src/java"
+             destdir="${jdiff.build.dir}"
+	     sourceFiles="${jdiff.home}/Null.java"
+	     maxmemory="${javadoc.maxmemory}">
+       <doclet name="org.apache.hadoop.classification.tools.ExcludePrivateAnnotationsJDiffDoclet"
+              path="${build.dir}/${final.name}.jar:${jdiff.jar}:${xerces.jar}">
+         <param name="-oldapi" value="hadoop-core ${jdiff.stable}"/>
+         <param name="-newapi" value="hadoop-core ${version}"/>
+         <param name="-oldapidir" value="${jdiff.xml.dir}"/>
+         <param name="-newapidir" value="${jdiff.xml.dir}"/>
+         <param name="-javadocold" value="${jdiff.stable.javadoc}"/>
+         <param name="-javadocnew" value="../../api/"/>
+         <param name="-stats"/>
+         <param name="${jdiff.stability}"/>
+         <param name="${jdiff.compatibility}"/>
+       </doclet>
+       <classpath >
+         <path refid="classpath" />
+         <path refid="jdiff-classpath"/>
+         <pathelement path="${java.class.path}"/>
+       </classpath>
+    </javadoc>
+  </target>
+	
+  <target name="changes-to-html" description="Convert CHANGES.txt into an html file">
+    <mkdir dir="${build.docs}"/>
+    <exec executable="perl" input="CHANGES.txt" output="${build.docs}/changes.html" failonerror="true">
+      <arg value="${changes.src}/changes2html.pl"/>
+    </exec>
+    <copy todir="${build.docs}">
+      <fileset dir="${changes.src}" includes="*.css"/>
+    </copy>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- D I S T R I B U T I O N                                            -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="package" depends="compile, jar, javadoc, docs, api-report, create-native-configure, jar-test"
+	  description="Build distribution">
+    <mkdir dir="${dist.dir}"/>
+    <mkdir dir="${dist.dir}/lib"/>
+    <mkdir dir="${dist.dir}/libexec"/>
+    <mkdir dir="${dist.dir}/bin"/>
+    <mkdir dir="${dist.dir}/docs"/>
+    <mkdir dir="${dist.dir}/docs/api"/>
+    <mkdir dir="${dist.dir}/docs/jdiff"/>
+
+    <copy todir="${dist.dir}/lib" includeEmptyDirs="false" flatten="true">
+      <fileset dir="${common.ivy.lib.dir}"/>
+    </copy>
+
+    <copy todir="${dist.dir}/lib" includeEmptyDirs="false">
+      <fileset dir="lib">
+        <exclude name="**/native/**"/>
+      </fileset>
+    </copy>
+
+  	<exec dir="${dist.dir}" executable="sh" failonerror="true">
+	  <env key="BASE_NATIVE_LIB_DIR" value="${lib.dir}/native"/>
+	  <env key="BUILD_NATIVE_DIR" value="${build.dir}/native"/>
+	  <env key="DIST_LIB_DIR" value="${dist.dir}/lib/native"/>
+	  <arg line="${native.src.dir}/packageNativeHadoop.sh"/>
+    </exec>
+
+    <copy todir="${dist.dir}/webapps">
+      <fileset dir="${build.webapps}"/>
+    </copy>
+
+    <copy todir="${dist.dir}"> 
+      <fileset file="${build.dir}/${final.name}.jar"/>
+      <fileset file="${build.dir}/${test.final.name}.jar"/>
+    </copy>
+    
+    <copy todir="${dist.dir}/bin">
+      <fileset dir="bin"/>
+    </copy>
+
+    <copy todir="${dist.dir}/conf">
+      <fileset dir="${conf.dir}" excludes="**/*.template"/>
+    </copy>
+
+    <copy todir="${dist.dir}/docs">
+      <fileset dir="${build.docs}"/>
+    </copy>
+
+    <copy file="ivy.xml" tofile="${dist.dir}/ivy.xml"/>
+
+    <copy todir="${dist.dir}/ivy">
+      <fileset dir="ivy"/>
+    </copy>
+
+    <copy todir="${dist.dir}/libexec">
+      <fileset dir="bin">
+        <include name="hadoop-config.sh"/>
+      </fileset>
+    </copy>
+
+    <copy todir="${dist.dir}">
+      <fileset dir=".">
+        <include name="*.txt" />
+      </fileset>
+    </copy>
+
+    <copy todir="${dist.dir}/src" includeEmptyDirs="true">
+      <fileset dir="src" excludes="**/*.template **/docs/build/**/*"/>
+    </copy>
+  	
+    <copy todir="${dist.dir}/" file="build.xml"/>
+
+    <chmod perm="ugo+x" file="${dist.dir}/src/native/configure"/>
+    <chmod perm="ugo+x" type="file" parallel="false">
+        <fileset dir="${dist.dir}/bin"/>
+    </chmod>
+
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Make release tarball                                               -->
+  <!-- ================================================================== -->
+  <target name="tar" depends="package" description="Make release tarball">
+    <macro_tar param.destfile="${build.dir}/${final.name}.tar.gz">
+      <param.listofitems>
+        <tarfileset dir="${build.dir}" mode="664">
+          <exclude name="${final.name}/bin/*" />
+          <exclude name="${final.name}/src/native/configure" />
+          <include name="${final.name}/**" />
+        </tarfileset>
+        <tarfileset dir="${build.dir}" mode="755">
+          <include name="${final.name}/bin/*" />
+          <include name="${final.name}/src/native/configure" />
+        </tarfileset>
+      </param.listofitems>
+    </macro_tar>
+  </target>
+
+  <target name="bin-package" depends="compile, jar, jar-test, javadoc" 
+		description="assembles artifacts for binary target">
+    <mkdir dir="${dist.dir}"/>
+    <mkdir dir="${dist.dir}/lib"/>
+    <mkdir dir="${dist.dir}/${package.share.dir}/contrib"/>
+    <mkdir dir="${dist.dir}/${package.share.dir}/templates"/>
+    <mkdir dir="${dist.dir}/${package.share.dir}/webapps"/>
+    <mkdir dir="${dist.dir}/bin"/>
+    <mkdir dir="${dist.dir}/libexec"/>
+    <mkdir dir="${dist.dir}/sbin"/>
+    <mkdir dir="${dist.dir}/var/log"/>
+    <mkdir dir="${dist.dir}/var/run"/>
+
+    <copy todir="${dist.dir}/${package.share.dir}/lib" includeEmptyDirs="false" flatten="true">
+      <fileset dir="${common.ivy.lib.dir}"/>
+    </copy>
+
+    <copy todir="${dist.dir}/${package.share.dir}" includeEmptyDirs="false">
+      <fileset dir="lib">
+        <exclude name="**/native/**"/>
+      </fileset>
+    </copy>
+
+  	<exec dir="${dist.dir}" executable="sh" failonerror="true">
+	  <env key="BASE_NATIVE_LIB_DIR" value="${lib.dir}/native"/>
+	  <env key="BUILD_NATIVE_DIR" value="${build.dir}/native"/>
+	  <env key="DIST_LIB_DIR" value="${dist.dir}/lib"/>
+	  <arg line="${native.src.dir}/packageNativeHadoop.sh"/>
+    </exec>
+
+    <copy todir="${dist.dir}/${package.share.dir}"> 
+      <fileset file="${build.dir}/*.jar"/>
+    </copy>
+    
+    <copy todir="${dist.dir}/bin">
+      <fileset dir="bin">
+        <include name="hadoop"/>
+      </fileset>
+    </copy>
+
+    <copy todir="${dist.dir}/libexec">
+      <fileset dir="bin">
+        <include name="hadoop-config.sh"/>
+      </fileset>
+    </copy>
+
+    <copy todir="${dist.dir}/sbin">
+      <fileset dir="bin">
+        <include name="hadoop-daemon.sh"/>
+        <include name="hadoop-daemons.sh"/>
+        <include name="slaves.sh"/>
+        <include name="start-all.sh"/>
+        <include name="stop-all.sh"/>
+      </fileset>
+      <fileset dir="${basedir}/src/packages">
+        <include name="*.sh" />
+      </fileset>
+    </copy>
+
+    <copy todir="${dist.dir}/etc/hadoop">
+      <fileset dir="${conf.dir}" excludes="**/*.template"/>
+      <fileset dir="${conf.dir}" includes="hadoop-env.sh.template"/>
+    </copy>
+
+    <copy todir="${dist.dir}/${package.share.dir}/templates">
+      <fileset dir="${basedir}/src/packages/templates/conf" includes="*"/>
+    </copy>
+
+    <copy todir="${dist.dir}/share/doc/hadoop/${module}">
+      <fileset dir=".">
+        <include name="*.txt" />
+      </fileset>
+    </copy>
+
+    <chmod perm="ugo+x" type="file" parallel="false">
+        <fileset dir="${dist.dir}/bin"/>
+        <fileset dir="${dist.dir}/sbin"/>
+    </chmod>
+  </target>
+
+  <target name="binary" depends="bin-package" description="Make tarball without source and documentation">
+    <macro_tar param.destfile="${build.dir}/${final.name}-bin.tar.gz">
+      <param.listofitems>
+        <tarfileset dir="${build.dir}" mode="664">
+          <exclude name="${final.name}/bin/*" />
+          <exclude name="${final.name}/libexec/*" />
+          <exclude name="${final.name}/sbin/*" />
+          <exclude name="${final.name}/src/**" />
+          <exclude name="${final.name}/docs/**" />
+          <include name="${final.name}/**" />
+        </tarfileset>
+        <tarfileset dir="${build.dir}" mode="755">
+          <include name="${final.name}/bin/*" />
+          <include name="${final.name}/libexec/*" />
+          <include name="${final.name}/sbin/*" />
+        </tarfileset>
+      </param.listofitems>
+    </macro_tar>
+  </target>
+
+  <target name="rpm" depends="binary" description="Make rpm package">
+    <mkdir dir="${package.buildroot}/BUILD" />
+    <mkdir dir="${package.buildroot}/RPMS" />
+    <mkdir dir="${package.buildroot}/SRPMS" />
+    <mkdir dir="${package.buildroot}/SOURCES" />
+    <mkdir dir="${package.buildroot}/SPECS" />
+    <copy todir="${package.buildroot}/SOURCES">
+      <fileset dir="${build.dir}">
+        <include name="${final.name}-bin.tar.gz" />
+      </fileset>
+    </copy>
+    <copy file="${src.dir}/packages/rpm/spec/hadoop.spec" todir="${package.buildroot}/SPECS">
+      <filterchain>
+        <replacetokens>
+          <token key="final.name" value="${final.name}" />
+          <token key="version" value="${_version}" />
+          <token key="package.release" value="${package.release}" />
+          <token key="package.build.dir" value="${package.build.dir}" />
+          <token key="package.prefix" value="${package.prefix}" />
+          <token key="package.conf.dir" value="${package.conf.dir}" />
+          <token key="package.log.dir" value="${package.log.dir}" />
+          <token key="package.pid.dir" value="${package.pid.dir}" />
+          <token key="package.var.dir" value="${package.var.dir}" />
+        </replacetokens>
+      </filterchain>
+    </copy>
+    <rpm specFile="hadoop.spec" command="-bb --target ${os.arch}" topDir="${package.buildroot}" cleanBuildDir="true" failOnError="true"/>
+    <copy todir="${build.dir}/" flatten="true">
+      <fileset dir="${package.buildroot}/RPMS">
+        <include name="**/*.rpm" />
+      </fileset>
+    </copy>
+    <delete dir="${package.buildroot}" quiet="true" verbose="false"/>
+  </target>
+
+  <target name="deb" depends="ivy-retrieve-package, binary" description="Make deb package">
+    <taskdef name="deb"
+           classname="org.vafer.jdeb.ant.DebAntTask">
+      <classpath refid="ivy-package.classpath" />
+    </taskdef>
+
+    <mkdir dir="${package.build.dir}/hadoop.control" />
+    <mkdir dir="${package.buildroot}/${package.prefix}" />
+    <copy todir="${package.buildroot}/${package.prefix}">
+      <fileset dir="${build.dir}/${final.name}">
+        <include name="**" />
+      </fileset>
+    </copy>
+    <copy todir="${package.build.dir}/hadoop.control">
+      <fileset dir="${src.dir}/packages/deb/hadoop.control">
+        <exclude name="control" />
+      </fileset>
+    </copy>
+    <copy file="${src.dir}/packages/deb/hadoop.control/control" todir="${package.build.dir}/hadoop.control">
+      <filterchain>
+        <replacetokens>
+          <token key="final.name" value="${final.name}" />
+          <token key="version" value="${_version}" />
+          <token key="package.release" value="${package.release}" />
+          <token key="package.build.dir" value="${package.build.dir}" />
+          <token key="package.prefix" value="${package.prefix}" />
+          <token key="package.conf.dir" value="${package.conf.dir}" />
+          <token key="package.log.dir" value="${package.log.dir}" />
+          <token key="package.pid.dir" value="${package.pid.dir}" />
+        </replacetokens>
+      </filterchain>
+    </copy>
+    <deb destfile="${package.buildroot}/${name}_${_version}-${package.release}_${os.arch}.deb" control="${package.build.dir}/hadoop.control">
+      <tarfileset dir="${build.dir}/${final.name}" filemode="644" prefix="${package.prefix}">
+        <exclude name="bin" />
+        <exclude name="etc" />
+        <exclude name="libexec" />
+        <exclude name="etc/**" />
+        <exclude name="sbin" />
+        <include name="**" />
+      </tarfileset>
+      <tarfileset dir="${build.dir}/${final.name}/bin" filemode="755" prefix="${package.prefix}/bin">
+        <include name="*" />
+      </tarfileset>
+      <tarfileset dir="${build.dir}/${final.name}/libexec" filemode="755" prefix="${package.prefix}/libexec">
+        <include name="*" />
+      </tarfileset>
+      <tarfileset dir="${build.dir}/${final.name}/sbin" filemode="755" prefix="${package.prefix}/sbin">
+        <include name="*" />
+      </tarfileset>
+      <tarfileset dir="${src.dir}/packages" filemode="755" prefix="${package.prefix}/sbin">
+        <include name="*.sh" />
+      </tarfileset>
+      <tarfileset dir="${build.dir}/${final.name}/etc/hadoop" filemode="644" prefix="${package.conf.dir}">
+        <exclude name="core-site.xml" />
+        <exclude name="hdfs-site.xml" />
+        <exclude name="mapred-site.xml" />
+        <include name="**" /> 
+      </tarfileset>
+    </deb>
+    <copy todir="${build.dir}/" flatten="true">
+      <fileset dir="${package.buildroot}">
+        <include name="**/hadoop*.deb" />
+      </fileset>
+    </copy>
+    <delete dir="${package.buildroot}" quiet="true" verbose="false"/>
+  </target>
+  
+  <target name="ant-task-download" description="To download mvn-ant-task" unless="offline">
+    <get src="${ant_task_repo_url}" dest="${ant_task.jar}" usetimestamp="true"/>
+  </target>
+
+  <target name="mvn-taskdef" depends="ant-task-download">
+     <path id="mvn-ant-task.classpath" path="${ant_task.jar}"/> 
+     <typedef resource="org/apache/maven/artifact/ant/antlib.xml" 
+         uri="urn:maven-artifact-ant"
+         classpathref="mvn-ant-task.classpath"/>
+  </target>   
+
+  <target name="mvn-install" depends="mvn-taskdef,jar,jar-test,set-version"
+    description="Install hadoop common and test jars to local fs m2 repo">
+     <artifact:pom file="${hadoop-common.pom}" id="hadoop.core"/>
+     <artifact:pom file="${hadoop-common-test.pom}" id="hadoop.core.test"/>
+     <artifact:install file="${hadoop-common.jar}">
+        <pom refid="hadoop.core"/>
+	<attach file="${hadoop-common-sources.jar}" classifier="sources" />
+     </artifact:install>
+     <artifact:install file="${hadoop-common-test.jar}">
+        <pom refid="hadoop.core.test"/>
+	<attach file="${hadoop-common-test-sources.jar}" classifier="sources" />
+     </artifact:install>
+  </target>
+
+  <target name="mvn-si-install" depends="mvn-install,-mvn-system-install"
+     description="Install system integration test jars as well"/>
+
+  <target name="mvn-deploy" depends="mvn-taskdef, jar, jar-test,
+     jar-system, set-version, signanddeploy, simpledeploy"
+     description="To deploy hadoop common and test jar's to apache
+     snapshot's repository"/>
+
+  <target name="signanddeploy" if="staging" depends="sign">
+     <artifact:pom file="${hadoop-common.pom}" id="hadoop.core"/>
+     <artifact:pom file="${hadoop-common-test.pom}" id="hadoop.core.test"/>
+     <artifact:pom file="${hadoop-common-instrumented.pom}" 
+       id="hadoop.core.${herriot.suffix}"/>
+     <artifact:install-provider artifactId="wagon-http"
+     version="${wagon-http.version}"/>
+
+     <artifact:deploy file="${hadoop-common.jar}">
+       <remoteRepository id="apache.staging.https" url="${asfstagingrepo}"/>
+       <pom refid="hadoop.core"/>
+       <attach file="${hadoop-common.jar}.asc" type="jar.asc"/>
+       <attach file="${hadoop-common.pom}.asc" type="pom.asc"/>
+       <attach file="${hadoop-common-sources.jar}.asc" type="jar.asc"
+         classifier="sources"/>
+       <attach file="${hadoop-common-sources.jar}" classifier="sources"/>
+     </artifact:deploy>
+
+     <artifact:deploy file="${hadoop-common-test.jar}">
+       <remoteRepository id="apache.staging.https" url="${asfstagingrepo}"/>
+       <pom refid="hadoop.core.test"/>
+       <attach file="${hadoop-common-test.jar}.asc" type="jar.asc"/>
+       <attach file="${hadoop-common-test.pom}.asc" type="pom.asc"/>
+       <attach file="${hadoop-common-test-sources.jar}.asc" type="jar.asc"
+         classifier="sources"/>
+       <attach file="${hadoop-common-test-sources.jar}" classifier="sources"/>
+     </artifact:deploy>
+
+     <artifact:deploy file="${hadoop-common-instrumented.jar}">
+       <remoteRepository id="apache.staging.https" url="${asfstagingrepo}"/>
+       <pom refid="hadoop.core.${herriot.suffix}"/>
+       <attach file="${hadoop-common-instrumented.jar}.asc" type="jar.asc"/>
+       <attach file="${hadoop-common-instrumented.pom}.asc" type="pom.asc"/>
+       <attach file="${hadoop-common-instrumented-sources.jar}.asc" 
+         type="jar.asc" classifier="sources"/>
+       <attach file="${hadoop-common-instrumented-sources.jar}"
+         classifier="sources"/>
+     </artifact:deploy>
+  </target>
+
+  <target name="sign" depends="clean-sign" if="staging">
+    <input message="password:>" addproperty="gpg.passphrase">
+     <handler classname="org.apache.tools.ant.input.SecureInputHandler" />
+    </input>
+    <macrodef name="sign-artifact" description="Signs the artifact">
+      <attribute name="input.file"/>
+      <attribute name="output.file" default="@{input.file}.asc"/>
+      <attribute name="gpg.passphrase"/>
+      <sequential>
+        <echo>Signing @{input.file} Sig File: @{output.file}</echo>
+        <exec executable="gpg" >
+          <arg value="--armor"/>
+          <arg value="--output"/>
+          <arg value="@{output.file}"/>
+          <arg value="--passphrase"/>
+          <arg value="@{gpg.passphrase}"/>
+          <arg value="--detach-sig"/>
+          <arg value="@{input.file}"/>
+        </exec>
+      </sequential>
+    </macrodef>
+    <sign-artifact input.file="${hadoop-common.jar}" 
+     output.file="${hadoop-common.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-artifact input.file="${hadoop-common-test.jar}" 
+     output.file="${hadoop-common-test.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-artifact input.file="${hadoop-common-sources.jar}" 
+     output.file="${hadoop-common-sources.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-artifact input.file="${hadoop-common-test-sources.jar}" 
+     output.file="${hadoop-common-test-sources.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-artifact input.file="${hadoop-common.pom}" 
+     output.file="${hadoop-common.pom}.asc" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-artifact input.file="${hadoop-common-test.pom}" 
+     output.file="${hadoop-common-test.pom}.asc" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-artifact input.file="${hadoop-common-instrumented.jar}" 
+     output.file="${hadoop-common-instrumented.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-artifact input.file="${hadoop-common-instrumented.pom}" 
+     output.file="${hadoop-common-instrumented.pom}.asc" gpg.passphrase="${gpg.passphrase}"/>
+    <sign-artifact input.file="${hadoop-common-instrumented-sources.jar}" 
+     output.file="${hadoop-common-instrumented-sources.jar}.asc" gpg.passphrase="${gpg.passphrase}"/>
+  </target>
+
+  <target name="simpledeploy" unless="staging">
+     <artifact:pom file="${hadoop-common.pom}" id="hadoop.core"/>
+     <artifact:pom file="${hadoop-common-test.pom}" id="hadoop.test"/>
+     <artifact:pom file="${hadoop-common-instrumented.pom}" 
+       id="hadoop.core.${herriot.suffix}"/>
+
+     <artifact:install-provider artifactId="wagon-http" version="${wagon-http.version}"/>
+     <artifact:deploy file="${hadoop-common.jar}">
+         <remoteRepository id="apache.snapshots.https" url="${asfsnapshotrepo}"/>
+         <pom refid="hadoop.core"/>
+	 <attach file="${hadoop-common-sources.jar}" classifier="sources" />
+     </artifact:deploy>
+
+     <artifact:deploy file="${hadoop-common-test.jar}">
+         <remoteRepository id="apache.snapshots.https" url="${asfsnapshotrepo}"/>
+         <pom refid="hadoop.core.test"/>
+	 <attach file="${hadoop-common-test-sources.jar}" classifier="sources" />
+     </artifact:deploy> 
+
+     <artifact:deploy file="${hadoop-common-instrumented.jar}">
+         <remoteRepository id="apache.snapshots.https" url="${asfsnapshotrepo}"/>
+         <pom refid="hadoop.core.${herriot.suffix}"/>
+         <attach file="${hadoop-common-instrumented-sources.jar}" classifier="sources" />
+     </artifact:deploy>
+  </target>
+
+  <target name="set-version">
+    <delete file="${basedir}/ivy/hadoop-common.xml"/>
+    <delete file="${basedir}/ivy/hadoop-common-test.xml"/>
+    <delete file="${basedir}/ivy/hadoop-common-${herriot.suffix}.xml"/>
+    <copy file="${basedir}/ivy/hadoop-common-template.xml" tofile="${basedir}/ivy/hadoop-common.xml"/>
+    <copy file="${basedir}/ivy/hadoop-common-test-template.xml" tofile="${basedir}/ivy/hadoop-common-test.xml"/>
+    <copy file="${basedir}/ivy/hadoop-common-${herriot.suffix}-template.xml"
+      tofile="${basedir}/ivy/hadoop-common-${herriot.suffix}.xml"/>
+    <replaceregexp byline="true">
+      <regexp pattern="@version"/>
+      <substitution expression="${version}"/>
+      <fileset dir="${basedir}/ivy">
+        <include name="hadoop-common.xml"/>
+        <include name="hadoop-common-test.xml"/>
+        <include name="hadoop-common-${herriot.suffix}.xml"/>
+      </fileset>
+    </replaceregexp>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Perform audit activities for the release                           -->
+  <!-- ================================================================== -->
+  <target name="rats-taskdef" depends="ivy-retrieve-releaseaudit">
+     <typedef format="xml" resource="org/apache/rat/anttasks/antlib.xml" uri="antlib:org.apache.rat.anttasks"
+      classpathref="releaseaudit-classpath"/>
+  </target>
+
+  <target name="releaseaudit" depends="package, rats-taskdef" description="Release Audit activities">
+   <rat:report xmlns:rat="antlib:org.apache.rat.anttasks">
+      <fileset dir="${dist.dir}">
+        <exclude name="**/CHANGES.txt"/>
+        <exclude name="**/conf/*"/>
+        <exclude name="**/docs/"/>
+        <exclude name="lib/jdiff/"/>
+        <exclude name="**/native/*"/>
+        <exclude name="**/native/config/*"/>
+        <exclude name="**/native/m4/*"/>
+        <exclude name="**/VERSION"/>
+        <exclude name="**/*.json"/>
+        <exclude name="**/hod/*.txt"/>
+        <exclude name="src/test/empty-file" />
+      </fileset>
+    </rat:report>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Clean.  Delete the build files, and their directories              -->
+  <!-- ================================================================== -->
+  <target name="clean" depends="clean-sign, clean-fi" description="Clean.  Delete the build files, and their directories">
+    <delete dir="${build.dir}"/>
+    <delete dir="${package.buildroot}"/>
+    <delete file="${basedir}/ivy/hadoop-common.xml"/>
+    <delete file="${basedir}/ivy/hadoop-common-pom.xml"/>
+    <delete file="${basedir}/ivy/hadoop-common-test.xml"/>
+    <delete file="${basedir}/ivy/hadoop-common-test-pom.xml"/>
+    <delete file="${basedir}/ivy/hadoop-common-${herriot.suffix}.xml"/>
+    <delete dir="${docs.src}/build"/>
+    <delete dir="${src.docs.cn}/build"/>
+  </target>
+
+  <target name="clean-sign" description="Clean.  Delete .asc files">
+    <delete>
+      <fileset dir="." includes="**/**/*.asc"/>
+    </delete>
+  </target>  
+
+  <target name="veryclean" depends="clean" description="Delete mvn ant task jar and ivy ant taks jar">
+    <delete file="${ant_task.jar}"/>
+    <delete file="${ivy.jar}"/>
+  </target>
+
+ <target name="clover" depends="clover.setup, clover.info" description="Instrument the Unit tests using Clover. 
+     To use, specify -Dclover.home=&lt;base of clover installation&gt; -Drun.clover=true on the command line."/>
+
+<target name="clover.setup" if="clover.enabled">
+   <taskdef resource="cloverlib.xml" classpath="${clover.jar}"/>
+   <mkdir dir="${clover.db.dir}"/>
+   <clover-setup initString="${clover.db.dir}/hadoop_coverage.db">
+     <fileset dir="${src.dir}" includes="java/**/*"/>
+     <testsources dir="${test.src.dir}"/>
+   </clover-setup>
+</target>
+
+<target name="clover.info" unless="clover.present">
+  <echo>
+     Clover not found. Code coverage reports disabled.
+  </echo>
+</target>
+
+<target name="clover.check">
+  <fail unless="clover.present">
+  ##################################################################
+   Clover not found.
+   Please specify -Dclover.home=&lt;base of clover installation&gt;
+   on the command line.
+  ##################################################################
+  </fail>
+</target>
+
+<target name="generate-clover-reports" depends="clover.check, clover">
+  <mkdir dir="${clover.report.dir}"/>
+  <clover-report>
+     <current outfile="${clover.report.dir}" title="${final.name}">
+     <format type="html"/>
+     </current>
+  </clover-report>
+  <clover-report>
+     <current outfile="${clover.report.dir}/clover.xml" title="${final.name}">
+     <format type="xml"/>
+     </current>
+  </clover-report>
+</target>
+
+<target name="findbugs.check" depends="check-for-findbugs" unless="findbugs.present">
+  <fail message="'findbugs.home' is not defined. Please pass -Dfindbugs.home=&lt;base of Findbugs installation&gt; to Ant on the command-line." />
+</target>
+
+<target name="patch.check" unless="patch.file">
+  <fail message="'patch.file' is not defined. Please pass -Dpatch.file=&lt;location of patch file&gt; to Ant on the command-line." />
+</target>
+
+<target name="test-patch" depends="patch.check,findbugs.check,forrest.check">
+  <exec executable="bash" failonerror="true">
+    <arg value="${basedir}/src/test/bin/test-patch.sh"/>
+    <arg value="DEVELOPER"/>
+    <arg value="${patch.file}"/>
+    <arg value="${scratch.dir}"/>
+    <arg value="${svn.cmd}"/>
+    <arg value="${grep.cmd}"/>
+    <arg value="${patch.cmd}"/>
+    <arg value="${findbugs.home}"/>
+    <arg value="${forrest.home}"/>
+    <arg value="${basedir}"/>
+  </exec>
+</target>
+
+<target name="hudson-test-patch" depends="findbugs.check,forrest.check">
+  <exec executable="bash" failonerror="true">
+    <arg value="${basedir}/src/test/bin/test-patch.sh"/>
+    <arg value="HUDSON"/>
+    <arg value="${scratch.dir}"/>
+    <arg value="${support.dir}"/>
+    <arg value="${ps.cmd}"/>
+    <arg value="${wget.cmd}"/>
+    <arg value="${jiracli.cmd}"/>
+    <arg value="${svn.cmd}"/>
+    <arg value="${grep.cmd}"/>
+    <arg value="${patch.cmd}"/>
+    <arg value="${findbugs.home}"/>
+    <arg value="${forrest.home}"/>
+    <arg value="${eclipse.home}"/>
+    <arg value="${basedir}"/>
+    <arg value="${jira.passwd}"/>
+    <arg value="${curl.cmd}"/>
+    <arg value="${defect}"/>
+  </exec>
+</target>
+	
+  <condition property="ant-eclipse.jar.exists">
+    <available file="${build.dir}/lib/ant-eclipse-1.0-jvm1.2.jar"/>
+  </condition>
+
+  <target name="ant-eclipse-download" unless="ant-eclipse.jar.exists"
+          description="Downloads the ant-eclipse binary.">
+    <get src="http://downloads.sourceforge.net/project/ant-eclipse/ant-eclipse/1.0/ant-eclipse-1.0.bin.tar.bz2"
+         dest="${build.dir}/ant-eclipse-1.0.bin.tar.bz2" usetimestamp="false" />
+
+    <untar src="${build.dir}/ant-eclipse-1.0.bin.tar.bz2"
+           dest="${build.dir}" compression="bzip2">
+      <patternset>
+        <include name="lib/ant-eclipse-1.0-jvm1.2.jar"/>
+      </patternset>
+    </untar>
+    <delete file="${build.dir}/ant-eclipse-1.0.bin.tar.bz2" />
+  </target>
+
+  <target name="eclipse" 
+          depends="init,ant-eclipse-download,ivy-retrieve-common,ivy-retrieve-test,compile-core-test"
+          description="Create eclipse project files">
+	     <pathconvert property="eclipse.project">
+	       <path path="${basedir}"/>
+	       <regexpmapper from="^.*/([^/]+)$$" to="\1" handledirsep="yes"/>
+	     </pathconvert>
+    <taskdef name="eclipse"
+             classname="prantl.ant.eclipse.EclipseTask"
+             classpath="${build.dir}/lib/ant-eclipse-1.0-jvm1.2.jar" />
+    <eclipse updatealways="true">
+      <project name="${eclipse.project}" />
+      <classpath>
+        <source path="${java.src.dir}"
+                output="${build.dir.eclipse-main-classes}" />
+        <source path="${test.src.dir}/core"
+                output="${build.dir.eclipse-test-classes}" />
+        <source path="${test.src.dir}/aop"
+                output="${build.dir.eclipse-test-classes}" />
+        <source path="${test.generated.dir}"
+                output="${build.dir.eclipse-test-generated-classes}" />
+        <output path="${build.dir.eclipse-main-classes}" />
+        <library pathref="ivy-common.classpath" exported="true" />
+        <library pathref="ivy-test.classpath" exported="false" />
+        <variable path="ANT_HOME/lib/ant.jar" exported="false" />
+        <library path="${conf.dir}" exported="false" />
+        <library path="${java.home}/../lib/tools.jar" exported="false" />
+      </classpath>
+    </eclipse>
+  </target>
+
+  <target name="ivy-init-dirs">
+    <mkdir dir="${build.ivy.dir}" />
+    <mkdir dir="${build.ivy.lib.dir}" />
+    <mkdir dir="${build.ivy.report.dir}" />
+    <mkdir dir="${build.ivy.maven.dir}" />
+  </target>
+
+  <target name="ivy-probe-antlib" >
+    <condition property="ivy.found">
+      <typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/>
+    </condition>
+  </target>
+
+  <target name="ivy-download" description="To download ivy" unless="offline">
+    <get src="${ivy_repo_url}" dest="${ivy.jar}" usetimestamp="true"/>
+  </target>
+
+  <!--
+  To avoid Ivy leaking things across big projects, always load Ivy in the same classloader.
+  Also note how we skip loading Ivy if it is already there, just to make sure all is well.
+  -->
+  <target name="ivy-init-antlib" depends="ivy-download,ivy-init-dirs,ivy-probe-antlib" unless="ivy.found">
+    <typedef uri="antlib:org.apache.ivy.ant" onerror="fail"
+      loaderRef="ivyLoader">
+      <classpath>
+        <pathelement location="${ivy.jar}"/>
+      </classpath>
+    </typedef>
+    <fail >
+      <condition >
+        <not>
+          <typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/>
+        </not>
+      </condition>
+      You need Apache Ivy 2.0 or later from http://ant.apache.org/
+      It could not be loaded from ${ivy_repo_url}
+    </fail>
+  </target>
+
+  <property name="ivyresolvelog" value="download-only"/>
+  <property name="ivyretrievelog" value="quiet"/>
+
+  <target name="ivy-init" depends="ivy-init-antlib" >
+
+    <!--Configure Ivy by reading in the settings file
+        If anyone has already read in a settings file into this settings ID, it gets priority
+    -->
+    <ivy:configure settingsid="${ant.project.name}.ivy.settings" file="${ivysettings.xml}" override='false'
+      realm="Sonatype Nexus Repository Manager"/>
+
+  </target>
+
+  <target name="ivy-resolve" depends="ivy-init">
+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings"
+    	log="${ivyresolvelog}"/>
+  </target>
+
+  <target name="ivy-resolve-javadoc" depends="ivy-init">
+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="javadoc"
+    	log="${ivyresolvelog}"/>
+  </target>
+
+  <target name="ivy-resolve-releaseaudit" depends="ivy-init">
+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="releaseaudit"
+  		log="${ivyresolvelog}"/>
+  </target>
+
+  <target name="ivy-resolve-test" depends="ivy-init">
+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="test"
+    	log="${ivyresolvelog}"/>
+  </target>
+
+  <target name="ivy-resolve-common" depends="ivy-init">
+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="common"
+    	log="${ivyresolvelog}"/>
+  </target>
+
+  <target name="ivy-resolve-package" depends="ivy-init">
+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="package"
+    	log="${ivyresolvelog}"/>
+  </target>
+
+  <target name="ivy-resolve-jdiff" depends="ivy-init">
+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="jdiff"
+    	log="${ivyresolvelog}"/>
+  </target>
+
+  <target name="ivy-resolve-checkstyle" depends="ivy-init">
+    <ivy:resolve settingsRef="${ant.project.name}.ivy.settings" conf="checkstyle"
+  		log="${ivyresolvelog}"/>
+  </target>
+
+  <target name="ivy-retrieve" depends="ivy-resolve"
+    description="Retrieve Ivy-managed artifacts">
+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
+    		log="${ivyretrievelog}"/>
+  </target>
+
+  <target name="ivy-retrieve-checkstyle" depends="ivy-resolve-checkstyle"
+    description="Retrieve Ivy-managed artifacts for the checkstyle configurations">
+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
+  			log="${ivyretrievelog}"/>
+    <ivy:cachepath pathid="checkstyle-classpath" conf="checkstyle"/>
+  </target>
+
+  <target name="ivy-retrieve-jdiff" depends="ivy-resolve-jdiff"
+    description="Retrieve Ivy-managed artifacts for the jdiff configurations">
+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
+  			log="${ivyretrievelog}"/>
+    <ivy:cachepath pathid="jdiff-classpath" conf="jdiff"/>
+  </target>
+
+  <target name="ivy-retrieve-javadoc" depends="ivy-resolve-javadoc"
+    description="Retrieve Ivy-managed artifacts for the javadoc configurations">
+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
+  			log="${ivyretrievelog}"/>
+    <ivy:cachepath pathid="javadoc-classpath" conf="javadoc"/>
+  </target>
+
+  <target name="ivy-retrieve-test" depends="ivy-resolve-test"
+    description="Retrieve Ivy-managed artifacts for the test configurations">
+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
+    		log="${ivyretrievelog}"/>
+    <ivy:cachepath pathid="ivy-test.classpath" conf="test"/>
+  </target>
+
+  <target name="ivy-retrieve-common" depends="ivy-resolve-common"
+    description="Retrieve Ivy-managed artifacts for the compile configurations">
+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
+    		log="${ivyretrievelog}"/>
+    <ivy:cachepath pathid="ivy-common.classpath" conf="common"/>
+  </target>
+
+  <target name="ivy-retrieve-package" depends="ivy-resolve-package"
+    description="Retrieve Ivy-managed artifacts for the package configurations">
+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
+    		log="${ivyretrievelog}"/>
+    <ivy:cachepath pathid="ivy-package.classpath" conf="package"/>
+  </target>
+
+  <target name="ivy-retrieve-releaseaudit" depends="ivy-resolve-releaseaudit"
+    description="Retrieve Ivy-managed artifacts for the compile configurations">
+    <ivy:retrieve settingsRef="${ant.project.name}.ivy.settings"
+      pattern="${build.ivy.lib.dir}/${ivy.artifact.retrieve.pattern}"
+    		log="${ivyretrievelog}"/>
+    <ivy:cachepath pathid="releaseaudit-classpath" conf="releaseaudit"/>
+  </target>
+
+  <target name="ivy-report" depends="ivy-resolve-releaseaudit"
+    description="Generate">
+    <ivy:report todir="${build.ivy.report.dir}" settingsRef="${ant.project.name}.ivy.settings"/>
+    <echo>
+      Reports generated:${build.ivy.report.dir}
+    </echo>
+  </target>
+
+</project>

+ 24 - 0
common/conf/configuration.xsl

@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+<xsl:output method="html"/>
+<xsl:template match="configuration">
+<html>
+<body>
+<table border="1">
+<tr>
+ <td>name</td>
+ <td>value</td>
+ <td>description</td>
+</tr>
+<xsl:for-each select="property">
+<tr>
+  <td><a name="{name}"><xsl:value-of select="name"/></a></td>
+  <td><xsl:value-of select="value"/></td>
+  <td><xsl:value-of select="description"/></td>
+</tr>
+</xsl:for-each>
+</table>
+</body>
+</html>
+</xsl:template>
+</xsl:stylesheet>

+ 8 - 0
common/conf/core-site.xml.template

@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+</configuration>

+ 66 - 0
common/conf/hadoop-env.sh.template

@@ -0,0 +1,66 @@
+# Set Hadoop-specific environment variables here.
+
+# The only required environment variable is JAVA_HOME.  All others are
+# optional.  When running a distributed configuration it is best to
+# set JAVA_HOME in this file, so that it is correctly defined on
+# remote nodes.
+
+# The java implementation to use.  Required.
+export JAVA_HOME=${JAVA_HOME}
+
+# Hadoop Installation Prefix
+HADOOP_PREFIX=${HADOOP_PREFIX}
+
+# Hadoop Configuration Directory
+HADOOP_CONF_DIR=${HADOOP_CONF_DIR}
+export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-$HADOOP_PREFIX/conf}
+
+# Extra Java CLASSPATH elements.  Optional.
+# export HADOOP_CLASSPATH="<extra_entries>:$HADOOP_CLASSPATH"
+
+# The maximum amount of heap to use, in MB. Default is 1000.
+# export HADOOP_HEAPSIZE=2000
+
+# Extra Java runtime options.  Empty by default.
+# if [ "$HADOOP_OPTS" == "" ]; then export HADOOP_OPTS=-server; else HADOOP_OPTS+=" -server"; fi
+
+# Command specific options appended to HADOOP_OPTS when specified
+export HADOOP_NAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_NAMENODE_OPTS"
+export HADOOP_SECONDARYNAMENODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_SECONDARYNAMENODE_OPTS"
+export HADOOP_DATANODE_OPTS="-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS"
+export HADOOP_BALANCER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_BALANCER_OPTS"
+export HADOOP_JOBTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_JOBTRACKER_OPTS"
+export HADOOP_TASKTRACKER_OPTS="-Dcom.sun.management.jmxremote $HADOOP_TASKTRACKER_OPTS"
+# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
+# export HADOOP_CLIENT_OPTS
+
+# Extra ssh options.  Empty by default.
+# export HADOOP_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HADOOP_CONF_DIR"
+
+# File naming remote slave hosts.  $HADOOP_PREFIX/conf/slaves by default.
+export HADOOP_SLAVES=${HADOOP_CONF_DIR}/slaves
+
+# host:path where hadoop code should be rsync'd from.  Unset by default.
+# export HADOOP_MASTER=master:/home/$USER/src/hadoop
+
+# Seconds to sleep between slave commands.  Unset by default.  This
+# can be useful in large clusters, where, e.g., slave rsyncs can
+# otherwise arrive faster than the master can service them.
+# export HADOOP_SLAVE_SLEEP=0.1
+
+# The directory where pid files are stored. /tmp by default.
+HADOOP_PID_DIR=${HADOOP_PID_DIR}
+export HADOOP_PID_DIR=${HADOOP_PID_DIR:-$HADOOP_PREFIX/var/run}
+
+# A string representing this instance of hadoop. $USER by default.
+export HADOOP_IDENT_STRING=`whoami`
+
+# The scheduling priority for daemon processes.  See 'man nice'.
+# export HADOOP_NICENESS=10
+
+# Allow Hadoop to run with sysctl net.ipv6.bindv6only = 1
+# export HADOOP_ALLOW_IPV6=yes
+
+# Where log files are stored.  $HADOOP_PREFIX/logs by default.
+HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$HADOOP_IDENT_STRING
+export HADOOP_LOG_DIR=${HADOOP_LOG_DIR:-$HADOOP_PREFIX/var/log}

+ 72 - 0
common/conf/hadoop-metrics.properties

@@ -0,0 +1,72 @@
+# Configuration of the "dfs" context for null
+dfs.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "dfs" context for file
+#dfs.class=org.apache.hadoop.metrics.file.FileContext
+#dfs.period=10
+#dfs.fileName=/tmp/dfsmetrics.log
+
+# Configuration of the "dfs" context for ganglia
+# Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter)
+# dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# dfs.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
+# dfs.period=10
+# dfs.servers=localhost:8649
+
+
+# Configuration of the "mapred" context for null
+mapred.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "mapred" context for file
+#mapred.class=org.apache.hadoop.metrics.file.FileContext
+#mapred.period=10
+#mapred.fileName=/tmp/mrmetrics.log
+
+# Configuration of the "mapred" context for ganglia
+# Pick one: Ganglia 3.0 (former) or Ganglia 3.1 (latter)
+# mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# mapred.class=org.apache.hadoop.metrics.ganglia.GangliaContext31
+# mapred.period=10
+# mapred.servers=localhost:8649
+
+
+# Configuration of the "jvm" context for null
+#jvm.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "jvm" context for file
+#jvm.class=org.apache.hadoop.metrics.file.FileContext
+#jvm.period=10
+#jvm.fileName=/tmp/jvmmetrics.log
+
+# Configuration of the "jvm" context for ganglia
+# jvm.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# jvm.period=10
+# jvm.servers=localhost:8649
+
+# Configuration of the "rpc" context for null
+rpc.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "rpc" context for file
+#rpc.class=org.apache.hadoop.metrics.file.FileContext
+#rpc.period=10
+#rpc.fileName=/tmp/rpcmetrics.log
+
+# Configuration of the "rpc" context for ganglia
+# rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# rpc.period=10
+# rpc.servers=localhost:8649
+
+
+# Configuration of the "ugi" context for null
+ugi.class=org.apache.hadoop.metrics.spi.NullContext
+
+# Configuration of the "ugi" context for file
+#ugi.class=org.apache.hadoop.metrics.file.FileContext
+#ugi.period=10
+#ugi.fileName=/tmp/ugimetrics.log
+
+# Configuration of the "ugi" context for ganglia
+# ugi.class=org.apache.hadoop.metrics.ganglia.GangliaContext
+# ugi.period=10
+# ugi.servers=localhost:8649
+

+ 27 - 0
common/conf/hadoop-metrics2.properties

@@ -0,0 +1,27 @@
+# syntax: [prefix].[source|sink].[instance].[options]
+# See javadoc of package-info.java for org.apache.hadoop.metrics2 for details
+
+*.sink.file.class=org.apache.hadoop.metrics2.sink.FileSink
+# default sampling period
+*.period=10
+
+# The namenode-metrics.out will contain metrics from all context
+#namenode.sink.file.filename=namenode-metrics.out
+# Specifying a special sampling period for namenode:
+#namenode.sink.*.period=8
+
+#datanode.sink.file.filename=datanode-metrics.out
+
+# the following example split metrics of different
+# context to different sinks (in this case files)
+#jobtracker.sink.file_jvm.context=jvm
+#jobtracker.sink.file_jvm.filename=jobtracker-jvm-metrics.out
+#jobtracker.sink.file_mapred.context=mapred
+#jobtracker.sink.file_mapred.filename=jobtracker-mapred-metrics.out
+
+#tasktracker.sink.file.filename=tasktracker-metrics.out
+
+#maptask.sink.file.filename=maptask-metrics.out
+
+#reducetask.sink.file.filename=reducetask-metrics.out
+

+ 106 - 0
common/conf/hadoop-policy.xml.template

@@ -0,0 +1,106 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+  <property>
+    <name>security.client.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ClientProtocol, which is used by user code 
+    via the DistributedFileSystem. 
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.client.datanode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for ClientDatanodeProtocol, the client-to-datanode protocol 
+    for block recovery.
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.datanode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for DatanodeProtocol, which is used by datanodes to 
+    communicate with the namenode.
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.inter.datanode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for InterDatanodeProtocol, the inter-datanode protocol
+    for updating generation timestamp.
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.namenode.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for NamenodeProtocol, the protocol used by the secondary
+    namenode to communicate with the namenode.
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.inter.tracker.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for InterTrackerProtocol, used by the tasktrackers to 
+    communicate with the jobtracker.
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.job.submission.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for JobSubmissionProtocol, used by job clients to 
+    communciate with the jobtracker for job submission, querying job status etc.
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.task.umbilical.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for TaskUmbilicalProtocol, used by the map and reduce 
+    tasks to communicate with the parent tasktracker. 
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.refresh.policy.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for RefreshAuthorizationPolicyProtocol, used by the 
+    dfsadmin and mradmin commands to refresh the security policy in-effect. 
+    The ACL is a comma-separated list of user and group names. The user and 
+    group list is separated by a blank. For e.g. "alice,bob users,wheel". 
+    A special value of "*" means all users are allowed.</description>
+  </property>
+
+  <property>
+    <name>security.admin.operations.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for AdminOperationsProtocol, used by the mradmins commands
+    to refresh queues and nodes at JobTracker. The ACL is a comma-separated list of 
+    user and group names. The user and group list is separated by a blank. 
+    For e.g. "alice,bob users,wheel". A special value of "*" means all users are 
+    allowed.</description>
+  </property>
+</configuration>

+ 151 - 0
common/conf/log4j.properties

@@ -0,0 +1,151 @@
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=INFO,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+#
+# Job Summary Appender 
+#
+# Use following logger to send summary to separate file defined by 
+# hadoop.mapreduce.jobsummary.log.file rolled daily:
+# hadoop.mapreduce.jobsummary.logger=INFO,JSA
+# 
+hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger}
+hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshold=ALL
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this 
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.iscleanup=false
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+#Security appender
+#
+hadoop.security.log.file=SecurityAuth.audit
+log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender 
+log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
+
+log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
+log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+#new logger
+# Define some default values that can be overridden by system properties
+hadoop.security.logger=INFO,console
+log4j.category.SecurityLogger=${hadoop.security.logger}
+
+#
+# Rolling File Appender
+#
+
+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Logfile size and and 30-day backups
+#log4j.appender.RFA.MaxFileSize=1MB
+#log4j.appender.RFA.MaxBackupIndex=30
+
+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+#
+# FSNamesystem Audit logging
+# All audit events are logged at INFO level
+#
+log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=WARN
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter
+
+#
+# Job Summary Appender
+#
+log4j.appender.JSA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file}
+log4j.appender.JSA.layout=org.apache.log4j.PatternLayout
+log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+log4j.appender.JSA.DatePattern=.yyyy-MM-dd
+log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger}
+log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false
+
+#
+# MapReduce Audit Log Appender
+#
+
+# Set the MapReduce audit log filename
+#hadoop.mapreduce.audit.log.file=hadoop-mapreduce.audit.log
+
+# Appender for AuditLogger.
+# Requires the following system properties to be set
+#    - hadoop.log.dir (Hadoop Log directory)
+#    - hadoop.mapreduce.audit.log.file (MapReduce audit log filename)
+
+#log4j.logger.org.apache.hadoop.mapred.AuditLogger=INFO,MRAUDIT
+#log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false
+#log4j.appender.MRAUDIT=org.apache.log4j.DailyRollingFileAppender
+#log4j.appender.MRAUDIT.File=${hadoop.log.dir}/${hadoop.mapreduce.audit.log.file}
+#log4j.appender.MRAUDIT.DatePattern=.yyyy-MM-dd
+#log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout
+#log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n

+ 1 - 0
common/conf/masters.template

@@ -0,0 +1 @@
+localhost

+ 2 - 0
common/conf/slaves.template

@@ -0,0 +1,2 @@
+# Specify multiple slaves, one per each line. 
+localhost

+ 57 - 0
common/conf/ssl-client.xml.example

@@ -0,0 +1,57 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+
+<property>
+  <name>ssl.client.truststore.location</name>
+  <value></value>
+  <description>Truststore to be used by clients like distcp. Must be
+  specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.truststore.password</name>
+  <value></value>
+  <description>Optional. Default value is "".
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.truststore.type</name>
+  <value>jks</value>
+  <description>Optional. Default value is "jks".
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.keystore.location</name>
+  <value></value>
+  <description>Keystore to be used by clients like distcp. Must be
+  specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.keystore.password</name>
+  <value></value>
+  <description>Optional. Default value is "".
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.keystore.keypassword</name>
+  <value></value>
+  <description>Optional. Default value is "".
+  </description>
+</property>
+
+<property>
+  <name>ssl.client.keystore.type</name>
+  <value>jks</value>
+  <description>Optional. Default value is "jks".
+  </description>
+</property>
+
+</configuration>

+ 55 - 0
common/conf/ssl-server.xml.example

@@ -0,0 +1,55 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+
+<property>
+  <name>ssl.server.truststore.location</name>
+  <value></value>
+  <description>Truststore to be used by NN and DN. Must be specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.truststore.password</name>
+  <value></value>
+  <description>Optional. Default value is "".
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.truststore.type</name>
+  <value>jks</value>
+  <description>Optional. Default value is "jks".
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.keystore.location</name>
+  <value></value>
+  <description>Keystore to be used by NN and DN. Must be specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.keystore.password</name>
+  <value></value>
+  <description>Must be specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.keystore.keypassword</name>
+  <value></value>
+  <description>Must be specified.
+  </description>
+</property>
+
+<property>
+  <name>ssl.server.keystore.type</name>
+  <value>jks</value>
+  <description>Optional. Default value is "jks".
+  </description>
+</property>
+
+</configuration>

+ 331 - 0
common/ivy.xml

@@ -0,0 +1,331 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<ivy-module version="1.0">
+  <info organisation="org.apache.hadoop" module="${ant.project.name}" revision="${version}">
+    <license name="Apache 2.0"/>
+    <ivyauthor name="Apache Hadoop Team" url="http://hadoop.apache.org"/>
+    <description>
+        Hadoop Common
+    </description>
+  </info>
+  <configurations defaultconfmapping="default">
+    <!--these match the Maven configurations-->
+    <conf name="default" extends="master,runtime"/>
+    <conf name="master" description="contains the artifact but no dependencies"/>
+    <conf name="runtime" description="runtime but not the artifact"
+      extends="client,server,s3-server,kfs,mandatory,jetty,ftp"/>
+
+    <conf name="mandatory" description="contains the critical  dependencies"
+      extends="commons-logging,log4j"/>
+
+    <!--
+    These public configurations contain the core dependencies for running hadoop client or server.
+    The server is effectively a superset of the client.
+    -->
+    <conf name="client" description="client-side dependencies"
+      extends="mandatory,httpclient"/>
+    <conf name="server" description="server-side dependencies"
+      extends="client"/>
+    <conf name="s3-client" description="dependencies for working with S3/EC2 infrastructure"
+      extends="client"/>
+    <conf name="s3-server" description="dependencies for running on S3/EC2 infrastructure"
+      extends="s3-client,server"/>
+    <conf name="kfs" description="dependencies for KFS file system support"/>
+    <conf name="ftp" description="dependencies for workign with FTP filesytems" 
+              extends="mandatory"/>
+   <conf name="jetty" description="Jetty provides the in-VM HTTP daemon" extends="commons-logging"/>
+
+   <conf name="common" extends="runtime,mandatory,httpclient,ftp,jetty,jdiff"
+		      description="common artifacts"/>
+    <!--Testing pulls in everything-->
+   <conf name="test" extends="master" description="the classpath needed to run tests"/>
+
+    <!--Packaging pulls in everything-->
+   <conf name="package" extends="master" description="the classpath needed for packaging"/>
+
+    <!--Private configurations. -->
+
+    <conf name="javadoc" visibility="private" description="artiracts required while performing doc generation"
+      extends="common,mandatory,jetty,lucene"/>
+
+    <conf name="releaseaudit" visibility="private"
+	description="Artifacts required for releaseaudit target"/>
+     
+    <conf name="commons-logging" visibility="private"/>
+    <conf name="httpclient" visibility="private" extends="commons-logging"/>
+    <conf name="log4j" visibility="private"/>
+    <conf name="lucene" visibility="private"/>
+    <conf name="jdiff" visibility="private" extends="log4j,s3-client,jetty,server"/>
+    <conf name="checkstyle" visibility="private"/>
+
+  </configurations>
+
+  <publications>
+    <!--get the artifact from our module name-->
+    <artifact conf="master"/>
+  </publications>
+  <dependencies>
+
+ <!--used client side-->
+    <dependency org="commons-cli"
+      name="commons-cli"
+      rev="${commons-cli.version}"
+      conf="client->default"/>
+    <dependency org="checkstyle"
+      name="checkstyle"
+      rev="${checkstyle.version}"
+      conf="checkstyle->default"/>
+    <dependency org="jdiff"
+      name="jdiff"
+      rev="${jdiff.version}"
+      conf="jdiff->default"/>
+
+    <dependency org="xmlenc"
+      name="xmlenc"
+      rev="${xmlenc.version}"
+      conf="server->default"/>
+
+    <!--Configuration: httpclient-->
+
+    <!--
+    commons-httpclient asks for too many files.
+    All it needs is commons-codec and commons-logging JARs
+    -->
+    <dependency org="commons-httpclient"
+      name="commons-httpclient"
+      rev="${commons-httpclient.version}"
+      conf="httpclient->master">
+    </dependency>
+
+    <dependency org="commons-codec"
+      name="commons-codec"
+      rev="${commons-codec.version}"
+      conf="httpclient->default"/>
+
+    <dependency org="commons-net"
+      name="commons-net"
+      rev="${commons-net.version}"
+      conf="ftp->default"/>
+
+    <!--Configuration: Jetty -->
+
+<!-- <dependency org="javax.servlet"
+      name="servlet-api"
+      rev="${servlet-api.version}"
+      conf="jetty->master"/>   -->
+    <dependency org="org.mortbay.jetty"
+      name="jetty"
+      rev="${jetty.version}"
+      conf="jetty->master"/>
+    <dependency org="org.mortbay.jetty"
+      name="jetty-util"
+      rev="${jetty-util.version}"
+      conf="jetty->master"/>
+
+    <dependency org="tomcat"
+      name="jasper-runtime"
+      rev="${jasper.version}"
+      conf="jetty->master"/>
+    <dependency org="tomcat"
+      name="jasper-compiler"
+      rev="${jasper.version}"
+      conf="jetty->master"/>
+    <dependency org="org.mortbay.jetty"
+      name="jsp-api-2.1"
+      rev="${jetty.version}"
+      conf="jetty->master"/>
+    <dependency org="org.mortbay.jetty"
+      name="jsp-2.1"
+      rev="${jetty.version}"
+      conf="jetty->master"/>
+    <dependency org="commons-el"
+      name="commons-el"
+      rev="${commons-el.version}"
+      conf="jetty->master"/>
+
+
+    <!--Configuration: commons-logging -->
+
+    <!--it is essential that only the master JAR of commons logging
+    is pulled in, as its dependencies are usually a mess, including things
+    like out of date servlet APIs, bits of Avalon, etc.
+    -->
+    <dependency org="commons-logging"
+      name="commons-logging"
+      rev="${commons-logging.version}"
+      conf="commons-logging->master"/>
+
+
+    <!--Configuration: commons-logging -->
+
+    <!--log4J is not optional until commons-logging.properties is stripped out of the JAR -->
+    <dependency org="log4j"
+      name="log4j"
+      rev="${log4j.version}"
+      conf="log4j->master"/>
+
+    <!--Configuration: s3-client -->
+    <!--there are two jets3t projects in the repository; this one goes up to 0.6 and
+    is assumed to be the live one-->
+    <dependency org="net.java.dev.jets3t"
+      name="jets3t"
+      rev="${jets3t.version}"
+      conf="s3-client->master"/>
+    <dependency org="commons-net"
+      name="commons-net"
+      rev="${commons-net.version}"
+      conf="s3-client->master"/> 
+    <dependency org="org.mortbay.jetty"
+      name="servlet-api-2.5"
+      rev="${servlet-api-2.5.version}"
+      conf="s3-client->master"/>
+    <dependency org="net.sf.kosmosfs"
+      name="kfs"
+      rev="${kfs.version}"
+      conf="kfs->default"/>
+
+    <!--Configuration: test -->
+    <!--artifacts needed for testing -->
+
+    <dependency org="org.apache.ftpserver"
+      name="ftplet-api"
+      rev="${ftplet-api.version}"
+      conf="test->default"/>
+    <dependency org="org.apache.mina"
+      name="mina-core"
+      rev="${mina-core.version}"
+      conf="test->default"/>
+    <dependency org="org.apache.ftpserver"
+      name="ftpserver-core"
+      rev="${ftpserver-core.version}"
+      conf="test->default"/>
+    <dependency org="org.apache.ftpserver"
+      name="ftpserver-deprecated"
+      rev="${ftpserver-deprecated.version}"
+      conf="test->default"/>
+
+    <dependency org="junit"
+      name="junit"
+      rev="${junit.version}"
+      conf="test->default"/>
+    <dependency org="org.apache.rat"
+      name="apache-rat-tasks"
+      rev="${rats-lib.version}"
+      conf="releaseaudit->default"/>
+    <dependency org="commons-lang"
+      name="commons-lang"
+      rev="${commons-lang.version}"
+      conf="releaseaudit->default"/>
+    <dependency org="commons-collections"
+      name="commons-collections"
+      rev="${commons-collections.version}"
+      conf="releaseaudit->default"/>
+    <dependency org="hsqldb"
+      name="hsqldb"
+      rev="${hsqldb.version}"
+      conf="common->default"/>
+    <dependency org="org.apache.lucene"
+      name="lucene-core"
+      rev="${lucene-core.version}"
+      conf="javadoc->default"/> 
+    <dependency org="commons-logging"
+      name="commons-logging-api"
+      rev="${commons-logging-api.version}"
+      conf="common->default"/>
+    <dependency org="org.slf4j"
+      name="slf4j-api"
+      rev="${slf4j-api.version}"
+      conf="common->default"/>
+    <dependency org="org.eclipse.jdt"
+      name="core"
+      rev="${core.version}"
+      conf="common->master"/>
+    <dependency org="oro"
+      name="oro"
+      rev="${oro.version}"
+      conf="common->default"/>
+    <dependency org="org.slf4j"
+      name="slf4j-log4j12"
+      rev="${slf4j-log4j12.version}"
+      conf="common->master">
+    </dependency>
+    <dependency org="org.apache.hadoop"
+      name="avro"
+      rev="${avro.version}"
+      conf="common->default">
+      <exclude module="ant"/>
+      <exclude module="jetty"/>
+      <exclude module="slf4j-simple"/>
+    </dependency>
+    <dependency org="org.codehaus.jackson"
+      name="jackson-mapper-asl"
+      rev="${jackson.version}"
+      conf="common->default"/> 
+    <dependency org="com.thoughtworks.paranamer"
+      name="paranamer"
+      rev="${paranamer.version}"
+      conf="common->default"/>
+    <dependency org="com.thoughtworks.paranamer"
+      name="paranamer-ant"
+      rev="${paranamer.version}"
+      conf="common->default"/>
+    <dependency org="org.aspectj"
+      name="aspectjrt"
+      rev="${aspectj.version}"
+      conf="common->default">
+    </dependency>
+    <dependency org="org.aspectj"
+      name="aspectjtools"
+      rev="${aspectj.version}"
+      conf="common->default">
+    </dependency>
+    <dependency org="org.mockito" 
+      name="mockito-all" 
+      rev="${mockito-all.version}" 
+      conf="test->default">
+    </dependency> 
+    <dependency org="com.jcraft"
+      name="jsch"
+      rev="${jsch.version}"
+      conf="common->default">
+    </dependency>
+    <!--Configuration: package -->
+    <!--artifacts needed for packaging -->
+    <dependency org="org.vafer" 
+      name="jdeb" 
+      rev="${jdeb.version}"
+      conf="package->master">
+    </dependency>
+    <dependency org="commons-configuration"
+      name="commons-configuration"
+      rev="${commons-configuration.version}"
+      conf="common->default"/>
+    <dependency org="org.apache.commons"
+      name="commons-math"
+      rev="${commons-math.version}"
+      conf="common->default"/>
+    <dependency org="com.google.guava"
+      name="guava"
+      rev="${guava.version}"
+      conf="common->default"/>
+    <dependency org="com.google.protobuf"
+      name="protobuf-java"
+      rev="${protobuf.version}"
+      conf="common->default"/>
+  </dependencies>
+</ivy-module>

+ 139 - 0
common/ivy/hadoop-common-instrumented-template.xml

@@ -0,0 +1,139 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>org.apache.hadoop</groupId>
+  <artifactId>hadoop-common-instrumented</artifactId>
+  <packaging>jar</packaging>
+  <version>@version</version>
+  <dependencies>
+   <dependency>
+      <groupId>commons-cli</groupId>
+      <artifactId>commons-cli</artifactId>
+      <version>1.2</version>
+    </dependency>
+   <dependency>
+      <groupId>xmlenc</groupId>
+      <artifactId>xmlenc</artifactId>
+      <version>0.52</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-httpclient</groupId>
+      <artifactId>commons-httpclient</artifactId>
+      <version>3.1</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-codec</groupId>
+      <artifactId>commons-codec</artifactId>
+      <version>1.4</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-net</groupId>
+      <artifactId>commons-net</artifactId>
+      <version>1.4.1</version>
+    </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jetty</artifactId>
+      <version>6.1.14</version>
+    </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jetty-util</artifactId>
+      <version>6.1.14</version>
+    </dependency>
+    <dependency>
+      <groupId>tomcat</groupId>
+      <artifactId>jasper-runtime</artifactId>
+      <version>5.5.12</version>
+    </dependency>
+    <dependency>
+      <groupId>tomcat</groupId>
+      <artifactId>jasper-compiler</artifactId>
+      <version>5.5.12</version>
+    </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jsp-api-2.1</artifactId>
+      <version>6.1.14</version>
+    </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jsp-2.1</artifactId>
+      <version>6.1.14</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-el</groupId>
+      <artifactId>commons-el</artifactId>
+      <version>1.0</version>
+    </dependency>
+    <dependency>
+      <groupId>net.java.dev.jets3t</groupId>
+      <artifactId>jets3t</artifactId>
+      <version>0.7.1</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-net</groupId>
+      <artifactId>commons-net</artifactId>
+      <version>1.4.1</version>
+    </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>servlet-api-2.5</artifactId>
+      <version>6.1.14</version>
+    </dependency>
+    <dependency>
+      <groupId>net.sf.kosmosfs</groupId>
+      <artifactId>kfs</artifactId>
+      <version>0.3</version>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>4.8.1</version>
+    </dependency>
+    <dependency>
+      <groupId>hsqldb</groupId>
+      <artifactId>hsqldb</artifactId>
+      <version>1.8.0.10</version>
+    </dependency>
+    <dependency>
+      <groupId>oro</groupId>
+      <artifactId>oro</artifactId>
+      <version>2.0.8</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>avro</artifactId>
+      <version>1.3.2</version>
+      <exclusions>
+        <exclusion>
+          <!-- Don't pull in Avro's (later) version of Jetty.-->
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jetty</artifactId>
+        </exclusion>
+        <exclusion>
+          <!-- Exclude Avro's version of ant since it conflicts with Jetty's.-->
+          <groupId>org.apache.ant</groupId>
+          <artifactId>ant</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+  </dependencies>
+</project>

+ 159 - 0
common/ivy/hadoop-common-template.xml

@@ -0,0 +1,159 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>org.apache.hadoop</groupId>
+  <artifactId>hadoop-common</artifactId>
+  <packaging>jar</packaging>
+  <version>@version</version>
+  <dependencies>
+   <dependency>
+      <groupId>commons-cli</groupId>
+      <artifactId>commons-cli</artifactId>
+      <version>1.2</version>
+    </dependency>
+   <dependency>
+      <groupId>xmlenc</groupId>
+      <artifactId>xmlenc</artifactId>
+      <version>0.52</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-httpclient</groupId>
+      <artifactId>commons-httpclient</artifactId>
+      <version>3.1</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-codec</groupId>
+      <artifactId>commons-codec</artifactId>
+      <version>1.4</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-net</groupId>
+      <artifactId>commons-net</artifactId>
+      <version>1.4.1</version>
+    </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jetty</artifactId>
+      <version>6.1.14</version>
+    </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jetty-util</artifactId>
+      <version>6.1.14</version>
+    </dependency>
+    <dependency>
+      <groupId>tomcat</groupId>
+      <artifactId>jasper-runtime</artifactId>
+      <version>5.5.12</version>
+    </dependency>
+    <dependency>
+      <groupId>tomcat</groupId>
+      <artifactId>jasper-compiler</artifactId>
+      <version>5.5.12</version>
+    </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jsp-api-2.1</artifactId>
+      <version>6.1.14</version>
+    </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jsp-2.1</artifactId>
+      <version>6.1.14</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-el</groupId>
+      <artifactId>commons-el</artifactId>
+      <version>1.0</version>
+    </dependency>
+    <dependency>
+      <groupId>net.java.dev.jets3t</groupId>
+      <artifactId>jets3t</artifactId>
+      <version>0.7.1</version>
+    </dependency>
+    <dependency>
+      <groupId>commons-net</groupId>
+      <artifactId>commons-net</artifactId>
+      <version>1.4.1</version>
+    </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>servlet-api-2.5</artifactId>
+      <version>6.1.14</version>
+    </dependency>
+    <dependency>
+      <groupId>net.sf.kosmosfs</groupId>
+      <artifactId>kfs</artifactId>
+      <version>0.3</version>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>4.8.1</version>
+    </dependency>
+    <dependency>
+      <groupId>hsqldb</groupId>
+      <artifactId>hsqldb</artifactId>
+      <version>1.8.0.10</version>
+    </dependency>
+    <dependency>
+      <groupId>oro</groupId>
+      <artifactId>oro</artifactId>
+      <version>2.0.8</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>avro</artifactId>
+      <version>1.3.2</version>
+      <exclusions>
+        <exclusion>
+          <!-- Don't pull in Avro's (later) version of Jetty.-->
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>jetty</artifactId>
+        </exclusion>
+        <exclusion>
+          <!-- Exclude Avro's version of ant since it conflicts with Jetty's.-->
+          <groupId>org.apache.ant</groupId>
+          <artifactId>ant</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>commons-configuration</groupId>
+      <artifactId>commons-configuration</artifactId>
+      <version>1.6</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-math</artifactId>
+      <version>2.1</version>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>r09</version>
+    </dependency>
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+      <version>2.4.0a</version>
+    </dependency>
+  </dependencies>
+</project>

+ 58 - 0
common/ivy/hadoop-common-test-template.xml

@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>org.apache.hadoop</groupId>
+  <artifactId>hadoop-common-test</artifactId>
+  <packaging>jar</packaging>
+  <version>@version</version>
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <version>@version</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.ftpserver</groupId>
+      <artifactId>ftplet-api</artifactId>
+      <version>1.0.0</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.mina</groupId>
+      <artifactId>mina-core</artifactId>
+      <version>2.0.0-M5</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.ftpserver</groupId>
+      <artifactId>ftpserver-core</artifactId>
+      <version>1.0.0</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.ftpserver</groupId>
+      <artifactId>ftpserver-deprecated</artifactId>
+      <version>1.0.0-M2</version>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <version>1.8.5</version>
+    </dependency>
+  </dependencies>
+</project>

+ 50 - 0
common/ivy/ivysettings.xml

@@ -0,0 +1,50 @@
+<ivysettings>
+ <!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+  <property name="repo.maven.org" value="http://repo1.maven.org/maven2/" override="false"/>
+
+  <property name="maven2.pattern" value="[organisation]/[module]/[revision]/[module]-[revision]"/>
+  <property name="repo.dir" value="${user.home}/.m2/repository"/>
+      <!-- pull in the local repository -->
+ <include url="${ivy.default.conf.dir}/ivyconf-local.xml"/> 
+
+ <property name="resolvers" value="default" override="false"/>
+  <property name="force-resolve" value="false" override="false"/>
+ <settings defaultResolver="${resolvers}"/>
+
+ <resolvers>
+   <!--ibiblio resolvers-->
+    <ibiblio name="maven2" root="${repo.maven.org}" m2compatible="true"/>
+
+    <filesystem name="fs" m2compatible="true" force="${force-resolve}">
+       <artifact pattern="${repo.dir}/${maven2.pattern}.[ext]"/>
+       <ivy pattern="${repo.dir}/${maven2.pattern}.pom"/>
+    </filesystem>
+
+    <chain name="default" dual="true">
+      <resolver ref="maven2"/>
+    </chain>
+
+    <chain name="internal" dual="true">
+      <resolver ref="fs"/>
+      <resolver ref="maven2"/>
+    </chain>
+
+  </resolvers>
+
+</ivysettings>

+ 90 - 0
common/ivy/libraries.properties

@@ -0,0 +1,90 @@
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+#This properties file lists the versions of the various artifacts used by hadoop and components.
+#It drives ivy and the generation of a maven POM
+
+#These are the versions of our dependencies (in alphabetical order)
+ant-task.version=2.0.10
+
+avro.version=1.3.2
+
+checkstyle.version=4.2
+
+commons-cli.version=1.2
+commons-cli2.version=2.0-mahout
+commons-codec.version=1.4
+commons-collections.version=3.1
+commons-configuration.version=1.6
+commons-httpclient.version=3.1
+commons-lang.version=2.5
+commons-logging.version=1.1.1
+commons-logging-api.version=1.1
+commons-el.version=1.0
+commons-fileupload.version=1.2
+commons-io.version=1.4
+commons-math.version=2.1
+commons-net.version=1.4.1
+core.version=3.1.1
+coreplugin.version=1.3.2
+
+ftplet-api.version=1.0.0
+ftpserver-core.version=1.0.0
+ftpserver-deprecated.version=1.0.0-M2
+
+guava.version=r09
+
+hsqldb.version=1.8.0.10
+
+ivy.version=2.1.0
+
+jasper.version=5.5.12
+jdeb.version=0.8
+jsp.version=2.1
+jsp-api.version=5.5.12
+jets3t.version=0.7.1
+jetty.version=6.1.14
+jetty-util.version=6.1.14
+junit.version=4.8.1
+jdiff.version=1.0.9
+json.version=1.0
+
+kfs.version=0.3
+
+log4j.version=1.2.15
+lucene-core.version=2.3.1
+
+mina-core.version=2.0.0-M5
+
+oro.version=2.0.8
+
+protobuf.version=2.4.0a
+
+rats-lib.version=0.6
+
+servlet.version=4.0.6
+servlet-api-2.5.version=6.1.14
+servlet-api.version=2.5
+slf4j-api.version=1.5.11
+slf4j-log4j12.version=1.5.11
+
+wagon-http.version=1.0-beta-2
+
+xmlenc.version=0.52
+xerces.version=1.4.4
+
+aspectj.version=1.6.5
+
+mockito-all.version=1.8.5
+
+jsch.version=0.1.42
+

Разница между файлами не показана из-за своего большого размера
+ 11 - 0
common/lib/jdiff/hadoop-core_0.20.0.xml


Разница между файлами не показана из-за своего большого размера
+ 11 - 0
common/lib/jdiff/hadoop-core_0.21.0.xml


Разница между файлами не показана из-за своего большого размера
+ 11 - 0
common/lib/jdiff/hadoop_0.17.0.xml


Разница между файлами не показана из-за своего большого размера
+ 11 - 0
common/lib/jdiff/hadoop_0.18.1.xml


Разница между файлами не показана из-за своего большого размера
+ 11 - 0
common/lib/jdiff/hadoop_0.18.2.xml


Разница между файлами не показана из-за своего большого размера
+ 11 - 0
common/lib/jdiff/hadoop_0.18.3.xml


Разница между файлами не показана из-за своего большого размера
+ 11 - 0
common/lib/jdiff/hadoop_0.19.0.xml


Разница между файлами не показана из-за своего большого размера
+ 11 - 0
common/lib/jdiff/hadoop_0.19.1.xml


Разница между файлами не показана из-за своего большого размера
+ 11 - 0
common/lib/jdiff/hadoop_0.19.2.xml


Разница между файлами не показана из-за своего большого размера
+ 11 - 0
common/lib/jdiff/hadoop_0.20.0.xml


Разница между файлами не показана из-за своего большого размера
+ 11 - 0
common/lib/jdiff/hadoop_0.20.1.xml


Разница между файлами не показана из-за своего большого размера
+ 11 - 0
common/lib/jdiff/hadoop_0.20.2.xml


+ 11 - 0
common/src/contrib/bash-tab-completion/README

@@ -0,0 +1,11 @@
+Bash tab completion support for the hadoop script.
+
+On Debian-like distributions, the script can be placed in
+/etc/bash_completion.d/, and it will be sourced automatically by Bash. On
+other distributions, you may source the file manually (`. hadoop.sh') or
+source it from your bashrc (or equivalent) file.
+
+The script allows tab completion of all the command names, subcommands for the
+'fs', 'dfsadmin', 'job', 'namenode' and 'pipe' commands, arguments of the 'jar'
+command and most arguments to the 'fs' subcommands (completing local and 
+dfs paths as appropriate).

+ 121 - 0
common/src/contrib/bash-tab-completion/hadoop.sh

@@ -0,0 +1,121 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Provides tab completion for the main hadoop script.
+#
+# On debian-based systems, place in /etc/bash_completion.d/ and either restart
+# Bash or source the script manually (. /etc/bash_completion.d/hadoop.sh).
+
+_hadoop() {
+  local script cur prev temp
+
+  COMPREPLY=()
+  cur=${COMP_WORDS[COMP_CWORD]}
+  prev=${COMP_WORDS[COMP_CWORD-1]}  
+  script=${COMP_WORDS[0]}  
+  
+  # Bash lets you tab complete things even if the script doesn't
+  # exist (or isn't executable). Check to make sure it is, as we
+  # need to execute it to get options/info
+  if [ -f "$script" -a -x "$script" ]; then
+    case $COMP_CWORD in
+    1)
+      # Completing the first argument (the command).
+
+      temp=`$script | grep -n "^\s*or"`;
+      temp=`$script | head -n $((${temp%%:*} - 1)) | awk '/^ / {print $1}' | sort | uniq`;
+      COMPREPLY=(`compgen -W "${temp}" -- ${cur}`);
+      return 0;;
+
+    2)
+      # Completing the second arg (first arg to the command)
+
+      # The output of commands isn't hugely consistent, so certain
+      # names are hardcoded and parsed differently. Some aren't
+      # handled at all (mostly ones without args).
+      case ${COMP_WORDS[1]} in
+      dfs | dfsadmin | fs | job | pipes)
+        # One option per line, enclosed in square brackets
+
+        temp=`$script ${COMP_WORDS[1]} 2>&1 | awk '/^[ \t]*\[/ {gsub("[[\\]]", ""); print $1}'`;
+        COMPREPLY=(`compgen -W "${temp}" -- ${cur}`);
+        return 0;;
+
+      jar)
+        # Any (jar) file
+
+        COMPREPLY=(`compgen -A file -- ${cur}`);
+        return 0;;
+
+      namenode)
+        # All options specified in one line,
+        # enclosed in [] and separated with |
+        temp=`$script ${COMP_WORDS[1]} -help 2>&1 | grep Usage: | cut -d '[' -f 2- | awk '{gsub("] \\| \\[|]", " "); print $0}'`;
+        COMPREPLY=(`compgen -W "${temp}" -- ${cur}`);
+        return 0;;
+
+      *)
+        # Other commands - no idea
+
+        return 1;;
+      esac;;
+
+    *)
+      # Additional args
+      
+      case ${COMP_WORDS[1]} in
+      dfs | fs)
+        # DFS/FS subcommand completion
+        # Pull the list of options, grep for the one the user is trying to use,
+        # and then select the description of the relevant argument
+        temp=$((${COMP_CWORD} - 1));
+        temp=`$script ${COMP_WORDS[1]} 2>&1 | grep -- "${COMP_WORDS[2]} " | awk '{gsub("[[ \\]]", ""); print $0}' | cut -d '<' -f ${temp}`;
+
+        if [ ${#temp} -lt 1 ]; then
+          # No match
+          return 1;
+        fi;
+
+        temp=${temp:0:$((${#temp} - 1))};
+
+        # Now do completion based on the argument
+        case $temp in
+        path | src | dst)
+          # DFS path completion
+          temp=`$script ${COMP_WORDS[1]} -ls "${cur}*" 2>&1 | grep -vE '^Found ' | cut -f 1 | awk '{gsub("^.* ", ""); print $0;}'`
+          COMPREPLY=(`compgen -W "${temp}" -- ${cur}`);
+          return 0;;
+
+        localsrc | localdst)
+          # Local path completion
+          COMPREPLY=(`compgen -A file -- ${cur}`);
+          return 0;;
+
+        *)
+          # Other arguments - no idea
+          return 1;;
+        esac;;
+
+      *)
+        # Other subcommands - no idea
+        return 1;;
+      esac;
+    esac;
+  fi;
+}
+
+complete -F _hadoop hadoop

+ 170 - 0
common/src/docs/changes/ChangesFancyStyle.css

@@ -0,0 +1,170 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+/**
+ * General
+ */
+
+img { border: 0; }
+
+#content table {
+  border: 0;
+  width: 100%;
+}
+/*Hack to get IE to render the table at 100%*/
+* html #content table { margin-left: -3px; }
+
+#content th,
+#content td {
+  margin: 0;
+  padding: 0;
+  vertical-align: top;
+}
+
+.clearboth {
+  clear: both;
+}
+
+.note, .warning, .fixme {
+  border: solid black 1px;
+  margin: 1em 3em;
+}
+
+.note .label {
+  background: #369;
+  color: white;
+  font-weight: bold;
+  padding: 5px 10px;
+}
+.note .content {
+  background: #F0F0FF;
+  color: black;
+  line-height: 120%;
+  font-size: 90%;
+  padding: 5px 10px;
+}
+.warning .label {
+  background: #C00;
+  color: white;
+  font-weight: bold;
+  padding: 5px 10px;
+}
+.warning .content {
+  background: #FFF0F0;
+  color: black;
+  line-height: 120%;
+  font-size: 90%;
+  padding: 5px 10px;
+}
+.fixme .label {
+  background: #C6C600;
+  color: black;
+  font-weight: bold;
+  padding: 5px 10px;
+}
+.fixme .content {
+  padding: 5px 10px;
+}
+
+/**
+ * Typography
+ */
+
+body {
+  font-family: verdana, "Trebuchet MS", arial, helvetica, sans-serif;
+  font-size: 100%;
+}
+
+#content {
+  font-family: Georgia, Palatino, Times, serif;
+  font-size: 95%;
+}
+#tabs {
+  font-size: 70%;
+}
+#menu {
+  font-size: 80%;
+}
+#footer {
+  font-size: 70%;
+}
+
+h1, h2, h3, h4, h5, h6 {
+  font-family: "Trebuchet MS", verdana, arial, helvetica, sans-serif;
+  font-weight: bold;
+  margin-top: 1em;
+  margin-bottom: .5em;
+}
+
+h1 {
+    margin-top: 0;
+    margin-bottom: 1em;
+  font-size: 1.4em;
+  background-color: 73CAFF
+}
+#content h1 {
+  font-size: 160%;
+  margin-bottom: .5em;
+}
+#menu h1 {
+  margin: 0;
+  padding: 10px;
+  background: #336699;
+  color: white;
+}
+h2 { 
+  font-size: 120%;
+  background-color: 73CAFF
+}
+h3 { font-size: 100%; }
+h4 { font-size: 90%; }
+h5 { font-size: 80%; }
+h6 { font-size: 75%; }
+
+p {
+  line-height: 120%;
+  text-align: left;
+  margin-top: .5em;
+  margin-bottom: 1em;
+}
+
+#content li,
+#content th,
+#content td,
+#content li ul,
+#content li ol{
+  margin-top: .5em;
+  margin-bottom: .5em;
+}
+
+
+#content li li,
+#minitoc-area li{
+  margin-top: 0em;
+  margin-bottom: 0em;
+}
+
+#content .attribution {
+  text-align: right;
+  font-style: italic;
+  font-size: 85%;
+  margin-top: 1em;
+}
+
+.codefrag {
+  font-family: "Courier New", Courier, monospace;
+  font-size: 110%;
+}

+ 49 - 0
common/src/docs/changes/ChangesSimpleStyle.css

@@ -0,0 +1,49 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+body {
+  font-family: Courier New, monospace;
+  font-size: 10pt;
+}
+
+h1 {
+  font-family: Courier New, monospace;
+  font-size: 10pt;
+}
+
+h2 {
+  font-family: Courier New, monospace;
+  font-size: 10pt; 
+}
+
+h3 {
+  font-family: Courier New, monospace;
+  font-size: 10pt; 
+}
+
+a:link {
+  color: blue;
+}
+
+a:visited {
+  color: purple; 
+}
+
+li {
+  margin-top: 1em;
+  margin-bottom: 1em;
+}

+ 282 - 0
common/src/docs/changes/changes2html.pl

@@ -0,0 +1,282 @@
+#!/usr/bin/perl
+#
+# Transforms Lucene Java's CHANGES.txt into Changes.html
+#
+# Input is on STDIN, output is to STDOUT
+#
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+use strict;
+use warnings;
+
+my $jira_url_prefix = 'http://issues.apache.org/jira/browse/';
+my $title = undef;
+my $release = undef;
+my $sections = undef;
+my $items = undef;
+my $first_relid = undef;
+my $second_relid = undef;
+my @releases = ();
+
+my @lines = <>;                        # Get all input at once
+
+#
+# Parse input and build hierarchical release structure in @releases
+#
+for (my $line_num = 0 ; $line_num <= $#lines ; ++$line_num) {
+  $_ = $lines[$line_num];
+  next unless (/\S/);                  # Skip blank lines
+
+  unless ($title) {
+    if (/\S/) {
+      s/^\s+//;                        # Trim leading whitespace
+      s/\s+$//;                        # Trim trailing whitespace
+    }
+    $title = $_;
+    next;
+  }
+
+  if (/^(Release)|(Trunk)/) {   # Release headings
+    $release = $_;
+    $sections = [];
+    push @releases, [ $release, $sections ];
+    ($first_relid = lc($release)) =~ s/\s+/_/g   if ($#releases == 0);
+    ($second_relid = lc($release)) =~ s/\s+/_/g  if ($#releases == 1);
+    $items = undef;
+    next;
+  }
+
+  # Section heading: 2 leading spaces, words all capitalized
+  if (/^  ([A-Z]+)\s*/) {
+    my $heading = $_;
+    $items = [];
+    push @$sections, [ $heading, $items ];
+    next;
+  }
+
+  # Handle earlier releases without sections - create a headless section
+  unless ($items) {
+    $items = [];
+    push @$sections, [ undef, $items ];
+  }
+
+  my $type;
+  if (@$items) { # A list item has been encountered in this section before
+    $type = $items->[0];  # 0th position of items array is list type
+  } else {
+    $type = get_list_type($_);
+    push @$items, $type;
+  }
+
+  if ($type eq 'numbered') { # The modern items list style
+    # List item boundary is another numbered item or an unindented line
+    my $line;
+    my $item = $_;
+    $item =~ s/^(\s{0,2}\d+\.\s*)//;       # Trim the leading item number
+    my $leading_ws_width = length($1);
+    $item =~ s/\s+$//;                     # Trim trailing whitespace
+    $item .= "\n";
+
+    while ($line_num < $#lines
+           and ($line = $lines[++$line_num]) !~ /^(?:\s{0,2}\d+\.\s*\S|\S)/) {
+      $line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
+      $line =~ s/\s+$//;                   # Trim trailing whitespace
+      $item .= "$line\n";
+    }
+    $item =~ s/\n+\Z/\n/;                  # Trim trailing blank lines
+    push @$items, $item;
+    --$line_num unless ($line_num == $#lines);
+  } elsif ($type eq 'paragraph') {         # List item boundary is a blank line
+    my $line;
+    my $item = $_;
+    $item =~ s/^(\s+)//;
+    my $leading_ws_width = defined($1) ? length($1) : 0;
+    $item =~ s/\s+$//;                     # Trim trailing whitespace
+    $item .= "\n";
+
+    while ($line_num < $#lines and ($line = $lines[++$line_num]) =~ /\S/) {
+      $line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
+      $line =~ s/\s+$//;                   # Trim trailing whitespace
+      $item .= "$line\n";
+    }
+    push @$items, $item;
+    --$line_num unless ($line_num == $#lines);
+  } else { # $type is one of the bulleted types
+    # List item boundary is another bullet or a blank line
+    my $line;
+    my $item = $_;
+    $item =~ s/^(\s*$type\s*)//;           # Trim the leading bullet
+    my $leading_ws_width = length($1);
+    $item =~ s/\s+$//;                     # Trim trailing whitespace
+    $item .= "\n";
+
+    while ($line_num < $#lines
+           and ($line = $lines[++$line_num]) !~ /^\s*(?:$type|\Z)/) {
+      $line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
+      $line =~ s/\s+$//;                   # Trim trailing whitespace
+      $item .= "$line\n";
+    }
+    push @$items, $item;
+    --$line_num unless ($line_num == $#lines);
+  }
+}
+
+#
+# Print HTML-ified version to STDOUT
+#
+print<<"__HTML_HEADER__";
+<!--
+**********************************************************
+** WARNING: This file is generated from CHANGES.txt by the 
+**          Perl script 'changes2html.pl'.
+**          Do *not* edit this file!
+**********************************************************
+          
+****************************************************************************
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+****************************************************************************
+-->
+<html>
+<head>
+  <title>$title</title>
+  <link rel="stylesheet" href="ChangesFancyStyle.css" title="Fancy">
+  <link rel="alternate stylesheet" href="ChangesSimpleStyle.css" title="Simple">
+  <META http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
+  <SCRIPT>
+    function toggleList(e) {
+      element = document.getElementById(e).style;
+      element.display == 'none' ? element.display = 'block' : element.display='none';
+    }
+    function collapse() {
+      for (var i = 0; i < document.getElementsByTagName("ul").length; i++) {
+        var list = document.getElementsByTagName("ul")[i];
+        if (list.id != '$first_relid' && list.id != '$second_relid') {
+          list.style.display = "none";
+        }
+      }
+      for (var i = 0; i < document.getElementsByTagName("ol").length; i++) {
+        document.getElementsByTagName("ol")[i].style.display = "none"; 
+      }
+    }
+    window.onload = collapse;
+  </SCRIPT>
+</head>
+<body>
+
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Scalable Computing Platform"></a>
+<h1>$title</h1>
+
+__HTML_HEADER__
+
+my $heading;
+my $relcnt = 0;
+my $header = 'h2';
+for my $rel (@releases) {
+  if (++$relcnt == 3) {
+    $header = 'h3';
+    print "<h2><a href=\"javascript:toggleList('older')\">";
+    print "Older Releases";
+    print "</a></h2>\n";
+    print "<ul id=\"older\">\n"
+  }
+      
+  ($release, $sections) = @$rel;
+
+  # The first section heading is undefined for the older sectionless releases
+  my $has_release_sections = $sections->[0][0];
+
+  (my $relid = lc($release)) =~ s/\s+/_/g;
+  print "<$header><a href=\"javascript:toggleList('$relid')\">";
+  print "$release";
+  print "</a></$header>\n";
+  print "<ul id=\"$relid\">\n"
+    if ($has_release_sections);
+
+  for my $section (@$sections) {
+    ($heading, $items) = @$section;
+    (my $sectid = lc($heading)) =~ s/\s+/_/g;
+    my $numItemsStr = $#{$items} > 0 ? "($#{$items})" : "(none)";  
+
+    print "  <li><a href=\"javascript:toggleList('$relid.$sectid')\">",
+          ($heading || ''), "</a>&nbsp;&nbsp;&nbsp;$numItemsStr\n"
+      if ($has_release_sections);
+
+    my $list_type = $items->[0] || '';
+    my $list = ($has_release_sections || $list_type eq 'numbered' ? 'ol' : 'ul');
+    my $listid = $sectid ? "$relid.$sectid" : $relid;
+    print "    <$list id=\"$listid\">\n";
+
+    for my $itemnum (1..$#{$items}) {
+      my $item = $items->[$itemnum];
+      $item =~ s:&:&amp;:g;                            # Escape HTML metachars
+      $item =~ s:<:&lt;:g; 
+      $item =~ s:>:&gt;:g;
+
+      $item =~ s:\s*(\([^)"]+?\))\s*$:<br />$1:;       # Separate attribution
+      $item =~ s:\n{2,}:\n<p/>\n:g;                    # Keep paragraph breaks
+      $item =~ s{(?:${jira_url_prefix})?(HADOOP-\d+)}  # Link to JIRA
+                {<a href="${jira_url_prefix}$1">$1</a>}g;
+      print "      <li>$item</li>\n";
+    }
+    print "    </$list>\n";
+    print "  </li>\n" if ($has_release_sections);
+  }
+  print "</ul>\n" if ($has_release_sections);
+}
+print "</ul>\n" if ($relcnt > 3);
+print "</body>\n</html>\n";
+
+
+#
+# Subroutine: get_list_type
+#
+# Takes one parameter:
+#
+#    - The first line of a sub-section/point
+#
+# Returns one scalar:
+#
+#    - The list type: 'numbered'; or one of the bulleted types '-', or '.' or
+#      'paragraph'.
+#
+sub get_list_type {
+  my $first_list_item_line = shift;
+  my $type = 'paragraph'; # Default to paragraph type
+
+  if ($first_list_item_line =~ /^\s{0,2}\d+\.\s+\S+/) {
+    $type = 'numbered';
+  } elsif ($first_list_item_line =~ /^\s*([-.])\s+\S+/) {
+    $type = $1;
+  }
+  return $type;
+}
+
+1;

+ 170 - 0
common/src/docs/cn/changes/ChangesFancyStyle.css

@@ -0,0 +1,170 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+/**
+ * General
+ */
+
+img { border: 0; }
+
+#content table {
+  border: 0;
+  width: 100%;
+}
+/*Hack to get IE to render the table at 100%*/
+* html #content table { margin-left: -3px; }
+
+#content th,
+#content td {
+  margin: 0;
+  padding: 0;
+  vertical-align: top;
+}
+
+.clearboth {
+  clear: both;
+}
+
+.note, .warning, .fixme {
+  border: solid black 1px;
+  margin: 1em 3em;
+}
+
+.note .label {
+  background: #369;
+  color: white;
+  font-weight: bold;
+  padding: 5px 10px;
+}
+.note .content {
+  background: #F0F0FF;
+  color: black;
+  line-height: 120%;
+  font-size: 90%;
+  padding: 5px 10px;
+}
+.warning .label {
+  background: #C00;
+  color: white;
+  font-weight: bold;
+  padding: 5px 10px;
+}
+.warning .content {
+  background: #FFF0F0;
+  color: black;
+  line-height: 120%;
+  font-size: 90%;
+  padding: 5px 10px;
+}
+.fixme .label {
+  background: #C6C600;
+  color: black;
+  font-weight: bold;
+  padding: 5px 10px;
+}
+.fixme .content {
+  padding: 5px 10px;
+}
+
+/**
+ * Typography
+ */
+
+body {
+  font-family: verdana, "Trebuchet MS", arial, helvetica, sans-serif;
+  font-size: 100%;
+}
+
+#content {
+  font-family: Georgia, Palatino, Times, serif;
+  font-size: 95%;
+}
+#tabs {
+  font-size: 70%;
+}
+#menu {
+  font-size: 80%;
+}
+#footer {
+  font-size: 70%;
+}
+
+h1, h2, h3, h4, h5, h6 {
+  font-family: "Trebuchet MS", verdana, arial, helvetica, sans-serif;
+  font-weight: bold;
+  margin-top: 1em;
+  margin-bottom: .5em;
+}
+
+h1 {
+    margin-top: 0;
+    margin-bottom: 1em;
+  font-size: 1.4em;
+  background-color: 73CAFF
+}
+#content h1 {
+  font-size: 160%;
+  margin-bottom: .5em;
+}
+#menu h1 {
+  margin: 0;
+  padding: 10px;
+  background: #336699;
+  color: white;
+}
+h2 { 
+  font-size: 120%;
+  background-color: 73CAFF
+}
+h3 { font-size: 100%; }
+h4 { font-size: 90%; }
+h5 { font-size: 80%; }
+h6 { font-size: 75%; }
+
+p {
+  line-height: 120%;
+  text-align: left;
+  margin-top: .5em;
+  margin-bottom: 1em;
+}
+
+#content li,
+#content th,
+#content td,
+#content li ul,
+#content li ol{
+  margin-top: .5em;
+  margin-bottom: .5em;
+}
+
+
+#content li li,
+#minitoc-area li{
+  margin-top: 0em;
+  margin-bottom: 0em;
+}
+
+#content .attribution {
+  text-align: right;
+  font-style: italic;
+  font-size: 85%;
+  margin-top: 1em;
+}
+
+.codefrag {
+  font-family: "Courier New", Courier, monospace;
+  font-size: 110%;
+}

+ 49 - 0
common/src/docs/cn/changes/ChangesSimpleStyle.css

@@ -0,0 +1,49 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+body {
+  font-family: Courier New, monospace;
+  font-size: 10pt;
+}
+
+h1 {
+  font-family: Courier New, monospace;
+  font-size: 10pt;
+}
+
+h2 {
+  font-family: Courier New, monospace;
+  font-size: 10pt; 
+}
+
+h3 {
+  font-family: Courier New, monospace;
+  font-size: 10pt; 
+}
+
+a:link {
+  color: blue;
+}
+
+a:visited {
+  color: purple; 
+}
+
+li {
+  margin-top: 1em;
+  margin-bottom: 1em;
+}

+ 282 - 0
common/src/docs/cn/changes/changes2html.pl

@@ -0,0 +1,282 @@
+#!/usr/bin/perl
+#
+# Transforms Lucene Java's CHANGES.txt into Changes.html
+#
+# Input is on STDIN, output is to STDOUT
+#
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+use strict;
+use warnings;
+
+my $jira_url_prefix = 'http://issues.apache.org/jira/browse/';
+my $title = undef;
+my $release = undef;
+my $sections = undef;
+my $items = undef;
+my $first_relid = undef;
+my $second_relid = undef;
+my @releases = ();
+
+my @lines = <>;                        # Get all input at once
+
+#
+# Parse input and build hierarchical release structure in @releases
+#
+for (my $line_num = 0 ; $line_num <= $#lines ; ++$line_num) {
+  $_ = $lines[$line_num];
+  next unless (/\S/);                  # Skip blank lines
+
+  unless ($title) {
+    if (/\S/) {
+      s/^\s+//;                        # Trim leading whitespace
+      s/\s+$//;                        # Trim trailing whitespace
+    }
+    $title = $_;
+    next;
+  }
+
+  if (/^(Release)|(Trunk)/) {   # Release headings
+    $release = $_;
+    $sections = [];
+    push @releases, [ $release, $sections ];
+    ($first_relid = lc($release)) =~ s/\s+/_/g   if ($#releases == 0);
+    ($second_relid = lc($release)) =~ s/\s+/_/g  if ($#releases == 1);
+    $items = undef;
+    next;
+  }
+
+  # Section heading: 2 leading spaces, words all capitalized
+  if (/^  ([A-Z]+)\s*/) {
+    my $heading = $_;
+    $items = [];
+    push @$sections, [ $heading, $items ];
+    next;
+  }
+
+  # Handle earlier releases without sections - create a headless section
+  unless ($items) {
+    $items = [];
+    push @$sections, [ undef, $items ];
+  }
+
+  my $type;
+  if (@$items) { # A list item has been encountered in this section before
+    $type = $items->[0];  # 0th position of items array is list type
+  } else {
+    $type = get_list_type($_);
+    push @$items, $type;
+  }
+
+  if ($type eq 'numbered') { # The modern items list style
+    # List item boundary is another numbered item or an unindented line
+    my $line;
+    my $item = $_;
+    $item =~ s/^(\s{0,2}\d+\.\s*)//;       # Trim the leading item number
+    my $leading_ws_width = length($1);
+    $item =~ s/\s+$//;                     # Trim trailing whitespace
+    $item .= "\n";
+
+    while ($line_num < $#lines
+           and ($line = $lines[++$line_num]) !~ /^(?:\s{0,2}\d+\.\s*\S|\S)/) {
+      $line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
+      $line =~ s/\s+$//;                   # Trim trailing whitespace
+      $item .= "$line\n";
+    }
+    $item =~ s/\n+\Z/\n/;                  # Trim trailing blank lines
+    push @$items, $item;
+    --$line_num unless ($line_num == $#lines);
+  } elsif ($type eq 'paragraph') {         # List item boundary is a blank line
+    my $line;
+    my $item = $_;
+    $item =~ s/^(\s+)//;
+    my $leading_ws_width = defined($1) ? length($1) : 0;
+    $item =~ s/\s+$//;                     # Trim trailing whitespace
+    $item .= "\n";
+
+    while ($line_num < $#lines and ($line = $lines[++$line_num]) =~ /\S/) {
+      $line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
+      $line =~ s/\s+$//;                   # Trim trailing whitespace
+      $item .= "$line\n";
+    }
+    push @$items, $item;
+    --$line_num unless ($line_num == $#lines);
+  } else { # $type is one of the bulleted types
+    # List item boundary is another bullet or a blank line
+    my $line;
+    my $item = $_;
+    $item =~ s/^(\s*$type\s*)//;           # Trim the leading bullet
+    my $leading_ws_width = length($1);
+    $item =~ s/\s+$//;                     # Trim trailing whitespace
+    $item .= "\n";
+
+    while ($line_num < $#lines
+           and ($line = $lines[++$line_num]) !~ /^\s*(?:$type|\Z)/) {
+      $line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
+      $line =~ s/\s+$//;                   # Trim trailing whitespace
+      $item .= "$line\n";
+    }
+    push @$items, $item;
+    --$line_num unless ($line_num == $#lines);
+  }
+}
+
+#
+# Print HTML-ified version to STDOUT
+#
+print<<"__HTML_HEADER__";
+<!--
+**********************************************************
+** WARNING: This file is generated from CHANGES.txt by the 
+**          Perl script 'changes2html.pl'.
+**          Do *not* edit this file!
+**********************************************************
+          
+****************************************************************************
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+****************************************************************************
+-->
+<html>
+<head>
+  <title>$title</title>
+  <link rel="stylesheet" href="ChangesFancyStyle.css" title="Fancy">
+  <link rel="alternate stylesheet" href="ChangesSimpleStyle.css" title="Simple">
+  <META http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
+  <SCRIPT>
+    function toggleList(e) {
+      element = document.getElementById(e).style;
+      element.display == 'none' ? element.display = 'block' : element.display='none';
+    }
+    function collapse() {
+      for (var i = 0; i < document.getElementsByTagName("ul").length; i++) {
+        var list = document.getElementsByTagName("ul")[i];
+        if (list.id != '$first_relid' && list.id != '$second_relid') {
+          list.style.display = "none";
+        }
+      }
+      for (var i = 0; i < document.getElementsByTagName("ol").length; i++) {
+        document.getElementsByTagName("ol")[i].style.display = "none"; 
+      }
+    }
+    window.onload = collapse;
+  </SCRIPT>
+</head>
+<body>
+
+<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Scalable Computing Platform"></a>
+<h1>$title</h1>
+
+__HTML_HEADER__
+
+my $heading;
+my $relcnt = 0;
+my $header = 'h2';
+for my $rel (@releases) {
+  if (++$relcnt == 3) {
+    $header = 'h3';
+    print "<h2><a href=\"javascript:toggleList('older')\">";
+    print "Older Releases";
+    print "</a></h2>\n";
+    print "<ul id=\"older\">\n"
+  }
+      
+  ($release, $sections) = @$rel;
+
+  # The first section heading is undefined for the older sectionless releases
+  my $has_release_sections = $sections->[0][0];
+
+  (my $relid = lc($release)) =~ s/\s+/_/g;
+  print "<$header><a href=\"javascript:toggleList('$relid')\">";
+  print "$release";
+  print "</a></$header>\n";
+  print "<ul id=\"$relid\">\n"
+    if ($has_release_sections);
+
+  for my $section (@$sections) {
+    ($heading, $items) = @$section;
+    (my $sectid = lc($heading)) =~ s/\s+/_/g;
+    my $numItemsStr = $#{$items} > 0 ? "($#{$items})" : "(none)";  
+
+    print "  <li><a href=\"javascript:toggleList('$relid.$sectid')\">",
+          ($heading || ''), "</a>&nbsp;&nbsp;&nbsp;$numItemsStr\n"
+      if ($has_release_sections);
+
+    my $list_type = $items->[0] || '';
+    my $list = ($has_release_sections || $list_type eq 'numbered' ? 'ol' : 'ul');
+    my $listid = $sectid ? "$relid.$sectid" : $relid;
+    print "    <$list id=\"$listid\">\n";
+
+    for my $itemnum (1..$#{$items}) {
+      my $item = $items->[$itemnum];
+      $item =~ s:&:&amp;:g;                            # Escape HTML metachars
+      $item =~ s:<:&lt;:g; 
+      $item =~ s:>:&gt;:g;
+
+      $item =~ s:\s*(\([^)"]+?\))\s*$:<br />$1:;       # Separate attribution
+      $item =~ s:\n{2,}:\n<p/>\n:g;                    # Keep paragraph breaks
+      $item =~ s{(?:${jira_url_prefix})?(HADOOP-\d+)}  # Link to JIRA
+                {<a href="${jira_url_prefix}$1">$1</a>}g;
+      print "      <li>$item</li>\n";
+    }
+    print "    </$list>\n";
+    print "  </li>\n" if ($has_release_sections);
+  }
+  print "</ul>\n" if ($has_release_sections);
+}
+print "</ul>\n" if ($relcnt > 3);
+print "</body>\n</html>\n";
+
+
+#
+# Subroutine: get_list_type
+#
+# Takes one parameter:
+#
+#    - The first line of a sub-section/point
+#
+# Returns one scalar:
+#
+#    - The list type: 'numbered'; or one of the bulleted types '-', or '.' or
+#      'paragraph'.
+#
+sub get_list_type {
+  my $first_list_item_line = shift;
+  my $type = 'paragraph'; # Default to paragraph type
+
+  if ($first_list_item_line =~ /^\s{0,2}\d+\.\s+\S+/) {
+    $type = 'numbered';
+  } elsif ($first_list_item_line =~ /^\s*([-.])\s+\S+/) {
+    $type = $1;
+  }
+  return $type;
+}
+
+1;

+ 112 - 0
common/src/docs/cn/forrest.properties

@@ -0,0 +1,112 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############
+# Properties used by forrest.build.xml for building the website
+# These are the defaults, un-comment them if you need to change them.
+##############
+
+# Prints out a summary of Forrest settings for this project
+#forrest.echo=true 
+
+# Project name (used to name .war file)
+#project.name=my-project
+
+# Specifies name of Forrest skin to use
+#project.skin=tigris
+#project.skin=pelt
+
+# comma separated list, file:// is supported
+#forrest.skins.descriptors=http://forrest.apache.org/skins/skins.xml,file:///c:/myskins/skins.xml
+
+##############
+# behavioural properties
+#project.menu-scheme=tab_attributes
+#project.menu-scheme=directories
+
+##############
+# layout properties
+
+# Properties that can be set to override the default locations
+#
+# Parent properties must be set. This usually means uncommenting
+# project.content-dir if any other property using it is uncommented
+
+#project.status=status.xml
+#project.content-dir=src/documentation
+#project.raw-content-dir=${project.content-dir}/content
+#project.conf-dir=${project.content-dir}/conf
+#project.sitemap-dir=${project.content-dir}
+#project.xdocs-dir=${project.content-dir}/content/xdocs
+#project.resources-dir=${project.content-dir}/resources
+#project.stylesheets-dir=${project.resources-dir}/stylesheets
+#project.images-dir=${project.resources-dir}/images
+#project.schema-dir=${project.resources-dir}/schema
+#project.skins-dir=${project.content-dir}/skins
+#project.skinconf=${project.content-dir}/skinconf.xml
+#project.lib-dir=${project.content-dir}/lib
+#project.classes-dir=${project.content-dir}/classes
+#project.translations-dir=${project.content-dir}/translations
+
+##############
+# validation properties
+
+# This set of properties determine if validation is performed
+# Values are inherited unless overridden.
+# e.g. if forrest.validate=false then all others are false unless set to true.
+#forrest.validate=true
+#forrest.validate.xdocs=${forrest.validate}
+#forrest.validate.skinconf=${forrest.validate}
+# Workaround (HADOOP-7072) for http://issues.apache.org/jira/browse/FOR-984
+# Remove when forrest-0.9 is available
+forrest.validate.sitemap=false
+forrest.validate.stylesheets=false
+forrest.validate.skins.stylesheets=false
+# End of forrest-0.8 + JDK6 workaround
+#forrest.validate.skins=${forrest.validate}
+
+# *.failonerror=(true|false) - stop when an XML file is invalid
+#forrest.validate.failonerror=true
+
+# *.excludes=(pattern) - comma-separated list of path patterns to not validate
+# e.g.
+#forrest.validate.xdocs.excludes=samples/subdir/**, samples/faq.xml
+#forrest.validate.xdocs.excludes=
+
+
+##############
+# General Forrest properties
+
+# The URL to start crawling from
+#project.start-uri=linkmap.html
+# Set logging level for messages printed to the console
+# (DEBUG, INFO, WARN, ERROR, FATAL_ERROR)
+#project.debuglevel=ERROR
+# Max memory to allocate to Java
+forrest.maxmemory=512m
+# Any other arguments to pass to the JVM. For example, to run on an X-less
+# server, set to -Djava.awt.headless=true
+#forrest.jvmargs=
+# The bugtracking URL - the issue number will be appended
+#project.bugtracking-url=http://issues.apache.org/bugzilla/show_bug.cgi?id=
+#project.bugtracking-url=http://issues.apache.org/jira/browse/
+# The issues list as rss
+#project.issues-rss-url=
+#I18n Property only works for the "forrest run" target.
+#project.i18n=true
+project.configfile=${project.home}/src/documentation/conf/cli.xconf
+

+ 7 - 0
common/src/docs/cn/src/documentation/README.txt

@@ -0,0 +1,7 @@
+This is the base documentation directory.
+
+skinconf.xml     # This file customizes Forrest for your project. In it, you
+                 # tell forrest the project name, logo, copyright info, etc
+
+sitemap.xmap     # Optional. This sitemap is consulted before all core sitemaps.
+                 # See http://forrest.apache.org/docs/project-sitemap.html

+ 40 - 0
common/src/docs/cn/src/documentation/classes/CatalogManager.properties

@@ -0,0 +1,40 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#=======================================================================
+# CatalogManager.properties
+#
+# This is the default properties file for Apache Forrest.
+# This facilitates local configuration of application-specific catalogs.
+#
+# See the Apache Forrest documentation:
+# http://forrest.apache.org/docs/your-project.html
+# http://forrest.apache.org/docs/validation.html
+
+# verbosity ... level of messages for status/debug
+# See forrest/src/core/context/WEB-INF/cocoon.xconf
+
+# catalogs ... list of additional catalogs to load
+#  (Note that Apache Forrest will automatically load its own default catalog
+#  from src/core/context/resources/schema/catalog.xcat)
+# use full pathnames
+# pathname separator is always semi-colon (;) regardless of operating system
+# directory separator is always slash (/) regardless of operating system
+#
+#catalogs=/home/me/forrest/my-site/src/documentation/resources/schema/catalog.xcat
+catalogs=
+

+ 327 - 0
common/src/docs/cn/src/documentation/conf/cli.xconf

@@ -0,0 +1,327 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!--+
+    |  This is the Apache Cocoon command line configuration file.
+    |  Here you give the command line interface details of where
+    |  to find various aspects of your Cocoon installation.
+    |
+    |  If you wish, you can also use this file to specify the URIs
+    |  that you wish to generate.
+    |
+    |  The current configuration information in this file is for
+    |  building the Cocoon documentation. Therefore, all links here
+    |  are relative to the build context dir, which, in the build.xml
+    |  file, is set to ${build.context}
+    |
+    |  Options:
+    |    verbose:            increase amount of information presented
+    |                        to standard output (default: false)
+    |    follow-links:       whether linked pages should also be
+    |                        generated (default: true)
+    |    precompile-only:    precompile sitemaps and XSP pages, but
+    |                        do not generate any pages (default: false)
+    |    confirm-extensions: check the mime type for the generated page
+    |                        and adjust filename and links extensions
+    |                        to match the mime type
+    |                        (e.g. text/html->.html)
+    |
+    |  Note: Whilst using an xconf file to configure the Cocoon
+    |        Command Line gives access to more features, the use of
+    |        command line parameters is more stable, as there are
+    |        currently plans to improve the xconf format to allow
+    |        greater flexibility. If you require a stable and
+    |        consistent method for accessing the CLI, it is recommended
+    |        that you use the command line parameters to configure
+    |        the CLI. See documentation at:
+    |        http://cocoon.apache.org/2.1/userdocs/offline/
+    |        http://wiki.apache.org/cocoon/CommandLine
+    |
+    +-->
+
+<cocoon verbose="true"
+        follow-links="true"
+        precompile-only="false"
+        confirm-extensions="false">
+
+   <!--+
+       |  The context directory is usually the webapp directory
+       |  containing the sitemap.xmap file.
+       |
+       |  The config file is the cocoon.xconf file.
+       |
+       |  The work directory is used by Cocoon to store temporary
+       |  files and cache files.
+       |
+       |  The destination directory is where generated pages will
+       |  be written (assuming the 'simple' mapper is used, see
+       |  below)
+       +-->
+   <context-dir>.</context-dir>
+   <config-file>WEB-INF/cocoon.xconf</config-file>
+   <work-dir>../tmp/cocoon-work</work-dir>
+   <dest-dir>../site</dest-dir>
+
+   <!--+
+       |  A checksum file can be used to store checksums for pages
+       |  as they are generated. When the site is next generated,
+       |  files will not be written if their checksum has not changed.
+       |  This means that it will be easier to detect which files
+       |  need to be uploaded to a server, using the timestamp.
+       |
+       |  The default path is relative to the core webapp directory.
+       |  An asolute path can be used.
+       +-->
+   <!--   <checksums-uri>build/work/checksums</checksums-uri>-->
+
+   <!--+
+       | Broken link reporting options:
+       |   Report into a text file, one link per line:
+       |     <broken-links type="text" report="filename"/>
+       |   Report into an XML file:
+       |     <broken-links type="xml" report="filename"/>
+       |   Ignore broken links (default):
+       |     <broken-links type="none"/>
+       |
+       |   Two attributes to this node specify whether a page should
+       |   be generated when an error has occured. 'generate' specifies
+       |   whether a page should be generated (default: true) and
+       |   extension specifies an extension that should be appended
+       |   to the generated page's filename (default: none)
+       |
+       |   Using this, a quick scan through the destination directory
+       |   will show broken links, by their filename extension.
+       +-->
+   <broken-links type="xml"
+                 file="../brokenlinks.xml"
+                 generate="false"
+                 extension=".error"
+                 show-referrers="true"/>
+
+   <!--+
+       |  Load classes at startup. This is necessary for generating
+       |  from sites that use SQL databases and JDBC.
+       |  The <load-class> element can be repeated if multiple classes
+       |  are needed.
+       +-->
+   <!--
+   <load-class>org.firebirdsql.jdbc.Driver</load-class>
+   -->
+
+   <!--+
+       |  Configures logging.
+       |  The 'log-kit' parameter specifies the location of the log kit
+       |  configuration file (usually called logkit.xconf.
+       |
+       |  Logger specifies the logging category (for all logging prior
+       |  to other Cocoon logging categories taking over)
+       |
+       |  Available log levels are:
+       |    DEBUG:        prints all level of log messages.
+       |    INFO:         prints all level of log messages except DEBUG
+       |                  ones.
+       |    WARN:         prints all level of log messages except DEBUG
+       |                  and INFO ones.
+       |    ERROR:        prints all level of log messages except DEBUG,
+       |                  INFO and WARN ones.
+       |    FATAL_ERROR:  prints only log messages of this level
+       +-->
+   <!-- <logging log-kit="WEB-INF/logkit.xconf" logger="cli" level="ERROR" /> -->
+
+   <!--+
+       |  Specifies the filename to be appended to URIs that
+       |  refer to a directory (i.e. end with a forward slash).
+       +-->
+   <default-filename>index.html</default-filename>
+
+   <!--+
+       |  Specifies a user agent string to the sitemap when
+       |  generating the site.
+       |
+       |  A generic term for a web browser is "user agent". Any
+       |  user agent, when connecting to a web server, will provide
+       |  a string to identify itself (e.g. as Internet Explorer or
+       |  Mozilla). It is possible to have Cocoon serve different
+       |  content depending upon the user agent string provided by
+       |  the browser. If your site does this, then you may want to
+       |  use this <user-agent> entry to provide a 'fake' user agent
+       |  to Cocoon, so that it generates the correct version of your
+       |  site.
+       |
+       |  For most sites, this can be ignored.
+       +-->
+   <!--
+   <user-agent>Cocoon Command Line Environment 2.1</user-agent>
+   -->
+
+   <!--+
+       |  Specifies an accept string to the sitemap when generating
+       |  the site.
+       |  User agents can specify to an HTTP server what types of content
+       |  (by mime-type) they are able to receive. E.g. a browser may be
+       |  able to handle jpegs, but not pngs. The HTTP accept header
+       |  allows the server to take the browser's capabilities into account,
+       |  and only send back content that it can handle.
+       |
+       |  For most sites, this can be ignored.
+       +-->
+
+   <accept>*/*</accept>
+
+   <!--+
+       | Specifies which URIs should be included or excluded, according
+       | to wildcard patterns.
+       |
+       | These includes/excludes are only relevant when you are following
+       | links. A link URI must match an include pattern (if one is given)
+       | and not match an exclude pattern, if it is to be followed by
+       | Cocoon. It can be useful, for example, where there are links in
+       | your site to pages that are not generated by Cocoon, such as
+       | references to api-documentation.
+       |
+       | By default, all URIs are included. If both include and exclude
+       | patterns are specified, a URI is first checked against the
+       | include patterns, and then against the exclude patterns.
+       |
+       | Multiple patterns can be given, using muliple include or exclude
+       | nodes.
+       |
+       | The order of the elements is not significant, as only the first
+       | successful match of each category is used.
+       |
+       | Currently, only the complete source URI can be matched (including
+       | any URI prefix). Future plans include destination URI matching
+       | and regexp matching. If you have requirements for these, contact
+       | dev@cocoon.apache.org.
+       +-->
+
+   <exclude pattern="**/"/>
+   <exclude pattern="api/**"/>
+   <exclude pattern="jdiff/**"/>
+   <exclude pattern="changes.html"/>
+   <exclude pattern="releasenotes.html"/>
+
+<!--
+  This is a workaround for FOR-284 "link rewriting broken when
+  linking to xml source views which contain site: links".
+  See the explanation there and in declare-broken-site-links.xsl
+-->
+   <exclude pattern="site:**"/>
+   <exclude pattern="ext:**"/>
+   <exclude pattern="lm:**"/>
+   <exclude pattern="**/site:**"/>
+   <exclude pattern="**/ext:**"/>
+   <exclude pattern="**/lm:**"/>
+
+   <!-- Exclude tokens used in URLs to ASF mirrors (interpreted by a CGI) -->
+   <exclude pattern="[preferred]/**"/>
+   <exclude pattern="[location]"/>
+
+   <!--   <include-links extension=".html"/>-->
+
+   <!--+
+       |  <uri> nodes specify the URIs that should be generated, and
+       |  where required, what should be done with the generated pages.
+       |  They describe the way the URI of the generated file is created
+       |  from the source page's URI. There are three ways that a generated
+       |  file URI can be created: append, replace and insert.
+       |
+       |  The "type" attribute specifies one of (append|replace|insert):
+       |
+       |  append:
+       |  Append the generated page's URI to the end of the source URI:
+       |
+       |   <uri type="append" src-prefix="documents/" src="index.html"
+       |   dest="build/dest/"/>
+       |
+       |  This means that
+       |   (1) the "documents/index.html" page is generated
+       |   (2) the file will be written to "build/dest/documents/index.html"
+       |
+       |  replace:
+       |  Completely ignore the generated page's URI - just
+       |  use the destination URI:
+       |
+       |   <uri type="replace" src-prefix="documents/" src="index.html"
+       |   dest="build/dest/docs.html"/>
+       |
+       |  This means that
+       |   (1) the "documents/index.html" page is generated
+       |   (2) the result is written to "build/dest/docs.html"
+       |   (3) this works only for "single" pages - and not when links
+       |       are followed
+       |
+       |  insert:
+       |  Insert generated page's URI into the destination
+       |  URI at the point marked with a * (example uses fictional
+       |  zip protocol)
+       |
+       |   <uri type="insert" src-prefix="documents/" src="index.html"
+       |   dest="zip://*.zip/page.html"/>
+       |
+       |  This means that
+       |   (1)
+       |
+       |  In any of these scenarios, if the dest attribute is omitted,
+       |  the value provided globally using the <dest-dir> node will
+       |  be used instead.
+       +-->
+   <!--
+   <uri type="replace"
+        src-prefix="samples/"
+        src="hello-world/hello.html"
+        dest="build/dest/hello-world.html"/>
+   -->
+
+   <!--+
+       | <uri> nodes can be grouped together in a <uris> node. This
+       | enables a group of URIs to share properties. The following
+       | properties can be set for a group of URIs:
+       |   * follow-links:       should pages be crawled for links
+       |   * confirm-extensions: should file extensions be checked
+       |                         for the correct mime type
+       |   * src-prefix:         all source URIs should be
+       |                         pre-pended with this prefix before
+       |                         generation. The prefix is not
+       |                         included when calculating the
+       |                         destination URI
+       |   * dest:               the base destination URI to be
+       |                         shared by all pages in this group
+       |   * type:               the method to be used to calculate
+       |                         the destination URI. See above
+       |                         section on <uri> node for details.
+       |
+       | Each <uris> node can have a name attribute. When a name
+       | attribute has been specified, the -n switch on the command
+       | line can be used to tell Cocoon to only process the URIs
+       | within this URI group. When no -n switch is given, all
+       | <uris> nodes are processed. Thus, one xconf file can be
+       | used to manage multiple sites.
+       +-->
+   <!--
+   <uris name="mirrors" follow-links="false">
+     <uri type="append" src="mirrors.html"/>
+   </uris>
+   -->
+
+   <!--+
+       |  File containing URIs (plain text, one per line).
+       +-->
+   <!--
+   <uri-file>uris.txt</uri-file>
+   -->
+</cocoon>

+ 386 - 0
common/src/docs/cn/src/documentation/content/xdocs/cluster_setup.xml

@@ -0,0 +1,386 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
+
+<document>
+  
+  <header>
+    <title>Hadoop集群搭建</title>
+  </header>
+  
+  <body>
+  
+    <section>
+      <title>目的</title>
+      
+      <p>本文描述了如何安装、配置和管理有实际意义的Hadoop集群,其规模可从几个节点的小集群到几千个节点的超大集群。</p>
+      
+      <p>如果你希望在单机上安装Hadoop玩玩,从<a href="quickstart.html">这里</a>能找到相关细节。</p>
+    </section>
+    
+    <section>
+      <title>先决条件</title>
+      
+      <ol>
+        <li>
+          确保在你集群中的每个节点上都安装了所有<a href="quickstart.html#PreReqs">必需</a>软件。
+        </li>
+        <li>
+          <a href="quickstart.html#下载">获取</a>Hadoop软件包。
+        </li>
+      </ol>
+    </section>
+    
+    <section>
+      <title>安装</title>
+      
+      <p>安装Hadoop集群通常要将安装软件解压到集群内的所有机器上。</p>
+      
+      <p>通常,集群里的一台机器被指定为 
+	 <code>NameNode</code>,另一台不同的机器被指定为<code>JobTracker</code>。这些机器是<em>masters</em>。余下的机器即作为<code>DataNode</code><em>也</em>作为<code>TaskTracker</code>。这些机器是<em>slaves</em>。</p>
+      
+      <p>我们用<code>HADOOP_PREFIX</code>指代安装的根路径。通常,集群里的所有机器的<code>HADOOP_PREFIX</code>路径相同。</p>
+    </section>
+    
+    <section>
+      <title>配置</title>
+      
+      <p>接下来的几节描述了如何配置Hadoop集群。</p>
+      
+      <section>
+        <title>配置文件</title>
+        
+        <p>对Hadoop的配置通过<code>conf/</code>目录下的两个重要配置文件完成:</p>
+        <ol>
+          <li>
+            <a href="ext:hadoop-default">hadoop-default.xml</a> - 只读的默认配置。
+          </li>
+          <li>
+            <em>hadoop-site.xml</em> - 集群特有的配置。
+          </li>
+        </ol>
+      
+        <p>要了解更多关于这些配置文件如何影响Hadoop框架的细节,请看<a href="ext:api/org/apache/hadoop/conf/configuration">这里</a>。</p>
+
+        <p>此外,通过设置<code>conf/hadoop-env.sh</code>中的变量为集群特有的值,你可以对<code>bin/</code>目录下的Hadoop脚本进行控制。</p>
+      </section>
+
+      <section>
+        <title>集群配置</title>
+        
+        <p>要配置Hadoop集群,你需要设置Hadoop守护进程的<em>运行环境</em>和Hadoop守护进程的<em>运行参数</em>。</p>
+        
+        <p>Hadoop守护进程指<code>NameNode</code>/<code>DataNode</code> 
+        和<code>JobTracker</code>/<code>TaskTracker</code>。</p>
+
+        <section>
+          <title>配置Hadoop守护进程的运行环境</title>
+
+          <p>管理员可在<code>conf/hadoop-env.sh</code>脚本内对Hadoop守护进程的运行环境做特别指定。</p>  
+          <p>至少,你得设定<code>JAVA_HOME</code>使之在每一远端节点上都被正确设置。</p>
+          <p>管理员可以通过配置选项<code>HADOOP_*_OPTS</code>来分别配置各个守护进程。
+          下表是可以配置的选项。
+          </p>
+          <table>
+          <tr><th>守护进程</th><th>配置选项</th></tr>
+          <tr><td>NameNode</td><td>HADOOP_NAMENODE_OPTS</td></tr>
+          <tr><td>DataNode</td><td>HADOOP_DATANODE_OPTS</td></tr>
+          <tr><td>SecondaryNamenode</td>
+              <td>HADOOP_SECONDARYNAMENODE_OPTS</td></tr>
+          <tr><td>JobTracker</td><td>HADOOP_JOBTRACKER_OPTS</td></tr>
+          <tr><td>TaskTracker</td><td>HADOOP_TASKTRACKER_OPTS</td></tr>
+          </table>
+
+	  <p>例如,配置Namenode时,为了使其能够并行回收垃圾(parallelGC),
+          要把下面的代码加入到<code>hadoop-env.sh</code> :
+          <br/><code>
+          export HADOOP_NAMENODE_OPTS="-XX:+UseParallelGC ${HADOOP_NAMENODE_OPTS}"
+          </code><br/></p>
+          <p>其它可定制的常用参数还包括:</p>
+          <ul>
+            <li>
+              <code>HADOOP_LOG_DIR</code> - 守护进程日志文件的存放目录。如果不存在会被自动创建。
+            </li>
+            <li>
+              <code>HADOOP_HEAPSIZE</code> - 最大可用的堆大小,单位为MB。比如,<code>1000MB</code>。
+              这个参数用于设置hadoop守护进程的堆大小。缺省大小是<code>1000MB</code>。
+            </li>
+          </ul>
+        </section>
+        
+        <section>
+          <title>配置Hadoop守护进程的运行参数</title>
+          
+          <p>这部分涉及Hadoop集群的重要参数,这些参数在<code>conf/hadoop-site.xml</code>中指定。</p>
+
+		  <table>
+  		    <tr>
+		      <th>参数</th>
+		      <th>取值</th> 
+		      <th>备注</th>
+		    </tr>
+  		    <tr>
+		      <td>fs.default.name</td>
+                       <td><code>NameNode</code>的URI。</td>
+                       <td><em>hdfs://主机名/</em></td>
+		    </tr>
+		    <tr>
+		      <td>mapred.job.tracker</td>
+		      <td><code>JobTracker</code>的主机(或者IP)和端口。</td>
+		      <td><em>主机:端口</em>。</td>
+		    </tr>
+		    <tr>
+		      <td>dfs.name.dir</td>
+		      <td>
+		        <code>NameNode</code>持久存储名字空间及事务日志的本地文件系统路径。</td>
+		      <td>当这个值是一个逗号分割的目录列表时,nametable数据将会被复制到所有目录中做冗余备份。
+		      </td>
+		    </tr>
+		    <tr>
+		      <td>dfs.data.dir</td>
+		      <td> 
+		        <code>DataNode</code>存放块数据的本地文件系统路径,逗号分割的列表。
+		      </td>
+		      <td>
+		        当这个值是逗号分割的目录列表时,数据将被存储在所有目录下,通常分布在不同设备上。
+		      </td>
+		    </tr>
+		    <tr>
+		      <td>mapred.system.dir</td>
+		      <td>Map/Reduce框架存储系统文件的HDFS路径。比如<code>/hadoop/mapred/system/</code>。
+		      </td>
+		      <td>这个路径是默认文件系统(HDFS)下的路径, 须从服务器和客户端上均可访问。
+		      </td>
+		    </tr>
+		    <tr>
+		      <td>mapred.local.dir</td>
+		      <td>本地文件系统下逗号分割的路径列表,Map/Reduce临时数据存放的地方。
+		      </td>
+		      <td>多路径有助于利用磁盘i/o。</td>
+		    </tr>
+		    <tr>
+		      <td>mapred.tasktracker.{map|reduce}.tasks.maximum</td>
+		      <td>某一<code>TaskTracker</code>上可运行的最大Map/Reduce任务数,这些任务将同时各自运行。
+		      </td>
+		      <td>
+		        默认为2(2个map和2个reduce),可依据硬件情况更改。
+		      </td>
+		    </tr>
+		    <tr>
+		      <td>dfs.hosts/dfs.hosts.exclude</td>
+		      <td>许可/拒绝DataNode列表。</td>
+		      <td>
+		        如有必要,用这个文件控制许可的datanode列表。
+		      </td>
+		    </tr>
+		    <tr>
+		      <td>mapred.hosts/mapred.hosts.exclude</td>
+		      <td>许可/拒绝TaskTracker列表。</td>
+		      <td>
+		        如有必要,用这个文件控制许可的TaskTracker列表。
+		      </td>
+  		    </tr>
+		  </table>
+
+          <p>通常,上述参数被标记为 
+          <a href="ext:api/org/apache/hadoop/conf/configuration/final_parameters">
+          final</a> 以确保它们不被用户应用更改。
+          </p>
+
+          <section>
+            <title>现实世界的集群配置</title>
+            
+            <p>这节罗列在大规模集群上运行<em>sort</em>基准测试(benchmark)时使用到的一些非缺省配置。</p>
+            
+            <ul>
+              <li>
+                <p>运行sort900的一些非缺省配置值,sort900即在900个节点的集群上对9TB的数据进行排序:</p>
+                <table>
+  		          <tr>
+		            <th>参数</th>
+		            <th>取值</th> 
+		            <th>备注</th>
+		          </tr>
+                  <tr>
+                    <td>dfs.block.size</td>
+                    <td>134217728</td>
+                    <td>针对大文件系统,HDFS的块大小取128MB。</td>
+                  </tr>
+                  <tr>
+                    <td>dfs.namenode.handler.count</td>
+                    <td>40</td>
+                    <td>
+                      启动更多的NameNode服务线程去处理来自大量DataNode的RPC请求。
+                    </td>
+                  </tr>
+                  <tr>
+                    <td>mapred.reduce.parallel.copies</td>
+                    <td>20</td>
+                    <td>
+			reduce启动更多的并行拷贝器以获取大量map的输出。
+                    </td>
+                  </tr>
+                  <tr>
+                    <td>mapred.child.java.opts</td>
+                    <td>-Xmx512M</td>
+                    <td>
+			为map/reduce子虚拟机使用更大的堆。 
+                    </td>
+                  </tr>
+                  <tr>
+                    <td>fs.inmemory.size.mb</td>
+                    <td>200</td>
+                    <td>
+                      为reduce阶段合并map输出所需的内存文件系统分配更多的内存。
+                    </td>
+                  </tr>
+                  <tr>
+                    <td>io.sort.factor</td>
+                    <td>100</td>
+                    <td>文件排序时更多的流将同时被归并。</td>
+                  </tr>
+                  <tr>
+                    <td>io.sort.mb</td>
+                    <td>200</td>
+                    <td>提高排序时的内存上限。</td>
+                  </tr>
+                  <tr>
+                    <td>io.file.buffer.size</td>
+                    <td>131072</td>
+                    <td>SequenceFile中用到的读/写缓存大小。</td>
+                  </tr>
+                </table>
+              </li>
+              <li>
+                <p>运行sort1400和sort2000时需要更新的配置,即在1400个节点上对14TB的数据进行排序和在2000个节点上对20TB的数据进行排序:</p>
+                <table>
+  		          <tr>
+		            <th>参数</th>
+		            <th>取值</th> 
+		            <th>备注</th>
+		          </tr>
+                  <tr>
+                    <td>mapred.job.tracker.handler.count</td>
+                    <td>60</td>
+                    <td>
+                      启用更多的JobTracker服务线程去处理来自大量TaskTracker的RPC请求。
+                    </td>
+                  </tr>
+                  <tr>
+                    <td>mapred.reduce.parallel.copies</td>
+                    <td>50</td>
+                    <td></td>
+                  </tr>
+                  <tr>
+                    <td>tasktracker.http.threads</td>
+                    <td>50</td>
+                    <td>
+                      为TaskTracker的Http服务启用更多的工作线程。reduce通过Http服务获取map的中间输出。
+                    </td>
+                  </tr>
+                  <tr>
+                    <td>mapred.child.java.opts</td>
+                    <td>-Xmx1024M</td>
+                    <td>使用更大的堆用于maps/reduces的子虚拟机</td>
+                  </tr>
+                </table>
+              </li>
+            </ul>
+          </section>
+          
+        </section>
+
+        <section>
+          <title>Slaves</title>
+          
+          <p>通常,你选择集群中的一台机器作为<code>NameNode</code>,另外一台不同的机器作为<code>JobTracker</code>。余下的机器即作为<code>DataNode</code>又作为<code>TaskTracker</code>,这些被称之为<em>slaves</em>。</p>
+          
+          <p>在<code>conf/slaves</code>文件中列出所有slave的主机名或者IP地址,一行一个。</p>
+        </section>
+        
+        <section>
+          <title>日志</title>
+          
+          <p>Hadoop使用<a href="http://logging.apache.org/log4j/">Apache log4j</a>来记录日志,它由<a href="http://commons.apache.org/logging/">Apache Commons Logging</a>框架来实现。编辑<code>conf/log4j.properties</code>文件可以改变Hadoop守护进程的日志配置(日志格式等)。</p>
+          
+          <section>
+            <title>历史日志</title>
+            
+	    <p>作业的历史文件集中存放在<code>hadoop.job.history.location</code>,这个也可以是在分布式文件系统下的路径,其默认值为<code>${HADOOP_LOG_DIR}/history</code>。jobtracker的web UI上有历史日志的web UI链接。</p>
+            
+            <p>历史文件在用户指定的目录<code>hadoop.job.history.user.location</code>也会记录一份,这个配置的缺省值为作业的输出目录。这些文件被存放在指定路径下的“_logs/history/”目录中。因此,默认情况下日志文件会在“mapred.output.dir/_logs/history/”下。如果将<code>hadoop.job.history.user.location</code>指定为值<code>none</code>,系统将不再记录此日志。</p>
+            
+            <p>用户可使用以下命令在指定路径下查看历史日志汇总<br/>
+            <code>$ bin/hadoop job -history output-dir</code><br/> 
+            这条命令会显示作业的细节信息,失败和终止的任务细节。 <br/>
+            关于作业的更多细节,比如成功的任务,以及对每个任务的所做的尝试次数等可以用下面的命令查看<br/>
+            <code>$ bin/hadoop job -history all output-dir</code><br/></p> 
+          </section>
+        </section>
+      </section>
+
+      <p>一但全部必要的配置完成,将这些文件分发到所有机器的<code>HADOOP_CONF_DIR</code>路径下,通常是<code>${HADOOP_PREFIX}/conf</code>。</p>
+    </section>
+    
+    <section>
+      <title>Hadoop的机架感知</title>
+      <p>HDFS和Map/Reduce的组件是能够感知机架的。</p>
+      <p><code>NameNode</code>和<code>JobTracker</code>通过调用管理员配置模块中的API<a href="ext:api/org/apache/hadoop/net/dnstoswitchmapping/resolve">resolve</a>来获取集群里每个slave的<code>机架id</code>。该API将slave的DNS名称(或者IP地址)转换成机架id。使用哪个模块是通过配置项<code>topology.node.switch.mapping.impl</code>来指定的。模块的默认实现会调用<code>topology.script.file.name</code>配置项指定的一个的脚本/命令。 如果topology.script.file.name未被设置,对于所有传入的IP地址,模块会返回<code>/default-rack</code>作为机架id。在Map/Reduce部分还有一个额外的配置项<code>mapred.cache.task.levels</code>,该参数决定cache的级数(在网络拓扑中)。例如,如果默认值是2,会建立两级的cache- 一级针对主机(主机 -> 任务的映射)另一级针对机架(机架 -> 任务的映射)。
+      </p>
+    </section>
+    
+    <section>
+      <title>启动Hadoop</title>
+      
+      <p>启动Hadoop集群需要启动HDFS集群和Map/Reduce集群。</p>
+
+      <p>
+        格式化一个新的分布式文件系统:<br/>
+        <code>$ bin/hadoop namenode -format</code>
+      </p>
+      
+      <p>
+	在分配的<code>NameNode</code>上,运行下面的命令启动HDFS:<br/>
+        <code>$ bin/start-dfs.sh</code>
+      </p>
+      <p><code>bin/start-dfs.sh</code>脚本会参照<code>NameNode</code>上<code>${HADOOP_CONF_DIR}/slaves</code>文件的内容,在所有列出的slave上启动<code>DataNode</code>守护进程。</p>
+      <p>
+	在分配的<code>JobTracker</code>上,运行下面的命令启动Map/Reduce:<br/>
+        <code>$ bin/start-mapred.sh</code>
+      </p>
+      <p><code>bin/start-mapred.sh</code>脚本会参照<code>JobTracker</code>上<code>${HADOOP_CONF_DIR}/slaves</code>文件的内容,在所有列出的slave上启动<code>TaskTracker</code>守护进程。</p>
+    </section>
+    
+    <section>
+      <title>停止Hadoop</title>
+      
+      <p>
+	在分配的<code>NameNode</code>上,执行下面的命令停止HDFS:<br/>
+        <code>$ bin/stop-dfs.sh</code>
+      </p>
+      <p><code>bin/stop-dfs.sh</code>脚本会参照<code>NameNode</code>上<code>${HADOOP_CONF_DIR}/slaves</code>文件的内容,在所有列出的slave上停止<code>DataNode</code>守护进程。</p>
+      <p>
+	在分配的<code>JobTracker</code>上,运行下面的命令停止Map/Reduce:<br/>
+        <code>$ bin/stop-mapred.sh</code><br/>
+      </p>
+      <p><code>bin/stop-mapred.sh</code>脚本会参照<code>JobTracker</code>上<code>${HADOOP_CONF_DIR}/slaves</code>文件的内容,在所有列出的slave上停止<code>TaskTracker</code>守护进程。</p>
+    </section>
+  </body>
+  
+</document>

+ 596 - 0
common/src/docs/cn/src/documentation/content/xdocs/commands_manual.xml

@@ -0,0 +1,596 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
+<document>
+	<header>
+		<title>命令手册</title>
+	</header>
+	
+	<body>
+		<section>
+			<title>概述</title>
+			<p>
+				所有的hadoop命令均由bin/hadoop脚本引发。不指定参数运行hadoop脚本会打印所有命令的描述。
+			</p>
+			<p>
+				<code>用法:hadoop [--config confdir] [COMMAND] [GENERIC_OPTIONS] [COMMAND_OPTIONS]</code>
+			</p>
+			<p>
+				Hadoop有一个选项解析框架用于解析一般的选项和运行类。
+			</p>
+			<table>
+			          <tr><th> 命令选项 </th><th> 描述 </th></tr>
+			
+			           <tr>
+			          	<td><code>--config confdir</code></td>
+			            <td>覆盖缺省配置目录。缺省是${HADOOP_PREFIX}/conf。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>GENERIC_OPTIONS</code></td>
+			            <td>多个命令都支持的通用选项。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>COMMAND</code><br/><code>命令选项S</code></td>
+			            <td>各种各样的命令和它们的选项会在下面提到。这些命令被分为
+			             <a href="commands_manual.html#用户命令">用户命令</a> 
+			             <a href="commands_manual.html#管理命令">管理命令</a>两组。</td>
+			           </tr>
+			     </table>
+			 <section>
+				<title>常规选项</title>
+				<p>
+				  下面的选项被
+				  <a href="commands_manual.html#dfsadmin">dfsadmin</a>, 
+				  <a href="commands_manual.html#fs">fs</a>, <a href="commands_manual.html#fsck">fsck</a>和 
+				  <a href="commands_manual.html#job">job</a>支持。 
+				  应用程序要实现
+				  <a href="ext:api/org/apache/hadoop/util/tool">Tool</a>来支持
+				  <a href="ext:api/org/apache/hadoop/util/genericoptionsparser">
+				  常规选项</a>。
+				</p>
+			     <table>
+			          <tr><th> GENERIC_OPTION </th><th> 描述 </th></tr>
+			
+			           <tr>
+			          	<td><code>-conf &lt;configuration file&gt;</code></td>
+			            <td>指定应用程序的配置文件。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-D &lt;property=value&gt;</code></td>
+			            <td>为指定property指定值value。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-fs &lt;local|namenode:port&gt;</code></td>
+			            <td>指定namenode。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-jt &lt;local|jobtracker:port&gt;</code></td>
+			            <td>指定job tracker。只适用于<a href="commands_manual.html#job">job</a>。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-files &lt;逗号分隔的文件列表&gt;</code></td>
+			            <td>指定要拷贝到map reduce集群的文件的逗号分隔的列表。
+			            只适用于<a href="commands_manual.html#job">job</a>。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-libjars &lt;逗号分隔的jar列表&gt;</code></td>
+			            <td>指定要包含到classpath中的jar文件的逗号分隔的列表。
+			            只适用于<a href="commands_manual.html#job">job</a>。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-archives &lt;逗号分隔的archive列表&gt;</code></td>
+			            <td>指定要被解压到计算节点上的档案文件的逗号分割的列表。
+			            只适用于<a href="commands_manual.html#job">job</a>。</td>
+			           </tr>
+				</table>
+			</section>	   
+		</section>
+		
+		<section>
+			<title> 用户命令 </title>
+			<p>hadoop集群用户的常用命令。</p>
+			<section>
+				<title> archive </title>
+				<p>
+					创建一个hadoop档案文件。参考 <a href="hadoop_archives.html">Hadoop Archives</a>.
+				</p>
+				<p>
+					<code>用法:hadoop archive -archiveName NAME &lt;src&gt;* &lt;dest&gt;</code>
+				</p>
+				<table>
+			          <tr><th> 命令选项 </th><th> 描述</th></tr>
+					   <tr>
+			          	<td><code>-archiveName NAME</code></td>
+			            <td>要创建的档案的名字。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>src</code></td>
+			            <td>文件系统的路径名,和通常含正则表达的一样。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>dest</code></td>
+			            <td>保存档案文件的目标目录。</td>
+			           </tr>
+			     </table>
+			</section>
+			
+			<section>
+				<title> distcp </title>
+				<p>
+					递归地拷贝文件或目录。参考<a href="distcp.html">DistCp指南</a>以获取等多信息。
+				</p>
+				<p>
+					<code>用法:hadoop distcp &lt;srcurl&gt; &lt;desturl&gt;</code>
+				</p>
+				<table>
+			          <tr><th> 命令选项 </th><th> 描述</th></tr>
+			
+			           <tr>
+			          	<td><code>srcurl</code></td>
+			            <td>源Url</td>
+			           </tr>
+			           <tr>
+			          	<td><code>desturl</code></td>
+			            <td>目标Url</td>
+			           </tr>
+			     </table>
+			</section>
+			       
+			<section>
+				<title> fs </title>
+				<p>
+					<code>用法:hadoop fs [</code><a href="commands_manual.html#常规选项">GENERIC_OPTIONS</a><code>] 
+					[COMMAND_OPTIONS]</code>
+				</p>
+				<p>
+					运行一个常规的文件系统客户端。
+				</p>
+				<p>
+					各种命令选项可以参考<a href="hdfs_shell.html">HDFS Shell指南</a>。
+				</p>   
+			</section>
+			
+			<section>
+				<title> fsck </title>
+				<p>
+					运行HDFS文件系统检查工具。参考<a href="hdfs_user_guide.html#fsck">Fsck</a>了解更多。
+				</p> 
+				<p><code>用法:hadoop fsck [</code><a href="commands_manual.html#常规选项">GENERIC_OPTIONS</a><code>] 
+				&lt;path&gt; [-move | -delete | -openforwrite] [-files [-blocks 
+				[-locations | -racks]]]</code></p>
+				<table>
+			          <tr><th> 命令选项 </th><th> 描述 </th></tr>
+			          <tr>
+			            <td><code>&lt;path&gt;</code></td>
+			            <td>检查的起始目录。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-move</code></td>
+			            <td>移动受损文件到/lost+found</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-delete</code></td>
+			            <td>删除受损文件。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-openforwrite</code></td>
+			            <td>打印出写打开的文件。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-files</code></td>
+			            <td>打印出正被检查的文件。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-blocks</code></td>
+			            <td>打印出块信息报告。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-locations</code></td>
+			            <td>打印出每个块的位置信息。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-racks</code></td>
+			            <td>打印出data-node的网络拓扑结构。</td>
+			           </tr>
+					</table>
+			</section>
+			
+			<section>
+				<title> jar </title>
+				<p>
+					运行jar文件。用户可以把他们的Map Reduce代码捆绑到jar文件中,使用这个命令执行。
+				</p> 
+				<p>
+					<code>用法:hadoop jar &lt;jar&gt; [mainClass] args...</code>
+				</p>
+				<p>
+					streaming作业是通过这个命令执行的。参考<a href="streaming.html#其他例子">Streaming examples</a>中的例子。
+				</p>
+				<p>
+					Word count例子也是通过jar命令运行的。参考<a href="mapred_tutorial.html#用法">Wordcount example</a>。
+				</p>
+			</section>
+			
+			<section>
+				<title> job </title>
+				<p>
+					用于和Map Reduce作业交互和命令。
+				</p>
+				<p>
+					<code>用法:hadoop job [</code><a href="commands_manual.html#常规选项">GENERIC_OPTIONS</a><code>] 
+					[-submit &lt;job-file&gt;] | [-status &lt;job-id&gt;] | 
+					[-counter &lt;job-id&gt; &lt;group-name&gt; &lt;counter-name&gt;] | [-kill &lt;job-id&gt;] | 
+					[-events &lt;job-id&gt; &lt;from-event-#&gt; &lt;#-of-events&gt;] | [-history [all] &lt;jobOutputDir&gt;] |
+					[-list [all]] | [-kill-task &lt;task-id&gt;] | [-fail-task &lt;task-id&gt;]</code>
+				</p>
+				<table>
+			          <tr><th> 命令选项 </th><th> 描述</th></tr>
+			
+			           <tr>
+			          	<td><code>-submit &lt;job-file&gt;</code></td>
+			            <td>提交作业</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-status &lt;job-id&gt;</code></td>
+			            <td>打印map和reduce完成百分比和所有计数器。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-counter &lt;job-id&gt; &lt;group-name&gt; &lt;counter-name&gt;</code></td>
+			            <td>打印计数器的值。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-kill &lt;job-id&gt;</code></td>
+			            <td>杀死指定作业。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-events &lt;job-id&gt; &lt;from-event-#&gt; &lt;#-of-events&gt;</code></td>
+			            <td>打印给定范围内jobtracker接收到的事件细节。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-history [all] &lt;jobOutputDir&gt;</code></td>
+			            <td>-history &lt;jobOutputDir&gt; 打印作业的细节、失败及被杀死原因的细节。更多的关于一个作业的细节比如成功的任务,做过的任务尝试等信息可以通过指定[all]选项查看。
+			            </td>
+			           </tr>
+			           <tr>
+			          	<td><code>-list [all]</code></td>
+			            <td>-list all显示所有作业。-list只显示将要完成的作业。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-kill-task &lt;task-id&gt;</code></td>
+			            <td>杀死任务。被杀死的任务不会不利于失败尝试。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-fail-task &lt;task-id&gt;</code></td>
+			            <td>使任务失败。被失败的任务会对失败尝试不利。</td>
+			           </tr>
+					</table>
+			</section>
+			
+			<section>
+				<title> pipes </title>
+				<p>
+					运行pipes作业。
+				</p>
+				<p>
+					<code>用法:hadoop pipes [-conf &lt;path&gt;] [-jobconf &lt;key=value&gt;, &lt;key=value&gt;, ...] 
+					[-input &lt;path&gt;] [-output &lt;path&gt;] [-jar &lt;jar file&gt;] [-inputformat &lt;class&gt;] 
+					[-map &lt;class&gt;] [-partitioner &lt;class&gt;] [-reduce &lt;class&gt;] [-writer &lt;class&gt;] 
+					[-program &lt;executable&gt;] [-reduces &lt;num&gt;] </code>
+				</p>
+				<table>
+			          <tr><th> 命令选项 </th><th> 描述</th></tr>
+			
+			          <tr>
+			          	<td><code>-conf &lt;path&gt;</code></td>
+			            <td>作业的配置</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-jobconf &lt;key=value&gt;, &lt;key=value&gt;, ...</code></td>
+			            <td>增加/覆盖作业的配置项</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-input &lt;path&gt;</code></td>
+			            <td>输入目录</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-output &lt;path&gt;</code></td>
+			            <td>输出目录</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-jar &lt;jar file&gt;</code></td>
+			            <td>Jar文件名</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-inputformat &lt;class&gt;</code></td>
+			            <td>InputFormat类</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-map &lt;class&gt;</code></td>
+			            <td>Java Map类</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-partitioner &lt;class&gt;</code></td>
+			            <td>Java Partitioner</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-reduce &lt;class&gt;</code></td>
+			            <td>Java Reduce类</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-writer &lt;class&gt;</code></td>
+			            <td>Java RecordWriter</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-program &lt;executable&gt;</code></td>
+			            <td>可执行程序的URI</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-reduces &lt;num&gt;</code></td>
+			            <td>reduce个数</td>
+			           </tr>
+					</table>
+			</section>
+			
+			<section>
+				<title> version </title>
+				<p>
+					打印版本信息。
+				</p> 
+				<p>
+					<code>用法:hadoop version</code>
+				</p>
+			</section>
+			
+			<section>
+				<title> CLASSNAME </title>
+				<p>
+					 hadoop脚本可用于调调用任何类。
+				</p>
+				<p>
+					<code>用法:hadoop CLASSNAME</code>
+				</p>
+				<p>
+					 运行名字为CLASSNAME的类。
+				</p>
+			</section>
+			
+		</section>
+		
+		<section>
+			<title>管理命令</title>
+			<p>hadoop集群管理员常用的命令。</p>
+			<section>
+				<title> balancer </title>
+				<p>
+					运行集群平衡工具。管理员可以简单的按Ctrl-C来停止平衡过程。参考<a href="hdfs_user_guide.html#Rebalancer">Rebalancer</a>了解更多。
+				</p>
+				<p>
+					<code>用法:hadoop balancer [-threshold &lt;threshold&gt;]</code>
+				</p>
+				<table>
+			          <tr><th> 命令选项 </th><th> 描述</th></tr>
+			
+			           <tr>
+			          	<td><code>-threshold &lt;threshold&gt;</code></td>
+			            <td>磁盘容量的百分比。这会覆盖缺省的阀值。</td>
+			           </tr>
+			     </table>
+			</section>
+			
+			<section>
+				<title> daemonlog </title>
+				<p>
+					 获取或设置每个守护进程的日志级别。
+				</p> 
+				<p>
+					<code>用法:hadoop daemonlog  -getlevel &lt;host:port&gt; &lt;name&gt;</code><br/>
+					<code>用法:hadoop daemonlog  -setlevel &lt;host:port&gt; &lt;name&gt; &lt;level&gt;</code>
+				</p>
+				<table>
+			          <tr><th> 命令选项 </th><th> 描述</th></tr>
+			
+			           <tr>
+			          	<td><code>-getlevel &lt;host:port&gt; &lt;name&gt;</code></td>
+			            <td>打印运行在&lt;host:port&gt;的守护进程的日志级别。这个命令内部会连接http://&lt;host:port&gt;/logLevel?log=&lt;name&gt;</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-setlevel &lt;host:port&gt; &lt;name&gt; &lt;level&gt;</code></td>
+			            <td>设置运行在&lt;host:port&gt;的守护进程的日志级别。这个命令内部会连接http://&lt;host:port&gt;/logLevel?log=&lt;name&gt;</td>
+			           </tr>
+			     </table>
+			</section>
+			
+			<section>
+				<title> datanode</title>
+				<p>
+					运行一个HDFS的datanode。
+				</p> 
+				<p>
+					<code>用法:hadoop datanode [-rollback]</code>
+				</p>
+				<table>
+			          <tr><th> 命令选项 </th><th> 描述</th></tr>
+			
+			           <tr>
+			          	<td><code>-rollback</code></td>
+			            <td>将datanode回滚到前一个版本。这需要在停止datanode,分发老的hadoop版本之后使用。
+			            </td>
+			           </tr>
+			     </table>
+			</section>
+			
+			<section>
+				<title> dfsadmin </title>
+				<p>
+					运行一个HDFS的dfsadmin客户端。
+				</p> 
+				<p>
+					<code>用法:hadoop dfsadmin  [</code><a href="commands_manual.html#常规选项">GENERIC_OPTIONS</a><code>] [-report] [-safemode enter | leave | get | wait] [-refreshNodes]
+					 [-finalizeUpgrade] [-upgradeProgress status | details | force] [-metasave filename] 
+					 [-setQuota &lt;quota&gt; &lt;dirname&gt;...&lt;dirname&gt;] [-clrQuota &lt;dirname&gt;...&lt;dirname&gt;] 
+					 [-help [cmd]]</code>
+				</p>
+				<table>
+			          <tr><th> 命令选项 </th><th> 描述</th></tr>
+			
+			           <tr>
+			          	<td><code>-report</code></td>
+			            <td>报告文件系统的基本信息和统计信息。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-safemode enter | leave | get | wait</code></td>
+			            <td>安全模式维护命令。安全模式是Namenode的一个状态,这种状态下,Namenode <br/>
+					1.  不接受对名字空间的更改(只读)<br/> 
+					2.  不复制或删除块<br/>
+					Namenode会在启动时自动进入安全模式,当配置的块最小百分比数满足最小的副本数条件时,会自动离开安全模式。安全模式可以手动进入,但是这样的话也必须手动关闭安全模式。
+                </td>
+			           </tr>
+			           <tr>
+			          	<td><code>-refreshNodes</code></td>
+			            <td>重新读取hosts和exclude文件,更新允许连到Namenode的或那些需要退出或入编的Datanode的集合。
+                </td>
+			           </tr>
+			           <tr>
+			          	<td><code>-finalizeUpgrade</code></td>
+			            <td>终结HDFS的升级操作。Datanode删除前一个版本的工作目录,之后Namenode也这样做。这个操作完结整个升级过程。
+                </td>
+			           </tr>
+			           <tr>
+			          	<td><code>-upgradeProgress status | details | force</code></td>
+			            <td>请求当前系统的升级状态,状态的细节,或者强制升级操作进行。
+                </td>
+			           </tr>
+			           <tr>
+			          	<td><code>-metasave filename</code></td>
+			            <td>保存Namenode的主要数据结构到hadoop.log.dir属性指定的目录下的&lt;filename&gt;文件。对于下面的每一项,&lt;filename&gt;中都会一行内容与之对应<br/>
+                        1. Namenode收到的Datanode的心跳信号<br/>
+                        2. 等待被复制的块<br/>
+                        3. 正在被复制的块<br/>
+                        4. 等待被删除的块</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-setQuota &lt;quota&gt; &lt;dirname&gt;...&lt;dirname&gt;</code></td>
+			            <td>为每个目录 &lt;dirname&gt;设定配额&lt;quota&gt;。目录配额是一个长整型整数,强制限定了目录树下的名字个数。<br/>
+                命令会在这个目录上工作良好,以下情况会报错:<br/>
+                1. N不是一个正整数,或者<br/>
+                2. 用户不是管理员,或者<br/>
+                3. 这个目录不存在或是文件,或者<br/>
+                4. 目录会马上超出新设定的配额。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-clrQuota &lt;dirname&gt;...&lt;dirname&gt;</code></td>
+			            <td>为每一个目录&lt;dirname&gt;清除配额设定。<br/>
+                命令会在这个目录上工作良好,以下情况会报错:<br/>
+                1. 这个目录不存在或是文件,或者<br/>
+                2. 用户不是管理员。<br/>
+                如果目录原来没有配额不会报错。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-help [cmd]</code></td>
+			            <td>显示给定命令的帮助信息,如果没有给定命令,则显示所有命令的帮助信息。</td>
+			           </tr>
+			     </table>
+			</section>
+			
+			<section>
+				<title> jobtracker </title>
+				<p>
+					运行MapReduce job Tracker节点。
+				</p> 
+				<p>
+					<code>用法:hadoop jobtracker</code>
+				</p>
+			</section>
+			
+			<section>
+				<title> namenode </title>
+				<p>
+					运行namenode。有关升级,回滚,升级终结的更多信息请参考<a href="hdfs_user_guide.html#升级和回滚">升级和回滚</a>。
+				</p>
+				<p>
+					<code>用法:hadoop namenode [-format] | [-upgrade] | [-rollback] | [-finalize] | [-importCheckpoint]</code>
+				</p>
+				<table>
+			          <tr><th> 命令选项 </th><th> 描述</th></tr>
+			
+			           <tr>
+			          	<td><code>-format</code></td>
+			            <td>格式化namenode。它启动namenode,格式化namenode,之后关闭namenode。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-upgrade</code></td>
+			            <td>分发新版本的hadoop后,namenode应以upgrade选项启动。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-rollback</code></td>
+			            <td>将namenode回滚到前一版本。这个选项要在停止集群,分发老的hadoop版本后使用。
+			            </td>
+			           </tr>
+			           <tr>
+			          	<td><code>-finalize</code></td>
+			            <td>finalize会删除文件系统的前一状态。最近的升级会被持久化,rollback选项将再不可用,升级终结操作之后,它会停掉namenode。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-importCheckpoint</code></td>
+			            <td>从检查点目录装载镜像并保存到当前检查点目录,检查点目录由dfs.namenode.checkpoint.dir指定。
+			            </td>
+			           </tr>
+			     </table>
+			</section>
+			
+			<section>
+				<title> secondarynamenode </title>
+				<p>
+					运行HDFS的secondary namenode。参考<a href="hdfs_user_guide.html#Secondary+NameNode">Secondary Namenode</a>了解更多。 
+				</p>
+				<p>
+					<code>用法:hadoop secondarynamenode [-checkpoint [force]] | [-geteditsize]</code>
+				</p>
+				<table>
+			          <tr><th> 命令选项 </th><th> 描述</th></tr>
+			
+			           <tr>
+			          	<td><code>-checkpoint [force]</code></td>
+			            <td>如果EditLog的大小 >= dfs.namenode.checkpoint.size,启动Secondary namenode的检查点过程。
+			            如果使用了-force,将不考虑EditLog的大小。</td>
+			           </tr>
+			           <tr>
+			          	<td><code>-geteditsize</code></td>
+			            <td>打印EditLog大小。</td>
+			           </tr>
+			     </table>
+			</section>
+			
+			<section>
+				<title> tasktracker </title>
+				<p>
+					运行MapReduce的task Tracker节点。
+				</p> 
+				<p>
+					<code>用法:hadoop tasktracker</code>
+				</p>
+			</section>
+			
+		</section>
+		
+		
+		      
+
+	</body>
+</document>      

+ 294 - 0
common/src/docs/cn/src/documentation/content/xdocs/distcp.xml

@@ -0,0 +1,294 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
+
+<document>
+
+  <header>
+    <title>DistCp</title>
+  </header>
+
+  <body>
+
+    <section>
+      <title>概述</title>
+
+      <p>DistCp(分布式拷贝)是用于大规模集群内部和集群之间拷贝的工具。
+	  它使用Map/Reduce实现文件分发,错误处理和恢复,以及报告生成。
+      它把文件和目录的列表作为map任务的输入,每个任务会完成源列表中部分文件的拷贝。
+      由于使用了Map/Reduce方法,这个工具在语义和执行上都会有特殊的地方。
+      这篇文档会为常用DistCp操作提供指南并阐述它的工作模型。
+      </p>
+
+    </section>
+
+    <section>
+      <title>使用方法</title>
+
+      <section>
+        <title>基本使用方法</title>
+        <p>DistCp最常用在集群之间的拷贝:</p>
+        <p><code>bash$ hadoop distcp hdfs://nn1:8020/foo/bar \</code><br/>
+           <code>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+                 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+                 hdfs://nn2:8020/bar/foo</code></p>
+
+        <p>这条命令会把nn1集群的<code>/foo/bar</code>目录下的所有文件或目录名展开并存储到一个临时文件中,这些文件内容的拷贝工作被分配给多个map任务,
+        然后每个TaskTracker分别执行从nn1到nn2的拷贝操作。注意DistCp使用绝对路径进行操作。
+        </p>
+
+        <p>命令行中可以指定多个源目录:</p>
+        <p><code>bash$ hadoop distcp hdfs://nn1:8020/foo/a \</code><br/>
+           <code>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+                 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+                 hdfs://nn1:8020/foo/b \</code><br/>
+           <code>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+                 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+                 hdfs://nn2:8020/bar/foo</code></p>
+
+        <p>或者使用<code>-f</code>选项,从文件里获得多个源:<br/>
+        <code>bash$ hadoop distcp -f hdfs://nn1:8020/srclist \</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              &nbsp;hdfs://nn2:8020/bar/foo</code><br/></p>
+
+        <p>其中<code>srclist</code> 的内容是<br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b</code></p>
+
+        <p>当从多个源拷贝时,如果两个源冲突,DistCp会停止拷贝并提示出错信息,
+        如果在目的位置发生冲突,会根据<a href="#options">选项设置</a>解决。
+        默认情况会跳过已经存在的目标文件(比如不用源文件做替换操作)。每次操作结束时
+        都会报告跳过的文件数目,但是如果某些拷贝操作失败了,但在之后的尝试成功了,
+        那么报告的信息可能不够精确(请参考<a href="#etc">附录</a>)。
+	</p>
+
+        <p>每个TaskTracker必须都能够与源端和目的端文件系统进行访问和交互。
+        对于HDFS来说,源和目的端要运行相同版本的协议或者使用向下兼容的协议。
+        (请参考<a href="#cpver">不同版本间的拷贝</a> )。
+        </p>
+
+        <p>拷贝完成后,建议生成源端和目的端文件的列表,并交叉检查,来确认拷贝真正成功。
+        因为DistCp使用Map/Reduce和文件系统API进行操作,所以这三者或它们之间有任何问题
+        都会影响拷贝操作。一些Distcp命令的成功执行可以通过再次执行带-update参数的该命令来完成,
+        但用户在如此操作之前应该对该命令的语法很熟悉。
+        </p>
+
+        <p>值得注意的是,当另一个客户端同时在向源文件写入时,拷贝很有可能会失败。
+        尝试覆盖HDFS上正在被写入的文件的操作也会失败。
+        如果一个源文件在拷贝之前被移动或删除了,拷贝失败同时输出异常
+        FileNotFoundException。</p>
+
+      </section> <!-- Basic -->
+
+      <section id="options">
+        <title>选项</title>
+
+        <section>
+        <title>选项索引</title>
+        <table>
+          <tr><th> 标识  </th><th> 描述 </th><th> 备注 </th></tr>
+
+          <tr><td><code>-p[rbugp]</code></td>
+              <td>Preserve<br/>
+                  &nbsp;&nbsp;r: replication number<br/>
+                  &nbsp;&nbsp;b: block size<br/>
+                  &nbsp;&nbsp;u: user<br/>
+                  &nbsp;&nbsp;g: group<br/>
+                  &nbsp;&nbsp;p: permission<br/></td>
+              <td>修改次数不会被保留。并且当指定
+              <code>-update</code> 时,更新的状态<strong>不</strong>会
+              被同步,除非文件大小不同(比如文件被重新创建)。
+              </td></tr>
+          <tr><td><code>-i</code></td>
+              <td>忽略失败</td>
+              <td>就像在 <a href="#etc">附录</a>中提到的,这个选项会比默认情况提供关于拷贝的更精确的统计, 同时它还将保留失败拷贝操作的日志,这些日志信息可以用于调试。最后,如果一个map失败了,但并没完成所有分块任务的尝试,这不会导致整个作业的失败。
+              </td></tr>
+          <tr><td><code>-log &lt;logdir&gt;</code></td>
+              <td>记录日志到 &lt;logdir&gt;</td>
+              <td>DistCp为每个文件的每次尝试拷贝操作都记录日志,并把日志作为map的输出。
+              如果一个map失败了,当重新执行时这个日志不会被保留。
+              </td></tr>
+          <tr><td><code>-m &lt;num_maps&gt;</code></td>
+              <td>同时拷贝的最大数目</td>
+              <td>指定了拷贝数据时map的数目。请注意并不是map数越多吞吐量越大。
+              </td></tr>
+          <tr><td><code>-overwrite</code></td>
+              <td>覆盖目标</td>
+              <td>如果一个map失败并且没有使用<code>-i</code>选项,不仅仅那些拷贝失败的文件,这个分块任务中的所有文件都会被重新拷贝。
+			  就像<a href="#uo">下面</a>提到的,它会改变生成目标路径的语义,所以
+              用户要小心使用这个选项。
+              </td></tr>
+          <tr><td><code>-update</code></td>
+              <td>如果源和目标的大小不一样则进行覆盖</td>
+              <td>像之前提到的,这不是&quot;同步&quot;操作。
+              执行覆盖的唯一标准是源文件和目标文件大小是否相同;如果不同,则源文件替换目标文件。
+              像 <a href="#uo">下面</a>提到的,它也改变生成目标路径的语义,
+              用户使用要小心。
+              </td></tr>
+          <tr><td><code>-f &lt;urilist_uri&gt;</code></td>
+              <td>使用&lt;urilist_uri&gt; 作为源文件列表</td>
+              <td>这等价于把所有文件名列在命令行中。
+              <code>urilist_uri</code> 列表应该是完整合法的URI。
+              </td></tr>
+
+        </table>
+
+      </section>
+
+      <section id="uo">
+        <title>更新和覆盖</title>
+
+        <p>这里给出一些 <code>-update</code>和 <code>-overwrite</code>的例子。
+        考虑一个从<code>/foo/a</code> 和
+        <code>/foo/b</code> 到 <code>/bar/foo</code>的拷贝,源路径包括:
+        </p>
+
+        <p><code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a/aa</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a/ab</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b/ba</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b/ab</code></p>
+
+        <p>如果没设置<code>-update</code>或 <code>-overwrite</code>选项,
+        那么两个源都会映射到目标端的
+        <code>/bar/foo/ab</code>。
+        如果设置了这两个选项,每个源目录的内容都会和目标目录的
+         <strong>内容</strong> 做比较。DistCp碰到这类冲突的情况会终止操作并退出。</p>
+
+        <p>默认情况下,<code>/bar/foo/a</code> 和
+        <code>/bar/foo/b</code> 目录都会被创建,所以并不会有冲突。</p>
+
+        <p>现在考虑一个使用<code>-update</code>合法的操作:<br/>
+        <code>distcp -update hdfs://nn1:8020/foo/a \</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              hdfs://nn1:8020/foo/b \</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              hdfs://nn2:8020/bar</code></p>
+
+        <p>其中源路径/大小:</p>
+
+        <p><code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a/aa 32</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a/ab 32</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b/ba 64</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b/bb 32</code></p>
+
+        <p>和目的路径/大小:</p>
+
+        <p><code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/aa 32</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/ba 32</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/bb 64</code></p>
+
+        <p>会产生:</p>
+
+        <p><code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/aa 32</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/ab 32</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/ba 64</code><br/>
+        <code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/bb 32</code></p>
+
+        <p>只有nn2的<code>aa</code>文件没有被覆盖。如果指定了
+        <code>-overwrite</code>选项,所有文件都会被覆盖。
+        </p>
+
+      </section> <!-- Update and Overwrite -->
+
+      </section> <!-- Options -->
+
+    </section> <!-- Usage -->
+
+    <section id="etc">
+      <title>附录</title>
+
+      <section>
+        <title>Map数目</title>
+
+          <p>DistCp会尝试着均分需要拷贝的内容,这样每个map拷贝差不多相等大小的内容。
+	          但因为文件是最小的拷贝粒度,所以配置增加同时拷贝(如map)的数目不一定会增加实际同时拷贝的数目以及总吞吐量。
+          </p>
+
+          <p>如果没使用<code>-m</code>选项,DistCp会尝试在调度工作时指定map的数目
+          为 <code>min (total_bytes / bytes.per.map, 20 * num_task_trackers)</code>,
+		  其中<code>bytes.per.map</code>默认是256MB。</p>
+
+          <p>建议对于长时间运行或定期运行的作业,根据源和目标集群大小、拷贝数量大小以及带宽调整map的数目。
+          </p>
+
+      </section>
+
+      <section id="cpver">
+        <title>不同HDFS版本间的拷贝</title>
+
+        <p>对于不同Hadoop版本间的拷贝,用户应该使用HftpFileSystem。
+        这是一个只读文件系统,所以DistCp必须运行在目标端集群上(更确切的说是在能够写入目标集群的TaskTracker上)。
+        源的格式是
+        <code>hftp://&lt;dfs.http.address&gt;/&lt;path&gt;</code>
+        (默认情况<code>dfs.http.address</code>是
+        &lt;namenode&gt;:50070)。</p>
+
+      </section>
+
+      <section>
+        <title>Map/Reduce和副效应</title>
+
+        <p>像前面提到的,map拷贝输入文件失败时,会带来一些副效应。
+        </p>
+
+        <ul>
+
+          <li>除非使用了<code>-i</code>,任务产生的日志会被新的尝试替换掉。
+          </li>
+
+          <li>除非使用了<code>-overwrite</code>,文件被之前的map成功拷贝后当又一次执行拷贝时会被标记为
+          &quot;被忽略&quot;。</li>
+
+          <li>如果map失败了<code>mapred.map.max.attempts</code>次,剩下的map任务会被终止(除非使用了<code>-i</code>)。
+          </li>
+
+          <li>如果<code>mapred.speculative.execution</code>被设置为
+          <code>final</code>和<code>true</code>,则拷贝的结果是未定义的。</li>
+
+        </ul>
+
+      </section>
+
+      <!--
+      <section>
+        <title>Firewalls and SSL</title>
+
+        <p>To copy over HTTP, use the HftpFileSystem as described in the
+        preceding <a href="#cpver">section</a>, and ensure that the required
+        port(s) are open.</p>
+
+        <p>TODO</p>
+
+      </section>
+      -->
+
+    </section> <!-- Appendix -->
+
+  </body>
+
+</document>

+ 69 - 0
common/src/docs/cn/src/documentation/content/xdocs/hadoop_archives.xml

@@ -0,0 +1,69 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
+<document>
+        <header>
+        <title>Hadoop Archives</title>
+        </header>
+        <body>
+        <section>
+        <title> 什么是Hadoop archives? </title>
+        <p>
+        Hadoop archives是特殊的档案格式。一个Hadoop archive对应一个文件系统目录。
+        Hadoop archive的扩展名是*.har。Hadoop archive包含元数据(形式是_index和_masterindx)和数据(part-*)文件。_index文件包含了档案中的文件的文件名和位置信息。
+        </p>
+        </section>
+        <section>
+        <title> 如何创建archive? </title>
+        <p>
+        <code>用法: hadoop archive -archiveName name &lt;src&gt;* &lt;dest&gt;</code>
+        </p>
+        <p>
+        由-archiveName选项指定你要创建的archive的名字。比如foo.har。archive的名字的扩展名应该是*.har。输入是文件系统的路径名,路径名的格式和平时的表达方式一样。创建的archive会保存到目标目录下。注意创建archives是一个Map/Reduce job。你应该在map reduce集群上运行这个命令。下面是一个例子:
+        </p>
+        <p>
+        <code>hadoop archive -archiveName foo.har /user/hadoop/dir1 /user/hadoop/dir2 /user/zoo/</code>
+        </p><p>
+        在上面的例子中,
+        /user/hadoop/dir1 和 /user/hadoop/dir2 会被归档到这个文件系统目录下
+        -- /user/zoo/foo.har。当创建archive时,源文件不会被更改或删除。
+        </p>
+        </section>
+        <section>
+        <title> 如何查看archives中的文件? </title>
+        <p>
+        archive作为文件系统层暴露给外界。所以所有的fs shell命令都能在archive上运行,但是要使用不同的URI。
+        另外,archive是不可改变的。所以重命名,删除和创建都会返回错误。Hadoop Archives 的URI是
+        </p><p><code>har://scheme-hostname:port/archivepath/fileinarchive</code></p><p>
+        如果没提供scheme-hostname,它会使用默认的文件系统。这种情况下URI是这种形式
+        </p><p><code>
+        har:///archivepath/fileinarchive</code></p>
+        <p>
+        这是一个archive的例子。archive的输入是/dir。这个dir目录包含文件filea,fileb。
+        把/dir归档到/user/hadoop/foo.bar的命令是
+        </p>
+        <p><code>hadoop archive -archiveName foo.har /dir /user/hadoop</code>
+        </p><p>
+        获得创建的archive中的文件列表,使用命令
+        </p>
+        <p><code>hadoop dfs -lsr har:///user/hadoop/foo.har</code></p>
+        <p>查看archive中的filea文件的命令-
+        </p><p><code>hadoop dfs -cat har:///user/hadoop/foo.har/dir/filea</code></p>
+        </section>
+	</body>
+</document>

+ 376 - 0
common/src/docs/cn/src/documentation/content/xdocs/hdfs_design.xml

@@ -0,0 +1,376 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+          "http://forrest.apache.org/dtd/document-v20.dtd">
+
+
+<document>
+
+  <header>
+    <title> 
+      Hadoop分布式文件系统:架构和设计
+    </title>
+    <authors>
+      <person name="Dhruba Borthakur" email="dhruba@yahoo-inc.com"/>
+    </authors> 
+  </header>
+
+  <body>
+    <section>
+      <title> 引言 </title>
+      <p>
+	      Hadoop分布式文件系统(<acronym title="Hadoop分布式文件系统">HDFS</acronym>)被设计成适合运行在通用硬件(commodity hardware)上的分布式文件系统。它和现有的分布式文件系统有很多共同点。但同时,它和其他的分布式文件系统的区别也是很明显的。HDFS是一个高度容错性的系统,适合部署在廉价的机器上。HDFS能提供高吞吐量的数据访问,非常适合大规模数据集上的应用。HDFS放宽了一部分POSIX约束,来实现流式读取文件系统数据的目的。HDFS在最开始是作为Apache Nutch搜索引擎项目的基础架构而开发的。HDFS是Apache Hadoop Core项目的一部分。这个项目的地址是<a href="http://hadoop.apache.org/core/">http://hadoop.apache.org/core/</a>。
+      </p>
+    </section>
+
+    <section> 
+      <title> 前提和设计目标 </title>
+
+      <section> 
+        <title> 硬件错误 </title>
+        <p>
+	硬件错误是常态而不是异常。HDFS可能由成百上千的服务器所构成,每个服务器上存储着文件系统的部分数据。我们面对的现实是构成系统的组件数目是巨大的,而且任一组件都有可能失效,这意味着总是有一部分HDFS的组件是不工作的。因此错误检测和快速、自动的恢复是HDFS最核心的架构目标。
+       </p>
+     </section>
+      <section> 
+        <title> 流式数据访问 </title>
+        <p>
+运行在HDFS上的应用和普通的应用不同,需要流式访问它们的数据集。HDFS的设计中更多的考虑到了数据批处理,而不是用户交互处理。比之数据访问的低延迟问题,更关键的在于数据访问的高吞吐量。POSIX标准设置的很多硬性约束对HDFS应用系统不是必需的。为了提高数据的吞吐量,在一些关键方面对POSIX的语义做了一些修改。        
+        </p>
+      </section>
+
+      <section> 
+        <title> 大规模数据集 </title>
+	<p>
+        运行在HDFS上的应用具有很大的数据集。HDFS上的一个典型文件大小一般都在G字节至T字节。因此,HDFS被调节以支持大文件存储。它应该能提供整体上高的数据传输带宽,能在一个集群里扩展到数百个节点。一个单一的HDFS实例应该能支撑数以千万计的文件。
+        </p>
+      </section>
+
+ 
+      <section> 
+        <title> 简单的一致性模型 </title>
+<!--DCCOMMENT:diff begin-->
+        <p>
+        HDFS应用需要一个“一次写入多次读取”的文件访问模型。一个文件经过创建、写入和关闭之后就不需要改变。这一假设简化了数据一致性问题,并且使高吞吐量的数据访问成为可能。Map/Reduce应用或者网络爬虫应用都非常适合这个模型。目前还有计划在将来扩充这个模型,使之支持文件的附加写操作。 
+        </p>
+<!--DCCOMMENT:diff end
+note: "MapReduce" has been replaced by "Map/Reduce" in this doc
+
+@@ -67,7 +67,7 @@
+       <section>
+         <title> Simple Coherency Model </title>
+         <p>
+-        HDFS applications need a write-once-read-many access model for files. A file once created, written, and closed need not be changed. This assumption simplifies data coherency issues and enables high throughput data access. A MapReduce application or a web crawler application fits perfectly with this model. There is a plan to support appending-writes to files in the future.
++        HDFS applications need a write-once-read-many access model for files. A file once created, written, and closed need not be changed. This assumption simplifies data coherency issues and enables high throughput data access. A Map/Reduce application or a web crawler application fits perfectly with this model. There is a plan to support appending-writes to files in the future.
+         </p>
+       </section>
+
+-->
+      </section>
+
+ 
+      <section> 
+        <title> &#x201c;移动计算比移动数据更划算&#x201d; </title>
+        <p>
+        一个应用请求的计算,离它操作的数据越近就越高效,在数据达到海量级别的时候更是如此。因为这样就能降低网络阻塞的影响,提高系统数据的吞吐量。将计算移动到数据附近,比之将数据移动到应用所在显然更好。HDFS为应用提供了将它们自己移动到数据附近的接口。 
+        </p>
+      </section>
+
+
+      <section> 
+        <title> 异构软硬件平台间的可移植性 </title>
+        <p>
+        HDFS在设计的时候就考虑到平台的可移植性。这种特性方便了HDFS作为大规模数据应用平台的推广。
+        </p>
+      </section>
+    </section>
+
+ 
+    <section>
+      <title> Namenode 和 Datanode </title>
+<!--DCCOMMENT:diff begin-->
+      <p>
+      HDFS采用master/slave架构。一个HDFS集群是由一个Namenode和一定数目的Datanodes组成。Namenode是一个中心服务器,负责管理文件系统的名字空间(namespace)以及客户端对文件的访问。集群中的Datanode一般是一个节点一个,负责管理它所在节点上的存储。HDFS暴露了文件系统的名字空间,用户能够以文件的形式在上面存储数据。从内部看,一个文件其实被分成一个或多个数据块,这些块存储在一组Datanode上。Namenode执行文件系统的名字空间操作,比如打开、关闭、重命名文件或目录。它也负责确定数据块到具体Datanode节点的映射。Datanode负责处理文件系统客户端的读写请求。在Namenode的统一调度下进行数据块的创建、删除和复制。
+      </p>
+<!--DCCOMMENT:diff end
+note : tag "<em></em>" has been deleted.
+ <p>
+-      HDFS has a master/slave architecture. An HDFS cluster consists of a single <em>Namenode</em>, a master server that manages the file system namespace and regulates access to files by clients. In addition, there are a number of <em>Datanodes</em>, usually one per node in the cluster, which manage storage attached to the nodes that they run on. HDFS exposes a file system namespace and allows user data to be stored in files. Internally, a file is split into one or more blocks and these blocks are stored in a set of Datanodes. The Namenode executes file system namespace operations like opening, closing, and renaming files and directories. It also determines the mapping of blocks to Datanodes. The Datanodes are responsible for serving read and write requests from the file system&#x2019;s clients. The Datanodes also perform block creation, deletion, and replication upon instruction from the Namenode.
++      HDFS has a master/slave architecture. An HDFS cluster consists of a single NameNode, a master server that manages the file system namespace and regulates access to files by clients. In addition, there are a number of DataNodes, usually one per node in the cluster, which manage storage attached to the nodes that they run on. HDFS exposes a file system namespace and allows user data to be stored in files. Internally, a file is split into one or more blocks and these blocks are stored in a set of DataNodes. The NameNode executes file system namespace operations like opening, closing, and renaming files and directories. It also determines the mapping of blocks to DataNodes. The DataNodes are responsible for serving read and write requests from the file system&#x2019;s clients. The DataNodes also perform block creation, deletion, and replication upon instruction from the NameNode.
+
+-->
+      <figure alt="HDFS 架构" src="images/hdfsarchitecture.gif"/>
+      <p>
+      Namenode和Datanode被设计成可以在普通的商用机器上运行。这些机器一般运行着GNU/Linux操作系统(<acronym title="操作系统">OS</acronym>)。HDFS采用Java语言开发,因此任何支持Java的机器都可以部署Namenode或Datanode。由于采用了可移植性极强的Java语言,使得HDFS可以部署到多种类型的机器上。一个典型的部署场景是一台机器上只运行一个Namenode实例,而集群中的其它机器分别运行一个Datanode实例。这种架构并不排斥在一台机器上运行多个Datanode,只不过这样的情况比较少见。
+      </p>
+      <p>
+      集群中单一Namenode的结构大大简化了系统的架构。Namenode是所有HDFS元数据的仲裁者和管理者,这样,用户数据永远不会流过Namenode。
+      </p>
+    </section> 
+
+    <section>
+      <title> 文件系统的名字空间 (namespace) </title>
+      <p>
+      HDFS支持传统的层次型文件组织结构。用户或者应用程序可以创建目录,然后将文件保存在这些目录里。文件系统名字空间的层次结构和大多数现有的文件系统类似:用户可以创建、删除、移动或重命名文件。当前,HDFS不支持用户磁盘配额和访问权限控制,也不支持硬链接和软链接。但是HDFS架构并不妨碍实现这些特性。
+      </p>
+      <p>
+      Namenode负责维护文件系统的名字空间,任何对文件系统名字空间或属性的修改都将被Namenode记录下来。应用程序可以设置HDFS保存的文件的副本数目。文件副本的数目称为文件的副本系数,这个信息也是由Namenode保存的。
+      </p>
+    </section>
+
+    <section> 
+      <title> 数据复制 </title>
+      <p>
+      HDFS被设计成能够在一个大集群中跨机器可靠地存储超大文件。它将每个文件存储成一系列的数据块,除了最后一个,所有的数据块都是同样大小的。为了容错,文件的所有数据块都会有副本。每个文件的数据块大小和副本系数都是可配置的。应用程序可以指定某个文件的副本数目。副本系数可以在文件创建的时候指定,也可以在之后改变。HDFS中的文件都是一次性写入的,并且严格要求在任何时候只能有一个写入者。 
+      </p>
+      <p>
+      Namenode全权管理数据块的复制,它周期性地从集群中的每个Datanode接收心跳信号和块状态报告(Blockreport)。接收到心跳信号意味着该Datanode节点工作正常。块状态报告包含了一个该Datanode上所有数据块的列表。
+    </p>
+    <figure alt="HDFS Datanodes" src="images/hdfsdatanodes.gif"/>
+
+      <section>
+        <title> 副本存放: 最最开始的一步 </title>
+        <p>
+        副本的存放是HDFS可靠性和性能的关键。优化的副本存放策略是HDFS区分于其他大部分分布式文件系统的重要特性。这种特性需要做大量的调优,并需要经验的积累。HDFS采用一种称为机架感知(rack-aware)的策略来改进数据的可靠性、可用性和网络带宽的利用率。目前实现的副本存放策略只是在这个方向上的第一步。实现这个策略的短期目标是验证它在生产环境下的有效性,观察它的行为,为实现更先进的策略打下测试和研究的基础。 
+        </p>
+        <p>
+	大型HDFS实例一般运行在跨越多个机架的计算机组成的集群上,不同机架上的两台机器之间的通讯需要经过交换机。在大多数情况下,同一个机架内的两台机器间的带宽会比不同机架的两台机器间的带宽大。        
+        </p>
+	<p>
+        通过一个<a href="cluster_setup.html#Hadoop的机架感知">机架感知</a>的过程,Namenode可以确定每个Datanode所属的机架id。一个简单但没有优化的策略就是将副本存放在不同的机架上。这样可以有效防止当整个机架失效时数据的丢失,并且允许读数据的时候充分利用多个机架的带宽。这种策略设置可以将副本均匀分布在集群中,有利于当组件失效情况下的负载均衡。但是,因为这种策略的一个写操作需要传输数据块到多个机架,这增加了写的代价。 
+        </p>
+        <p>
+        在大多数情况下,副本系数是3,HDFS的存放策略是将一个副本存放在本地机架的节点上,一个副本放在同一机架的另一个节点上,最后一个副本放在不同机架的节点上。这种策略减少了机架间的数据传输,这就提高了写操作的效率。机架的错误远远比节点的错误少,所以这个策略不会影响到数据的可靠性和可用性。于此同时,因为数据块只放在两个(不是三个)不同的机架上,所以此策略减少了读取数据时需要的网络传输总带宽。在这种策略下,副本并不是均匀分布在不同的机架上。三分之一的副本在一个节点上,三分之二的副本在一个机架上,其他副本均匀分布在剩下的机架中,这一策略在不损害数据可靠性和读取性能的情况下改进了写的性能。
+        </p>
+        <p>
+        当前,这里介绍的默认副本存放策略正在开发的过程中。
+        </p>
+      </section>
+
+      <section> 
+        <title> 副本选择 </title>
+        <p>
+        为了降低整体的带宽消耗和读取延时,HDFS会尽量让读取程序读取离它最近的副本。如果在读取程序的同一个机架上有一个副本,那么就读取该副本。如果一个HDFS集群跨越多个数据中心,那么客户端也将首先读本地数据中心的副本。
+        </p>
+      </section>
+
+      <section> 
+        <title> 安全模式 </title>
+        <p>
+	Namenode启动后会进入一个称为安全模式的特殊状态。处于安全模式的Namenode是不会进行数据块的复制的。Namenode从所有的 Datanode接收心跳信号和块状态报告。块状态报告包括了某个Datanode所有的数据块列表。每个数据块都有一个指定的最小副本数。当Namenode检测确认某个数据块的副本数目达到这个最小值,那么该数据块就会被认为是副本安全(safely replicated)的;在一定百分比(这个参数可配置)的数据块被Namenode检测确认是安全之后(加上一个额外的30秒等待时间),Namenode将退出安全模式状态。接下来它会确定还有哪些数据块的副本没有达到指定数目,并将这些数据块复制到其他Datanode上。
+        </p>
+      </section>
+
+    </section>
+
+    <section>
+      <title> 文件系统元数据的持久化 </title>
+        <p>
+	Namenode上保存着HDFS的名字空间。对于任何对文件系统元数据产生修改的操作,Namenode都会使用一种称为EditLog的事务日志记录下来。例如,在HDFS中创建一个文件,Namenode就会在Editlog中插入一条记录来表示;同样地,修改文件的副本系数也将往Editlog插入一条记录。Namenode在本地操作系统的文件系统中存储这个Editlog。整个文件系统的名字空间,包括数据块到文件的映射、文件的属性等,都存储在一个称为FsImage的文件中,这个文件也是放在Namenode所在的本地文件系统上。
+        </p>
+        <p>
+        Namenode在内存中保存着整个文件系统的名字空间和文件数据块映射(Blockmap)的映像。这个关键的元数据结构设计得很紧凑,因而一个有4G内存的Namenode足够支撑大量的文件和目录。当Namenode启动时,它从硬盘中读取Editlog和FsImage,将所有Editlog中的事务作用在内存中的FsImage上,并将这个新版本的FsImage从内存中保存到本地磁盘上,然后删除旧的Editlog,因为这个旧的Editlog的事务都已经作用在FsImage上了。这个过程称为一个检查点(checkpoint)。在当前实现中,检查点只发生在Namenode启动时,在不久的将来将实现支持周期性的检查点。
+        </p>
+        <p>
+	Datanode将HDFS数据以文件的形式存储在本地的文件系统中,它并不知道有关HDFS文件的信息。它把每个HDFS数据块存储在本地文件系统的一个单独的文件中。Datanode并不在同一个目录创建所有的文件,实际上,它用试探的方法来确定每个目录的最佳文件数目,并且在适当的时候创建子目录。在同一个目录中创建所有的本地文件并不是最优的选择,这是因为本地文件系统可能无法高效地在单个目录中支持大量的文件。当一个Datanode启动时,它会扫描本地文件系统,产生一个这些本地文件对应的所有HDFS数据块的列表,然后作为报告发送到Namenode,这个报告就是块状态报告。         
+        </p>
+    </section>
+
+    <section> 
+      <title> 通讯协议 </title>
+      <p>
+      所有的HDFS通讯协议都是建立在TCP/IP协议之上。客户端通过一个可配置的<acronym title="Transmission Control Protocol">TCP</acronym>端口连接到Namenode,通过ClientProtocol协议与Namenode交互。而Datanode使用DatanodeProtocol协议与Namenode交互。一个远程过程调用(<acronym title="Remote Procedure Call">RPC</acronym>)模型被抽象出来封装ClientProtocol和Datanodeprotocol协议。在设计上,Namenode不会主动发起RPC,而是响应来自客户端或 Datanode 的RPC请求。 
+      </p>
+    </section> 
+
+    <section> 
+      <title> 健壮性 </title>
+      <p>
+	      HDFS的主要目标就是即使在出错的情况下也要保证数据存储的可靠性。常见的三种出错情况是:Namenode出错, Datanode出错和网络割裂(network partitions)。
+      </p>
+ 
+      <section>
+        <title> 磁盘数据错误,心跳检测和重新复制 </title>
+        <p>
+        每个Datanode节点周期性地向Namenode发送心跳信号。网络割裂可能导致一部分Datanode跟Namenode失去联系。Namenode通过心跳信号的缺失来检测这一情况,并将这些近期不再发送心跳信号Datanode标记为宕机,不会再将新的<acronym title="Input/Output">IO</acronym>请求发给它们。任何存储在宕机Datanode上的数据将不再有效。Datanode的宕机可能会引起一些数据块的副本系数低于指定值,Namenode不断地检测这些需要复制的数据块,一旦发现就启动复制操作。在下列情况下,可能需要重新复制:某个Datanode节点失效,某个副本遭到损坏,Datanode上的硬盘错误,或者文件的副本系数增大。
+        </p>
+      </section>
+
+      <section>
+        <title> 集群均衡 </title>
+        <p>
+        HDFS的架构支持数据均衡策略。如果某个Datanode节点上的空闲空间低于特定的临界点,按照均衡策略系统就会自动地将数据从这个Datanode移动到其他空闲的Datanode。当对某个文件的请求突然增加,那么也可能启动一个计划创建该文件新的副本,并且同时重新平衡集群中的其他数据。这些均衡策略目前还没有实现。
+        </p>
+      </section>
+
+      <section>
+        <title> 数据完整性 </title>
+        <p>
+        <!-- XXX "checksum checking" sounds funny -->
+        从某个Datanode获取的数据块有可能是损坏的,损坏可能是由Datanode的存储设备错误、网络错误或者软件bug造成的。HDFS客户端软件实现了对HDFS文件内容的校验和(checksum)检查。当客户端创建一个新的HDFS文件,会计算这个文件每个数据块的校验和,并将校验和作为一个单独的隐藏文件保存在同一个HDFS名字空间下。当客户端获取文件内容后,它会检验从Datanode获取的数据跟相应的校验和文件中的校验和是否匹配,如果不匹配,客户端可以选择从其他Datanode获取该数据块的副本。
+        </p>
+      </section>
+      <section>
+        <title> 元数据磁盘错误 </title>
+        <p>
+        FsImage和Editlog是HDFS的核心数据结构。如果这些文件损坏了,整个HDFS实例都将失效。因而,Namenode可以配置成支持维护多个FsImage和Editlog的副本。任何对FsImage或者Editlog的修改,都将同步到它们的副本上。这种多副本的同步操作可能会降低Namenode每秒处理的名字空间事务数量。然而这个代价是可以接受的,因为即使HDFS的应用是数据密集的,它们也非元数据密集的。当Namenode重启的时候,它会选取最近的完整的FsImage和Editlog来使用。
+        </p>
+        <p> 
+        Namenode是HDFS集群中的单点故障(single point of failure)所在。如果Namenode机器故障,是需要手工干预的。目前,自动重启或在另一台机器上做Namenode故障转移的功能还没实现。
+        </p>
+      </section>
+
+      <section>
+        <title> 快照 </title>
+        <p>
+        快照支持某一特定时刻的数据的复制备份。利用快照,可以让HDFS在数据损坏时恢复到过去一个已知正确的时间点。HDFS目前还不支持快照功能,但计划在将来的版本进行支持。
+        </p>
+      </section>
+    </section>
+    <section> 
+      <!-- XXX Better name -->
+      <title> 数据组织 </title>
+
+      <section>
+        <title> 数据块 </title>
+        <p>
+        HDFS被设计成支持大文件,适用HDFS的是那些需要处理大规模的数据集的应用。这些应用都是只写入数据一次,但却读取一次或多次,并且读取速度应能满足流式读取的需要。HDFS支持文件的“一次写入多次读取”语义。一个典型的数据块大小是64MB。因而,HDFS中的文件总是按照64M被切分成不同的块,每个块尽可能地存储于不同的Datanode中。
+        </p>
+      </section>
+ 
+      <section>
+        <!-- XXX staging never described / referenced in its section -->
+        <title> Staging </title>
+        <p>
+        客户端创建文件的请求其实并没有立即发送给Namenode,事实上,在刚开始阶段HDFS客户端会先将文件数据缓存到本地的一个临时文件。应用程序的写操作被透明地重定向到这个临时文件。当这个临时文件累积的数据量超过一个数据块的大小,客户端才会联系Namenode。Namenode将文件名插入文件系统的层次结构中,并且分配一个数据块给它。然后返回Datanode的标识符和目标数据块给客户端。接着客户端将这块数据从本地临时文件上传到指定的Datanode上。当文件关闭时,在临时文件中剩余的没有上传的数据也会传输到指定的Datanode上。然后客户端告诉Namenode文件已经关闭。此时Namenode才将文件创建操作提交到日志里进行存储。如果Namenode在文件关闭前宕机了,则该文件将丢失。
+        </p>
+        <p>
+        上述方法是对在HDFS上运行的目标应用进行认真考虑后得到的结果。这些应用需要进行文件的流式写入。如果不采用客户端缓存,由于网络速度和网络堵塞会对吞估量造成比较大的影响。这种方法并不是没有先例的,早期的文件系统,比如<acronym title="Andrew File System">AFS</acronym>,就用客户端缓存来提高性能。为了达到更高的数据上传效率,已经放松了POSIX标准的要求。
+        </p>
+      </section>
+
+      <section>
+        <title> 流水线复制 </title>
+        <p>
+        当客户端向HDFS文件写入数据的时候,一开始是写到本地临时文件中。假设该文件的副本系数设置为3,当本地临时文件累积到一个数据块的大小时,客户端会从Namenode获取一个Datanode列表用于存放副本。然后客户端开始向第一个Datanode传输数据,第一个Datanode一小部分一小部分(4 KB)地接收数据,将每一部分写入本地仓库,并同时传输该部分到列表中第二个Datanode节点。第二个Datanode也是这样,一小部分一小部分地接收数据,写入本地仓库,并同时传给第三个Datanode。最后,第三个Datanode接收数据并存储在本地。因此,Datanode能流水线式地从前一个节点接收数据,并在同时转发给下一个节点,数据以流水线的方式从前一个Datanode复制到下一个。
+        </p>
+      </section>
+
+    </section>
+
+    <section>
+      <!-- XXX "Accessibility" sounds funny - "Interfaces" ? -->
+      <title> 可访问性 </title>
+      <!-- XXX Make an API section ? (HTTP is "web service" API?) -->
+      <p>
+      HDFS给应用提供了多种访问方式。用户可以通过<a href="http://hadoop.apache.org/core/docs/current/api/">Java API</a>接口访问,也可以通过C语言的封装API访问,还可以通过浏览器的方式访问HDFS中的文件。通过<acronym title="Web-based Distributed Authoring and Versioning">WebDAV</acronym>协议访问的方式正在开发中。
+      </p>
+
+      <section>
+        <title> DFSShell </title>
+        <p>
+        HDFS以文件和目录的形式组织用户数据。它提供了一个命令行的接口(DFSShell)让用户与HDFS中的数据进行交互。命令的语法和用户熟悉的其他shell(例如 bash, csh)工具类似。下面是一些动作/命令的示例:
+        </p>
+        <table>
+          <tr>
+            <th> 动作 </th><th> 命令 </th>
+          </tr>
+          <tr>
+            <td> 创建一个名为 <code>/foodir</code> 的目录 </td> <td> <code>bin/hadoop dfs -mkdir /foodir</code> </td>
+          </tr>
+          <tr>
+            <td> 创建一个名为 <code>/foodir</code> 的目录 </td> <td> <code>bin/hadoop dfs -mkdir /foodir</code> </td>
+          </tr>
+          <tr>
+            <td> 查看名为 <code>/foodir/myfile.txt</code> 的文件内容 </td> <td> <code>bin/hadoop dfs -cat /foodir/myfile.txt</code> </td>
+          </tr>
+        </table>
+        <p>
+        DFSShell 可以用在那些通过脚本语言和文件系统进行交互的应用程序上。
+        </p>
+      </section>
+
+      <section> 
+        <title> DFSAdmin </title>
+        <p>
+		DFSAdmin 命令用来管理HDFS集群。这些命令只有HDSF的管理员才能使用。下面是一些动作/命令的示例:
+        </p>
+        <table>
+          <tr>
+            <th> 动作 </th><th> 命令 </th>
+          </tr>
+          <tr>
+            <td> 将集群置于安全模式 </td> <td> <code>bin/hadoop dfsadmin -safemode enter</code> </td>
+          </tr>
+          <tr>
+            <td> 显示Datanode列表 </td> <td> <code>bin/hadoop dfsadmin -report</code> </td>
+          </tr>
+          <tr>
+            <td> 使Datanode节点 <code>datanodename</code>退役</td><td> <code>bin/hadoop dfsadmin -decommission datanodename</code> </td>
+          </tr>
+        </table>
+      </section>
+
+      <section> 
+        <title> 浏览器接口 </title>
+        <p>
+	一个典型的HDFS安装会在一个可配置的TCP端口开启一个Web服务器用于暴露HDFS的名字空间。用户可以用浏览器来浏览HDFS的名字空间和查看文件的内容。
+       </p>
+      </section>
+
+    </section> 
+
+    <section> 
+      <title> 存储空间回收 </title>
+
+      <section>
+        <title> 文件的删除和恢复 </title>
+	<p>
+       当用户或应用程序删除某个文件时,这个文件并没有立刻从HDFS中删除。实际上,HDFS会将这个文件重命名转移到<code>/trash</code>目录。只要文件还在<code>/trash</code>目录中,该文件就可以被迅速地恢复。文件在<code>/trash</code>中保存的时间是可配置的,当超过这个时间时,Namenode就会将该文件从名字空间中删除。删除文件会使得该文件相关的数据块被释放。注意,从用户删除文件到HDFS空闲空间的增加之间会有一定时间的延迟。</p>
+        <p>
+只要被删除的文件还在<code>/trash</code>目录中,用户就可以恢复这个文件。如果用户想恢复被删除的文件,他/她可以浏览<code>/trash</code>目录找回该文件。<code>/trash</code>目录仅仅保存被删除文件的最后副本。<code>/trash</code>目录与其他的目录没有什么区别,除了一点:在该目录上HDFS会应用一个特殊策略来自动删除文件。目前的默认策略是删除<code>/trash</code>中保留时间超过6小时的文件。将来,这个策略可以通过一个被良好定义的接口配置。
+        </p>
+      </section>
+
+      <section>
+        <title> 减少副本系数 </title>
+        <p>
+        当一个文件的副本系数被减小后,Namenode会选择过剩的副本删除。下次心跳检测时会将该信息传递给Datanode。Datanode遂即移除相应的数据块,集群中的空闲空间加大。同样,在调用<code>setReplication</code> API结束和集群中空闲空间增加间会有一定的延迟。</p>
+      </section>
+    </section>
+
+
+    <section>
+      <title> 参考资料 </title>
+      <p>
+      HDFS Java API: 
+      <a href="http://hadoop.apache.org/core/docs/current/api/"> 
+        http://hadoop.apache.org/core/docs/current/api/
+      </a>
+      </p>
+      <p>
+      HDFS 源代码: 
+      <a href= "http://hadoop.apache.org/core/version_control.html"> 
+        http://hadoop.apache.org/core/version_control.html
+      </a>
+      </p>
+    </section> 
+
+  </body>
+</document>

+ 193 - 0
common/src/docs/cn/src/documentation/content/xdocs/hdfs_permissions_guide.xml

@@ -0,0 +1,193 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+          "http://forrest.apache.org/dtd/document-v20.dtd">
+
+
+<document>
+
+  <header>
+    <title>
+      HDFS权限管理用户指南
+    </title>
+  </header>
+
+  <body>
+    <section> <title>概述</title>
+      <p>
+		Hadoop分布式文件系统实现了一个和POSIX系统类似的文件和目录的权限模型。每个文件和目录有一个<em>所有者(owner)</em>和一个<em>组(group)</em>。文件或目录对其所有者、同组的其他用户以及所有其他用户分别有着不同的权限。对文件而言,当读取这个文件时需要有<em>r</em>权限,当写入或者追加到文件时需要有<em>w</em>权限。对目录而言,当列出目录内容时需要具有<em>r</em>权限,当新建或删除子文件或子目录时需要有<em>w</em>权限,当访问目录的子节点时需要有<em>x</em>权限。不同于POSIX模型,HDFS权限模型中的文件没有<em>sticky</em>,<em>setuid</em>或<em>setgid</em>位,因为这里没有可执行文件的概念。为了简单起见,这里也没有目录的<em>sticky</em>,<em>setuid</em>或<em>setgid</em>位。总的来说,文件或目录的权限就是它的<em>模式(mode)</em>。HDFS采用了Unix表示和显示模式的习惯,包括使用八进制数来表示权限。当新建一个文件或目录,它的所有者即客户进程的用户,它的所属组是父目录的组(BSD的规定)。
+	</p>
+	<p>
+		每个访问HDFS的用户进程的标识分为两个部分,分别是<em>用户名</em>和<em>组名列表</em>。每次用户进程访问一个文件或目录<code>foo</code>,HDFS都要对其进行权限检查,
+	</p>
+	<ul>
+		<li>
+		   如果用户即<code>foo</code>的所有者,则检查所有者的访问权限;
+		</li>
+		<li>
+		   如果<code>foo</code>关联的组在组名列表中出现,则检查组用户的访问权限;
+		</li>
+		<li>
+		   否则检查<code>foo</code>其他用户的访问权限。
+		</li>
+	</ul>
+
+<p>
+		如果权限检查失败,则客户的操作会失败。
+</p>
+     </section>
+
+<section><title>用户身份</title>
+<p>
+在这个版本的Hadoop中,客户端用户身份是通过宿主操作系统给出。对类Unix系统来说,
+</p>
+<ul>
+<li>
+   用户名等于<code>`whoami`</code>;
+</li>
+<li>
+   组列表等于<code>`bash -c groups`</code>。
+</li>
+</ul>
+
+<p>
+将来会增加其他的方式来确定用户身份(比如Kerberos、LDAP等)。期待用上文中提到的第一种方式来防止一个用户假冒另一个用户是不现实的。这种用户身份识别机制结合权限模型允许一个协作团体以一种有组织的形式共享文件系统中的资源。
+</p>
+<p>
+不管怎样,用户身份机制对HDFS本身来说只是外部特性。HDFS并不提供创建用户身份、创建组或处理用户凭证等功能。
+</p>
+</section>
+
+<section> <title>理解系统的实现</title>
+<p>
+	每次文件或目录操作都传递完整的路径名给name node,每一个操作都会对此路径做权限检查。客户框架会隐式地将用户身份和与name node的连接关联起来,从而减少改变现有客户端API的需求。经常会有这种情况,当对一个文件的某一操作成功后,之后同样的操作却会失败,这是因为文件或路径上的某些目录已经不复存在了。比如,客户端首先开始读一个文件,它向name node发出一个请求以获取文件第一个数据块的位置。但接下去的获取其他数据块的第二个请求可能会失败。另一方面,删除一个文件并不会撤销客户端已经获得的对文件数据块的访问权限。而权限管理能使得客户端对一个文件的访问许可在两次请求之间被收回。重复一下,权限的改变并不会撤销当前客户端对文件数据块的访问许可。
+</p>
+<p>
+map-reduce框架通过传递字符串来指派用户身份,没有做其他特别的安全方面的考虑。文件或目录的所有者和组属性是以字符串的形式保存,而不是像传统的Unix方式转换为用户和组的数字ID。
+</p>
+<p>
+这个发行版本的权限管理特性并不需要改变data node的任何行为。Data node上的数据块上并没有任何<em>Hadoop</em>所有者或权限等关联属性。
+</p>
+</section>
+     
+<section> <title>文件系统API变更</title>
+<p>
+	如果权限检查失败,所有使用一个路径参数的方法都可能抛出<code>AccessControlException</code>异常。
+</p>
+<p>新增方法:</p>
+<ul>
+	<li>
+		<code>public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException;</code>
+	</li>
+	<li>
+		<code>public boolean mkdirs(Path f, FsPermission permission) throws IOException;</code>
+	</li>
+	<li>
+		<code>public void setPermission(Path p, FsPermission permission) throws IOException;</code>
+	</li>
+	<li>
+		<code>public void setOwner(Path p, String username, String groupname) throws IOException;</code>
+	</li>
+	<li>
+		<code>public FileStatus getFileStatus(Path f) throws IOException;</code> 也会返回路径关联的所有者、组和模式属性。
+	</li>
+
+</ul>
+<p>
+新建文件或目录的模式受配置参数<code>umask</code>的约束。当使用之前的 <code>create(path, &hellip;)</code> 方法(<em>没有指定</em>权限参数)时,新文件的模式是<code>666&thinsp;&amp;&thinsp;^umask</code>。当使用新的 <code>create(path, </code><em>permission</em><code>, &hellip;)</code> 方法(<em>指定了</em>权限参数<em>P</em>)时,新文件的模式是<code>P&thinsp;&amp;&thinsp;^umask&thinsp;&amp;&thinsp;666</code>。当使用先前的 <code>mkdirs(path)</code> 方法(<em>没有指定</em> 权限参数)新建一个目录时,新目录的模式是<code>777&thinsp;&amp;&thinsp;^umask</code>。当使用新的 <code>mkdirs(path, </code><em>permission</em> <code>)</code> 方法(<em>指定了</em>权限参数<em>P</em>)新建一个目录时,新目录的模式是<code>P&thinsp;&amp;&thinsp;^umask&thinsp;&amp;&thinsp;777</code>。
+</p>
+</section>
+
+     
+<section> <title>Shell命令变更</title>
+<p>新增操作:</p>
+<dl>
+	<dt><code>chmod [-R]</code> <em>mode file &hellip;</em></dt>
+	<dd>
+		只有文件的所有者或者超级用户才有权限改变文件模式。
+	</dd>
+	<dt><code>chgrp [-R]</code> <em>group file &hellip;</em></dt>
+	<dd>
+		使用<code>chgrp</code>命令的用户必须属于特定的组且是文件的所有者,或者用户是超级用户。
+	</dd>
+	<dt><code>chown [-R]</code> <em>[owner][:[group]] file &hellip;</em></dt>
+	<dd>
+		文件的所有者的只能被超级用户更改。
+	</dd>
+	<dt><code>ls </code> <em>file &hellip;</em></dt><dd></dd>
+	<dt><code>lsr </code> <em>file &hellip;</em></dt>
+	<dd>
+		输出格式做了调整以显示所有者、组和模式。
+	</dd>
+</dl></section>
+
+     
+<section> <title>超级用户</title>
+<p>
+超级用户即运行name node进程的用户。宽泛的讲,如果你启动了name node,你就是超级用户。超级用户干任何事情,因为超级用户能够通过所有的权限检查。没有永久记号保留谁<em>过去</em>是超级用户;当name node开始运行时,进程自动判断谁<em>现在</em>是超级用户。HDFS的超级用户不一定非得是name node主机上的超级用户,也不需要所有的集群的超级用户都是一个。同样的,在个人工作站上运行HDFS的实验者,不需任何配置就已方便的成为了他的部署实例的超级用户。
+	</p>
+	<p>
+	另外,管理员可以用配置参数指定一组特定的用户,如果做了设定,这个组的成员也会是超级用户。
+</p>
+</section>
+
+<section> <title>Web服务器</title>
+<p>
+Web服务器的身份是一个可配置参数。Name node并没有<em>真实</em>用户的概念,但是Web服务器表现地就像它具有管理员选定的用户的身份(用户名和组)一样。除非这个选定的身份是超级用户,否则会有名字空间中的一部分对Web服务器来说不可见。
+</p>
+</section>
+
+<section> <title>在线升级</title>
+<p>
+如果集群在0.15版本的数据集(<code>fsimage</code>)上启动,所有的文件和目录都有所有者<em>O</em>,组<em>G</em>,和模式<em>M</em>,这里 <em>O</em> 和 <em>G</em> 分别是超级用户的用户标识和组名,<em>M</em>是一个配置参数。</p>
+</section>
+
+<section> <title>配置参数</title>
+<dl>
+	<dt><code>dfs.permissions = true </code></dt>
+	<dd>
+		如果是 <code>true</code>,则打开前文所述的权限系统。如果是 <code>false</code>,权限<em>检查</em> 就是关闭的,但是其他的行为没有改变。这个配置参数的改变并不改变文件或目录的模式、所有者和组等信息。
+		<p>
+		</p>
+		不管权限模式是开还是关,<code>chmod</code>,<code>chgrp</code> 和 <code>chown</code> <em>总是</em> 会检查权限。这些命令只有在权限检查背景下才有用,所以不会有兼容性问题。这样,这就能让管理员在打开常规的权限检查之前可以可靠地设置文件的所有者和权限。
+	</dd>
+	<dt><code>dfs.web.ugi = webuser,webgroup</code></dt>
+	<dd>
+	Web服务器使用的用户名。如果将这个参数设置为超级用户的名称,则所有Web客户就可以看到所有的信息。如果将这个参数设置为一个不使用的用户,则Web客户就只能访问到“other”权限可访问的资源了。额外的组可以加在后面,形成一个用逗号分隔的列表。
+	</dd>
+	<dt><code>dfs.permissions.supergroup = supergroup</code></dt>
+	<dd>
+	超级用户的组名。
+	</dd>
+	<dt><code>dfs.upgrade.permission = 777</code></dt>
+	<dd>
+	升级时的初始模式。文件<em>永不会</em>被设置<em>x</em>权限。在配置文件中,可以使用十进制数<em>511<sub>10</sub></em>。
+	</dd>
+	<dt><code>dfs.umask = 022</code></dt>
+	<dd>
+		<code>umask</code>参数在创建文件和目录时使用。在配置文件中,可以使用十进制数<em>18<sub>10</sub></em>。
+	</dd>
+</dl>
+</section>
+
+     
+  </body>
+</document>
+ 	
+

+ 72 - 0
common/src/docs/cn/src/documentation/content/xdocs/hdfs_quota_admin_guide.xml

@@ -0,0 +1,72 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+          "http://forrest.apache.org/dtd/document-v20.dtd">
+
+
+<document>
+
+  <header>
+    <title>
+      名字空间配额管理指南
+    </title>
+  </header>
+
+  <body>
+      <p>
+      Hadoop分布式文件系统(HDFS)允许管理员为每个目录设置配额。
+      新建立的目录没有配额。
+      最大的配额是<code>Long.Max_Value</code>。配额为1可以强制目录保持为空。
+      </p>
+
+      <p>
+      目录配额是对目录树上该目录下的名字数量做硬性限制。如果创建文件或目录时超过了配额,该操作会失败。重命名不会改变该目录的配额;如果重命名操作会导致违反配额限制,该操作将会失败。如果尝试设置一个配额而现有文件数量已经超出了这个新配额,则设置失败。
+      </p>
+
+      <p>
+      配额和fsimage保持一致。当启动时,如果fsimage违反了某个配额限制(也许fsimage被偷偷改变了),则启动失败并生成错误报告。设置或删除一个配额会创建相应的日志记录。
+      </p> 
+
+      <p>
+      下面的新命令或新选项是用于支持配额的。
+      前两个是管理员命令。
+      </p>
+
+      <ul>
+      <li>
+      <code>dfsadmin -setquota &lt;N> &lt;directory>...&lt;directory></code> 
+      <br /> 
+      把每个目录配额设为<code>N</code>。这个命令会在每个目录上尝试,
+      如果<code>N</code>不是一个正的长整型数,目录不存在或是文件名,
+      或者目录超过配额,则会产生错误报告。
+      </li>
+  
+      <li>
+      <code>dfsadmin -clrquota &lt;directory>...&lt;director></code><br /> 
+      为每个目录删除配额。这个命令会在每个目录上尝试,如果目录不存在或者是文件,则会产生错误报告。如果目录原来没有设置配额不会报错。
+      </li>
+  
+      <li>
+      <code>fs -count -q &lt;directory>...&lt;directory></code><br />
+      使用<code>-q</code>选项,会报告每个目录设置的配额,以及剩余配额。
+      如果目录没有设置配额,会报告<code>none</code>和<code>inf</code>。
+      </li>
+      </ul>
+   </body>
+</document>

+ 477 - 0
common/src/docs/cn/src/documentation/content/xdocs/hdfs_shell.xml

@@ -0,0 +1,477 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
+<document>
+	<header>
+		<title>Hadoop Shell命令</title>
+	</header>
+	<body>
+<!--DCCOMMENT:diff begin-->
+		<section>
+			<title> FS Shell </title>
+			<p>
+      调用文件系统(FS)Shell命令应使用
+      <code>bin/hadoop fs &lt;args&gt;</code>的形式。
+      所有的的FS shell命令使用URI路径作为参数。URI格式是<em>scheme://authority/path</em>。对HDFS文件系统,scheme是<em>hdfs</em>,对本地文件系统,scheme是<em>file</em>。其中scheme和authority参数都是可选的,如果未加指定,就会使用配置中指定的默认scheme。一个HDFS文件或目录比如<em>/parent/child</em>可以表示成<em>hdfs://namenode:namenodeport/parent/child</em>,或者更简单的<em>/parent/child</em>(假设你配置文件中的默认值是<em>namenode:namenodeport</em>)。大多数FS Shell命令的行为和对应的Unix Shell命令类似,不同之处会在下面介绍各命令使用详情时指出。出错信息会输出到<em>stderr</em>,其他信息输出到<em>stdout</em>。
+  </p>
+		<section>
+			<title> cat </title>
+			<p>
+				<code>使用方法:hadoop fs -cat URI [URI &#x2026;]</code>
+			</p>
+<!--DCCOMMENT:diff end
+@@ -21,17 +21,28 @@
+        </header>
+        <body>
+                <section>
+-                       <title> DFShell </title>
++                       <title> FS Shell </title>
+                        <p>
+-      The HDFS shell is invoked by
+-      <code>bin/hadoop dfs &lt;args&gt;</code>.
+-      All the HDFS shell commands take path URIs as arguments. The URI format is <em>scheme://autority/path</em>. For HDFS the scheme is <em>hdfs</em>, and for the local filesystem the scheme is <em>file</em>. The scheme and authority are optional. If not specified, the default scheme specified in the configuration is used. An HDFS file or directory such as <em>/parent/child</em> can be specified as <em>hdfs://namenode:namenodeport/parent/child</em> or simply as <em>/parent/child</em> (given that your configuration is set to point to <em>namenode:namenodeport</em>). Most of the commands in HDFS shell behave like corresponding Unix commands. Differences are described with each of the commands. Error information is sent to <em>stderr</em> and the output is sent to <em>stdout</em>.
++      The FileSystem (FS) shell is invoked by
++      <code>bin/hadoop fs &lt;args&gt;</code>.
++      All the FS shell commands take path URIs as arguments. The URI
++      format is <em>scheme://autority/path</em>. For HDFS the scheme
++      is <em>hdfs</em>, and for the local filesystem the scheme
++      is <em>file</em>. The scheme and authority are optional. If not
++      specified, the default scheme specified in the configuration is
++      used. An HDFS file or directory such as <em>/parent/child</em>
++      can be specified as <em>hdfs://namenodehost/parent/child</em> or
++      simply as <em>/parent/child</em> (given that your configuration
++      is set to point to <em>hdfs://namenodehost</em>). Most of the
++      commands in FS shell behave like corresponding Unix
++      commands. Differences are described with each of the
++      commands. Error information is sent to <em>stderr</em> and the
++      output is sent to <em>stdout</em>.
+   </p>
+-               </section>
+                <section>
+                        <title> cat </title>
+                        <p>
+-                               <code>Usage: hadoop dfs -cat URI [URI &#x2026;]</code>
++                               <code>Usage: hadoop fs -cat URI [URI &#x2026;]</code>
+                        </p>
+                        <p>
+
+-->
+			<p>
+		   将路径指定文件的内容输出到<em>stdout</em>。
+		   </p>
+<!--DCCOMMENT:diff begin-->
+			<p>示例:</p>
+			<ul>
+				<li>
+					<code> hadoop fs -cat hdfs://host1:port1/file1 hdfs://host2:port2/file2 
+		   </code>
+				</li>
+				<li>
+					<code>hadoop fs -cat file:///file3 /user/hadoop/file4 </code>
+				</li>
+			</ul>
+			<p>返回值:<br/>
+<!--DCCOMMENT:diff end
+note:"hadoop dfs" has been replaced by "hadoop fs" in this doc.
+
+@@ -39,11 +50,11 @@
+                        <p>Example:</p>
+                        <ul>
+                                <li>
+-                                       <code> hadoop dfs -cat hdfs://host1:port1/file1 hdfs://host2:port2/file2
++                                       <code> hadoop fs -cat hdfs://nn1.example.com/file1 hdfs://nn2.example.com/file2
+                   </code>
+                                </li>
+                                <li>
+-                                       <code>hadoop dfs -cat file:///file3 /user/hadoop/file4 </code>
++                                       <code>hadoop fs -cat file:///file3 /user/hadoop/file4 </code>
+                                </li>
+                        </ul>
+                        <p>Exit Code:<br/>
+-->
+		   <code> 成功返回0,失败返回-1。</code></p>
+		</section>
+		<section>
+			<title> chgrp </title>
+			<p>
+				<code>使用方法:hadoop fs -chgrp [-R] GROUP URI [URI &#x2026;]</code>
+            Change group association of files. With <code>-R</code>, make the change recursively through the directory structure. The user must be the owner of files, or else a super-user. Additional information is in the <a href="hdfs_permissions_guide.html">Permissions User Guide</a>.
+-->
+			</p>
+			<p>
+	    改变文件所属的组。使用<code>-R</code>将使改变在目录结构下递归进行。命令的使用者必须是文件的所有者或者超级用户。更多的信息请参见<a href="hdfs_permissions_guide.html">HDFS权限用户指南</a>。
+	    </p>
+		</section>
+		<section>
+			<title> chmod </title>
+			<p>
+				<code>使用方法:hadoop fs -chmod [-R] &lt;MODE[,MODE]... | OCTALMODE&gt; URI [URI &#x2026;]</code>
+			</p>
+			<p>
+	    改变文件的权限。使用<code>-R</code>将使改变在目录结构下递归进行。命令的使用者必须是文件的所有者或者超级用户。更多的信息请参见<a href="hdfs_permissions_guide.html">HDFS权限用户指南</a>。
+	    </p>
+		</section>
+		<section>
+			<title> chown </title>
+			<p>
+				<code>使用方法:hadoop fs -chown [-R] [OWNER][:[GROUP]] URI [URI ]</code>
+			</p>
+			<p>
+	    改变文件的拥有者。使用<code>-R</code>将使改变在目录结构下递归进行。命令的使用者必须是超级用户。更多的信息请参见<a href="hdfs_permissions_guide.html">HDFS权限用户指南</a>。
+	    </p>
+		</section>
+		<section>
+			<title>copyFromLocal</title>
+			<p>
+				<code>使用方法:hadoop fs -copyFromLocal &lt;localsrc&gt; URI</code>
+			</p>
+			<p>除了限定源路径是一个本地文件外,和<a href="#putlink"><strong>put</strong></a>命令相似。</p>
+		</section>
+		<section>
+			<title> copyToLocal</title>
+			<p>
+				<code>使用方法:hadoop fs -copyToLocal [-ignorecrc] [-crc] URI &lt;localdst&gt;</code>
+			</p>
+			<p>除了限定目标路径是一个本地文件外,和<a href="#getlink"><strong>get</strong></a>命令类似。</p>
+		</section>
+		<section>
+			<title> cp </title>
+			<p>
+				<code>使用方法:hadoop fs -cp URI [URI &#x2026;] &lt;dest&gt;</code>
+			</p>
+			<p>
+	    将文件从源路径复制到目标路径。这个命令允许有多个源路径,此时目标路径必须是一个目录。
+	    <br/>
+	    示例:</p>
+			<ul>
+				<li>
+					<code> hadoop fs -cp /user/hadoop/file1 /user/hadoop/file2</code>
+				</li>
+				<li>
+					<code> hadoop fs -cp /user/hadoop/file1 /user/hadoop/file2 /user/hadoop/dir </code>
+				</li>
+			</ul>
+			<p>返回值:</p>
+			<p>
+				<code> 成功返回0,失败返回-1。</code>
+			</p>
+		</section>
+		<section>
+			<title>du</title>
+			<p>
+				<code>使用方法:hadoop fs -du URI [URI &#x2026;]</code>
+			</p>
+			<p>
+	     显示目录中所有文件的大小,或者当只指定一个文件时,显示此文件的大小。<br/>
+	     示例:<br/><code>hadoop fs -du /user/hadoop/dir1 /user/hadoop/file1 hdfs://host:port/user/hadoop/dir1</code><br/>
+	     返回值:<br/><code> 成功返回0,失败返回-1。</code><br/></p>
+		</section>
+		<section>
+			<title> dus </title>
+			<p>
+				<code>使用方法:hadoop fs -dus &lt;args&gt;</code>
+			</p>
+			<p>
+	   显示文件的大小。
+	   </p>
+		</section>
+		<section>
+			<title> expunge </title>
+			<p>
+				<code>使用方法:hadoop fs -expunge</code>
+			</p>
+			<p>清空回收站。请参考<a href="hdfs_design.html">HDFS设计</a>文档以获取更多关于回收站特性的信息。
+	   </p>
+		</section>
+		<section>
+			<title id="getlink"> get </title>
+			<p>
+				<code>使用方法:hadoop fs -get [-ignorecrc] [-crc] &lt;src&gt; &lt;localdst&gt;</code>
+				<br/>
+			</p>
+			<p>
+	   复制文件到本地文件系统。可用<code>-ignorecrc</code>选项复制CRC校验失败的文件。使用<code>-crc</code>选项复制文件以及CRC信息。
+	  		</p>
+			<p>示例:</p>
+			<ul>
+				<li>
+					<code> hadoop fs -get /user/hadoop/file localfile </code>
+				</li>
+				<li>
+					<code> hadoop fs -get hdfs://host:port/user/hadoop/file localfile</code>
+				</li>
+			</ul>
+			<p>返回值:</p>
+			<p>
+				<code> 成功返回0,失败返回-1。</code>
+			</p>
+		</section>
+		<section>
+			<title> getmerge </title>
+			<p>
+				<code>使用方法:hadoop fs -getmerge &lt;src&gt; &lt;localdst&gt; [addnl]</code>
+			</p>
+			<p>
+	  接受一个源目录和一个目标文件作为输入,并且将源目录中所有的文件连接成本地目标文件。<code>addnl</code>是可选的,用于指定在每个文件结尾添加一个换行符。 
+	  </p>
+		</section>
+		<section>
+			<title> ls </title>
+			<p>
+				<code>使用方法:hadoop fs -ls &lt;args&gt;</code>
+			</p>
+			<p>如果是文件,则按照如下格式返回文件信息:<br/><code>文件名 &lt;副本数&gt; 文件大小 修改日期 修改时间 权限 用户ID 组ID</code><br/>
+	         如果是目录,则返回它直接子文件的一个列表,就像在Unix中一样。目录返回列表的信息如下:<br/><code>目录名 &lt;dir&gt; 修改日期 修改时间 权限 用户ID 组ID</code><br/>
+	         示例:<br/><code>hadoop fs -ls /user/hadoop/file1 /user/hadoop/file2 hdfs://host:port/user/hadoop/dir1 /nonexistentfile</code><br/>
+	         返回值:<br/><code> 成功返回0,失败返回-1。</code><br/></p>
+		</section>
+		<section>
+			<title>lsr</title>
+			<p><code>使用方法:hadoop fs -lsr &lt;args&gt;</code><br/>
+	      <code>ls</code>命令的递归版本。类似于Unix中的<code>ls -R</code>。
+	      </p>
+		</section>
+		<section>
+			<title> mkdir </title>
+			<p>
+				<code>使用方法:hadoop fs -mkdir &lt;paths&gt;</code>
+				<br/>
+			</p>
+			<p>接受路径指定的uri作为参数,创建这些目录。其行为类似于Unix的mkdir -p,它会创建路径中的各级父目录。</p>
+			<p>示例:</p>
+			<ul>
+				<li>
+					<code>hadoop fs -mkdir /user/hadoop/dir1 /user/hadoop/dir2 </code>
+				</li>
+				<li>
+					<code>hadoop fs -mkdir hdfs://host1:port1/user/hadoop/dir hdfs://host2:port2/user/hadoop/dir
+	  </code>
+				</li>
+			</ul>
+			<p>返回值:</p>
+			<p>
+				<code>成功返回0,失败返回-1。</code>
+			</p>
+		</section>
+		<section>
+			<title> movefromLocal </title>
+			<p>
+				<code>使用方法:dfs -moveFromLocal &lt;src&gt; &lt;dst&gt;</code>
+			</p>
+			<p>输出一个”not implemented“信息。
+	   </p>
+		</section>
+		<section>
+			<title> mv </title>
+			<p>
+				<code>使用方法:hadoop fs -mv URI [URI &#x2026;] &lt;dest&gt;</code>
+			</p>
+			<p>
+	    将文件从源路径移动到目标路径。这个命令允许有多个源路径,此时目标路径必须是一个目录。不允许在不同的文件系统间移动文件。
+	    <br/>
+	    示例:
+	    </p>
+			<ul>
+				<li>
+					<code> hadoop fs -mv /user/hadoop/file1 /user/hadoop/file2</code>
+				</li>
+				<li>
+					<code> hadoop fs -mv hdfs://host:port/file1 hdfs://host:port/file2 hdfs://host:port/file3 hdfs://host:port/dir1</code>
+				</li>
+			</ul>
+			<p>返回值:</p>
+			<p>
+				<code> 成功返回0,失败返回-1。</code>
+			</p>
+		</section>
+		<section>
+			<title id="putlink"> put </title>
+			<p>
+				<code>使用方法:hadoop fs -put &lt;localsrc&gt; ... &lt;dst&gt;</code>
+			</p>
+			<p>从本地文件系统中复制单个或多个源路径到目标文件系统。也支持从标准输入中读取输入写入目标文件系统。<br/>
+	   </p>
+			<ul>
+				<li>
+					<code> hadoop fs -put localfile /user/hadoop/hadoopfile</code>
+				</li>
+				<li>
+					<code> hadoop fs -put localfile1 localfile2 /user/hadoop/hadoopdir</code>
+				</li>
+				<li>
+					<code> hadoop fs -put localfile hdfs://host:port/hadoop/hadoopfile</code>
+				</li>
+				<li><code>hadoop fs -put - hdfs://host:port/hadoop/hadoopfile</code><br/>从标准输入中读取输入。</li>
+			</ul>
+			<p>返回值:</p>
+			<p>
+				<code> 成功返回0,失败返回-1。</code>
+			</p>
+		</section>
+		<section>
+			<title> rm </title>
+			<p>
+				<code>使用方法:hadoop fs -rm URI [URI &#x2026;] </code>
+			</p>
+			<p>
+	   删除指定的文件。只删除非空目录和文件。请参考rmr命令了解递归删除。<br/>
+	   示例:
+	   </p>
+			<ul>
+				<li>
+					<code> hadoop fs -rm hdfs://host:port/file /user/hadoop/emptydir </code>
+				</li>
+			</ul>
+			<p>返回值:</p>
+			<p>
+				<code> 成功返回0,失败返回-1。</code>
+			</p>
+		</section>
+		<section>
+			<title> rmr </title>
+			<p>
+				<code>使用方法:hadoop fs -rmr URI [URI &#x2026;]</code>
+			</p>
+			<p>delete的递归版本。<br/>
+	   示例:
+	   </p>
+			<ul>
+				<li>
+					<code> hadoop fs -rmr /user/hadoop/dir </code>
+				</li>
+				<li>
+					<code> hadoop fs -rmr hdfs://host:port/user/hadoop/dir </code>
+				</li>
+			</ul>
+			<p>返回值:</p>
+			<p>
+				<code> 成功返回0,失败返回-1。</code>
+			</p>
+		</section>
+		<section>
+			<title> setrep </title>
+			<p>
+				<code>使用方法:hadoop fs -setrep [-R] &lt;path&gt;</code>
+			</p>
+			<p>
+	   改变一个文件的副本系数。-R选项用于递归改变目录下所有文件的副本系数。
+	  </p>
+			<p>示例:</p>
+			<ul>
+				<li>
+					<code> hadoop fs -setrep -w 3 -R /user/hadoop/dir1 </code>
+				</li>
+			</ul>
+			<p>返回值:</p>
+			<p>
+				<code>成功返回0,失败返回-1。</code>
+			</p>
+		</section>
+		<section>
+			<title> stat </title>
+			<p>
+				<code>使用方法:hadoop fs -stat URI [URI &#x2026;]</code>
+			</p>
+			<p>
+	   返回指定路径的统计信息。
+	   </p>
+			<p>示例:</p>
+			<ul>
+				<li>
+					<code> hadoop fs -stat path </code>
+				</li>
+			</ul>
+			<p>返回值:<br/>
+	   <code> 成功返回0,失败返回-1。</code></p>
+		</section>
+		<section>
+			<title> tail </title>
+			<p>
+				<code>使用方法:hadoop fs -tail [-f] URI </code>
+			</p>
+			<p>
+	   将文件尾部1K字节的内容输出到stdout。支持-f选项,行为和Unix中一致。
+	   </p>
+			<p>示例:</p>
+			<ul>
+				<li>
+					<code> hadoop fs -tail pathname </code>
+				</li>
+			</ul>
+			<p>返回值:<br/>
+	   <code> 成功返回0,失败返回-1。</code></p>
+		</section>
+		<section>
+			<title> test </title>
+			<p>
+				<code>使用方法:hadoop fs -test -[ezd] URI</code>
+			</p>
+			<p>
+	   选项:<br/>
+	   -e 检查文件是否存在。如果存在则返回0。<br/>
+	   -z 检查文件是否是0字节。如果是则返回0。 <br/>
+	   -d 如果路径是个目录,则返回1,否则返回0。<br/></p>
+			<p>示例:</p>
+			<ul>
+				<li>
+					<code> hadoop fs -test -e filename </code>
+				</li>
+			</ul>
+		</section>
+		<section>
+			<title> text </title>
+			<p>
+				<code>使用方法:hadoop fs -text &lt;src&gt;</code>
+				<br/>
+			</p>
+			<p>
+	   将源文件输出为文本格式。允许的格式是zip和TextRecordInputStream。
+	  </p>
+		</section>
+		<section>
+			<title> touchz </title>
+			<p>
+				<code>使用方法:hadoop fs -touchz URI [URI &#x2026;]</code>
+				<br/>
+			</p>
+			<p>
+	   创建一个0字节的空文件。
+	   </p>
+			<p>示例:</p>
+			<ul>
+				<li>
+					<code> hadoop -touchz pathname </code>
+				</li>
+			</ul>
+			<p>返回值:<br/>
+	   <code> 成功返回0,失败返回-1。</code></p>
+<!--DCCOMMENT:diff begin-->
+		</section>
+         </section>
+	</body>
+</document>
+<!--DCCOMMENT:diff end
+                        <p>Exit Code:<br/>
+           <code> Returns 0 on success and -1 on error.</code></p>
+                </section>
++        </section>
+        </body>
+ </document>
+
+-->
+

+ 513 - 0
common/src/docs/cn/src/documentation/content/xdocs/hdfs_user_guide.xml

@@ -0,0 +1,513 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+          "http://forrest.apache.org/dtd/document-v20.dtd">
+
+
+<document>
+
+<!--DCCOMMENT:diff begin-->
+  <header>
+    <title>
+      Hadoop分布式文件系统使用指南
+    </title>
+  </header>
+
+  <body>
+    <section> <title>目的</title>
+      <p>
+	本文档的目标是为Hadoop分布式文件系统(HDFS)的用户提供一个学习的起点,这里的HDFS既可以作为<a href="http://hadoop.apache.org/">Hadoop</a>集群的一部分,也可以作为一个独立的分布式文件系统。虽然HDFS在很多环境下被设计成是可正确工作的,但是了解HDFS的工作原理对在特定集群上改进HDFS的运行性能和错误诊断都有极大的帮助。
+      </p>
+    </section>
+<!--DCCOMMENT:diff end
+@@ -23,18 +23,18 @@
+
+   <header>
+     <title>
+-      Hadoop DFS User Guide
++      HDFS User Guide
+     </title>
+   </header>
+
+   <body>
+     <section> <title>Purpose</title>
+       <p>
+- This document aims to be the starting point for users working with
++ This document is a starting point for users working with
+  Hadoop Distributed File System (HDFS) either as a part of a
+  <a href="http://hadoop.apache.org/">Hadoop</a>
+  cluster or as a stand-alone general purpose distributed file system.
+- While HDFS is designed to "just-work" in many environments, a working
++ While HDFS is designed to "just work" in many environments, a working
+  knowledge of HDFS helps greatly with configuration improvements and
+  diagnostics on a specific cluster.
+       </p>
+
+-->
+<!--DCCOMMENT:begin-->
+
+    <section> <title> 概述 </title>
+      <p>
+HDFS是Hadoop应用用到的一个最主要的分布式存储系统。一个HDFS集群主要由一个NameNode和很多个Datanode组成:Namenode管理文件系统的元数据,而Datanode存储了实际的数据。HDFS的体系结构在<a href="hdfs_design.html">这里</a>有详细的描述。本文档主要关注用户以及管理员怎样和HDFS进行交互。<a href="hdfs_design.html">HDFS架构设计</a>中的<a href="images/hdfsarchitecture.gif">图解</a>描述了Namenode、Datanode和客户端之间的基本的交互操作。基本上,客户端联系Namenode以获取文件的元数据或修饰属性,而真正的文件I/O操作是直接和Datanode进行交互的。
+      </p>
+      <p>
+      下面列出了一些多数用户都比较感兴趣的重要特性。
+      </p> 
+    <ul>
+    <li>
+<!--DCCOMMENT:end
+note:all tag "<em>" has been deleted in this doc.
+
+@@ -43,21 +43,20 @@
+     <section> <title> Overview </title>
+       <p>
+  HDFS is the primary distributed storage used by Hadoop applications. A
+- HDFS cluster primarily consists of a <em>NameNode</em> that manages the
+- filesystem metadata and Datanodes that store the actual data. The
++ HDFS cluster primarily consists of a NameNode that manages the
++ file system metadata and DataNodes that store the actual data. The
+  architecture of HDFS is described in detail
+  <a href="hdfs_design.html">here</a>. This user guide primarily deals with
+  interaction of users and administrators with HDFS clusters.
+  The <a href="images/hdfsarchitecture.gif">diagram</a> from
+  <a href="hdfs_design.html">HDFS architecture</a> depicts
+- basic interactions among Namenode, Datanodes, and the clients. Eseentially,
+- clients contact Namenode for file metadata or file modifications and perform
+- actual file I/O directly with the datanodes.
++ basic interactions among NameNode, the DataNodes, and the clients.
++ Clients contact NameNode for file metadata or file modifications and perform
++ actual file I/O directly with the DataNodes.
+       </p>
+       <p>
+  The following are some of the salient features that could be of
+- interest to many users. The terms in <em>italics</em>
+- are described in later sections.
++ interest to many users.
+       </p>
+     <ul>
+     <li>
+-->
+    Hadoop(包括HDFS)非常适合在商用硬件(commodity hardware)上做分布式存储和计算,因为它不仅具有容错性和可扩展性,而且非常易于扩展。<a href="mapred_tutorial.html">Map-Reduce</a>框架以其在大型分布式系统应用上的简单性和可用性而著称,这个框架已经被集成进Hadoop中。
+    </li>
+    <li>
+    	HDFS的可配置性极高,同时,它的默认配置能够满足很多的安装环境。多数情况下,这些参数只在非常大规模的集群环境下才需要调整。
+    </li>
+<!--DCCOMMENT:diff begin-->
+    <li>
+    	用Java语言开发,支持所有的主流平台。
+    </li>
+    <li>
+    	支持类Shell命令,可直接和HDFS进行交互。
+    </li>
+    <li>
+    	NameNode和DataNode有内置的Web服务器,方便用户检查集群的当前状态。
+    </li>
+<!--DCCOMMENT:diff end
+@@ -74,13 +73,13 @@
+        needs to be tuned only for very large clusters.
+     </li>
+     <li>
+-       It is written in Java and is supported on all major platforms.
++       Hadoop is written in Java and is supported on all major platforms.
+     </li>
+     <li>
+-       Supports <em>shell like commands</em> to interact with HDFS directly.
++       Hadoop supports shell-like commands to interact with HDFS directly.
+     </li>
+     <li>
+-       Namenode and Datanodes have built in web servers that makes it
++       The NameNode and Datanodes have built in web servers that makes it
+        easy to check current status of the cluster.
+     </li>
+     <li>
+-->
+    <li>
+	新特性和改进会定期加入HDFS的实现中。下面列出的是HDFS中常用特性的一部分:
+      <ul>
+    	<li>
+    		文件权限和授权。
+    	</li>
+    	<li>
+    		机架感知(Rack awareness):在调度任务和分配存储空间时考虑节点的物理位置。
+    	</li>
+    	<li>
+    		安全模式:一种维护需要的管理模式。
+    	</li>
+    	<li>
+    		fsck:一个诊断文件系统健康状况的工具,能够发现丢失的文件或数据块。
+    	</li>
+    	<li>
+    		Rebalancer:当datanode之间数据不均衡时,平衡集群上的数据负载。
+    	</li>
+    	<li>
+    		升级和回滚:在软件更新后有异常发生的情形下,能够回滚到HDFS升级之前的状态。
+    	</li>
+    	<li>
+		Secondary Namenode:对文件系统名字空间执行周期性的检查点,将Namenode上HDFS改动日志文件的大小控制在某个特定的限度下。
+    	</li>
+      </ul>
+    </li>
+    </ul>
+    
+    </section> <section> <title> 先决条件 </title>
+    <p>
+    下面的文档描述了如何安装和搭建Hadoop集群:
+    </p>
+ 	<ul>
+ 	<li>
+ 		<a href="quickstart.html">Hadoop快速入门</a>
+ 		针对初次使用者。
+ 	</li>
+ 	<li>
+		<a href="cluster_setup.html">Hadoop集群搭建</a>
+ 		针对大规模分布式集群的搭建。
+ 	</li>
+    </ul>
+    <p>
+    文档余下部分假设用户已经安装并运行了至少包含一个Datanode节点的HDFS。就本文目的来说,Namenode和Datanode可以运行在同一个物理主机上。
+    </p>
+     
+    </section> <section> <title> Web接口 </title>
+<!--DCCOMMENT:diff begin-->
+    <p>
+ 	NameNode和DataNode各自启动了一个内置的Web服务器,显示了集群当前的基本状态和信息。在默认配置下NameNode的首页地址是<code>http://namenode-name:50070/</code>。这个页面列出了集群里的所有DataNode和集群的基本状态。这个Web接口也可以用来浏览整个文件系统(使用NameNode首页上的"Browse the file system"链接)。
+ </p> 
+<!--DCCOMMENT:diff end
+  </section> <section> <title> Web Interface </title>
+  <p>
+-       Namenode and Datanode each run an internal web server in order to
++       NameNode and DataNode each run an internal web server in order to
+        display basic information about the current status of the cluster.
+-       With the default configuration, namenode front page is at
+-       <code>http://namenode:50070/</code> .
+-       It lists the datanodes in the cluster and basic stats of the
++       With the default configuration, the NameNode front page is at
++       <code>http://namenode-name:50070/</code>.
++       It lists the DataNodes in the cluster and basic statistics of the
+        cluster. The web interface can also be used to browse the file
+-       system (using "Browse the file system" link on the Namenode front
++       system (using "Browse the file system" link on the NameNode front
+        page).
+  </p>
+
+
+-->
+<!--DCCOMMENT:diff begin-->
+    </section> <section> <title>Shell命令</title>
+ 	<p>Hadoop包括一系列的类shell的命令,可直接和HDFS以及其他Hadoop支持的文件系统进行交互。<code>bin/hadoop fs -help</code> 命令列出所有Hadoop Shell支持的命令。而 <code>bin/hadoop fs -help command-name</code> 命令能显示关于某个命令的详细信息。这些命令支持大多数普通文件系统的操作,比如复制文件、改变文件权限等。它还支持一些HDFS特有的操作,比如改变文件副本数目。
+     </p>
+<!--DCCOMMENT:diff end
+    </section> <section> <title>Shell Commands</title>
+        <p>
+-      Hadoop includes various "shell-like" commands that directly
++      Hadoop includes various shell-like commands that directly
+       interact with HDFS and other file systems that Hadoop supports.
+       The command
+       <code>bin/hadoop fs -help</code>
+       lists the commands supported by Hadoop
+-      shell. Further,
+-      <code>bin/hadoop fs -help command</code>
+-      displays more detailed help on a command. The commands support
+-      most of the normal filesystem operations like copying files,
++      shell. Furthermore, the command
++      <code>bin/hadoop fs -help command-name</code>
++      displays more detailed help for a command. These commands support
++      most of the normal files ystem operations like copying files,
+       changing file permissions, etc. It also supports a few HDFS
+       specific operations like changing replication of files.
+      </p>
+
+-->
+   <section> <title> DFSAdmin命令 </title>
+   <p>
+   	<code>'bin/hadoop dfsadmin'</code> 命令支持一些和HDFS管理相关的操作。<code>bin/hadoop dfsadmin -help</code> 命令能列出所有当前支持的命令。比如:
+   </p>  
+   	<ul>
+   	<li>
+<!--DCCOMMENT:diff begin-->
+   	    <code>-report</code>:报告HDFS的基本统计信息。有些信息也可以在NameNode Web服务首页看到。
+<!--DCCOMMENT:diff end
+note: "Namenode" is replaced by "NameNode" in this doc
+
+        <li>
+            <code>-report</code>
+-           : reports basic stats of HDFS. Some of this information is
+-           also available on the Namenode front page.
++           : reports basic statistics of HDFS. Some of this information is
++           also available on the NameNode front page.
+        </li>
+-->
+   	</li>
+   	<li>
+   	    <code>-safemode</code>:虽然通常并不需要,但是管理员的确可以手动让NameNode进入或离开安全模式。
+   	</li>
+   	<li>
+   	    <code>-finalizeUpgrade</code>:删除上一次升级时制作的集群备份。
+   	</li>
+   	</ul>
+   </section>
+   
+   </section> <section> <title> Secondary NameNode </title>
+   <p>NameNode将对文件系统的改动追加保存到本地文件系统上的一个日志文件(<code>edits</code>)。当一个NameNode启动时,它首先从一个映像文件(<code>fsimage</code>)中读取HDFS的状态,接着应用日志文件中的edits操作。然后它将新的HDFS状态写入(<code>fsimage</code>)中,并使用一个空的edits文件开始正常操作。因为NameNode只有在启动阶段才合并<code>fsimage</code>和<code>edits</code>,所以久而久之日志文件可能会变得非常庞大,特别是对大型的集群。日志文件太大的另一个副作用是下一次NameNode启动会花很长时间。
+   </p>
+   <p>
+     Secondary NameNode定期合并fsimage和edits日志,将edits日志文件大小控制在一个限度下。因为内存需求和NameNode在一个数量级上,所以通常secondary NameNode和NameNode运行在不同的机器上。Secondary NameNode通过<code>bin/start-dfs.sh</code>在<code>conf/masters</code>中指定的节点上启动。
+   </p>
+
+<!--DCCOMMENT:diff begin-->
+<p>
+Secondary NameNode的检查点进程启动,是由两个配置参数控制的:
+</p>
+   <ul>
+      <li>
+        <code>dfs.namenode.checkpoint.period</code>,指定连续两次检查点的最大时间间隔,
+        默认值是1小时。
+      </li>
+      <li>
+        <code>dfs.namenode.checkpoint.size</code>定义了edits日志文件的最大值,一旦超过这个值会导致强制执行检查点(即使没到检查点的最大时间间隔)。默认值是64MB。
+      </li>
+   </ul>
+   <p>
+     Secondary NameNode保存最新检查点的目录与NameNode的目录结构相同。
+     所以NameNode可以在需要的时候读取Secondary NameNode上的检查点镜像。
+   </p>
+  <p>
+     如果NameNode上除了最新的检查点以外,所有的其他的历史镜像和edits文件都丢失了,
+     NameNode可以引入这个最新的检查点。以下操作可以实现这个功能:
+   </p>
+   <ul>
+      <li>
+        在配置参数<code>dfs.name.dir</code>指定的位置建立一个空文件夹;
+      </li>
+      <li>
+        把检查点目录的位置赋值给配置参数<code>dfs.namenode.checkpoint.dir</code>;
+      </li>
+      <li>
+        启动NameNode,并加上<code>-importCheckpoint</code>。 
+      </li>
+   </ul>
+   <p>
+     NameNode会从<code>dfs.namenode.checkpoint.dir</code>目录读取检查点,
+     并把它保存在<code>dfs.name.dir</code>目录下。
+     如果<code>dfs.name.dir</code>目录下有合法的镜像文件,NameNode会启动失败。
+     NameNode会检查<code>dfs.namenode.checkpoint.dir</code>目录下镜像文件的一致性,但是不会去改动它。
+   </p>
+   <p>
+     命令的使用方法请参考<a href="commands_manual.html#secondarynamenode"><code>secondarynamenode</code> 命令</a>.
+   </p>
+
+ 
+<!--DCCOMMENT:diff end
++   <p>
++     The start of the checkpoint process on the secondary NameNode is
++     controlled by two configuration parameters.
++   </p>
++   <ul>
++      <li>
++        <code>dfs.namenode.checkpoint.period</code>, set to 1 hour by default, specifies
++        the maximum delay between two consecutive checkpoints, and
++      </li>
++      <li>
++        <code>dfs.namenode.checkpoint.size</code>, set to 64MB by default, defines the
++        size of the edits log file that forces an urgent checkpoint even if
++        the maximum checkpoint delay is not reached.
++      </li>
++   </ul>
++   <p>
++     The secondary NameNode stores the latest checkpoint in a
++     directory which is structured the same way as the primary NameNode's
++     directory. So that the check pointed image is always ready to be
++     read by the primary NameNode if necessary.
++   </p>
++   <p>
++     The latest checkpoint can be imported to the primary NameNode if
++     all other copies of the image and the edits files are lost.
++     In order to do that one should:
++   </p>
++   <ul>
++      <li>
++        Create an empty directory specified in the
++        <code>dfs.name.dir</code> configuration variable;
++      </li>
++      <li>
++        Specify the location of the checkpoint directory in the
++        configuration variable <code>dfs.namenode.checkpoint.dir</code>;
++      </li>
++      <li>
++        and start the NameNode with <code>-importCheckpoint</code> option.
++      </li>
++   </ul>
++   <p>
++     The NameNode will upload the checkpoint from the
++     <code>dfs.namenode.checkpoint.dir</code> directory and then save it to the NameNode
++     directory(s) set in <code>dfs.name.dir</code>.
++     The NameNode will fail if a legal image is contained in
++     <code>dfs.name.dir</code>.
++     The NameNode verifies that the image in <code>dfs.namenode.checkpoint.dir</code> is
++     consistent, but does not modify it in any way.
++   </p>
++   <p>
++     For command usage, see <a href="commands_manual.html#secondarynamenode"><code>secondarynamenode</code> command</a>.
++   </p>
+
+    </section> <section> <title> Rebalancer </title>
+-->
+   
+   </section> <section> <title> Rebalancer </title>
+<!--DCCOMMENT:diff begin-->
+    <p>
+      HDFS的数据也许并不是非常均匀的分布在各个DataNode中。一个常见的原因是在现有的集群上经常会增添新的DataNode节点。当新增一个数据块(一个文件的数据被保存在一系列的块中)时,NameNode在选择DataNode接收这个数据块之前,会考虑到很多因素。其中的一些考虑的是:
+    </p>
+<!--DCCOMMENT:diff end
+note : "datanode" is replaced by "DataNode" in this doc.
+
+    HDFS data might not always be be placed uniformly across the
+-      datanode. One common reason is addition of new datanodes to an
+-      existing cluster. While placing new <em>blocks</em> (data for a file is
+-      stored as a series of blocks), Namenode considers various
+-      parameters before choosing the datanodes to receive these blocks.
+-      Some of the considerations are :
++      DataNode. One common reason is addition of new DataNodes to an
++      existing cluster. While placing new blocks (data for a file is
++      stored as a series of blocks), NameNode considers various
++      parameters before choosing the DataNodes to receive these blocks.
++      Some of the considerations are:
+     </p>
+-->
+      <ul>
+      <li>
+	将数据块的一个副本放在正在写这个数据块的节点上。
+      </li>
+      <li>
+        尽量将数据块的不同副本分布在不同的机架上,这样集群可在完全失去某一机架的情况下还能存活。
+      </li>
+      <li>
+        一个副本通常被放置在和写文件的节点同一机架的某个节点上,这样可以减少跨越机架的网络I/O。
+      </li>
+      <li>
+        尽量均匀地将HDFS数据分布在集群的DataNode中。
+      </li>
+      </ul>
+    <p>
+由于上述多种考虑需要取舍,数据可能并不会均匀分布在DataNode中。HDFS为管理员提供了一个工具,用于分析数据块分布和重新平衡DataNode上的数据分布。<a href="http://issues.apache.org/jira/browse/HADOOP-1652">HADOOP-1652</a>的附件中的一个<a href="http://issues.apache.org/jira/secure/attachment/12368261/RebalanceDesign6.pdf">PDF</a>是一个简要的rebalancer管理员指南。
+    </p>
+<!--DCCOMMENT:diff begin-->
+    <p>
+     使用方法请参考<a href="commands_manual.html#balancer">balancer 命令</a>.
+   </p>
+<!--DCCOMMENT:diff end
+      <a href="http://issues.apache.org/jira/browse/HADOOP-1652">HADOOP-1652</a>.
+     </p>
++    <p>
++     For command usage, see <a href="commands_manual.html#balancer">balancer command</a>.
++   </p>
+
+    </section> <section> <title> Rack Awareness </title>
+
+-->
+   </section> <section> <title> 机架感知(Rack awareness) </title>
+    <p>
+      通常,大型Hadoop集群是以机架的形式来组织的,同一个机架上不同节点间的网络状况比不同机架之间的更为理想。另外,NameNode设法将数据块副本保存在不同的机架上以提高容错性。Hadoop允许集群的管理员通过配置<code>dfs.network.script</code>参数来确定节点所处的机架。当这个脚本配置完毕,每个节点都会运行这个脚本来获取它的机架ID。默认的安装假定所有的节点属于同一个机架。这个特性及其配置参数在<a href="http://issues.apache.org/jira/browse/HADOOP-692">HADOOP-692</a>所附的<a href="http://issues.apache.org/jira/secure/attachment/12345251/Rack_aware_HDFS_proposal.pdf">PDF</a>上有更详细的描述。
+    </p>
+
+   </section> <section> <title> 安全模式 </title>
+    <p>
+     NameNode启动时会从fsimage和edits日志文件中装载文件系统的状态信息,接着它等待各个DataNode向它报告它们各自的数据块状态,这样,NameNode就不会过早地开始复制数据块,即使在副本充足的情况下。这个阶段,NameNode处于安全模式下。NameNode的安全模式本质上是HDFS集群的一种只读模式,此时集群不允许任何对文件系统或者数据块修改的操作。通常NameNode会在开始阶段自动地退出安全模式。如果需要,你也可以通过<code>'bin/hadoop dfsadmin -safemode'</code>命令显式地将HDFS置于安全模式。NameNode首页会显示当前是否处于安全模式。关于安全模式的更多介绍和配置信息请参考JavaDoc:<a href="http://hadoop.apache.org/core/docs/current/api/org/apache/hadoop/dfs/NameNode.html#setSafeMode(org.apache.hadoop.dfs.FSConstants.SafeModeAction)"><code>setSafeMode()</code></a>。
+    </p>
+
+   </section> <section> <title> fsck </title>
+     <p>    
+      HDFS支持<code>fsck</code>命令来检查系统中的各种不一致状况。这个命令被设计来报告各种文件存在的问题,比如文件缺少数据块或者副本数目不够。不同于在本地文件系统上传统的fsck工具,这个命令并不会修正它检测到的错误。一般来说,NameNode会自动修正大多数可恢复的错误。HDFS的fsck不是一个Hadoop shell命令。它通过'<code>bin/hadoop fsck</code>'执行。
+<!--DCCOMMENT:diff begin-->
+命令的使用方法请参考<a href="commands_manual.html#fsck"><code>fsck</code>命令</a>
+<code>fsck</code>可用来检查整个文件系统,也可以只检查部分文件。
+<!--DCCOMMENT:diff end
+ Hadoop shell command. It can be run as '<code>bin/hadoop fsck</code>'.
+-      Fsck can be run on the whole filesystem or on a subset of files.
++      For command usage, see <a href="commands_manual.html#fsck"><code>fsck</code> command</a>.
++      <code>fsck</code> can be run on the whole file system or on a subset of files.
+      </p>
+
+-->
+     </p>
+     
+   </section> <section> <title> 升级和回滚 </title>
+     <p>当在一个已有集群上升级Hadoop时,像其他的软件升级一样,可能会有新的bug或一些会影响到现有应用的非兼容性变更出现。在任何有实际意义的HDSF系统上,丢失数据是不被允许的,更不用说重新搭建启动HDFS了。HDFS允许管理员退回到之前的Hadoop版本,并将集群的状态回滚到升级之前。更多关于HDFS升级的细节在<a href="http://wiki.apache.org/hadoop/Hadoop%20Upgrade">升级wiki</a>上可以找到。HDFS在一个时间可以有一个这样的备份。在升级之前,管理员需要用<code>bin/hadoop dfsadmin -finalizeUpgrade</code>(升级终结操作)命令删除存在的备份文件。下面简单介绍一下一般的升级过程:
+     </p>
+      <ul>
+      <li>升级 Hadoop 软件之前,请检查是否已经存在一个备份,如果存在,可执行升级终结操作删除这个备份。通过<code>dfsadmin -upgradeProgress status</code>命令能够知道是否需要对一个集群执行升级终结操作。</li>
+      <li>停止集群并部署新版本的Hadoop。</li>
+      <li>使用<code>-upgrade</code>选项运行新的版本(<code>bin/start-dfs.sh -upgrade</code>)。
+      </li>
+      <li>在大多数情况下,集群都能够正常运行。一旦我们认为新的HDFS运行正常(也许经过几天的操作之后),就可以对之执行升级终结操作。注意,在对一个集群执行升级终结操作之前,删除那些升级前就已经存在的文件并不会真正地释放DataNodes上的磁盘空间。</li>
+      <li>如果需要退回到老版本,
+	<ul>
+          <li>停止集群并且部署老版本的Hadoop。</li>
+          <li>用回滚选项启动集群(<code>bin/start-dfs.h -rollback</code>)。</li>
+        </ul>
+      </li>
+      </ul>
+    
+   </section> <section> <title> 文件权限和安全性 </title>
+     <p>           
+      这里的文件权限和其他常见平台如Linux的文件权限类似。目前,安全性仅限于简单的文件权限。启动NameNode的用户被视为HDFS的超级用户。HDFS以后的版本将会支持网络验证协议(比如Kerberos)来对用户身份进行验证和对数据进行加密传输。具体的细节请参考<a href="hdfs_permissions_guide.html">权限使用管理指南</a>。
+     </p>
+     
+   </section> <section> <title> 可扩展性 </title>
+     <p>
+      现在,Hadoop已经运行在上千个节点的集群上。<a href="http://wiki.apache.org/hadoop/PoweredBy">Powered By Hadoop</a>页面列出了一些已将Hadoop部署在他们的大型集群上的组织。HDFS集群只有一个NameNode节点。目前,NameNode上可用内存大小是一个主要的扩展限制。在超大型的集群中,增大HDFS存储文件的平均大小能够增大集群的规模,而不需要增加NameNode的内存。默认配置也许并不适合超大规模的集群。<a href="http://wiki.apache.org/hadoop/FAQ">Hadoop FAQ</a>页面列举了针对大型Hadoop集群的配置改进。</p>
+     
+   </section> <section> <title> 相关文档 </title>          
+      <p>
+      这个用户手册给用户提供了一个学习和使用HDSF文件系统的起点。本文档会不断地进行改进,同时,用户也可以参考更多的Hadoop和HDFS文档。下面的列表是用户继续学习的起点:
+      </p>
+      <ul>
+      <li>
+        <a href="http://hadoop.apache.org/">Hadoop官方主页</a>:所有Hadoop相关的起始页。
+      </li>
+      <li>
+        <a href="http://wiki.apache.org/hadoop/FrontPage">Hadoop Wiki</a>:Hadoop Wiki文档首页。这个指南是Hadoop代码树中的一部分,与此不同,Hadoop Wiki是由Hadoop社区定期编辑的。
+      </li>
+      <li>Hadoop Wiki上的<a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a>。
+      </li>
+      <li>Hadoop <a href="http://hadoop.apache.org/core/docs/current/api/">JavaDoc API</a>。</li>
+      <li>Hadoop用户邮件列表:<a href="mailto:core-user@hadoop.apache.org">core-user[at]hadoop.apache.org</a>。</li>
+      <li>查看<code>conf/hadoop-default.xml</code>文件。这里包括了大多数配置参数的简要描述。</li>
+      <li>
+        <a href="commands_manual.html">命令手册</a>:命令使用说明。
+      </li>
+<!--DCCOMMENT:diff begin-->
+<!--DCCOMMENT:diff end
+@@ -411,6 +469,10 @@
+          It includes brief
+          description of most of the configuration variables available.
+       </li>
++      <li>
++        <a href="commands_manual.html">Commands Manual</a>
++        : commands usage.
++      </li>
+       </ul>
+      </section>
+
+-->
+      </ul>
+     </section>
+     
+  </body>
+</document>

+ 46 - 0
common/src/docs/cn/src/documentation/content/xdocs/hod.xml

@@ -0,0 +1,46 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+          "http://forrest.apache.org/dtd/document-v20.dtd">
+
+
+<document>
+
+  <header>
+    <title> 
+      Hadoop On Demand
+    </title>
+  </header>
+
+  <body>
+  <section>
+	<title>简介</title>
+      <p>Hadoop On Demand(HOD)是一个能在大型物理集群上供应虚拟hadoop集群的系统。它使用Torque资源管理器分配节点。它可以在分配的节点上启动Hadoop Map/Reduce和HDFS的守护进程。它会自动为Hadoop守护进程和客户端生成合适的配置文件(hadoop-site.xml)。HOD还可以将Hadoop分发到它分配出来的虚拟Hadoop集群的节点上。简而言之,HOD使管理员和用户轻松地快速搭建和使用hadoop。它也是Hadoop开发人员和测试人员非常有用的一个工具,他们可以使用HOD共享一个物理集群来测试各自的Hadoop版本。</p>
+      </section>
+      <section>
+        <title>文档</title>
+      <p>读一遍下面的文档,你会在使用HOD方面了解更多</p>
+      <ul>
+        <li><a href="hod_admin_guide.html">HOD管理指南</a> : 此指南概述了HOD的体系结构,Torque资源管理器及其他各种支持工具,也会告诉你如何安装,配置和运行HOD。</li>
+        <li><a href="hod_config_guide.html">HOD配置指南</a> : 此指南讨论HOD的配置段,会告诉你如何使用那些最重要和最常用的配置项。</li>
+        <li><a href="hod_user_guide.html">HOD用户指南</a> : 此指南会告诉你如何开始使用HOD,它的各种功能特性,命令行选项,也会给你一些故障解决方面的详细帮助。</li>
+      </ul>
+    </section>
+  </body>
+</document>

+ 237 - 0
common/src/docs/cn/src/documentation/content/xdocs/hod_admin_guide.xml

@@ -0,0 +1,237 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+          "http://forrest.apache.org/dtd/document-v20.dtd">
+ 
+
+<document>
+
+  <header>
+    <title> 
+      Hadoop On Demand
+    </title>
+  </header>
+
+  <body>
+<section>
+<title>概述</title>
+<p>Hadoop On Demand (HOD)是一个能在一个共享集群上供应和管理相互独立的Hadoop Map/Reduce和Hadoop分布式文件系统(HDFS)实例的系统。它能让管理员和用户轻松地快速搭建和使用hadoop。HOD对Hadoop的开发人员和测试人员也非常有用,他们可以通过HOD共享一个物理集群来测试各自不同的Hadoop版本。</p>
+<p>HOD依赖资源管理器(RM)来分配节点,这些节点被用来在之上运行hadoop实例。目前,HOD采用的是<a href="ext:hod/torque">Torque资源管理器</a>。
+</p>
+<p>
+基本的HOD系统架构包含的下列组件:</p>
+<ul>
+  <li>一个资源管理器(可能同时附带一个调度程序)</li>
+  <li>各种HOD的组件 </li>
+  <li>Hadoop Map/Reduce和HDFS守护进程</li>
+</ul>
+
+<p>
+通过与以上组件交互,HOD在给定的集群上供应和维护Hadoop Map/Reduce实例,或者HDFS实例。集群中的节点可看作由两组节点构成:</p>
+<ul>
+  <li>提交节点(Submit nodes):用户通过HOD客户端在这些节点上申请集群,之后通过Hadoop客户端提交Hadoop作业。</li>
+  <li>计算节点(Compute nodes):利用资源管理器,HOD组件在这些节点上运行以供应Hadoop守护进程。之后,Hadoop作业在这些节点上运行。</li>
+</ul>
+
+<p>
+下面是对申请集群及在之上运行作业所需操作步骤的简要描述。
+</p>
+<ul>
+  <li>用户在提交节点上用HOD客户端分配所需数目节点的集群,在上面供应Hadoop。</li>
+  <li>HOD客户端利用资源管理器接口(在Torque中是qsub)提交一个被称为RingMaster的HOD进程作为一个资源管理器作业,申请理想数目的节点。这个作业被提交到资源管理器的中央服务器上(在Torque中叫pbs_server)。</li>
+  <li>在计算节点上,资源管理器的从(slave)守护程序(Torque中的pbs_moms)接受并处理中央服务器(Torque中的pbs_server)分配的作业。RingMaster进程在其中一个计算节点(Torque中的mother superior)上开始运行。</li>
+  <li>之后,Ringmaster通过资源管理器的另外一个接口(在Torque中是pbsdsh)在所有分配到的计算节点上运行第二个HOD组件HodRing,即分布式任务。</li>
+  <li>HodRing初始化之后会与RingMaster通信获取Hadoop指令,并遵照执行。一旦Hadoop的命令开始启动,它们会向RingMaster登记,提供关于守护进程的信息。</li>
+  <li>Hadoop实例所需的配置文件全部由HOD自己生成,有一些来自于用户在配置文件设置的选项。</li>
+  <li>HOD客户端保持和RingMaster的通信,找出JobTracker和HDFS守护进程的位置所在。</li>
+</ul>
+<p>之后的文档会讲述如何在一个物理集群的节点上安装HOD。</p>
+</section>
+
+<section>
+<title>先决条件</title>
+<p>要使用HOD,你的系统应包含下列的硬件和软件</p>
+<p>操作系统: HOD目前在RHEL4上测试通过。<br/>
+节点:HOD至少需要3个由资源管理器配置的节点。<br/></p>
+
+<p>软件</p>
+<p>在使用HOD之前,以下组件必须被安装到所有节点上:</p>
+<ul>
+ <li>Torque:资源管理器</li>
+ <li><a href="ext:hod/python">Python</a>:HOD要求Python 2.5.1</li>
+</ul>
+
+<p>下列组件是可选的,你可以安装以获取HOD更好的功能:</p>
+<ul>
+ <li><a href="ext:hod/twisted-python">Twisted Python</a>:这个可以用来提升HOD的可扩展性。如果检测到这个模块已安装,HOD就用它,否则就使用默认的模块。</li>
+
+ <li><a href="ext:site">Hadoop</a>:HOD能自动将Hadoop分发到集群的所有节点上。不过,如果Hadoop在所有节点上已经可用,HOD也可以使用已经安装好的Hadoop。HOD目前支持Hadoop 0.15和其后续版本。</li>
+</ul>
+
+<p>注释: HOD的配置需要以上这些组件的安装位置在集群所有节点上保持一致。如果在提交节点上的安装位置也相同,配置起来会更简单。</p>
+</section>
+
+<section>
+<title>资源管理器</title>
+<p>目前,HOD使用Torque资源管理器来分配节点和提交作业。Torque是一个开源的资源管理器,来自于<a href="ext:hod/cluster-resources">Cluster Resources</a>,是一个社区基于PBS项目努力的结晶。它提供对批处理作业和分散的计算节点(Compute nodes)的控制。你可以自由地从<a href="ext:hod/torque-download">此处</a>下载Torque。</p>
+<p>所有torque相关的文档可以在<a href="ext:hod/torque-docs">这儿</a>的TORQUE Resource Manager一节找到。在<a href="ext:hod/torque-wiki">这里</a>可以看到wiki文档。如果想订阅TORQUE的邮件列表或查看问题存档,访问<a href="ext:hod/torque-mailing-list">这里</a>。</p>
+
+<p>使用带Torque的HOD:</p>
+<ul>
+<li>安装Torque组件:在一个节点上(head node)安装pbs_server,所有计算节点上安装pbs_mom,所有计算节点和提交节点上安装PBS客户端。至少做最基本的配置,使Torque系统跑起来,也就是,使pbs_server能知道该和哪些机器通话。查看<a  href="ext:hod/torque-basic-config">这里</a>可以了解基本配置。要了解高级配置,请查看<a href="ext:hod/torque-advanced-config">这里</a>。</li>
+ <li>在pbs_server上创建一个作业提交队列。队列的名字和HOD的配置参数resource-manager.queue相同。Hod客户端利用此队列提交RingMaster进程作为Torque作业。</li>
+ <li>在集群的所有节点上指定一个cluster name作为property。这可以用qmgr命令做到。比如:<code>qmgr -c "set node node properties=cluster-name"</code>。集群名字和HOD的配置参数hod.cluster是相同的。</li>
+ <li>确保作业可以提交到节点上去。这可以通过使用qsub命令做到。比如:<code>echo "sleep 30" | qsub -l nodes=3</code></li>
+</ul>
+</section>
+
+<section>
+<title>安装HOD</title>
+
+<p>现在资源管理器已经安装好了,我们接着下载并安装HOD。</p>
+<ul>
+ <li>如果你想从Hadoop tar包中获取HOD,它在'contrib'下的'hod'的根目录下。</li>
+ <li>如果你从编译源码,可以在Hadoop根目录下的运行ant tar, 生成Hadoop tar包。然后从获取HOD,参照上面。</li>
+ <li>把这个目录下的所有文件分发到集群的所有节点上。注意文件拷贝的位置应在所有节点上保持一致。</li>
+ <li>注意,编译hadoop时会创建HOD,同时会正确地设置所有HOD必须的脚本文件的权限。</li>
+</ul>
+</section>
+
+<section>
+<title>配置HOD</title>
+
+<p>安装HOD后你就可以配置它。为了运行HOD需要做的最小配置会在下面讲述,更多高级的配置会在HOD配置指南里面讲解。</p>
+<section>
+  <title>最小配置</title>
+<p>为运行HOD,以下的最小配置是必须要做的:</p>
+<ul>
+ <li>在你想要运行hod的节点上,编辑&lt;install dir&gt;/conf目录下的hodrc文件。这个文件包含了运行hod所必需的最少量的设置。</li>
+ <li>
+<p>为这个配置文件中的定义的变量指定适合你环境的值。注意,有些变量在文件中出现了不止一次。</p>
+
+  <ul>
+   <li>${JAVA_HOME}:Hadoop的Java的安装位置。Hadoop支持Sun JDK 1.5.x及以上版本。</li>
+   <li>${CLUSTER_NAME}:集群名称,由'node property'指定,在资源管理器配置中曾提到过。</li>
+   <li>${HADOOP_PREFIX}:Hadoop在计算节点和提交节点上的安装位置。</li>
+   <li>${RM_QUEUE}:在资源管理器配置中设置的作业提交队列。</li>
+   <li>${RM_HOME}:资源管理器在计算节点和提交节点的安装位置。</li>
+    </ul>
+</li>
+
+<li>
+<p>以下环境变量可能需要设置,取决于你的系统环境。在你运行HOD客户端的地方这些变量必须被定义,也必须在HOD配置文件中通过设定resource_manager.env-vars的值指定。多个变量可指定为用逗号分隔的key=value对组成的列表。</p>
+<ul>
+   <li>HOD_PYTHON_HOME:如果python安装在计算节点或提交节点的非默认位置,那么这个值必须设定为python的可执行文件的实际位置。</li>
+</ul>
+</li>
+</ul>
+</section>
+
+<section>
+    <title>高级配置</title>
+    <p>你可以检查和修改其它配置选项来满足你的特定需要。关于HOD配置的更多信息,请参考<a href="hod_config_guide.html">配置指南</a>。</p>
+  </section>
+</section>
+  <section>
+    <title>运行HOD</title>
+    <p>当HOD配置好后,你就可以运行它了。更多信息请参考<a href="hod_user_guide.html">HOD用户指南</a>。</p>
+  </section>
+
+  <section>
+    <title>支持工具和实用程序</title>
+    <p>此节描述一些可用于管理HOD部署的支持工具和应用程序。</p>
+    <section>
+      <title>logcondense.py - 管理日志文件</title>
+      <p>在<a href="hod_user_guide.html#收集和查看Hadoop日志">HOD用户指南</a>有提到,HOD可配置成将Hadoop日志上传到一个配置好的静态HDFS上。随着时间增加,日志数量会不断增长。logcondense.py可以帮助管理员清理上传到HDFS的日志文件。</p>
+      <section>
+        <title>运行logcondense.py</title>
+        <p>logcondense.py在hod_install_location/support文件夹下。你可以使用python去运行它,比如<em>python logcondense.py</em>,或者授以执行权限,直接运行<em>logcondense.py</em>。如果启用了权限,logcondense.py需要被有足够权限,能删除HDFS上上传目录下日志文件的用户运行。比如,在<a href="hod_config_guide.html#3.7+hodring的配置项">配置指南</a>中提及过,用户可以配置将日志放在HDFS上的其主目录下。在这种情况下,你需要具有超级用户权限,才能运行logcondense.py删除所有用户主目录下的日志文件。</p>
+      </section>
+      <section>
+        <title>logcondense.py的命令行选项</title>
+        <p>logcondense.py支持以下命令行选项</p>
+          <table>
+            <tr>
+              <td>短选项</td>
+              <td>长选项</td>
+              <td>含义</td>
+              <td>例子</td>
+            </tr>
+            <tr>
+              <td>-p</td>
+              <td>--package</td>
+              <td>hadoop脚本的全路径。Hadoop的版本必须和运行HDFS的版本一致。</td>
+              <td>/usr/bin/hadoop</td>
+            </tr>
+            <tr>
+              <td>-d</td>
+              <td>--days</td>
+              <td>删除超过指定天数的日志文件</td>
+              <td>7</td>
+            </tr>
+            <tr>
+              <td>-c</td>
+              <td>--config</td>
+              <td>Hadoop配置目录的路径,hadoop-site.xml存在于此目录中。hadoop-site.xml中须指明待删除日志存放的HDFS的NameNode。</td>
+              <td>/home/foo/hadoop/conf</td>
+            </tr>
+            <tr>
+              <td>-l</td>
+              <td>--logs</td>
+              <td>一个HDFS路径,须和log-destination-uri指定的是同一个HDFS路径,不带hdfs:// URI串,这点在<a href="hod_config_guide.html#3.7+hodring的配置项">配置指南</a>中提到过。</td>
+              <td>/user</td>
+            </tr>
+            <tr>
+              <td>-n</td>
+              <td>--dynamicdfs</td>
+            <td>如果为true,logcondense.py除要删除Map/Reduce日志之外还需删除HDFS日志。否则,它只删除Map/Reduce日志,这也是不指定这个选项时的默认行为。这个选项对下面的情况非常有用:一个动态的HDFS由HOD供应,一个静态的HDFS用来收集日志文件 - 也许这是测试集群中一个非常普遍的使用场景。</td>
+              <td>false</td>
+            </tr>
+          </table>
+        <p>比如,假如要删除所有7天之前的日志文件,hadoop-site.xml存放在~/hadoop-conf下,hadoop安装于~/hadoop-0.17.0,你可以这样:</p>
+        <p><em>python logcondense.py -p ~/hadoop-0.17.0/bin/hadoop -d 7 -c ~/hadoop-conf -l /user</em></p>
+      </section>
+    </section>
+    <section>
+      <title>checklimits.sh - 监视资源限制</title>
+      <p>checklimits.sh是一个针对Torque/Maui环境的HOD工具(<a href="ext:hod/maui">Maui集群调度器</a> 是一个用于集群和超级计算机的开源作业调度器,来自clusterresourcces)。当新提交的作业违反或超过用户在Maui调度器里设置的限制时,checklimits.sh脚本更新torque的comment字段。它使用qstat在torque的job-list中做一次遍历确定作业是在队列中还是已完成,运行Maui工具checkjob检查每一个作业是否违反用户限制设定,之后运行torque的qalter工具更新作业的'comment'的属性。当前,它把那些违反限制的作业的comment的值更新为<em>User-limits exceeded. Requested:([0-9]*) Used:([0-9]*) MaxLimit:([0-9]*)</em>。之后,HOD根据这个注释内容做出相应处理。
+      </p>
+     
+      <section>
+        <title>运行checklimits.sh</title>
+        <p>checklimits.sh可以在hod_install_location/support目录下下找到。在具有得执行权限后,这个shell脚本可以直接通过<em>sh checklimits.sh </em>或者<em>./checklimits.sh</em>运行。这个工具运行的机器上应有Torque和Maui的二进制运行文件,并且这些文件要在这个shell脚本进程的路径中。为了更新不同用户作业的comment值,这个工具必须以torque的管理员权限运行。这个工具必须按照一定时间间隔重复运行,来保证更新job的约束条件,比如可以通过cron。请注意,这个脚本中用到的资源管理器和调度器命令运行代价可能会比价大,所以最好不要在没有sleeping的紧凑循环中运行。
+        </p>
+      </section>
+    </section>
+    
+    <section>
+      <title>verify-account - 用于核实用户提交作业所使用的帐号的脚本</title>
+      <p>生产系统一般使用帐号系统来对使用共享资源的用户收费。HOD支持一个叫<em>resource_manager.pbs-account</em>的参数,用户可以通过这个参数来指定提交作业时使用的帐号。核实这个帐户在帐号管理系统中的有效性是有必要的。脚本<em>hod-install-dir/bin/verify-account</em>提供了一种机制让用户插入自定义脚本来实现这个核实过程。</p>
+    <section>
+      <title>在HOD中集成verify-account</title>
+      <p>在分配集群之前,HOD运行<em>verify-account</em>脚本,将<em>resource_manager.pbs-account</em>的值作为参数传递给用户自定义脚本来完成用户的确认。系统还可以通过这种方式来取代它本身的帐号系统。若该用户脚本中的返回值非0,就会导致HOD分配集群失败。并且在发生错误时,HOD还会将脚本中产生的错误信息打印出来。通过这种方式,任何描述性的错误信息都可以从用户脚本中返回给用户。
+      </p>
+      <p>在HOD中自带的默认脚本是不做任何的用户核实,并返回0。</p>
+      <p>如果HOD没有找到上面提到的verify-account脚本,HOD就会认为该用户核实的功能被关闭,然后继续自己以后的分配工作。</p>
+    </section>
+    </section>
+    
+  </section>
+</body>
+</document>

+ 158 - 0
common/src/docs/cn/src/documentation/content/xdocs/hod_config_guide.xml

@@ -0,0 +1,158 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+          "http://forrest.apache.org/dtd/document-v20.dtd">
+
+<document>
+
+  <header>
+    <title> 
+      Hadoop On Demand:配置指南
+    </title>
+  </header>
+
+  <body>
+    <section>
+      <title>1. 简介</title>
+      <p>
+      这个文档讲述了一些最重要和常用的Hadoop On Demand(HOD)的配置项。
+      这些配置项可通过两种方式指定:INI风格的配置文件,通过--section.option[=value]格式指定的HOD shell的命令行选项。如果两个地方都指定了同一个选项,命令行中的值覆盖配置文件中的值。
+	</p>
+      <p>
+	你可以通过以下命令获得所有配置项的简要描述:
+      </p>
+      <table><tr><td><code>$ hod --verbose-help</code></td></tr></table>
+    
+    </section>
+    <section>
+      <title>2. 段</title>
+      <p>HOD配置文件分成以下几个配置段:</p>
+      <ul>
+        <li>  hod:                 HOD客户端的配置项</li>
+        <li>  resource_manager:    指定要使用的资源管理器的配置项,以及使用该资源管理器时需要的一些其他参数。</li>
+        <li>  ringmaster:          RingMaster进程的配置项</li>
+        <li>  hodring:             HodRing进程的配置项</li>
+        <li>  gridservice-mapred:  Map/Reduce守护进程的配置项</li>
+        <li>  gridservice-hdfs:    HDFS守护进程的配置项</li>
+      </ul>
+    </section>
+     <section>
+      <title>3. HOD配置项</title>
+      <p>
+      接下来的一节会先描述大部分HOD配置段中通用的一些配置项,再描述各配置段特有的配置项。
+      </p>
+ 
+      <section> 
+        <title>3.1 一般的配置项</title>
+        
+        <p>某些配置项会在HOD配置中的多个段定义。在一个段中定义的配置项,会被该段所适用的所有进程使用。这些配置项意义相同,但在不同的段中可以有不同的取值。</p>
+        
+        <ul>
+          <li>temp-dir: HOD进程使用的临时目录。请确保运行hod的用户有权限在这个指定的目录下创建子目录。如果想在每次分配的时候都使用不同的临时目录,可以使用环境变量,资源管理器会让这些环境变量对HOD进程可用。例如,在Torque设置的时候,使--ringmaster.temp-dir=/tmp/hod- temp-dir.$PBS_JOBID会让ringmaster在每一次申请时使用不同的临时目录;Troque会在ringmaster启动前展开这个环境变量。
+          </li>
+          <li>debug:数值类型,取值范围是1-4。4会产生最多的log信息。</li>
+          <li>log-dir:日志文件的存放目录。缺省值是&lt;install-location&gt;/logs/。temp-dir变量的限制和注意事项在这里同样使用。
+          </li>
+          <li>xrs-port-range:端口范围,会在这之中挑选一个可用端口用于运行XML-RPC服务。</li>
+          <li>http-port-range:端口范围,会在这之中挑选一个可用端口用于运行HTTP服务。</li>
+          <li>java-home:给Hadoop使用的Java的位置。</li>
+          <li>syslog-address:syslog守护进程要绑定的地址。格式为host:port。如果配置了这个选项,HOD日志信息会被记录到这个位置的syslog。</li>
+        </ul>
+      </section>
+      
+      <section>
+        <title>3.2 hod的配置项</title>
+        <ul>
+          <li>cluster:集群的描述性名称。对于Torque,这个值被指定为集群中所有节点的'Node property'。HOD使用这个值计算可用节点的个数。</li>
+          <li>client-params:逗号分割的hadoop配置参数列表,其中的每一项都是一个key-value对。在提交节点上会据此产生一个hadoop-site.xml,用于运行Map/Reduce作业。</li>
+          
+          <li>job-feasibility-attr: 正则表达式,用于指定是否和如何检查作业的可行性 - 资源管理器限制或调度限制。目前是通过torque作业的'comment'属性实现的,缺省情况下没有开启这个功能。设置了这个配置项后,HOD会使用它来确定哪些种类的限制是启用的,以及请求超出限制或者累积超出限制时是回收机群还是留在排队状态。torque comment属性可以被某个外部机制周期性地更新。比如,comment属性被hod/support目录下的<a href="hod_admin_guide.html#checklimits.sh+-+资源限制监视器">checklimits.sh</a>更新,这样设置job-feasibility-attr的值等于TORQUE_USER_LIMITS_COMMENT_FIELD, "User-limits exceeded. Requested:([0-9]*) Used:([0-9]*) MaxLimit:([0-9]*)"会使HOD产生相应的行为。</li>
+         </ul>
+      </section>
+      
+      <section>
+        <title>3.3 resouce_manager的配置项</title>
+      
+        <ul>
+          <li>queue:资源管理器中配置的队列名,作业会被提交到这里。</li>
+          
+          <li>batch-home:个安装目录,其下的'bin'中有资源管理器的可执行文件。</li> 
+          <li>env-vars:逗号分隔的key-value对的列表,形式是key=value,它会被传递给运行在计算节点的作业。例如,如果ptyhon没有安装在常规位置,用户可以通过设置环境变量'HOD_PYTHON_HOME'指定python可执行文件的路径。之后,在计算节点运行的HOD的进程就可以使用这个变量了。</li>
+        </ul>
+      </section>
+      
+      <section>
+        <title>3.4 ringmaster的配置项</title>
+        <ul>
+          <li>work-dirs:这是一个由逗号分隔的路径列表,这些路径将作为HOD产生和传递给Hadoop,用于存放DFS和Map/Reduce数据的目录的根目录。例如,这是DFS数据块存放的路径。一般情况下,有多少块磁盘就指定多少路径,以确保所有的磁盘都被利用到。temp-dir变量的限制和注意事项在这儿同样适用。</li>
+          <li>max-master-failures:hadoop主守护进启动前可以失败的次数,超出这个次数后,HOD会让这次集群分配失败。在HOD集群中,有时候由于某些问题,比如机器没安装java,没有安装Hadoop,或者Hadoop版本错误等,会存在一个或几个“坏”节点。当这个配置项被设为正整数时,只有当hadoop matser(JobTracker或者NameNode)在上述的坏节点上,由于上面提到的种种原因启动失败的次数超过设定的值时,RingMaster才会把错误返回给客户端。如果尝试启动的次数没有超过设定值,当下一个HodRing请求运行一个命令时,同一个hadoop master会指定给这个HodRing。这样,即使集群中存在一些坏的节点,HOD也会尽全力使这次分配成功。
+                       </li>
+ 
+        </ul>
+      </section>
+      <section>
+        <title>3.5 gridservice-hdfs的配置项</title>
+        <ul>
+          <li>external:如果被置为false,HOD必须在通过allocate命令分配的节点上自己创建HDFS集群。注意,在这种情况下,如果集群被回收,HDFS集群会停止,所有数据会丢失。如果被置为true,它会尝试链接外部的已配置的HDFS系统。通常,因为在作业运行之前作业的输入需要被放置在HDFS上,并且作业的输出需要持久保留,在生产环境中一个内部的HDFS集群意义不大。</li>
+          
+          <li>host:外部配置好的NameNode的主机名。</li>
+          
+          <li>fs_port:NameNode RPC服务绑定的端口。</li>
+          
+          <li>info_port:NameNode web UI服务绑定的端口。</li>
+          
+          <li>pkgs:安装目录,其下有bin/hadoop可执行文件。可用来使用集群上预先安装的Hadoop版本。</li>
+          
+          <li>server-params:一个逗号分割的hadoop配置参数列表,每一项为key-value对形式。这些将用于产生被NameNode和DataNode使用到的hadoop-site.xml文件。</li>
+          
+          <li>final-server-params:除会被标记为final外和上面相同。</li>
+        </ul>
+      </section>
+      
+      <section>
+        <title>3.6 gridservice-mapred的配置项</title>
+        
+        <ul>
+          <li>external:如果被置为false,HOD必须在通过allocate命令分配的节点上自己创建Map/Reduce集群。如果被置为true,它会尝试链接外部的已配置的Map/Reduce系统。</li>
+          <li>host:外部配置好的JobTracker的主机名。</li>
+          
+          <li>tracker_port:JobTracker RPC服务绑定的端口。</li>
+          
+          <li>info_port:JobTracker web UI服务绑定的端口。</li>
+          
+          <li>pkgs:安装目录,其下有bin/hadoop可执行文件。</li>
+          
+          <li>server-params:一个逗号分割的hadoop配置参数列表,每一项为key-value对形式。这些将用于产生被JobTracker和TaskTracker使用到的hadoop-site.xml文件。</li>
+          <li>final-server-params:除会被标记为final外和上面相同。</li>
+        </ul>
+      </section>
+
+      <section>
+        <title>3.7 hodring的配置项</title>
+
+        <ul>
+          <li>mapred-system-dir-root:DFS上的目录,HOD会在这个目录下创建子目录并把全路径作为参数'mapred.system.dir'的值传递给Hadoop守护进程。全路径的格式为value-of-this-option/userid/mapredsystem/cluster-id。注意,如果HDFS启用了权限,这里指定的路径下应允许所有用户创建子目录。设置此配置项的值为/user会使HOD使用用户的home目录来产生mapred.system.dir的值。</li>
+          <li>log-destination-uri:一个URL,能反映一个外部的静态的DFS或者集群节点上的本地文件系统上的路径。当集群被回收时,HOD会把Hadoop日志上传到这个路径。要指定DFS路径,使用'hdfs://path'格式。要指定一个集群节点上的本地文件系统路径,使用'file://path'格式。当HOD回收集群时,作为HOD的清除过程的一部分,hadoop日志会被删除。要做到持久储这些日志,你可以使用这个配置项。路径的格式会是values-of-this-option/userid/hod-logs/cluster-id。注意,应该保证所有的用户能在这里指定的目录下创建子目录。把这个值设为hdfs://user会使这些日志被转移到用户在DFS上的home目录下。</li>
+          <li>pkgs:安装目录,其下有bin/hadoop可执行文件。如果给log-destination-uri指定了一个HDFS URL,HOD上传日志时会用到这个配置项。注意,当用户使用了和外部静态HDFS不同版本的tarball时,这个配置项会派上用场。</li>
+                                      
+        </ul>
+      </section>
+    </section>
+  </body>
+</document>

+ 559 - 0
common/src/docs/cn/src/documentation/content/xdocs/hod_user_guide.xml

@@ -0,0 +1,559 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+          "http://forrest.apache.org/dtd/document-v20.dtd">
+<document>
+  <header>
+    <title>
+      Hadoop On Demand用户指南
+    </title>
+  </header>
+
+<body>
+  <section>
+    <title>简介</title><anchor id="Introduction"></anchor>
+  <p>Hadoop On Demand (HOD)是一个能在大规模物理集群上供应虚拟Hadoop集群的系统。它使用Torque资源管理器进行节点分配。在所分配的节点上,它能启动Hadoop Map/Reduce以及HDFS守护进程。它能自动为Hadoop守护进程及客户端生成合适的配置文件(Hadoop-site.xml)。HOD还能够将Hadoop分发到它分配的虚拟集群节点上。总之,HOD方便管理者和用户快速安装与使用Hadoop。它也是需要在同一物理集群上测试各自版本的Hadoop开发者和测试者的实用工具。</p>
+  <p>HOD支持Hadoop 0.15及其后续版本。</p>
+  <p>后面的文档包括一个快速入门指南能让你快速上手HOD,一个所有HOD特性的详细手册,命令行选项,一些已知问题和故障排除的信息。</p>
+  </section>
+  <section>
+          <title>HOD使用入门</title><anchor id="Getting_Started_Using_HOD_0_4"></anchor>
+
+<p>在这部分,我们将会逐步骤地介绍使用HOD涉及到的最基本的操作。在开始遵循这些步骤之前,我们假定HOD及其依赖的软硬件均已被正确安装和配置。这步通常由集群的系统管理员负责。</p>
+  <p>HOD的用户界面是一个命令行工具,叫做<code>hod</code>。它被一个通常由系统管理员为用户设置好的配置文件所驱动。用户在使用<code>hod</code>的时候可以覆盖这个配置,文档的后面会由介绍。使用<code>hod</code>时有如下两种方式可以指定配置文件:</p>
+  <ul>
+    <li>在命令行中指定,使用 -c 选项。例如<code>hod &lt;operation&gt; &lt;required-args&gt; -c path-to-the-configuration-file [ohter-options]</code></li>
+    <li>在运行<code>hod</code>的地方设置环境变量<em>HOD_CONF_DIR</em>。这个变量应指向指向一个本地目录,其中有名为<em>hodrc</em>的文件。这与Hadoop中的<em>HADOOP_CONF_DIR</em>与<em>hadoop-site.xml</em>文件是类似的。如果命令行中未指定配置文件,<code>hod</code>会查找<em>HOD_CONF_DIR</em>环境变量指定目录下的<em>hodrc</em>文件。</li>
+    </ul>
+  <p>下面的例子中,我们将不会明确指出这个配置选项,假定其已正确指定。</p>
+  <section><title>一个典型HOD会话</title><anchor id="HOD_Session"></anchor>
+  <p>一个典型HOD会话至少包括三个步骤:分配,执行Hadoop作业,回收。为此,执行如下步骤。</p>
+  <p><strong>创建一个集群目录</strong></p><anchor id="Create_a_Cluster_Directory"></anchor>
+  <p><em>集群目录</em>是本地文件系统上的一个目录,<code>hod</code>会为它分配的集群产生对应的Hadoop配置<em>hadoop-site.xml</em>,放在这个目录下。这个目录可以按下文所述方式传递给<code>hod</code>操作。如果这个目录不存在,HOD会自动创建这个目录。一但分配好了集群,用户可通过Hadoop --config选项指定集群目录,在之上运行Hadoop作业。</p>
+
+  <p><strong><em>allocate</em>操作</strong></p><anchor id="Operation_allocate"></anchor>
+  <p><em>allocate</em>操作用来分配一组节点并在之上安装和提供Hadoop。它的语法如下。注意它要求指定参数集群目录(-d, --hod.clusterdir)和节点个数(-n, --hod.nodecount):</p>
+    <table>
+      
+        <tr>
+          <td><code>$ hod allocate -d cluster_dir -n number_of_nodes [OPTIONS]</code></td>
+        </tr>
+      
+    </table>
+  <p>如果命令成功执行,<code>cluster_dir/hadoop-site.xml</code>会被生成,文件中包含了分配出的集群的信息。它也会打印出关于Hadoop的web UI的信息。</p>
+  <p>试运行这个命令会产生如下输出。注意在这个例子中集群目录是<code>~/hod-clusters/test</code>,我们要分配5个节点:</p>
+  <table>
+    <tr>
+      <td><code>$ hod allocate -d ~/hod-clusters/test -n 5</code><br/>
+      <code>INFO - HDFS UI on http://foo1.bar.com:53422</code><br/>
+      <code>INFO - Mapred UI on http://foo2.bar.com:55380</code><br/></td>
+      </tr>
+   </table>
+
+  <p><strong>在分配的集群上执行Hadoop作业</strong></p><anchor id="Running_Hadoop_jobs_using_the_al"></anchor>
+  <p>现在,可以用一般的方式在分配的集群上执行Hadoop作业了。这是假定像<em>JAVA_HOME</em>,指向Hadoop安装的路径已被正确地设置了:</p>
+    <table>
+      
+        <tr>
+          <td><code>$ hadoop --config cluster_dir hadoop_command hadoop_command_args</code></td>
+        </tr>
+      
+    </table>
+  <p>或者</p>
+    <table>
+      
+        <tr>
+          <td><code>$ export HADOOP_CONF_DIR=cluster_dir</code> <br />
+              <code>$ hadoop hadoop_command hadoop_command_args</code></td>
+        </tr>
+      
+    </table>
+  <p>继续我们的例子,下面的命令会在分配的集群上运行wordcount的例子:</p>
+  <table><tr><td><code>$ hadoop --config ~/hod-clusters/test jar /path/to/hadoop/hadoop-examples.jar wordcount /path/to/input /path/to/output</code></td></tr></table>
+  <p>或者</p>
+  <table><tr>
+    <td><code>$ export HADOOP_CONF_DIR=~/hod-clusters/test</code><br />
+    <code>$ hadoop jar /path/to/hadoop/hadoop-examples.jar wordcount /path/to/input /path/to/output</code></td>
+    </tr>
+  </table>
+  <p><strong> <em>deallocate</em>操作</strong></p><anchor id="Operation_deallocate"></anchor>
+  <p><em>deallocate</em>操作用来回收分配到的集群。当完成集群使用之后,必须执行回收操作使这些节点可以为其他用户所用。<em>deallocate</em>操作的语法如下。注意它需要集群目录(-d, --hod.clusterdir)作为参数:</p>
+    <table>
+      
+        <tr>
+          <td><code>$ hod deallocate -d cluster_dir</code></td>
+        </tr>
+      
+    </table>
+  <p>继续我们的例子,如下命令会回收集群:</p>
+  <table><tr><td><code>$ hod deallocate -d ~/hod-clusters/test</code></td></tr></table>
+  <p>如你所见,HOD允许用户分配一个集群,随意的使用它来运行Hadoop作业。例如,通过从多个shell中启动使用同一个配置的hadoop,用户可以做到在同一个集群上并发运行多个作业。</p>
+	</section>
+  <section><title>使用HOD运行Hadoop脚本</title><anchor id="HOD_Script_Mode"></anchor>
+  <p>HOD的<em>script操作</em>能将集群的分配,使用和回收组织在一起。这对那些想运行Hadoop作业脚本,期望HOD能在脚本结束后自动完成清理操作的用户特别管用。用<code>hod</code>执行Hadoop脚本,需要这么做:</p>
+  <p><strong>创建脚本文件</strong></p><anchor id="Create_a_script_file"></anchor>
+  <p>这是一个普通的shell脚本,通常里面会包含hadoop命令,如:</p>
+  <table><tr><td><code>$ hadoop jar jar_file options</code></td>
+  </tr></table>
+  <p>当然,用户可以向脚本中添加任何有效的命令。HOD会在执行这个脚本时自动地设置<em>HADOOP_CONF_DIR</em>指向分配的集群。用户不必对此担心。不过,像分配操作时一样,用户需要指定一个集群目录。</p>
+  <p><strong>运行脚本</strong></p><anchor id="Running_the_script"></anchor>
+  <p><em>脚本操作</em>的语法如下。注意它需要集群目录(-d, --hod.clusterdir),节点个数(-n, --hod.nodecount)以及脚本文件(-s, --hod.script)作为参数:</p>
+    <table>
+      
+        <tr>
+          <td><code>$ hod script -d cluster_directory -n number_of_nodes -s script_file</code></td>
+        </tr>
+      
+    </table>
+  <p>注意一但脚本执行完毕,HOD就会回收集群,这意味着脚本必须要做到等hadoop作业完成后脚本才结束。用户写脚本时必须注意这点。</p>
+   </section>
+  </section>
+  <section>
+          <title>HOD的功能</title><anchor id="HOD_0_4_Features"></anchor>
+  <section><title>供应与管理Hadoop集群</title><anchor id="Provisioning_and_Managing_Hadoop"></anchor>
+	  <p>HOD主要功能是供应Hadoop的Map/Reduce和HDFS集群。这些在见入门一节已经做过描述。 此外,要是还有节点可用,并且组织上也批准,一个用户可以在同一时间内使用HOD分配多个Map/Reduce集群。对于分配到的不同集群,用户需要为上面提到的<code>cluster_dir</code>参数指定不同的路径。HOD提供<em>list</em>和<em>info</em>操作可以管理多个集群。</p>
+  <p><strong><em>list</em>操作</strong></p><anchor id="Operation_list"></anchor>
+  <p>list操作能列举到目前为止用户所创建的所有集群。存放hadoop-site.xml的集群目录,与JobTracker和/或HDFS的连接及状态也会被显示出来。list操作的使用语法如下:</p>
+    <table>
+      
+        <tr>
+          <td><code>$ hod list</code></td>
+        </tr>
+      
+    </table>
+  <p><strong><em>info</em>操作</strong></p><anchor id="Operation_info"></anchor>
+  <p>info操作会显示指定集群相关的信息。这些信息包括Torque作业id,HOD Ringmaster进程,Hadoop的JobTracker和NameNode守护进程等重要守护进程的位置。info操作的语法如下。注意它需要集群目录(-d, --hod.clusterdir)作为参数:</p>
+    <table>
+      
+        <tr>
+          <td><code>$ hod info -d cluster_dir</code></td>
+        </tr>
+      
+    </table>
+  <p><code>cluster_dir</code>应为前面<em>allocate</em>操作中指定的有效集群目录。</p>
+  </section>
+  <section><title>使用tarball分发Hadoop</title><anchor id="Using_a_tarball_to_distribute_Ha"></anchor>
+<p>供应Hadoop时,HOD可以使用集群节点上已经安装好的Hadoop,也可以将hadoop的tarball作为供应操作的一部分在节点上进行分发和安装。如果使用tarball选项,就不必非得使用预装的Hadoop了,也不要求集群节点上必须有一个预装的版本。这对开发/QE环境下在一个共享集群上测试不同版本hadoop的开发者尤其有用。</p>
+  <p>要使用预装的Hadoop,你必须在hodrc中的<code>gridservice-hdfs</code>部分和<code>gridservice-mapred</code>部分指定<code>pkgs</code>选项。它必须指向集群中所有节点上Hadoop的安装路径。</p>
+  <p>指定Tarball的语法如下:</p>
+    <table>
+        <tr>
+          <td><code>$ hod allocate -d cluster_dir -n number_of_nodes -t hadoop_tarball_location</code></td>
+        </tr>
+    </table>
+  <p>例如,下面的命令根据tarball<code>~/share/hadoop.tar.gz</code>分配Hadoop:</p>
+  <table><tr><td><code>$ hod allocate -d ~/hadoop-cluster -n 10 -t ~/share/hadoop.tar.gz</code></td></tr></table>
+  <p>类似地,使用hod脚本的语法如下:</p>
+    <table>
+        <tr>
+          <td><code>$ hod script -d cluster_directory -s scritp_file -n number_of_nodes -t hadoop_tarball_location</code></td>
+        </tr>
+    </table>
+  <p>上面语法中指定的hadoop_tarball_location应指向从所有计算节点都可以访问的共享文件系统的路径。当前,HOD只支持挂载的NFS。</p>
+  <p><em>注意:</em></p>
+  <ul>
+    <li>为了获得更好分发性能,建议Hadoop tarball只包含库与二进制文件,不包含源代码或文档。</li>
+    <li>当你希望在用tarball方式分配的集群上执行作业,你必须使用兼容的Hadoop版本提交你的作业。最好的方式是解压,使用Tarball中的版本。</li>
+    <li>你需要确保在tar分发包的conf目录下没有Hadoop配置文件hadoop-env.sh和hadoop-site.xml。如果这些文件存在并包含错误的值,集群分配可能会失败。
+</li>
+  </ul>
+  </section>
+  <section><title>使用外部HDFS</title><anchor id="Using_an_external_HDFS"></anchor>
+  <p>在典型的由HOD提供的Hadoop集群中,HDFS已经被静态地(未使用HOD)设置好。这能使数据在HOD提供的集群被回收后还可以持久保存在HDFS中。为使用静态配置的HDFS,你的hodrc必须指向一个外部HDFS。具体就是,在hodrc的<code>gridservice-hdfs</code>部分将下面选项设置为正确的值:</p>
+   <table><tr><td>external = true</td></tr><tr><td>host = HDFS NameNode主机名</td></tr><tr><td>fs_port = HDFS NameNode端口</td></tr><tr><td>info_port = HDFS NameNode web UI的端口</td></tr></table>
+  <p><em>注意:</em>你也可以从命令行开启这个选项。即,你这样去使用一个静态HDFS:<br />
+    </p>
+    <table>
+        <tr>
+          <td><code>$ hod allocate -d cluster_dir -n number_of_nodes --gridservice-hdfs.external</code></td>
+        </tr>
+    </table>
+    <p>如果需要,HOD即可以供应HDFS集群也可以供应Map/Reduce的集群HOD。这需要设置hodrc中的<code>gridservice-hdfs</code>部分的下列选项:</p>
+  <table><tr><td>external = false</td></tr></table>
+  </section>
+  <section><title>配置Hadoop的选项</title><anchor id="Options_for_Configuring_Hadoop"></anchor>
+  <p>HOD提供一个非常方便的机制能配置它提供的Hadoop守护进程和它在客户端生成的hadoop-site.xml。通过在HOD配置文件中指定配置参数,或在分配集群时在命令行指定都可做到这点。</p>
+  <p><strong>配置Hadoop守护进程</strong></p><anchor id="Configuring_Hadoop_Daemons"></anchor>
+  <p>要配置Hadoop守护进程,你可以这么做:</p>
+  <p>对于Map/Reduce,指定<code>gridservice-mapred</code>部分的<code>server-params</code>项的指为一个以逗号分割的key-value对列表。同配置动态HDFS集群一样,设置<code>gridservice-hdfs</code>部分的<code>server-params</code>项。如果这些参数应被标记成<em>final</em>,将这些参数包含到相应部分的<code>final-server-params</code>项中。</p>
+  <p>例如:</p>
+  <table><tr><td><code>server-params = mapred.reduce.parallel.copies=20,io.sort.factor=100,io.sort.mb=128,io.file.buffer.size=131072</code></td></tr><tr><td><code>final-server-params = mapred.child.java.opts=-Xmx512m,dfs.block.size=134217728,fs.inmemory.size.mb=128</code></td>
+  </tr></table>
+  <p>要从命令行指定选项,你可以用如下语法:</p>
+  <p>配置Map/Reduce守护进程:</p>
+    <table>
+        <tr>
+          <td><code>$ hod allocate -d cluster_dir -n number_of_nodes -Mmapred.reduce.parallel.copies=20 -Mio.sort.factor=100</code></td>
+        </tr>
+    </table>
+  <p>在上述例子中,<em>mapred.reduce.parallel.copies</em>参数和<em>io.sort.factor</em>参数将会被添加到<code>server-params</code>中,如果已经在<code>server-params</code>中存在,则它们会被覆盖。要将这些参数指定成<em>final</em>类型,你可以:</p>
+    <table>
+        <tr>
+          <td><code>$ hod allocate -d cluster_dir -n number_of_nodes -Fmapred.reduce.parallel.copies=20 -Fio.sort.factor=100</code></td>
+        </tr>
+    </table>
+  <p>不过,应注意final参数无法被命令行改写的,只有在未指定的情形才能追加。</p>
+  <p>配置动态供应的HDFS守护进程的选项与此相似。用-H替换-M以,用-S替换-F即可。</p>
+  <p><strong>配置Hadoop的作业提交(客户端)程序</strong></p><anchor id="Configuring_Hadoop_Job_Submissio"></anchor>
+  <p>如上所述,当allocate操作成功后,<code>cluster_dir/hadoop-site.xml</code>将会生成,其中会包含分配的集群的JobTracker和NameNode的信息。这个配置用于向集群提交作业。HOD提供选项可将其它的hadoop配置参数添加到该文件,其语法如下:</p>
+    <table>
+        <tr>
+          <td><code>$ hod allocate -d cluster_dir -n number_of_nodes -Cmapred.userlog.limit.kb=200 -Cmapred.child.java.opts=-Xmx512m</code></td>
+        </tr>
+    </table>
+  <p>上例中,<em>mapred.userlog.limit.kb</em>和<em>mapred.child.java.opts</em>会被添加到hod产生的hadoop-site.xml中。</p>
+  </section>
+  <section><title>查看Hadoop的Web-UI</title><anchor id="Viewing_Hadoop_Web_UIs"></anchor>
+  <p>HOD的allocate操作会打印出JobTracker和NameNode的Web UI的URL。例如:</p>
+   <table><tr><td><code>$ hod allocate -d ~/hadoop-cluster -n 10 -c ~/hod-conf-dir/hodrc</code><br/>
+    <code>INFO - HDFS UI on http://host242.foo.com:55391</code><br/>
+    <code>INFO - Mapred UI on http://host521.foo.com:54874</code>
+    </td></tr></table>
+  <p>上面提到的<em>info</em>操作可以给你同样的信息。</p>
+  </section>
+  <section><title>收集和查看Hadoop日志</title><anchor id="Collecting_and_Viewing_Hadoop_Lo"></anchor>
+  <p>要获取在某些分配节点上运行的守护进程的Hadoop日志:</p>
+  <ul>
+    <li>登录感兴趣的节点。如果你想查看JobTracker或者NameNode的日志,<em>list</em>和<em>info</em>操作能告诉你这些进程在那些节点上运行。</li>
+    <li>获取感兴趣的守护进程的进程信息(例如,<code>ps ux | grep TaskTracker</code>)</li>
+    <li>在这些进程信息中,查找变量<code>-Dhadoop.log.dir</code>的值。通常是hod配置文件里<code>hodring.temp-dir</code>目录的一个子目录 。</li>
+    <li>切换到<code>hadoop.log.dir</code>目录以查看守护进程日志和用户日志。</li>
+  </ul>
+  <p>HOD也提供了一个机制,能让你在集群回收后将日志收集存放到文件系统,或者一个在外部配置的HDFS中。这样的话,在作业完成,节点回收后你还可以看这些日志。要做到这点,像下面一样为log-destination-uri指定一个URI:</p>
+   <table><tr><td><code>log-destination-uri= hdfs://host123:45678/user/hod/logs</code>或者</td></tr>
+    <tr><td><code>log-destination-uri= file://path/to/store/log/files</code></td></tr>
+    </table>
+    <p>在上面指定的的根目录中,HOD会创建路径user_name/torque_jobid,把作业涉及到的每个节点上的日志文件gzip压缩,存放在里面。</p>
+  <p>注意要在HDFS上存储这些文件,你得将<code>hodring.pkgs</code>项配置为和刚才提到的HDFS兼容的版本。否则,HOD会尝试使用它供应Hadoop集群时用到的Hadoop版本。</p>
+  </section>
+  <section><title>闲置集群的自动回收</title><anchor id="Auto_deallocation_of_Idle_Cluste"></anchor>
+  <p>HOD会自动回收在一段时间内没有运行Hadoop作业的集群。每次的HOD分配会带有一个监控设施不停地检查Hadoop作业的执行。如果侦测到在一定时间内没Hadoop作业在执行,它就回收这个集群,释放那些未被有效利用的节点。</p>
+  <p><em>注意:</em>当集群被回收时,<em>集群目录</em>没有被自动清空。用户须通过一个正式的<em>deallcocate</em>操作清理它。</p>
+	</section>
+  <section><title>指定额外的作业属性</title><anchor id="Specifying_Additional_Job_Attrib"></anchor>
+  <p>HOD允许用户为一个Torque作业指定一个时钟时间和一个名称(或者标题)。 </p>
+  <p>时钟时间是对Torque作业有效时间的一个估计。这个时间过期后,Torque将自动删除这个作业,释放其节点。指定这个时钟时间还能帮助作业调度程序更好的安排作业,提高对集群资源的使用率。</p>
+  <p>指定时钟时间的语法如下:</p>
+    <table>
+        <tr>
+          <td><code>$ hod allocate -d cluster_dir -n number_of_nodes -l time_in_seconds</code></td>
+        </tr>
+    </table>
+  <p>Torque作业的名称或标题能给用户以友好的作业标识。每次展示Torque作业的属性的时候,这个字符串就会出现,包括<code>qstat</code>命令。</p>
+  <p>指定名称或标题的语法如下:</p>
+    <table>
+        <tr>
+          <td><code>$ hod allocate -d cluster_dir -n number_of_nodes -N name_of_job</code></td>
+        </tr>
+    </table>
+  <p><em>注意:</em>由于底层Torque资源管理器的限制,不以字母开头或者包含空格的名字将导致作业失败。失败信息会表明问题存在于指定的作业名称中。</p>
+  </section>
+  <section><title>捕获HOD在Torque中的退出码</title><anchor id="Capturing_HOD_exit_codes_in_Torq"></anchor>
+  <p>HOD退出码出现在Torque的exit_status字段中。这有助于使用者和系统管理员区分成功的HOD执行和失败的HOD执行。如果分配成功且所有Hadoop作业在所分配的集群上正确的执行,退出码为0。如果分配失败或者部分hadoop作业在分配集群上运行失败,退出码非0。下表列出了可能出现的退出码。<em>注意:只有所使用的Hadoop版本是0.16或以上时,Hadoop作业状态才可以被捕获。</em></p>
+  <table>
+    
+      <tr>
+        <td>退出码</td>
+        <td>含义</td>
+      </tr>
+      <tr>
+        <td> 6 </td>
+        <td>Ringmaster故障</td>
+      </tr>
+      <tr>
+        <td> 7 </td>
+        <td> DFS故障</td>
+      </tr>
+      <tr>
+        <td> 8 </td>
+        <td> Job tracker故障</td>
+      </tr>
+      <tr>
+        <td> 10 </td>
+        <td> 集群死亡</td>
+      </tr>
+      <tr>
+        <td> 12 </td>
+        <td> 集群已分配 </td>
+      </tr>
+      <tr>
+        <td> 13 </td>
+        <td> HDFS死亡</td>
+      </tr>
+      <tr>
+        <td> 14 </td>
+        <td> Mapred死亡</td>
+      </tr>
+      <tr>
+        <td> 16 </td>
+        <td>集群中所有的Map/Reduce作业失败。查看hadoop日志了解更多细节。</td>
+      </tr>
+      <tr>
+        <td> 17 </td>
+        <td>集群中部分的Map/Reduce作业失败。查看hadoop日志了解更多细节。</td>
+      </tr>
+  </table>
+  </section>
+  <section>
+    <title>命令行</title><anchor id="Command_Line"></anchor>
+    <p>HOD命令行的通用的语法如下:<br/>
+      <em>hod &lt;operation&gt; [ARGS] [OPTIONS]<br/></em>
+      允许的操作有‘allocate’,‘deallocate’,‘info’,‘list’,‘script’以及‘help’。要获取某特定操作的帮助你可以执行:<code>hod help &lt;operation&gt;</code>。要查看可能的操作你可以执行<code>hod help options</code>。</p>
+      <p><em>allocate</em><br />
+      <em>用法:hod allocate -d cluster_dir -n number_of_nodes [OPTIONS]</em><br />
+      分配一个指定节点数目的集群,把分配信息存放在cluster_dir方便后续<code>hadoop</code>命令使用。注意<code>cluster_dir</code>必须在运行该命令前已经存在。</p>
+      <p><em>list</em><br/>
+      <em>用法:hod list [OPTIONS]</em><br />
+      列举出用户分配的所有集群。提供的信息包括集群对应的的Torque作业标识,存储分配信息的集群目录,Map/Reduce守护进程是否存活。</p>
+      <p><em>info</em><br/>
+      <em>用法:hod info -d cluster_dir [OPTIONS]</em><br />
+      列举集群分配信息存放于某指定集群目录的集群信息。</p>
+      <p><em>deallocate</em><br/>
+      <em>用法:hod deallocate -d cluster_dir [OPTIONS]</em><br />
+      回收集群分配信息存放于某指定集群目录的集群。</p>
+      <p><em>script</em><br/>
+      <em>用法:hod script -s script_file -d cluster_directory -n number_of_node [OPTIONS]</em><br />
+      用HOD<em>script</em>操作执行一个hadoop脚本。在给定数目的节点上提供Hadoop,在提交的节点执行这个脚本,并在脚本执行结束后回收集群。</p>
+      <p><em>help</em><br/>
+      <em>用法:hod help [operation | 'options']</em><br/>
+      未指定参数时,<code>hod help</code>给出用法以及基本选项,等同于<code>hod --help</code> (见下文)。当指定参数‘options’时,显示hod的基本选项。当指定operation时,它会显示出该特定operation的用法和相应的描述。例如,希望了解allocate操作,你可以执行<code>hod help allocate</code></p>
+      <p>除上面的操作外,HOD还能接受下列命令行选项。</p>
+      <p><em>--help</em><br />
+      打印出用法和基本选项的帮助信息。</p>
+      <p><em>--verbose-help</em><br />
+      hodrc文件中所有的配置项均可通过命令行传递,使用语法<code>--section_name.option_name[=vlaue]</code>。这种方式下,命令行传递的参数会覆盖hodrc中的配置项。verbose-help命令会列出hodrc文件中全部可用项。这也是一个了解配置选项含义的好方法。</p>
+      <p><a href="#Options_Configuring_HOD">下一部分</a>有多数重要的hod配置项的描述。对于基本选项,你可以通过<code>hod help options</code>了解,对于所有的hod配置中的可能选项,你可以参看<code>hod --verbose-help</code>的输出。了解所有选项的描述,请参看<a href="hod_config_guide.html">配置指南</a>。</p>
+  </section>
+  <section><title> HOD配置选项</title><anchor id="Options_Configuring_HOD"></anchor>
+  <p> 如上所述,HOD的配置是通过系统管理员设置配置文件完成。这是一个INI风格的配置文件,文件分成多个段,每个段包含一些配置项。这些段分别和HOD的进程:client,ringmaster,hodring,mapreduce或hdfs相关。每一个配置项有选项名和值构成。</p>
+  <p>有两种方式可让用户覆盖默认配置文件里的设定:</p>
+  <ul>
+    <li>在每条命令前,用户可以向HOD提供自己的配置文件,使用<code>-c</code>选项。</li>
+    <li>用户可以在命令行指定HOD的配置选项覆盖正使用的配置文件中提供的值。</li>
+  </ul>
+  <p>这一节介绍一些最常用的配置项。为了指定方便,这些常用选项通常会有一个<em>短</em>选项名。所有其它选项可能用随后介绍的<em>长</em>选项指定。</p>
+  <p><em>-c config_file</em><br />
+  提供要使用的配置文件。可与其他任何的HOD选项一起使用。此外,可定义<code>HOD_CONF_DIR</code>环境变量为一个包含<code>hodrc</code>文件的目录,避免每条HOD命令都要指定配置文件。</p>
+  <p><em>-d cluster_dir</em><br />
+  大多数hod操作都要求这个选项。如<a href="#Create_a_Cluster_Directory">此处</a>描述的,<em>集群目录</em>是在本地文件系统上的一个目录,<code>hod</code>将它分配集群的相应Hadoop配置产生在这个目录里,即<em>hadoop-site.xml</em>。使用-d或者--hod.clusterdir将这个参数传递给<code>hod</code>操作,如果目录不存在,HOD会自动创建该目录。集群分配好后,用户可在这个集群上,通过指定hadoop--config为集群目录来执行Hadoop作业。</p>
+  <p><em>-n number_of_nodes</em><br />
+  hod allocation操作和script操作要求这个选项。表示要分配的节点数。</p>
+  <p><em>-s script-file</em><br/>
+  脚本操作时需要,用于指定要执行的脚本文件。</p>
+  <p><em>-b 1|2|3|4</em><br />
+  启用给定的调试级别。能与其他HOD选项一起使用。级别4最为详尽。</p>
+  <p><em>-t hadoop_tarball</em><br />
+  从指定tar.gz文件提供Hadoop分发。此选项值只适用于<em>allocate</em>操作。为获得更好的分发性能,强烈推荐创建Hadoop tarball<em>前</em>删除其中的源代码或文档。</p>
+  <p><em>-N job-name</em><br />
+  内部使用的资源管理作业名。比如,对于Torque作为资源管理器的情况,会被解释成<code>qsub -N</code>选项,使用<code>qstat</code>命令时可以看到这个作业名。</p>
+  <p><em>-l wall-clock-time</em><br />
+  用户希望在分配的集群作业的时间总量。它被传递给HOD底层的资源管理器,用于更有效地调度和利用集群。注意对于Torque的情形,这个时间到期后,集群会在被自动回收。</p>
+  <p><em>-j java-home</em><br />
+  JAVA_HOME环境变量里指定的路径。在<em>script</em>操作中使用。HOD将JAVA_HOME环境变量设置为这个值,并在此环境下启动用户脚本。</p>
+  <p><em>-A account-string</em><br />
+  传递给后台资源管理器的核计信息。</p>
+  <p><em>-Q queue-name</em><br />
+  接受作业提交的后台资源管理器中队列的名称。</p>
+  <p><em>-Mkey1=value1 -Mkey2=value2</em><br/>
+  为供应的Map/Reduce守护进程(JobTracker以及TaskTracker)提供配置参数。在集群节点上,会根据这些值产生一个hadoop-site.xml。 <br />
+  <em>注意:</em>值中的下列字符:空格,逗号,等号,分号需要使用‘\’转义, 且放置在引号中。你也可以使用‘\’来转义‘\’。</p>
+  <p><em>-Hkey1=value1 -Hkey2=value2</em><br />
+  为供应的HDFS守护进程(NameNode以及DataNode)提供配置参数。在集群节点上,会根据这些值产生一个hadoop-site.xml。 <br />
+  <em>注意:</em>值中的下列字符:空格,逗号,等号,分号需要使用‘\’转义, 且放置在引号中。你也可以使用‘\’来转义‘\’。</p>
+  <p><em>-Ckey1=value1 -Ckey2=value2</em><br />
+  为提交作业的客户端提供配置参数。在提交节点上,会根据这些值产生一个hadoop-site.xml。<br />
+  <em>注意:</em>参数值可以使用以下符号:空格,逗号,等号,需要‘\’做转义符的分号,上述符号要用引号进行分割。你也可以使用‘\’转义‘\’。 </p>
+  <p><em>--section-name.option-name=value</em><br />
+  这是用<em>长</em>格式提供配置选项的方法。比如,你可以<em>--hod.script-wait-time=20</em></p>
+	</section>
+	</section>
+<section>
+	  <title>故障排除</title><anchor id="Troubleshooting"></anchor>
+  <p>下节列出了一些用户使用HOD时可能碰到的多发错误的条件以及解决问题的方法</p>
+<section><title>分配操作时<code>hod</code>挂起</title><anchor id="_hod_Hangs_During_Allocation"></anchor><anchor id="hod_Hangs_During_Allocation"></anchor>
+  <p><em>可能原因:</em>HOD或Hadoop的一个组件启动失败。这种情况下,<code>hod</code>命令会在一段时间(通常是2-3分钟)后返回,退出码是错误代码部分定义的错误码7或8。参考该部分以获得更多细节。 </p>
+  <p><em>可能原因:</em>使用tarball模式申请了大规模的集群。有时由于网络负载,或者是分配节点上的负载,tarball分发过程可能会慢的比较明显,需要几分钟才能响应。等待命令完成。还可以检查一下tarball,看是否不含Hadoop源码或文档。</p>
+  <p><em>可能原因:</em>Torque相关的问题。如果原因与Torque相关,<code>hod</code>命令5分钟内是不会返回的。在调试模式下运行<code>hod</code>你会发现<code>qstat</code>命令被重复执行。在另一个shell中执行<code>qstat</code>命令你会发现作业处于<code>Q</code>(排队)状态。这通常说明Torque出现了问题。可能原因有个别节点宕机,或者增加了新节点但Torque不知。通常,需要系统管理员帮助解决此问题。</p>
+    </section>
+<section><title>回收操作时<code>hod</code>挂起</title><anchor id="_hod_Hangs_During_Deallocation"></anchor><anchor id="hod_Hangs_During_Deallocation"></anchor>
+  <p><em>可能原因:</em>Torque相关的问题,通常是Torque server上的负载较大,或者是分配的集群非常大。一般来说,你唯一能做的是等待命令执行完成。</p>
+  </section>
+  <section><title><code>hod</code>失败时的错误代码和错误信息</title><anchor id="hod_Fails_With_an_error_code_and"></anchor><anchor id="_hod_Fails_With_an_error_code_an"></anchor>
+  <p>如果<code>hod</code>命令的退出码不是<code>0</code>,参考下面的退出代码表确定此情况发生的原因和相应的调试方法。</p>
+  <p><strong>错误代码</strong></p><anchor id="Error_Codes"></anchor>
+  <table>
+    
+      <tr>
+        <th>错误代码</th>
+        <th>含义</th>
+        <th>可能原因及补救方法</th>
+      </tr>
+      <tr>
+        <td> 1 </td>
+        <td>配置错误 </td>
+        <td>hodrc中的参数错误,或者其他与HOD配置相关的错误。此类情况下,错误信息已经足够帮你发现和解决问题。</td>
+      </tr>
+      <tr>
+        <td> 2 </td>
+        <td>无效操作</td>
+        <td>执行<code>hod help</code>查看有效的操作列表。</td>
+      </tr>
+      <tr>
+        <td> 3 </td>
+        <td>无效操作参数</td>
+        <td>执行<code>hod help operation</code>查看特定操作的用法。</td>
+      </tr>
+      <tr>
+        <td> 4 </td>
+        <td>调度失败</td>
+        <td> 1. 请求分配了过多的资源。执行<code>checknodes cluster_name</code>查看是否有足够多的可用节点。<br />
+             2. 请求的资源超出了资源管理器的限制。<br />
+             3. Torque配置错误,Torque可执行文件路径配置错误,或者其它Torque相关问题。联系系统管理员。</td>
+      </tr>
+      <tr>
+        <td> 5 </td>
+        <td>执行作业失败</td>
+        <td> 1. Torque作业被外部删除。执行Torque <code>qstat</code>命令查看是否有作业处于<code>R</code>(运行)状态。如果没有,尝试重新运行HOD。<br/>
+          2. Torque的问题诸如服务器暂时性宕机,或者无响应。联系系统管理员。 <br/>
+          3. 系统管理员可能配置了帐号核实,并且一个非法的帐号被指定。请联系系统管理员。 </td>
+      </tr>
+      <tr>
+        <td> 6 </td>
+        <td>Ringmaster故障</td>
+        <td> HOD会打印信息"Cluster could not be allocated because of the following errors on the ringmaster host &lt;hostname&gt;"。实际的错误信息可能指示下列情形中的一种:<br/>
+          1. 运行ringmaster的节点配置不合法,错误信息中的hostname会指明具体的机器。<br/>
+          2. <code>ringmaster</code>段的配置无效,<br />
+          3. <code>gridservice-mapred或者gridservice-hdfs</code>段中<code>pkgs</code>项的配置无效,<br />
+          4. 无效的hadoop tarball,或者tarball中conf目录下存在无效的配置文件,<br />
+          5. Hadoop中的MapReduce与外部HDFS版本不匹配。<br />
+          Torque <code>qstat</code>命令很可能会显示一个出于<code>C</code>(Completed,已完成)状态的作业。<br/>
+          你可以登录到HOD失败信息中给出的ringmaster主机,根据错误信息的提示解决问题。如果错误信息没有给出完整的信息,ringmaster日志也可能帮助找到问题的根源。参考下面<em>定位Ringmaster日志</em>一节了解更多信息。</td>
+      </tr>
+      <tr>
+        <td> 7 </td>
+        <td> DFS故障</td>
+        <td> 当HOD由于DFS故障(或者Job tracker失败,错误码8,下文有介绍)分配失败时,它会打印错误信息 "Hodring at &lt;hostname&gt; failed with following errors:",并给出真正的错误信息,这个信息可能表明下列情形中的一种:<br/>
+	  1. 启动Hadoop集群时出现问题。通常错误信息会表明之前提到的主机出现错误的真正原因。你也要检查HOD配置中文件中Hadoop相关的配置。按上面<em>收集和查看Hadoop日志</em>一节中介绍的方法查看Hadoop的日志。<br />
+          2. 运行hodring的节点上的配置无效,错误信息中的hostname会指明机器<br/>
+          3. hodrc中<code>hodring</code>段的配置无效。<code>ssh</code>到错误信息中提到的节点,在hdring日志中grep<code>ERROR</code>或<code>CRITICAL</code>。参考下面<em>定位Hodring日志</em>部分获取更多信息。<br />
+	  4. 指定了无效的tarball,可能未正确打包。<br />
+          5. 无法与外部配置的HDFS通信。<br />
+          当DFS或Job tracker出现故障时,你可以登录到HOD失败信息中提到的主机上,进行debug。解决问题的时候,你也应通过查看ringmaster日志中的其它日志信息,来检查其他机器是否在启动jobtracker/namenode时也出现了问题,而不只是检查错误信息中提到的主机。其他机器也可能发生问题是因为HOD会按照配置项<a href="hod_config_guide.html#3.4+ringmaster的配置项">ringmaster.max-master-failures</a>的设置在多个机器上连续尝试和启动hadoop守护进程。更多关于ringmaster日志的信息请参考下文<em>定位Ringmaster日志</em>。
+</td>
+      </tr>
+      <tr>
+        <td> 8 </td>
+        <td>Job tracker故障</td>
+        <td>与<em>DFS故障</em>情形中的原因类似。</td>
+      </tr>
+      <tr>
+        <td> 10 </td>
+        <td>集群死亡</td>
+        <td>1. 集群因为较长时间空闲被自动回收。<br />
+          2. 集群因系统管理员或者用户指定的时钟时间到期被自动回收。<br />
+          3. 无法与成功分配的JobTracker以及HDFS的NameNode通信。回收集群,重新分配。</td>
+      </tr>
+      <tr>
+        <td> 12 </td>
+        <td>集群已分配</td>
+        <td>指定的集群目录是已被用于先前的分配操作,且尚未回收。指定另外一个目录,或者先回收先前分配的。</td>
+      </tr>
+      <tr>
+        <td> 13 </td>
+        <td>HDFS死亡</td>
+        <td>无法与HDFS的NameNode通信。HDFS的NameNode停掉了。</td>
+      </tr>
+      <tr>
+        <td> 14 </td>
+        <td>Mapred死亡</td>
+        <td> 1. 集群因为长时间闲置被自动回收。 <br />
+          2. 集群因系统管理员或用户指定的时钟时间到期被自动回收。<br />
+	  3. 无法与Map/Reduce的JobTracker通信。JobTracker节点宕机。 <br />
+          </td>
+      </tr>
+      <tr>
+        <td> 15 </td>
+        <td>集群未分配</td>
+        <td>一个需要已分配集群的操作被指以一个没有状态信息的集群目录。</td>
+      </tr>
+   
+      <tr>
+        <td>任意非0退出代码</td>
+        <td>HOD脚本错误</td>
+        <td>如果使用了hod的脚本选项,很可能这个退出代码是脚本的退出吗。不幸的是,这可能会与hod自己的退出码冲突。为帮助用户区分两者,如果脚本返回了一个退出码,hod将此退出码写到了集群目录下的script.exitcode文件。你可以cat这个文件以确定脚本的退出码。如果文件不存在,则退出代码是hod命令的退出码。</td> 
+      </tr>
+  </table>
+    </section>
+    
+  <section><title>Hadoop DFSClient警告NotReplicatedYetException信息</title>
+  <p>有时,当你申请到一个HOD集群后马上尝试上传文件到HDFS时,DFSClient会警告NotReplicatedYetException。通常会有一个这样的信息 - </p><table><tr><td><code>WARN
+hdfs.DFSClient: NotReplicatedYetException sleeping &lt;filename&gt; retries
+left 3</code></td></tr><tr><td><code>08/01/25 16:31:40 INFO hdfs.DFSClient:
+org.apache.hadoop.ipc.RemoteException: java.io.IOException: File
+&lt;filename&gt; could only be replicated to 0 nodes, instead of
+1</code></td></tr></table><p> 当你向一个DataNodes正在和NameNode联络的集群上传文件的时候,这种现象就会发生。在上传新文件到HDFS之前多等待一段时间就可以解决这个问题,因为这使得足够多的DataNode启动并且联络上了NameNode。</p>
+</section>
+
+    
+  <section><title>成功分配的集群上无法运行Hadoop作业</title><anchor id="Hadoop_Jobs_Not_Running_on_a_Suc"></anchor>
+  <p>这一情景通常发生在这种情形:一个集群已经分配,并且一段时间内处于不活跃状态,之后hadoop作业试图在这个集群上运行。Hadoop作业会失败,产生如下异常信息:</p>
+  <table><tr><td><code>08/01/25 16:31:40 INFO ipc.Client: Retrying connect to server: foo.bar.com/1.1.1.1:53567. Already tried 1 time(s).</code></td></tr></table>
+  <p><em>可能原因:</em>相当长的时间内无hadoop作业运行,集群会如<em>闲置集群的自动回收</em>一节介绍的那样被自动回收。回收该集群,然后重新分配。</p>
+  <p><em>可能原因:</em>从分配开始算起,Torque管理员指定的或<em>指定额外的作业属性</em>一节中定义的<code>-l</code>选项指定的时间上限过期。这种情况下集群可能已被释放。回收集群,然后重新分配。</p>
+  <p><em>可能原因:</em>提交作业使用的hadoop版本和供应集群的Hadoop版本(通常通过tarball选项)不匹配。确保使用的兼容的版本。</p>
+  <p><em>可能原因:</em> 提交job的hadoop客户端与提供的hadoop(通常通过tarball选项)版本不兼容。 确保所使用hadoop软件版本兼容。</p>
+  <p><em>可能原因:</em> 你使用了<code>-M or -H</code>中的一个指定Hadoop配置,其中有未正确转义的字符比如空格或逗号。参考<em>HOD配置选项</em>一节以了解如何正确指定这些选项。</p>
+    </section>
+  <section><title>我的Hadoop作业被中止了</title><anchor id="My_Hadoop_Job_Got_Killed"></anchor>
+  <p><em>可能原因:</em>从分配开始算起,Torque管理员指定的或<em>指定额外的作业属性</em>一节中定义的<code>-l</code>选项指定的时间上限过期。这种情况下集群可能已被释放。回收集群,然后重新分配,这次要制定一个大点儿的时钟时间。</p>
+  <p><em>可能原因:</em> JobTracker节点出现问题。参考<em>收集和查看Hadoop日志</em>一节以获取更多信息。</p>
+    </section>
+  <section><title>Hadoop作业失败并返回消息:‘Job tracker still initializing’</title><anchor id="Hadoop_Job_Fails_with_Message_Jo"></anchor>
+  <p><em>可能原因:</em>hadoop作业是作为HOD脚本的一部分运行的,它在JobTracker完全就绪前开始了执行。分配集群时为配置选<code>--hod.script-wait-time</code>设定一个大点儿的值。通常取120是可以工作的,尽管通常没必要这么大。</p>
+    </section>
+  <section><title>Torque的退出代码没有包含HOD的</title><anchor id="The_Exit_Codes_For_HOD_Are_Not_G"></anchor>
+  <p><em>可能原因:</em>此功能需要Hadoop 0.16。所用的Hadoop版本不满足这个条件。请使用合适的Hadoop版本。</p>
+  <p><em>可能原因:</em>没有使用<code>hod</code>命令回收集群;例如直接使用<code>qdel</code>。当使用这种方式回收集群时,HOD进程被信号中止。这会导致退出码是基于signal number的,而不是程序的退出码。</p>
+    </section>
+  <section><title>Hadoop日志未被上传到DFS</title><anchor id="The_Hadoop_Logs_are_Not_Uploaded"></anchor>
+  <p><em>可能原因:</em>上传日志的使用的hadoop与外部的HDFS版本不兼容。确保<code>hodring.pkgs</code>选项指定了正确的版本。</p>
+    </section>
+  <section><title>定位Ringmaster日志</title><anchor id="Locating_Ringmaster_Logs"></anchor>
+  <p>遵循以下步骤定位ringmaster日志:</p>
+  <ul>
+    <li>用-b选项在调试模式执行hod。这会打印出当前运行的Torque作业的标识。</li>
+    <li>执行<code>qstat -f torque_job_id</code>,在输出中查找<code>exec_host</code>参数的值。列表中的第一个主机就是ringmaster节点。</li>
+    <li>登陆该节点。</li>
+  <li>ringmaster日志的位置由hodrc中的<code>ringmaster.log-dir</code>项指定。日志文件的名字会是<code>username.torque_job_id/ringmaster-main.log</code>。</li>
+    <li>如果你没有获取到足够的信息,你可以将ringmaster的调试级别设为4。这可通过向hod命令行传递<code>--ringmaster.debug 4</code>做到。</li>
+  </ul>
+  </section>
+  <section><title>定位Hodring日志</title><anchor id="Locating_Hodring_Logs"></anchor>
+  <p>遵循以下步骤定位hodring日志:</p>
+  <ul>
+    <li>用-b选项在调试模式下运行hod。这将打印当前运行的Torque作业的标识。</li>
+    <li>执行<code>qstat -f torque_job_id</code>,查看输出中<code>exec_host</code>参数的值。列表中的的所有节点上都有一个hodring。</li>
+    <li>登陆到任何一个节点。</li>
+    <li>hodring日志的位置由hodrc中的<code>hodring.log-dir</code>项指定。日志文件的名字会是<code>username.torque_job_id/hodring-main.log</code>。</li>
+    <li>如果你没有获得足够的信息,你或许想将hodring的调试等级更改为4。这可以向hod命令行传递<code>--hodring.debug 4</code> 来做到。</li>
+  </ul>
+  </section>
+	</section>	
+</body>
+</document>

+ 46 - 0
common/src/docs/cn/src/documentation/content/xdocs/index.xml

@@ -0,0 +1,46 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
+
+<document>
+  
+  <header>
+    <title>Hadoop文档</title>
+  </header>
+  
+  <body>
+    <p>
+	下面的文档是一些概念介绍和操作教程,可帮助你开始使用Hadoop。如果遇到了问题,你可以向<a href="ext:lists">邮件列表</a>求助或者浏览一下存档邮件。
+    </p>
+    <ul>
+      <li><a href="quickstart.html">Hadoop快速入门</a></li>
+      <li><a href="cluster_setup.html">Hadoop集群搭建</a></li>
+      <li><a href="hdfs_design.html">Hadoop分布式文件系统</a></li>
+      <li><a href="mapred_tutorial.html">Hadoop Map-Reduce教程</a></li>
+      <li><a href="native_libraries.html">Hadoop本地库</a></li>
+      <li><a href="ext:api/index">API参考</a></li>
+      <li><a href="ext:wiki">维基</a></li>
+      <li><a href="ext:faq">常见问题</a></li>
+    </ul>
+    <p>
+    </p>
+
+  </body>
+  
+</document>

+ 2466 - 0
common/src/docs/cn/src/documentation/content/xdocs/mapred_tutorial.xml

@@ -0,0 +1,2466 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
+
+<document>
+  <header>
+    <title>Hadoop Map/Reduce教程</title>
+  </header>
+  
+  <body>
+  
+    <section>
+      <title>目的</title>
+      
+      <p>这篇教程从用户的角度出发,全面地介绍了Hadoop Map/Reduce框架的各个方面。</p>
+    </section>
+    
+    <section>
+      <title>先决条件</title>
+      
+      <p>请先确认Hadoop被正确安装、配置和正常运行中。更多信息见:</p> 
+      <ul>
+        <li>
+          <a href="quickstart.html">Hadoop快速入门</a>对初次使用者。
+        </li>
+        <li>
+          <a href="cluster_setup.html">Hadoop集群搭建</a>对大规模分布式集群。
+        </li>
+      </ul>
+    </section>
+    
+    <section>
+      <title>概述</title>
+      
+      <p>Hadoop Map/Reduce是一个使用简易的软件框架,基于它写出来的应用程序能够运行在由上千个商用机器组成的大型集群上,并以一种可靠容错的方式并行处理上T级别的数据集。</p>
+      
+      <p>一个Map/Reduce <em>作业(job)</em> 通常会把输入的数据集切分为若干独立的数据块,由
+      <em>map任务(task)</em>以完全并行的方式处理它们。框架会对map的输出先进行排序,
+      然后把结果输入给<em>reduce任务</em>。通常作业的输入和输出都会被存储在文件系统中。
+      整个框架负责任务的调度和监控,以及重新执行已经失败的任务。</p>
+      
+      <p>通常,Map/Reduce框架和<a href="hdfs_design.html">分布式文件系统</a>是运行在一组相同的节点上的,也就是说,计算节点和存储节点通常在一起。这种配置允许框架在那些已经存好数据的节点上高效地调度任务,这可以使整个集群的网络带宽被非常高效地利用。</p>
+      
+      <p>Map/Reduce框架由一个单独的master <code>JobTracker</code> 和每个集群节点一个slave <code>TaskTracker</code>共同组成。master负责调度构成一个作业的所有任务,这些任务分布在不同的slave上,master监控它们的执行,重新执行已经失败的任务。而slave仅负责执行由master指派的任务。</p>
+      
+      <p>应用程序至少应该指明输入/输出的位置(路径),并通过实现合适的接口或抽象类提供map和reduce函数。再加上其他作业的参数,就构成了<em>作业配置(job configuration)</em>。然后,Hadoop的 <em>job client</em>提交作业(jar包/可执行程序等)和配置信息给<code>JobTracker</code>,后者负责分发这些软件和配置信息给slave、调度任务并监控它们的执行,同时提供状态和诊断信息给job-client。</p>
+      
+      <p>虽然Hadoop框架是用Java<sup>TM</sup>实现的,但Map/Reduce应用程序则不一定要用
+      Java来写 。</p>
+      <ul>
+        <li>
+          <a href="ext:api/org/apache/hadoop/streaming/package-summary">
+          Hadoop Streaming</a>是一种运行作业的实用工具,它允许用户创建和运行任何可执行程序
+          (例如:Shell工具)来做为mapper和reducer。
+        </li>
+        <li>
+          <a href="ext:api/org/apache/hadoop/mapred/pipes/package-summary">
+          Hadoop Pipes</a>是一个与<a href="http://www.swig.org/">SWIG</a>兼容的C++ API
+          (没有基于JNI<sup>TM</sup>技术),它也可用于实现Map/Reduce应用程序。
+        </li>
+      </ul>
+    </section>
+    
+    <section>
+      <title>输入与输出</title>
+
+      <p>Map/Reduce框架运转在<code>&lt;key, value&gt;</code> 键值对上,也就是说,
+      框架把作业的输入看为是一组<code>&lt;key, value&gt;</code> 键值对,同样也产出一组
+      <code>&lt;key, value&gt;</code> 键值对做为作业的输出,这两组键值对的类型可能不同。</p> 
+      
+      <p>框架需要对<code>key</code>和<code>value</code>的类(classes)进行序列化操作,
+      因此,这些类需要实现 <a href="ext:api/org/apache/hadoop/io/writable">Writable</a>接口。
+      另外,为了方便框架执行排序操作,<code>key</code>类必须实现
+      <a href="ext:api/org/apache/hadoop/io/writablecomparable">
+      WritableComparable</a>接口。
+      </p>
+
+      <p>一个Map/Reduce 作业的输入和输出类型如下所示:</p>
+      <p>
+        (input) <code>&lt;k1, v1&gt;</code> 
+        -&gt; 
+        <strong>map</strong> 
+        -&gt; 
+        <code>&lt;k2, v2&gt;</code> 
+        -&gt; 
+        <strong>combine</strong> 
+        -&gt; 
+        <code>&lt;k2, v2&gt;</code> 
+        -&gt; 
+        <strong>reduce</strong> 
+        -&gt; 
+        <code>&lt;k3, v3&gt;</code> (output)
+      </p>
+    </section>
+
+    <section>
+      <title>例子:WordCount v1.0</title>
+      
+      <p>在深入细节之前,让我们先看一个Map/Reduce的应用示例,以便对它们的工作方式有一个初步的认识。</p>      
+      <p><code>WordCount</code>是一个简单的应用,它可以计算出指定数据集中每一个单词出现的次数。</p>      
+      <p>这个应用适用于
+      <a href="quickstart.html#Standalone+Operation">单机模式</a>,
+      <a href="quickstart.html#SingleNodeSetup">伪分布式模式</a> 或
+      <a href="quickstart.html#Fully-Distributed+Operation">完全分布式模式</a> 
+      三种Hadoop安装方式。</p>
+      
+      <section>
+        <title>源代码</title>
+        
+        <table>
+          <tr>
+            <th></th>
+            <th>WordCount.java</th>
+          </tr>
+          <tr>
+            <td>1.</td>
+            <td>
+              <code>package org.myorg;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>2.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>3.</td>
+            <td>
+              <code>import java.io.IOException;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>4.</td>
+            <td>
+              <code>import java.util.*;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>5.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>6.</td>
+            <td>
+              <code>import org.apache.hadoop.fs.Path;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>7.</td>
+            <td>
+              <code>import org.apache.hadoop.conf.*;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>8.</td>
+            <td>
+              <code>import org.apache.hadoop.io.*;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>9.</td>
+            <td>
+              <code>import org.apache.hadoop.mapred.*;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>10.</td>
+            <td>
+              <code>import org.apache.hadoop.util.*;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>11.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>12.</td>
+            <td>
+              <code>public class WordCount {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>13.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>14.</td>
+            <td>
+              &nbsp;&nbsp;
+              <code>
+                public static class Map extends MapReduceBase 
+                implements Mapper&lt;LongWritable, Text, Text, IntWritable&gt; {
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>15.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>
+                private final static IntWritable one = new IntWritable(1);
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>16.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>private Text word = new Text();</code>
+            </td>
+          </tr>
+          <tr>
+            <td>17.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>18.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>
+                public void map(LongWritable key, Text value, 
+                OutputCollector&lt;Text, IntWritable&gt; output, 
+                Reporter reporter) throws IOException {
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>19.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>String line = value.toString();</code>
+            </td>
+          </tr>
+          <tr>
+            <td>20.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>StringTokenizer tokenizer = new StringTokenizer(line);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>21.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>while (tokenizer.hasMoreTokens()) {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>22.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>word.set(tokenizer.nextToken());</code>
+            </td>
+          </tr>
+          <tr>
+            <td>23.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>output.collect(word, one);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>24.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>25.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>26.</td>
+            <td>
+              &nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>27.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>28.</td>
+            <td>
+              &nbsp;&nbsp;
+              <code>
+                public static class Reduce extends MapReduceBase implements 
+                Reducer&lt;Text, IntWritable, Text, IntWritable&gt; {
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>29.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>
+                public void reduce(Text key, Iterator&lt;IntWritable&gt; values,
+                OutputCollector&lt;Text, IntWritable&gt; output, 
+                Reporter reporter) throws IOException {
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>30.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>int sum = 0;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>31.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>while (values.hasNext()) {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>32.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>sum += values.next().get();</code>
+            </td>
+          </tr>
+          <tr>
+            <td>33.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>34.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>output.collect(key, new IntWritable(sum));</code>
+            </td>
+          </tr>
+          <tr>
+            <td>35.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>36.</td>
+            <td>
+              &nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>37.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>38.</td>
+            <td>
+              &nbsp;&nbsp;
+              <code>
+                public static void main(String[] args) throws Exception {
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>39.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>
+                JobConf conf = new JobConf(WordCount.class);
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>40.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>conf.setJobName("wordcount");</code>
+            </td>
+          </tr>
+          <tr>
+            <td>41.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>42.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>conf.setOutputKeyClass(Text.class);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>43.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>conf.setOutputValueClass(IntWritable.class);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>44.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>45.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>conf.setMapperClass(Map.class);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>46.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>conf.setCombinerClass(Reduce.class);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>47.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>conf.setReducerClass(Reduce.class);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>48.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>49.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>conf.setInputFormat(TextInputFormat.class);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>50.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>conf.setOutputFormat(TextOutputFormat.class);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>51.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>52.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>FileInputFormat.setInputPaths(conf, new Path(args[0]));</code>
+            </td>
+          </tr>
+          <tr>
+            <td>53.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>FileOutputFormat.setOutputPath(conf, new Path(args[1]));</code>
+            </td>
+          </tr>
+          <tr>
+            <td>54.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>55.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>JobClient.runJob(conf);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>57.</td>
+            <td>
+              &nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>58.</td>
+            <td>
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>59.</td>
+            <td></td>
+          </tr>
+        </table>
+      </section>
+      
+      <section>
+        <title>用法</title>
+        
+        <p>假设环境变量<code>HADOOP_PREFIX</code>对应安装时的根目录,<code>HADOOP_VERSION</code>对应Hadoop的当前安装版本,编译<code>WordCount.java</code>来创建jar包,可如下操作:</p>
+        <p>
+          <code>$ mkdir wordcount_classes</code><br/>
+          <code>
+            $ javac -classpath ${HADOOP_PREFIX}/hadoop-${HADOOP_VERSION}-core.jar 
+              -d wordcount_classes WordCount.java
+          </code><br/>
+          <code>$ jar -cvf /usr/joe/wordcount.jar -C wordcount_classes/ .</code> 
+        </p>
+        
+        <p>假设:</p>
+        <ul>
+          <li>
+            <code>/usr/joe/wordcount/input</code>  - 是HDFS中的输入路径
+          </li>
+          <li>
+            <code>/usr/joe/wordcount/output</code> - 是HDFS中的输出路径
+          </li>
+        </ul>
+        
+        <p>用示例文本文件做为输入:</p>
+        <p>
+          <code>$ bin/hadoop dfs -ls /usr/joe/wordcount/input/</code><br/>
+          <code>/usr/joe/wordcount/input/file01</code><br/>
+          <code>/usr/joe/wordcount/input/file02</code><br/>
+          <br/>
+          <code>$ bin/hadoop dfs -cat /usr/joe/wordcount/input/file01</code><br/>
+          <code>Hello World Bye World</code><br/>
+          <br/>
+          <code>$ bin/hadoop dfs -cat /usr/joe/wordcount/input/file02</code><br/>
+          <code>Hello Hadoop Goodbye Hadoop</code>
+        </p>
+
+        <p>运行应用程序:</p>
+        <p>
+          <code>
+            $ bin/hadoop jar /usr/joe/wordcount.jar org.myorg.WordCount 
+              /usr/joe/wordcount/input /usr/joe/wordcount/output 
+          </code>
+        </p>
+
+        <p>输出是:</p>
+        <p>
+          <code>
+            $ bin/hadoop dfs -cat /usr/joe/wordcount/output/part-00000
+          </code>
+          <br/>
+          <code>Bye    1</code><br/>
+          <code>Goodbye    1</code><br/>
+          <code>Hadoop    2</code><br/>
+          <code>Hello    2</code><br/>
+          <code>World    2</code><br/>
+        </p>
+          <p> 应用程序能够使用<code>-files</code>选项来指定一个由逗号分隔的路径列表,这些路径是task的当前工作目录。使用选项<code>-libjars</code>可以向map和reduce的classpath中添加jar包。使用<code>-archives</code>选项程序可以传递档案文件做为参数,这些档案文件会被解压并且在task的当前工作目录下会创建一个指向解压生成的目录的符号链接(以压缩包的名字命名)。
+        有关命令行选项的更多细节请参考
+        <a href="commands_manual.html">Commands manual</a>。</p>
+
+        <p>使用<code>-libjars</code>和<code>-files</code>运行<code>wordcount</code>例子:<br/>
+        <code> hadoop jar hadoop-examples.jar wordcount -files cachefile.txt
+        -libjars mylib.jar input output </code>
+        </p>
+
+      </section>
+      
+      <section>
+        <title>解释</title>
+        
+        <p><code>WordCount</code>应用程序非常直截了当。</p>
+        
+        <p><code>Mapper</code>(14-26行)中的<code>map</code>方法(18-25行)通过指定的
+        <code>TextInputFormat</code>(49行)一次处理一行。然后,它通过<code>StringTokenizer</code>
+        以空格为分隔符将一行切分为若干tokens,之后,输出<code>&lt; &lt;word&gt;, 1&gt;</code>
+        形式的键值对。</p>
+        
+        <p>
+        对于示例中的第一个输入,map输出是:<br/>
+          <code>&lt; Hello, 1&gt;</code><br/>
+          <code>&lt; World, 1&gt;</code><br/>
+          <code>&lt; Bye, 1&gt;</code><br/>
+          <code>&lt; World, 1&gt;</code><br/>
+        </p>
+        
+        <p>
+          第二个输入,map输出是:<br/>
+          <code>&lt; Hello, 1&gt;</code><br/>
+          <code>&lt; Hadoop, 1&gt;</code><br/>
+          <code>&lt; Goodbye, 1&gt;</code><br/>
+          <code>&lt; Hadoop, 1&gt;</code><br/>
+        </p>
+        
+        <p>关于组成一个指定作业的map数目的确定,以及如何以更精细的方式去控制这些map,我们将在教程的后续部分学习到更多的内容。</p>
+        
+        <p><code>WordCount</code>还指定了一个<code>combiner</code> (46行)。因此,每次map运行之后,会对输出按照<em>key</em>进行排序,然后把输出传递给本地的combiner(按照作业的配置与Reducer一样),进行本地聚合。</p>
+
+        <p>
+         第一个map的输出是:<br/>
+          <code>&lt; Bye, 1&gt;</code><br/>
+          <code>&lt; Hello, 1&gt;</code><br/>
+          <code>&lt; World, 2&gt;</code><br/>
+        </p>
+        
+        <p>
+          第二个map的输出是:<br/>
+          <code>&lt; Goodbye, 1&gt;</code><br/>
+          <code>&lt; Hadoop, 2&gt;</code><br/>
+          <code>&lt; Hello, 1&gt;</code><br/>
+        </p>
+
+        <p><code>Reducer</code>(28-36行)中的<code>reduce</code>方法(29-35行)
+        仅是将每个key(本例中就是单词)出现的次数求和。
+        </p>
+        
+        <p>
+          因此这个作业的输出就是:<br/>
+          <code>&lt; Bye, 1&gt;</code><br/>
+          <code>&lt; Goodbye, 1&gt;</code><br/>
+          <code>&lt; Hadoop, 2&gt;</code><br/>
+          <code>&lt; Hello, 2&gt;</code><br/>
+          <code>&lt; World, 2&gt;</code><br/>
+        </p>
+        
+        <p>代码中的<code>run</code>方法中指定了作业的几个方面,
+        例如:通过命令行传递过来的输入/输出路径、key/value的类型、输入/输出的格式等等<code>JobConf</code>中的配置信息。随后程序调用了<code>JobClient.runJob</code>(55行)来提交作业并且监控它的执行。</p>
+
+        <p>我们将在本教程的后续部分学习更多的关于<code>JobConf</code>, <code>JobClient</code>,
+        <code>Tool</code>和其他接口及类(class)。</p>
+      </section>
+    </section>
+    
+    <section>
+      <title>Map/Reduce - 用户界面</title>
+      
+      <p>这部分文档为用户将会面临的Map/Reduce框架中的各个环节提供了适当的细节。这应该会帮助用户更细粒度地去实现、配置和调优作业。然而,请注意每个类/接口的javadoc文档提供最全面的文档;本文只是想起到指南的作用。
+      </p>
+      
+      <p>我们会先看看<code>Mapper</code>和<code>Reducer</code>接口。应用程序通常会通过提供<code>map</code>和<code>reduce</code>方法来实现它们。
+      </p>
+      
+      <p>然后,我们会讨论其他的核心接口,其中包括:
+      <code>JobConf</code>,<code>JobClient</code>,<code>Partitioner</code>, 
+      <code>OutputCollector</code>,<code>Reporter</code>, 
+      <code>InputFormat</code>,<code>OutputFormat</code>等等。</p>
+      
+      <p>最后,我们将通过讨论框架中一些有用的功能点(例如:<code>DistributedCache</code>, 
+      <code>IsolationRunner</code>等等)来收尾。</p>
+
+      <section>
+        <title>核心功能描述</title>
+        
+        <p>应用程序通常会通过提供<code>map</code>和<code>reduce</code>来实现
+        <code>Mapper</code>和<code>Reducer</code>接口,它们组成作业的核心。</p>
+        
+        <section>
+          <title>Mapper</title>
+
+          <p><a href="ext:api/org/apache/hadoop/mapred/mapper">
+          Mapper</a>将输入键值对(key/value pair)映射到一组中间格式的键值对集合。</p>
+ 
+          <p>Map是一类将输入记录集转换为中间格式记录集的独立任务。
+          这种转换的中间格式记录集不需要与输入记录集的类型一致。一个给定的输入键值对可以映射成0个或多个输出键值对。</p> 
+ 
+          <p>Hadoop Map/Reduce框架为每一个<code>InputSplit</code>产生一个map任务,而每个<code>InputSplit</code>是由该作业的<code>InputFormat</code>产生的。</p>
+          
+          <p>概括地说,对<code>Mapper</code>的实现者需要重写
+          <a href="ext:api/org/apache/hadoop/mapred/jobconfigurable/configure">
+		  JobConfigurable.configure(JobConf)</a>方法,这个方法需要传递一个<code>JobConf</code>参数,目的是完成Mapper的初始化工作。然后,框架为这个任务的<code>InputSplit</code>中每个键值对调用一次
+	  <a href="ext:api/org/apache/hadoop/mapred/mapper/map">
+		  map(WritableComparable, Writable, OutputCollector, Reporter)</a>操作。应用程序可以通过重写<a href="ext:api/org/apache/hadoop/io/closeable/close">Closeable.close()</a>方法来执行相应的清理工作。</p>
+ 
+          <p>输出键值对不需要与输入键值对的类型一致。一个给定的输入键值对可以映射成0个或多个输出键值对。通过调用<a href="ext:api/org/apache/hadoop/mapred/outputcollector/collect">
+          OutputCollector.collect(WritableComparable,Writable)</a>可以收集输出的键值对。</p>
+
+          <p>应用程序可以使用<code>Reporter</code>报告进度,设定应用级别的状态消息,更新<code>Counters</code>(计数器),或者仅是表明自己运行正常。</p>
+ 
+          <p>框架随后会把与一个特定key关联的所有中间过程的值(value)分成组,然后把它们传给<code>Reducer</code>以产出最终的结果。用户可以通过
+          <a href="ext:api/org/apache/hadoop/mapred/jobconf/setoutputkeycomparatorclass">
+          JobConf.setOutputKeyComparatorClass(Class)</a>来指定具体负责分组的
+          <code>Comparator</code>。</p>
+
+          <p><code>Mapper</code>的输出被排序后,就被划分给每个<code>Reducer</code>。分块的总数目和一个作业的reduce任务的数目是一样的。用户可以通过实现自定义的          <code>Partitioner</code>来控制哪个key被分配给哪个 <code>Reducer</code>。</p>
+ 
+          <p>用户可选择通过<a href="ext:api/org/apache/hadoop/mapred/jobconf/setcombinerclass">
+          JobConf.setCombinerClass(Class)</a>指定一个<code>combiner</code>,它负责对中间过程的输出进行本地的聚集,这会有助于降低从<code>Mapper</code>到
+          <code>Reducer</code>数据传输量。
+          </p>
+          <p>这些被排好序的中间过程的输出结果保存的格式是(key-len, key, value-len, value),应用程序可以通过<code>JobConf</code>控制对这些中间结果是否进行压缩以及怎么压缩,使用哪种<a href="ext:api/org/apache/hadoop/io/compress/compressioncodec">
+          CompressionCodec</a>。
+          </p>
+          
+          <section>
+            <title>需要多少个Map?</title>
+             
+            <p>Map的数目通常是由输入数据的大小决定的,一般就是所有输入文件的总块(block)数。</p>
+  
+            <p>Map正常的并行规模大致是每个节点(node)大约10到100个map,对于CPU
+            消耗较小的map任务可以设到300个左右。由于每个任务初始化需要一定的时间,因此,比较合理的情况是map执行的时间至少超过1分钟。</p>
+ 
+            <p>这样,如果你输入10TB的数据,每个块(block)的大小是128MB,你将需要大约82,000个map来完成任务,除非使用
+            <a href="ext:api/org/apache/hadoop/mapred/jobconf/setnummaptasks">
+            setNumMapTasks(int)</a>(注意:这里仅仅是对框架进行了一个提示(hint),实际决定因素见<a href="ext:api/org/apache/hadoop/mapred/jobconf/setnummaptasks">这里</a>)将这个数值设置得更高。</p>
+          </section>
+        </section>
+        
+        <section>
+          <title>Reducer</title>
+          
+          <p><a href="ext:api/org/apache/hadoop/mapred/reducer">
+          Reducer</a>将与一个key关联的一组中间数值集归约(reduce)为一个更小的数值集。</p>
+          
+          <p>用户可以通过<a href="ext:api/org/apache/hadoop/mapred/jobconf/setnumreducetasks">
+          JobConf.setNumReduceTasks(int)</a>设定一个作业中reduce任务的数目。</p>
+          
+          <p>概括地说,对<code>Reducer</code>的实现者需要重写
+          <a href="ext:api/org/apache/hadoop/mapred/jobconfigurable/configure">
+          JobConfigurable.configure(JobConf)</a>方法,这个方法需要传递一个<code>JobConf</code>参数,目的是完成Reducer的初始化工作。然后,框架为成组的输入数据中的每个<code>&lt;key, (list of values)&gt;</code>对调用一次
+          <a href="ext:api/org/apache/hadoop/mapred/reducer/reduce">
+          reduce(WritableComparable, Iterator, OutputCollector, Reporter)</a>方法。之后,应用程序可以通过重写<a href="ext:api/org/apache/hadoop/io/closeable/close">Closeable.close()</a>来执行相应的清理工作。</p>
+
+          <p><code>Reducer</code>有3个主要阶段:shuffle、sort和reduce。
+          </p>
+          
+          <section>
+            <title>Shuffle</title>
+   
+            <p><code>Reducer</code>的输入就是Mapper已经排好序的输出。在这个阶段,框架通过HTTP为每个Reducer获得所有Mapper输出中与之相关的分块。</p>
+          </section>
+   
+          <section>
+            <title>Sort</title>
+   
+            <p>这个阶段,框架将按照key的值对<code>Reducer</code>的输入进行分组
+            (因为不同mapper的输出中可能会有相同的key)。</p>
+   
+            <p>Shuffle和Sort两个阶段是同时进行的;map的输出也是一边被取回一边被合并的。</p>
+      
+            <section>
+              <title>Secondary Sort</title>
+   
+              <p>如果需要中间过程对key的分组规则和reduce前对key的分组规则不同,那么可以通过<a href="ext:api/org/apache/hadoop/mapred/jobconf/setoutputvaluegroupingcomparator">
+              JobConf.setOutputValueGroupingComparator(Class)</a>来指定一个<code>Comparator</code>。再加上
+              <a href="ext:api/org/apache/hadoop/mapred/jobconf/setoutputkeycomparatorclass">
+              JobConf.setOutputKeyComparatorClass(Class)</a>可用于控制中间过程的key如何被分组,所以结合两者可以实现<em>按值的二次排序</em>。
+              </p>
+            </section>
+          </section>
+   
+          <section>   
+            <title>Reduce</title>
+   
+            <p>在这个阶段,框架为已分组的输入数据中的每个
+          <code>&lt;key, (list of values)&gt;</code>对调用一次
+          <a href="ext:api/org/apache/hadoop/mapred/reducer/reduce">
+          reduce(WritableComparable, Iterator, OutputCollector, Reporter)</a>方法。</p>
+            
+            <p>Reduce任务的输出通常是通过调用
+            <a href="ext:api/org/apache/hadoop/mapred/outputcollector/collect">
+            OutputCollector.collect(WritableComparable, Writable)</a>写入
+            <a href="ext:api/org/apache/hadoop/fs/filesystem">
+            文件系统</a>的。</p>
+   
+          <p>应用程序可以使用<code>Reporter</code>报告进度,设定应用程序级别的状态消息,更新<code>Counters</code>(计数器),或者仅是表明自己运行正常。</p>
+
+	  <p><code>Reducer</code>的输出是<em>没有排序的</em>。</p>
+          </section>
+          
+          <section>
+            <title>需要多少个Reduce?</title>
+ 
+            <p>Reduce的数目建议是<code>0.95</code>或<code>1.75</code>乘以
+            (&lt;<em>no. of nodes</em>&gt; * 
+            <code>mapred.tasktracker.reduce.tasks.maximum</code>)。
+            </p>
+ 
+            <p>用0.95,所有reduce可以在maps一完成时就立刻启动,开始传输map的输出结果。用1.75,速度快的节点可以在完成第一轮reduce任务后,可以开始第二轮,这样可以得到比较好的负载均衡的效果。</p>
+ 
+            <p>增加reduce的数目会增加整个框架的开销,但可以改善负载均衡,降低由于执行失败带来的负面影响。</p>
+ 
+            <p>上述比例因子比整体数目稍小一些是为了给框架中的推测性任务(speculative-tasks)
+            或失败的任务预留一些reduce的资源。</p>
+          </section>
+          
+          <section>
+            <title>无Reducer</title>
+            
+	    <p>如果没有归约要进行,那么设置reduce任务的数目为<em>零</em>是合法的。</p>
+ 
+            <p>这种情况下,map任务的输出会直接被写入由
+            <a href="ext:api/org/apache/hadoop/mapred/fileoutputformat/setoutputpath">
+		    setOutputPath(Path)</a>指定的输出路径。框架在把它们写入<code>FileSystem</code>之前没有对它们进行排序。
+            </p>
+          </section>
+        </section>
+        
+        <section>
+          <title>Partitioner</title>
+          
+          <p><a href="ext:api/org/apache/hadoop/mapred/partitioner">
+          Partitioner</a>用于划分键值空间(key space)。</p>
+          
+          <p>Partitioner负责控制map输出结果key的分割。Key(或者一个key子集)被用于产生分区,通常使用的是Hash函数。分区的数目与一个作业的reduce任务的数目是一样的。因此,它控制将中间过程的key(也就是这条记录)应该发送给<code>m</code>个reduce任务中的哪一个来进行reduce操作。
+	  </p>
+          
+          <p><a href="ext:api/org/apache/hadoop/mapred/lib/hashpartitioner">
+          HashPartitioner</a>是默认的 <code>Partitioner</code>。  </p>
+        </section>
+        
+        <section>
+          <title>Reporter</title>
+
+          <p><a href="ext:api/org/apache/hadoop/mapred/reporter">
+          Reporter</a>是用于Map/Reduce应用程序报告进度,设定应用级别的状态消息,
+          更新<code>Counters</code>(计数器)的机制。</p>
+   
+          <p><code>Mapper</code>和<code>Reducer</code>的实现可以利用<code>Reporter</code>
+          来报告进度,或者仅是表明自己运行正常。在那种应用程序需要花很长时间处理个别键值对的场景中,这种机制是很关键的,因为框架可能会以为这个任务超时了,从而将它强行杀死。另一个避免这种情况发生的方式是,将配置参数<code>mapred.task.timeout</code>设置为一个足够高的值(或者干脆设置为零,则没有超时限制了)。
+          </p>
+
+          <p>应用程序可以用<code>Reporter</code>来更新<code>Counter</code>(计数器)。
+          </p>
+        </section>
+      
+        <section>
+          <title>OutputCollector</title>
+        
+          <p><a href="ext:api/org/apache/hadoop/mapred/outputcollector">
+          OutputCollector</a>是一个Map/Reduce框架提供的用于收集
+          <code>Mapper</code>或<code>Reducer</code>输出数据的通用机制
+          (包括中间输出结果和作业的输出结果)。</p>
+        </section>
+      
+        <p>Hadoop Map/Reduce框架附带了一个包含许多实用型的mapper、reducer和partitioner
+        的<a href="ext:api/org/apache/hadoop/mapred/lib/package-summary">类库</a>。</p>
+      </section>
+      
+      <section>
+        <title>作业配置</title>
+        
+        <p><a href="ext:api/org/apache/hadoop/mapred/jobconf">
+        JobConf</a>代表一个Map/Reduce作业的配置。</p>
+ 
+        <p><code>JobConf</code>是用户向Hadoop框架描述一个Map/Reduce作业如何执行的主要接口。框架会按照<code>JobConf</code>描述的信息忠实地去尝试完成这个作业,然而:</p> 
+        <ul>
+          <li>
+            一些参数可能会被管理者标记为<a href="ext:api/org/apache/hadoop/conf/configuration/final_parameters">
+            final</a>,这意味它们不能被更改。
+          </li>
+          <li>
+          一些作业的参数可以被直截了当地进行设置(例如:
+          <a href="ext:api/org/apache/hadoop/mapred/jobconf/setnumreducetasks">
+            setNumReduceTasks(int)</a>),而另一些参数则与框架或者作业的其他参数之间微妙地相互影响,并且设置起来比较复杂(例如:<a href="ext:api/org/apache/hadoop/mapred/jobconf/setnummaptasks">
+            setNumMapTasks(int)</a>)。
+          </li>
+        </ul>
+ 
+        <p>通常,<code>JobConf</code>会指明<code>Mapper</code>、Combiner(如果有的话)、
+        <code>Partitioner</code>、<code>Reducer</code>、<code>InputFormat</code>和 
+        <code>OutputFormat</code>的具体实现。<code>JobConf</code>还能指定一组输入文件
+        (<a href="ext:api/org/apache/hadoop/mapred/fileinputformat/setinputpaths">setInputPaths(JobConf, Path...)</a>
+        /<a href="ext:api/org/apache/hadoop/mapred/fileinputformat/addinputpath">addInputPath(JobConf, Path)</a>)
+        和(<a href="ext:api/org/apache/hadoop/mapred/fileinputformat/setinputpathstring">setInputPaths(JobConf, String)</a>
+        /<a href="ext:api/org/apache/hadoop/mapred/fileinputformat/addinputpathstring">addInputPaths(JobConf, String)</a>)
+        以及输出文件应该写在哪儿
+        (<a href="ext:api/org/apache/hadoop/mapred/fileoutputformat/setoutputpath">setOutputPath(Path)</a>)。</p>
+
+        <p><code>JobConf</code>可选择地对作业设置一些高级选项,例如:设置<code>Comparator</code>;
+        放到<code>DistributedCache</code>上的文件;中间结果或者作业输出结果是否需要压缩以及怎么压缩;
+        利用用户提供的脚本(<a href="ext:api/org/apache/hadoop/mapred/jobconf/setmapdebugscript">setMapDebugScript(String)</a>/<a href="ext:api/org/apache/hadoop/mapred/jobconf/setreducedebugscript">setReduceDebugScript(String)</a>)     
+        进行调试;作业是否允许<em>预防性(speculative)</em>任务的执行
+        (<a href="ext:api/org/apache/hadoop/mapred/jobconf/setmapspeculativeexecution">setMapSpeculativeExecution(boolean)</a>)/(<a href="ext:api/org/apache/hadoop/mapred/jobconf/setreducespeculativeexecution">setReduceSpeculativeExecution(boolean)</a>)
+        ;每个任务最大的尝试次数
+        (<a href="ext:api/org/apache/hadoop/mapred/jobconf/setmaxmapattempts">setMaxMapAttempts(int)</a>/<a href="ext:api/org/apache/hadoop/mapred/jobconf/setmaxreduceattempts">setMaxReduceAttempts(int)</a>)
+        ;一个作业能容忍的任务失败的百分比
+        (<a href="ext:api/org/apache/hadoop/mapred/jobconf/setmaxmaptaskfailurespercent">setMaxMapTaskFailuresPercent(int)</a>/<a href="ext:api/org/apache/hadoop/mapred/jobconf/setmaxreducetaskfailurespercent">setMaxReduceTaskFailuresPercent(int)</a>) 
+        ;等等。</p>
+        
+        <p>当然,用户能使用
+        <a href="ext:api/org/apache/hadoop/conf/configuration/set">set(String, String)</a>/<a href="ext:api/org/apache/hadoop/conf/configuration/get">get(String, String)</a>
+        来设置或者取得应用程序需要的任意参数。然而,<code>DistributedCache</code>的使用是面向大规模只读数据的。</p>
+      </section>
+
+      <section>
+        <title>任务的执行和环境</title>
+
+        <p><code>TaskTracker</code>是在一个单独的jvm上以子进程的形式执行
+        <code>Mapper</code>/<code>Reducer</code>任务(Task)的。
+        </p>
+        
+        <p>子任务会继承父<code>TaskTracker</code>的环境。用户可以通过JobConf中的
+        <code>mapred.child.java.opts</code>配置参数来设定子jvm上的附加选项,例如:
+        通过<code>-Djava.library.path=&lt;&gt;</code> 将一个非标准路径设为运行时的链接用以搜索共享库,等等。如果<code>mapred.child.java.opts</code>包含一个符号<em>@taskid@</em>,
+        它会被替换成map/reduce的taskid的值。</p>
+        
+        <p>下面是一个包含多个参数和替换的例子,其中包括:记录jvm GC日志;
+        JVM JMX代理程序以无密码的方式启动,这样它就能连接到jconsole上,从而可以查看子进程的内存和线程,得到线程的dump;还把子jvm的最大堆尺寸设置为512MB,
+        并为子jvm的<code>java.library.path</code>添加了一个附加路径。</p>
+
+        <p>
+          <code>&lt;property&gt;</code><br/>
+          &nbsp;&nbsp;<code>&lt;name&gt;mapred.child.java.opts&lt;/name&gt;</code><br/>
+          &nbsp;&nbsp;<code>&lt;value&gt;</code><br/>
+          &nbsp;&nbsp;&nbsp;&nbsp;<code>
+                    -Xmx512M -Djava.library.path=/home/mycompany/lib
+                    -verbose:gc -Xloggc:/tmp/@taskid@.gc</code><br/>
+          &nbsp;&nbsp;&nbsp;&nbsp;<code>
+                    -Dcom.sun.management.jmxremote.authenticate=false 
+                    -Dcom.sun.management.jmxremote.ssl=false</code><br/>
+          &nbsp;&nbsp;<code>&lt;/value&gt;</code><br/>
+          <code>&lt;/property&gt;</code>
+        </p>
+        <p>用户或管理员也可以使用<code>mapred.child.ulimit</code>设定运行的子任务的最大虚拟内存。<code>mapred.child.ulimit</code>的值以(KB)为单位,并且必须大于或等于-Xmx参数传给JavaVM的值,否则VM会无法启动。</p>
+        <p>注意:<code>mapred.child.java.opts</code>只用于设置task tracker启动的子任务。为守护进程设置内存选项请查看
+        <a href="cluster_setup.html#配置Hadoop守护进程的运行环境">
+        cluster_setup.html </a></p>
+        <p><code> ${mapred.local.dir}/taskTracker/</code>是task tracker的本地目录,
+        用于创建本地缓存和job。它可以指定多个目录(跨越多个磁盘),文件会半随机的保存到本地路径下的某个目录。当job启动时,task tracker根据配置文档创建本地job目录,目录结构如以下所示:</p>
+        <ul>
+	<li><code>${mapred.local.dir}/taskTracker/archive/</code> :分布式缓存。这个目录保存本地的分布式缓存。因此本地分布式缓存是在所有task和job间共享的。</li>
+        <li><code>${mapred.local.dir}/taskTracker/jobcache/$jobid/</code> :
+        本地job目录。
+        <ul>
+        <li><code>${mapred.local.dir}/taskTracker/jobcache/$jobid/work/</code>:
+        job指定的共享目录。各个任务可以使用这个空间做为暂存空间,用于它们之间共享文件。这个目录通过<code>job.local.dir </code>参数暴露给用户。这个路径可以通过API <a href="ext:api/org/apache/hadoop/mapred/jobconf/getjoblocaldir">
+        JobConf.getJobLocalDir()</a>来访问。它也可以被做为系统属性获得。因此,用户(比如运行streaming)可以调用<code>System.getProperty("job.local.dir")</code>获得该目录。
+        </li>
+        <li><code>${mapred.local.dir}/taskTracker/jobcache/$jobid/jars/</code>:
+        存放jar包的路径,用于存放作业的jar文件和展开的jar。<code>job.jar</code>是应用程序的jar文件,它会被自动分发到各台机器,在task启动前会被自动展开。使用api
+        <a href="ext:api/org/apache/hadoop/mapred/jobconf/getjar">
+        JobConf.getJar() </a>函数可以得到job.jar的位置。使用JobConf.getJar().getParent()可以访问存放展开的jar包的目录。
+        </li>
+        <li><code>${mapred.local.dir}/taskTracker/jobcache/$jobid/job.xml</code>:
+        一个job.xml文件,本地的通用的作业配置文件。
+        </li>
+        <li><code>${mapred.local.dir}/taskTracker/jobcache/$jobid/$taskid</code>:
+        每个任务有一个目录<code>task-id</code>,它里面有如下的目录结构:
+	<ul>
+        <li><code>${mapred.local.dir}/taskTracker/jobcache/$jobid/$taskid/job.xml</code>:
+       一个job.xml文件,本地化的任务作业配置文件。任务本地化是指为该task设定特定的属性值。这些值会在下面具体说明。
+	</li>
+        <li><code>${mapred.local.dir}/taskTracker/jobcache/$jobid/$taskid/output</code>
+        一个存放中间过程的输出文件的目录。它保存了由framwork产生的临时map reduce数据,比如map的输出文件等。</li>
+        <li><code>${mapred.local.dir}/taskTracker/jobcache/$jobid/$taskid/work</code>:
+        task的当前工作目录。</li>
+        <li><code>${mapred.local.dir}/taskTracker/jobcache/$jobid/$taskid/work/tmp</code>:
+        task的临时目录。(用户可以设定属性<code>mapred.child.tmp</code>
+        来为map和reduce task设定临时目录。缺省值是<code>./tmp</code>。如果这个值不是绝对路径,
+        它会把task的工作路径加到该路径前面作为task的临时文件路径。如果这个值是绝对路径则直接使用这个值。
+        如果指定的目录不存在,会自动创建该目录。之后,按照选项
+        <code>-Djava.io.tmpdir='临时文件的绝对路径'</code>执行java子任务。
+        pipes和streaming的临时文件路径是通过环境变量<code>TMPDIR='the absolute path of the tmp dir'</code>设定的)。
+        如果<code>mapred.child.tmp</code>有<code>./tmp</code>值,这个目录会被创建。</li>
+        </ul>
+        </li>
+        </ul>
+        </li>
+        </ul>
+        <p>下面的属性是为每个task执行时使用的本地参数,它们保存在本地化的任务作业配置文件里:</p>
+        <table>
+          <tr><th>名称</th><th>类型</th><th>描述</th></tr>
+          <tr><td>mapred.job.id</td><td>String</td><td>job id</td></tr>
+          <tr><td>mapred.jar</td><td>String</td>
+              <td>job目录下job.jar的位置</td></tr>
+          <tr><td>job.local.dir</td><td> String</td>
+              <td>job指定的共享存储空间</td></tr>
+          <tr><td>mapred.tip.id</td><td> String</td>
+              <td> task id</td></tr>
+          <tr><td>mapred.task.id</td><td> String</td>
+              <td> task尝试id</td></tr>
+          <tr><td>mapred.task.is.map</td><td> boolean </td>
+              <td>是否是map task</td></tr>
+          <tr><td>mapred.task.partition</td><td> int </td>
+              <td>task在job中的id</td></tr>
+          <tr><td>map.input.file</td><td> String</td>
+              <td> map读取的文件名</td></tr>
+          <tr><td>map.input.start</td><td> long</td>
+              <td> map输入的数据块的起始位置偏移</td></tr>
+          <tr><td>map.input.length </td><td>long </td>
+              <td>map输入的数据块的字节数</td></tr>
+          <tr><td>mapred.work.output.dir</td><td> String </td>
+              <td>task临时输出目录</td></tr>
+        </table>
+        <p>task的标准输出和错误输出流会被读到TaskTracker中,并且记录到
+        <code>${HADOOP_LOG_DIR}/userlogs</code></p>
+        <p><a href="#DistributedCache">DistributedCache</a>
+        可用于map或reduce task中分发jar包和本地库。子jvm总是把
+        <em>当前工作目录</em> 加到
+        <code>java.library.path</code> 和 <code>LD_LIBRARY_PATH</code>。
+        因此,可以通过
+        <a href="http://java.sun.com/j2se/1.5.0/docs/api/java/lang/System.html#loadLibrary(java.lang.String)">
+        System.loadLibrary</a>或 
+        <a href="http://java.sun.com/j2se/1.5.0/docs/api/java/lang/System.html#load(java.lang.String)">
+        System.load</a>装载缓存的库。有关使用分布式缓存加载共享库的细节请参考
+        <a href="native_libraries.html#使用DistributedCache+加载本地库">
+        native_libraries.html</a></p>
+      </section>
+      
+      <section>
+        <title>作业的提交与监控</title>
+        
+        <p><a href="ext:api/org/apache/hadoop/mapred/jobclient">
+        JobClient</a>是用户提交的作业与<code>JobTracker</code>交互的主要接口。
+        </p>
+ 
+        <p><code>JobClient</code> 提供提交作业,追踪进程,访问子任务的日志记录,获得Map/Reduce集群状态信息等功能。
+        </p>
+ 
+        <p>作业提交过程包括: </p>
+        <ol>
+          <li>检查作业输入输出样式细节</li>
+          <li>为作业计算<code>InputSplit</code>值。</li>
+          <li>
+           如果需要的话,为作业的<code>DistributedCache</code>建立必须的统计信息。
+          </li>
+          <li>
+            拷贝作业的jar包和配置文件到<code>FileSystem</code>上的Map/Reduce系统目录下。
+          </li>
+          <li>
+            提交作业到<code>JobTracker</code>并且监控它的状态。
+          </li>
+        </ol>
+        <p>作业的历史文件记录到指定目录的"_logs/history/"子目录下。这个指定目录由<code>hadoop.job.history.user.location</code>设定,默认是作业输出的目录。因此默认情况下,文件会存放在mapred.output.dir/_logs/history目录下。用户可以设置<code>hadoop.job.history.user.location</code>为<code>none</code>来停止日志记录。
+        </p>
+
+        <p> 用户使用下面的命令可以看到在指定目录下的历史日志记录的摘要。
+        <br/>
+        <code>$ bin/hadoop job -history output-dir</code><br/> 
+        这个命令会打印出作业的细节,以及失败的和被杀死的任务细节。<br/>
+        要查看有关作业的更多细节例如成功的任务、每个任务尝试的次数(task attempt)等,可以使用下面的命令
+        <br/>
+       <code>$ bin/hadoop job -history all output-dir</code><br/></p> 
+            
+        <p>用户可以使用 
+        <a href="ext:api/org/apache/hadoop/mapred/outputlogfilter">OutputLogFilter</a>
+        从输出目录列表中筛选日志文件。</p>
+        
+        <p>一般情况,用户利用<code>JobConf</code>创建应用程序并配置作业属性,
+        然后用
+        <code>JobClient</code> 提交作业并监视它的进程。</p>
+
+        <section>
+          <title>作业的控制</title>
+ 
+          <p>有时候,用一个单独的Map/Reduce作业并不能完成一个复杂的任务,用户也许要链接多个Map/Reduce作业才行。这是容易实现的,因为作业通常输出到分布式文件系统上的,所以可以把这个作业的输出作为下一个作业的输入实现串联。
+          </p>
+ 
+          <p>然而,这也意味着,确保每一作业完成(成功或失败)的责任就直接落在了客户身上。在这种情况下,可以用的控制作业的选项有:
+          </p>
+          <ul>
+            <li>
+              <a href="ext:api/org/apache/hadoop/mapred/jobclient/runjob">
+              runJob(JobConf)</a>:提交作业,仅当作业完成时返回。
+            </li>
+            <li>
+              <a href="ext:api/org/apache/hadoop/mapred/jobclient/submitjob">
+              submitJob(JobConf)</a>:只提交作业,之后需要你轮询它返回的
+              <a href="ext:api/org/apache/hadoop/mapred/runningjob">
+              RunningJob</a>句柄的状态,并根据情况调度。
+            </li>
+            <li>
+              <a href="ext:api/org/apache/hadoop/mapred/jobconf/setjobendnotificationuri">
+              JobConf.setJobEndNotificationURI(String)</a>:设置一个作业完成通知,可避免轮询。
+           
+            </li>
+          </ul>
+        </section>
+      </section>
+
+      <section>
+        <title>作业的输入</title>
+        
+        <p><a href="ext:api/org/apache/hadoop/mapred/inputformat">
+        InputFormat</a> 为Map/Reduce作业描述输入的细节规范。
+        </p> 
+ 
+        <p>Map/Reduce框架根据作业的<code>InputFormat</code>来: 
+        </p>
+        <ol>
+          <li>检查作业输入的有效性。</li>
+          <li>
+            把输入文件切分成多个逻辑<code>InputSplit</code>实例,
+            并把每一实例分别分发给一个
+            <code>Mapper</code>。
+          </li>
+          <li>
+            提供<code>RecordReader</code>的实现,这个RecordReader从逻辑<code>InputSplit</code>中获得输入记录,
+		这些记录将由<code>Mapper</code>处理。 
+          </li>
+        </ol>
+ 
+        <p>基于文件的<code>InputFormat</code>实现(通常是
+	<a href="ext:api/org/apache/hadoop/mapred/fileinputformat">
+        FileInputFormat</a>的子类)
+	默认行为是按照输入文件的字节大小,把输入数据切分成逻辑分块(<em>logical</em> 
+        <code>InputSplit</code> )。	
+        其中输入文件所在的<code>FileSystem</code>的数据块尺寸是分块大小的上限。下限可以设置<code>mapred.min.split.size</code>
+	的值。</p>
+ 
+        <p>考虑到边界情况,对于很多应用程序来说,很明显按照文件大小进行逻辑分割是不能满足需求的。
+        在这种情况下,应用程序需要实现一个<code>RecordReader</code>来处理记录的边界并为每个任务提供一个逻辑分块的面向记录的视图。
+        </p>
+
+        <p><a href="ext:api/org/apache/hadoop/mapred/textinputformat">
+        TextInputFormat</a> 是默认的<code>InputFormat</code>。</p>
+        
+        <p>如果一个作业的<code>Inputformat</code>是<code>TextInputFormat</code>,
+        并且框架检测到输入文件的后缀是<em>.gz</em>和<em>.lzo</em>,就会使用对应的<code>CompressionCodec</code>自动解压缩这些文件。
+        但是需要注意,上述带后缀的压缩文件不会被切分,并且整个压缩文件会分给一个mapper来处理。
+        </p>
+        
+        <section>
+          <title>InputSplit</title>
+          
+          <p><a href="ext:api/org/apache/hadoop/mapred/inputsplit">
+          InputSplit</a> 是一个单独的<code>Mapper</code>要处理的数据块。</p>
+
+          <p>一般的<code>InputSplit</code> 是字节样式输入,然后由<code>RecordReader</code>处理并转化成记录样式。
+          </p>
+          
+          <p><a href="ext:api/org/apache/hadoop/mapred/filesplit">
+			  FileSplit</a> 是默认的<code>InputSplit</code>。 它把
+          <code>map.input.file</code> 设定为输入文件的路径,输入文件是逻辑分块文件。
+          </p>
+        </section>
+        
+        <section>
+          <title>RecordReader</title>
+          
+          <p><a href="ext:api/org/apache/hadoop/mapred/recordreader">
+          RecordReader</a> 从<code>InputSlit</code>读入<code>&lt;key, value&gt;</code>对。 
+          </p>
+
+          <p>一般的,<code>RecordReader</code> 把由<code>InputSplit</code>
+	  提供的字节样式的输入文件,转化成由<code>Mapper</code>处理的记录样式的文件。
+          因此<code>RecordReader</code>负责处理记录的边界情况和把数据表示成keys/values对形式。
+          </p>
+        </section>
+      </section>
+
+      <section>
+        <title>作业的输出</title>
+        
+        <p><a href="ext:api/org/apache/hadoop/mapred/outputformat">
+        OutputFormat</a> 描述Map/Reduce作业的输出样式。
+        </p>
+
+        <p>Map/Reduce框架根据作业的<code>OutputFormat</code>来:
+        </p>
+        <ol>
+          <li>
+            检验作业的输出,例如检查输出路径是否已经存在。
+          </li>
+          <li>
+            提供一个<code>RecordWriter</code>的实现,用来输出作业结果。
+            输出文件保存在<code>FileSystem</code>上。
+          </li>
+        </ol>
+ 
+        <p><code>TextOutputFormat</code>是默认的
+        <code>OutputFormat</code>。</p>
+ 
+        <section>
+          <title>任务的Side-Effect File</title>
+ 
+          <p>在一些应用程序中,子任务需要产生一些side-file,这些文件与作业实际输出结果的文件不同。
+          </p>
+ 
+	  <p>在这种情况下,同一个<code>Mapper</code>或者<code>Reducer</code>的两个实例(比如预防性任务)同时打开或者写
+	  <code>FileSystem</code>上的同一文件就会产生冲突。因此应用程序在写文件的时候需要为每次任务尝试(不仅仅是每次任务,每个任务可以尝试执行很多次)选取一个独一无二的文件名(使用attemptid,例如<code>task_200709221812_0001_m_000000_0</code>)。 
+          </p> 
+ 
+          <p>为了避免冲突,Map/Reduce框架为每次尝试执行任务都建立和维护一个特殊的
+          <code>${mapred.output.dir}/_temporary/_${taskid}</code>子目录,这个目录位于本次尝试执行任务输出结果所在的<code>FileSystem</code>上,可以通过
+          <code>${mapred.work.output.dir}</code>来访问这个子目录。
+          对于成功完成的任务尝试,只有<code>${mapred.output.dir}/_temporary/_${taskid}</code>下的文件会<em>移动</em>到<code>${mapred.output.dir}</code>。当然,框架会丢弃那些失败的任务尝试的子目录。这种处理过程对于应用程序来说是完全透明的。</p>
+ 
+          <p>在任务执行期间,应用程序在写文件时可以利用这个特性,比如
+	  通过<a href="ext:api/org/apache/hadoop/mapred/fileoutputformat/getworkoutputpath">
+          FileOutputFormat.getWorkOutputPath()</a>获得<code>${mapred.work.output.dir}</code>目录,
+	  并在其下创建任意任务执行时所需的side-file,框架在任务尝试成功时会马上移动这些文件,因此不需要在程序内为每次任务尝试选取一个独一无二的名字。
+          </p>
+          
+          <p>注意:在每次任务尝试执行期间,<code>${mapred.work.output.dir}</code> 的值实际上是
+          <code>${mapred.output.dir}/_temporary/_{$taskid}</code>,这个值是Map/Reduce框架创建的。
+          所以使用这个特性的方法是,在<a href="ext:api/org/apache/hadoop/mapred/fileoutputformat/getworkoutputpath">
+          FileOutputFormat.getWorkOutputPath() </a>
+	  路径下创建side-file即可。
+	  </p>
+          
+          <p>对于只使用map不使用reduce的作业,这个结论也成立。这种情况下,map的输出结果直接生成到HDFS上。
+           </p> 
+        </section>
+        
+        <section>
+          <title>RecordWriter</title>
+          
+          <p><a href="ext:api/org/apache/hadoop/mapred/recordwriter">
+          RecordWriter</a> 生成<code>&lt;key, value&gt;</code> 
+          对到输出文件。</p>
+
+          <p>RecordWriter的实现把作业的输出结果写到
+          <code>FileSystem</code>。</p>
+        </section>
+      </section>
+      
+      <section>
+        <title>其他有用的特性</title>
+ 
+        <section>
+          <title>Counters</title>
+          
+          <p><code>Counters</code> 是多个由Map/Reduce框架或者应用程序定义的全局计数器。
+          每一个<code>Counter</code>可以是任何一种 
+          <code>Enum</code>类型。同一特定<code>Enum</code>类型的Counter可以汇集到一个组,其类型为<code>Counters.Group</code>。</p>
+          
+          <p>应用程序可以定义任意(Enum类型)的<code>Counters</code>并且可以通过 <code>map</code> 或者 
+          <code>reduce</code>方法中的
+          <a href="ext:api/org/apache/hadoop/mapred/reporter/incrcounterEnum">
+          Reporter.incrCounter(Enum, long)</a>或者 
+          <a href="ext:api/org/apache/hadoop/mapred/reporter/incrcounterString">
+          Reporter.incrCounter(String, String, long)</a>
+          更新。之后框架会汇总这些全局counters。 
+          </p>
+        </section>       
+        
+        <section>
+          <title>DistributedCache</title>
+          
+          <p><a href="ext:api/org/apache/hadoop/filecache/distributedcache">
+          DistributedCache</a> 可将具体应用相关的、大尺寸的、只读的文件有效地分布放置。
+          </p>
+ 
+          <p><code>DistributedCache</code> 是Map/Reduce框架提供的功能,能够缓存应用程序所需的文件
+		(包括文本,档案文件,jar文件等)。
+          </p>
+          <p>应用程序在<code>JobConf</code>中通过url(hdfs://)指定需要被缓存的文件。
+	  <code>DistributedCache</code>假定由hdfs://格式url指定的文件已经在 
+          <code>FileSystem</code>上了。</p>
+
+          <p>Map-Redcue框架在作业所有任务执行之前会把必要的文件拷贝到slave节点上。
+          它运行高效是因为每个作业的文件只拷贝一次并且为那些没有文档的slave节点缓存文档。      
+          </p> 
+          
+          <p><code>DistributedCache</code> 根据缓存文档修改的时间戳进行追踪。
+	  在作业执行期间,当前应用程序或者外部程序不能修改缓存文件。 
+          </p>
+
+          <p><code>distributedCache</code>可以分发简单的只读数据或文本文件,也可以分发复杂类型的文件例如归档文件和jar文件。归档文件(zip,tar,tgz和tar.gz文件)在slave节点上会被<em>解档(un-archived)</em>。
+          这些文件可以设置<em>执行权限</em>。</p>
+          <p>用户可以通过设置<code>mapred.cache.{files|archives}</code>来分发文件。
+          如果要分发多个文件,可以使用逗号分隔文件所在路径。也可以利用API来设置该属性:
+            <a href="ext:api/org/apache/hadoop/filecache/distributedcache/addcachefile">
+          DistributedCache.addCacheFile(URI,conf)</a>/
+          <a href="ext:api/org/apache/hadoop/filecache/distributedcache/addcachearchive">
+          DistributedCache.addCacheArchive(URI,conf)</a> and
+          <a href="ext:api/org/apache/hadoop/filecache/distributedcache/setcachefiles">
+          DistributedCache.setCacheFiles(URIs,conf)</a>/
+          <a href="ext:api/org/apache/hadoop/filecache/distributedcache/setcachearchives">
+          DistributedCache.setCacheArchives(URIs,conf)</a>
+          其中URI的形式是
+          <code>hdfs://host:port/absolute-path#link-name</code>
+          在Streaming程序中,可以通过命令行选项
+          <code>-cacheFile/-cacheArchive</code>
+          分发文件。</p>
+          <p>
+	  用户可以通过<a href="ext:api/org/apache/hadoop/filecache/distributedcache/createsymlink">
+          DistributedCache.createSymlink(Configuration)</a>方法让<code>DistributedCache</code>
+        在<em>当前工作目录</em>下创建到缓存文件的符号链接。
+	或者通过设置配置文件属性<code>mapred.create.symlink</code>为<code>yes</code>。
+	分布式缓存会截取URI的片段作为链接的名字。
+	例如,URI是 <code>hdfs://namenode:port/lib.so.1#lib.so</code>,
+	则在task当前工作目录会有名为<code>lib.so</code>的链接,
+        它会链接分布式缓存中的<code>lib.so.1</code>。
+        </p>
+
+	<p><code>DistributedCache</code>可在map/reduce任务中作为
+        一种基础软件分发机制使用。它可以被用于分发jar包和本地库(native libraries)。
+        <a href="ext:api/org/apache/hadoop/filecache/distributedcache/addarchivetoclasspath">
+        DistributedCache.addArchiveToClassPath(Path, Configuration)</a>和
+        <a href="ext:api/org/apache/hadoop/filecache/distributedcache/addfiletoclasspath">
+        DistributedCache.addFileToClassPath(Path, Configuration)</a> API能够被用于
+        缓存文件和jar包,并把它们加入子jvm的<em>classpath</em>。也可以通过设置配置文档里的属性
+        <code>mapred.job.classpath.{files|archives}</code>达到相同的效果。缓存文件可用于分发和装载本地库。
+        </p>
+        </section>
+        
+        <section>
+          <title>Tool</title>
+          
+          <p><a href="ext:api/org/apache/hadoop/util/tool">Tool</a> 
+          接口支持处理常用的Hadoop命令行选项。
+          </p>
+          
+          <p><code>Tool</code> 是Map/Reduce工具或应用的标准。应用程序应只处理其定制参数,
+          要把标准命令行选项通过
+		<a href="ext:api/org/apache/hadoop/util/toolrunner/run"> ToolRunner.run(Tool, String[])</a> 
+		委托给
+          <a href="ext:api/org/apache/hadoop/util/genericoptionsparser">
+          GenericOptionsParser</a>处理。
+          </p>
+          
+          <p>
+            Hadoop命令行的常用选项有:<br/>
+            <code>
+              -conf &lt;configuration file&gt;
+            </code>
+            <br/>
+            <code>
+              -D &lt;property=value&gt;
+            </code>
+            <br/>
+            <code>
+              -fs &lt;local|namenode:port&gt;
+            </code>
+            <br/>
+            <code>
+              -jt &lt;local|jobtracker:port&gt;
+            </code>
+          </p>
+        </section>
+        
+        <section>
+          <title>IsolationRunner</title>
+          
+          <p><a href="ext:api/org/apache/hadoop/mapred/isolationrunner">
+          IsolationRunner</a> 是帮助调试Map/Reduce程序的工具。</p>
+          
+          <p>使用<code>IsolationRunner</code>的方法是,首先设置
+          <code>keep.failed.task.files</code>属性为<code>true</code> 
+          (同时参考<code>keep.task.files.pattern</code>)。</p>
+          
+          <p>
+            然后,登录到任务运行失败的节点上,进入
+            <code>TaskTracker</code>的本地路径运行
+            <code>IsolationRunner</code>:<br/>
+            <code>$ cd &lt;local path&gt;/taskTracker/${taskid}/work</code><br/>
+            <code>
+              $ bin/hadoop org.apache.hadoop.mapred.IsolationRunner ../job.xml
+            </code>
+          </p>
+          
+          <p><code>IsolationRunner</code>会把失败的任务放在单独的一个能够调试的jvm上运行,并且采用和之前完全一样的输入数据。
+		</p>
+        </section>
+
+         <section>
+           <title>Profiling</title>
+	   <p>Profiling是一个工具,它使用内置的java profiler工具进行分析获得(2-3个)map或reduce样例运行分析报告。</p>
+          <p>用户可以通过设置属性<code>mapred.task.profile</code>指定系统是否采集profiler信息。
+          利用api<a href="ext:api/org/apache/hadoop/mapred/jobconf/setprofileenabled">
+          JobConf.setProfileEnabled(boolean)可以修改属性值</a>。如果设为<code>true</code>,
+          则开启profiling功能。profiler信息保存在用户日志目录下。缺省情况,profiling功能是关闭的。</p>
+          <p>如果用户设定使用profiling功能,可以使用配置文档里的属性
+          <code>mapred.task.profile.{maps|reduces}</code>
+          设置要profile map/reduce task的范围。设置该属性值的api是
+           <a href="ext:api/org/apache/hadoop/mapred/jobconf/setprofiletaskrange">
+          JobConf.setProfileTaskRange(boolean,String)</a>。
+          范围的缺省值是<code>0-2</code>。</p>
+          <p>用户可以通过设定配置文档里的属性<code>mapred.task.profile.params</code>
+          来指定profiler配置参数。修改属性要使用api
+          <a href="ext:api/org/apache/hadoop/mapred/jobconf/setprofileparams">
+          JobConf.setProfileParams(String)</a>。当运行task时,如果字符串包含<code>%s</code>。
+          它会被替换成profileing的输出文件名。这些参数会在命令行里传递到子JVM中。缺省的profiling
+          参数是
+          <code>-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s</code>。
+          </p>
+	 </section>
+        
+        <section>
+          <title>调试</title>
+          <p>Map/Reduce框架能够运行用户提供的用于调试的脚本程序。 
+          当map/reduce任务失败时,用户可以通过运行脚本在任务日志(例如任务的标准输出、标准错误、系统日志以及作业配置文件)上做后续处理工作。用户提供的调试脚本程序的标准输出和标准错误会输出为诊断文件。如果需要的话这些输出结果也可以打印在用户界面上。</p>
+
+          <p> 在接下来的章节,我们讨论如何与作业一起提交调试脚本。为了提交调试脚本,
+          首先要把这个脚本分发出去,而且还要在配置文件里设置。
+     	  </p>
+          <section>
+          <title> 如何分发脚本文件:</title>
+          <p>用户要用
+          <a href="mapred_tutorial.html#DistributedCache">DistributedCache</a>
+          机制来<em>分发</em>和<em>链接</em>脚本文件</p>
+         </section>
+          <section>
+          <title> 如何提交脚本:</title>
+          <p> 一个快速提交调试脚本的方法是分别为需要调试的map任务和reduce任务设置
+		"mapred.map.task.debug.script" 和 "mapred.reduce.task.debug.script"
+	 属性的值。这些属性也可以通过
+          <a href="ext:api/org/apache/hadoop/mapred/jobconf/setmapdebugscript">
+          JobConf.setMapDebugScript(String) </a>和 
+          <a href="ext:api/org/apache/hadoop/mapred/jobconf/setreducedebugscript">
+          JobConf.setReduceDebugScript(String) </a>API来设置。对于streaming,
+          可以分别为需要调试的map任务和reduce任务使用命令行选项-mapdebug 和 -reducedegug来提交调试脚本。
+          </p>
+            
+          <p>脚本的参数是任务的标准输出、标准错误、系统日志以及作业配置文件。在运行map/reduce失败的节点上运行调试命令是:
+		 <br/>
+          <code> $script $stdout $stderr $syslog $jobconf </code> </p> 
+
+          <p> Pipes 程序根据第五个参数获得c++程序名。
+          因此调试pipes程序的命令是<br/> 
+          <code>$script $stdout $stderr $syslog $jobconf $program </code>  
+          </p>
+          </section>
+          
+          <section>
+          <title> 默认行为 </title>
+          <p> 对于pipes,默认的脚本会用gdb处理core dump,
+          打印 stack trace并且给出正在运行线程的信息。</p>
+          </section>
+        </section>
+        
+        <section>
+          <title>JobControl</title>
+          
+          <p><a href="ext:api/org/apache/hadoop/mapred/jobcontrol/package-summary">
+          JobControl</a>是一个工具,它封装了一组Map/Reduce作业以及他们之间的依赖关系。
+	  </p>
+        </section>
+        
+        <section>
+          <title>数据压缩</title>
+          
+          <p>Hadoop Map/Reduce框架为应用程序的写入文件操作提供压缩工具,这些工具可以为map输出的中间数据和作业最终输出数据(例如reduce的输出)提供支持。它还附带了一些
+          <a href="ext:api/org/apache/hadoop/io/compress/compressioncodec">
+          CompressionCodec</a>的实现,比如实现了
+          <a href="ext:zlib">zlib</a>和<a href="ext:lzo">lzo</a>压缩算法。
+           Hadoop同样支持<a href="ext:gzip">gzip</a>文件格式。 
+          </p>
+          
+          <p>考虑到性能问题(zlib)以及Java类库的缺失(lzo)等因素,Hadoop也为上述压缩解压算法提供本地库的实现。更多的细节请参考
+          <a href="native_libraries.html">这里</a>。</p>
+          
+          <section>
+            <title>中间输出</title>
+            
+            <p>应用程序可以通过
+            <a href="ext:api/org/apache/hadoop/mapred/jobconf/setcompressmapoutput">
+            JobConf.setCompressMapOutput(boolean)</a>api控制map输出的中间结果,并且可以通过
+            <a href="ext:api/org/apache/hadoop/mapred/jobconf/setmapoutputcompressorclass">
+            JobConf.setMapOutputCompressorClass(Class)</a>api指定
+            <code>CompressionCodec</code>。
+        </p>
+       </section>
+          
+          <section>
+            <title>作业输出</title>   
+            <p>应用程序可以通过
+            <a href="ext:api/org/apache/hadoop/mapred/fileoutputformat/setcompressoutput">
+            FileOutputFormat.setCompressOutput(JobConf, boolean)</a>
+            api控制输出是否需要压缩并且可以使用 
+            <a href="ext:api/org/apache/hadoop/mapred/fileoutputformat/setoutputcompressorclass">
+            FileOutputFormat.setOutputCompressorClass(JobConf, Class)</a>api指定<code>CompressionCodec</code>。</p> 
+            
+            <p>如果作业输出要保存成 
+            <a href="ext:api/org/apache/hadoop/mapred/sequencefileoutputformat">
+            SequenceFileOutputFormat</a>格式,需要使用
+            <a href="ext:api/org/apache/hadoop/mapred/sequencefileoutputformat/setoutputcompressiontype">
+            SequenceFileOutputFormat.setOutputCompressionType(JobConf, 
+            SequenceFile.CompressionType)</a>api,来设定
+            <code>SequenceFile.CompressionType</code> (i.e. <code>RECORD</code> / 
+            <code>BLOCK</code> - 默认是<code>RECORD</code>)。
+            </p>
+          </section>
+        </section>
+        
+      </section>
+    </section>
+
+    <section>
+      <title>例子:WordCount v2.0</title>
+      
+      <p>这里是一个更全面的<code>WordCount</code>例子,它使用了我们已经讨论过的很多Map/Reduce框架提供的功能。 
+      </p>
+      
+      <p>运行这个例子需要HDFS的某些功能,特别是
+      <code>DistributedCache</code>相关功能。因此这个例子只能运行在
+      <a href="quickstart.html#SingleNodeSetup">伪分布式</a> 或者
+      <a href="quickstart.html#Fully-Distributed+Operation">完全分布式模式</a>的 
+      Hadoop上。</p>      
+      
+       <section>
+        <title>源代码</title>
+        
+        <table>
+          <tr>
+            <th></th>
+            <th>WordCount.java</th>
+          </tr>
+          <tr>
+            <td>1.</td>
+            <td>
+              <code>package org.myorg;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>2.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>3.</td>
+            <td>
+              <code>import java.io.*;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>4.</td>
+            <td>
+              <code>import java.util.*;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>5.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>6.</td>
+            <td>
+              <code>import org.apache.hadoop.fs.Path;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>7.</td>
+            <td>
+              <code>import org.apache.hadoop.filecache.DistributedCache;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>8.</td>
+            <td>
+              <code>import org.apache.hadoop.conf.*;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>9.</td>
+            <td>
+              <code>import org.apache.hadoop.io.*;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>10.</td>
+            <td>
+              <code>import org.apache.hadoop.mapred.*;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>11.</td>
+            <td>
+              <code>import org.apache.hadoop.util.*;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>12.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>13.</td>
+            <td>
+              <code>public class WordCount extends Configured implements Tool {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>14.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>15.</td>
+            <td>
+              &nbsp;&nbsp;
+              <code>
+                public static class Map extends MapReduceBase 
+                implements Mapper&lt;LongWritable, Text, Text, IntWritable&gt; {
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>16.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>17.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>
+                static enum Counters { INPUT_WORDS }
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>18.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>19.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>
+                private final static IntWritable one = new IntWritable(1);
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>20.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>private Text word = new Text();</code>
+            </td>
+          </tr>
+          <tr>
+            <td>21.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>22.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>private boolean caseSensitive = true;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>23.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>private Set&lt;String&gt; patternsToSkip = new HashSet&lt;String&gt;();</code>
+            </td>
+          </tr>
+          <tr>
+            <td>24.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>25.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>private long numRecords = 0;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>26.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>private String inputFile;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>27.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>28.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>public void configure(JobConf job) {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>29.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>
+                caseSensitive = job.getBoolean("wordcount.case.sensitive", true);
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>30.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>inputFile = job.get("map.input.file");</code>
+            </td>
+          </tr>
+          <tr>
+            <td>31.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>32.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>if (job.getBoolean("wordcount.skip.patterns", false)) {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>33.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>Path[] patternsFiles = new Path[0];</code>
+            </td>
+          </tr>
+          <tr>
+            <td>34.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>try {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>35.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>
+                patternsFiles = DistributedCache.getLocalCacheFiles(job);
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>36.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>} catch (IOException ioe) {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>37.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>
+                System.err.println("Caught exception while getting cached files: " 
+                + StringUtils.stringifyException(ioe));
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>38.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>39.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>for (Path patternsFile : patternsFiles) {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>40.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>parseSkipFile(patternsFile);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>41.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>42.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>43.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>44.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>45.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>private void parseSkipFile(Path patternsFile) {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>46.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>try {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>47.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>
+                BufferedReader fis = 
+                  new BufferedReader(new FileReader(patternsFile.toString()));
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>48.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>String pattern = null;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>49.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>while ((pattern = fis.readLine()) != null) {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>50.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>patternsToSkip.add(pattern);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>51.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>52.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>} catch (IOException ioe) {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>53.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>
+                System.err.println("Caught exception while parsing the cached file '" +
+                                   patternsFile + "' : " + 
+                                   StringUtils.stringifyException(ioe));
+                
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>54.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>55.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>56.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>57.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>
+                public void map(LongWritable key, Text value, 
+                OutputCollector&lt;Text, IntWritable&gt; output, 
+                Reporter reporter) throws IOException {
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>58.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>
+                String line = 
+                  (caseSensitive) ? value.toString() : 
+                                    value.toString().toLowerCase();
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>59.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>60.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>for (String pattern : patternsToSkip) {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>61.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>line = line.replaceAll(pattern, "");</code>
+            </td>
+          </tr>
+          <tr>
+            <td>62.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>63.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>64.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>StringTokenizer tokenizer = new StringTokenizer(line);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>65.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>while (tokenizer.hasMoreTokens()) {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>66.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>word.set(tokenizer.nextToken());</code>
+            </td>
+          </tr>
+          <tr>
+            <td>67.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>output.collect(word, one);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>68.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>reporter.incrCounter(Counters.INPUT_WORDS, 1);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>69.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>70.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>71.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>if ((++numRecords % 100) == 0) {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>72.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>
+                reporter.setStatus("Finished processing " + numRecords + 
+                                   " records " + "from the input file: " + 
+                                   inputFile);
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>73.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>74.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>75.</td>
+            <td>
+              &nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>76.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>77.</td>
+            <td>
+              &nbsp;&nbsp;
+              <code>
+                public static class Reduce extends MapReduceBase implements 
+                Reducer&lt;Text, IntWritable, Text, IntWritable&gt; {
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>78.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>
+                public void reduce(Text key, Iterator&lt;IntWritable&gt; values,
+                OutputCollector&lt;Text, IntWritable&gt; output, 
+                Reporter reporter) throws IOException {
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>79.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>int sum = 0;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>80.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>while (values.hasNext()) {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>81.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>sum += values.next().get();</code>
+            </td>
+          </tr>
+          <tr>
+            <td>82.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>83.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>output.collect(key, new IntWritable(sum));</code>
+            </td>
+          </tr>
+          <tr>
+            <td>84.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>85.</td>
+            <td>
+              &nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>86.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>87.</td>
+            <td>
+              &nbsp;&nbsp;
+              <code>public int run(String[] args) throws Exception {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>88.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>
+                JobConf conf = new JobConf(getConf(), WordCount.class);
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>89.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>conf.setJobName("wordcount");</code>
+            </td>
+          </tr>
+          <tr>
+            <td>90.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>91.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>conf.setOutputKeyClass(Text.class);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>92.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>conf.setOutputValueClass(IntWritable.class);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>93.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>94.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>conf.setMapperClass(Map.class);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>95.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>conf.setCombinerClass(Reduce.class);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>96.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>conf.setReducerClass(Reduce.class);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>97.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>98.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>conf.setInputFormat(TextInputFormat.class);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>99.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>conf.setOutputFormat(TextOutputFormat.class);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>100.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>101.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>
+                List&lt;String&gt; other_args = new ArrayList&lt;String&gt;();
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>102.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>for (int i=0; i &lt; args.length; ++i) {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>103.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>if ("-skip".equals(args[i])) {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>104.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>
+                DistributedCache.addCacheFile(new Path(args[++i]).toUri(), conf);
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>105.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>
+                conf.setBoolean("wordcount.skip.patterns", true);
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>106.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>} else {</code>
+            </td>
+          </tr>
+          <tr>
+            <td>107.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>other_args.add(args[i]);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>108.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>109.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>110.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>111.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>FileInputFormat.setInputPaths(conf, new Path(other_args.get(0)));</code>
+            </td>
+          </tr>
+          <tr>
+            <td>112.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));</code>
+            </td>
+          </tr>
+          <tr>
+            <td>113.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>114.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>JobClient.runJob(conf);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>115.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>return 0;</code>
+            </td>
+          </tr>
+          <tr>
+            <td>116.</td>
+            <td>
+              &nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>117.</td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>118.</td>
+            <td>
+              &nbsp;&nbsp;
+              <code>
+                public static void main(String[] args) throws Exception {
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>119.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>
+                int res = ToolRunner.run(new Configuration(), new WordCount(), 
+                                         args);
+              </code>
+            </td>
+          </tr>
+          <tr>
+            <td>120.</td>
+            <td>
+              &nbsp;&nbsp;&nbsp;&nbsp;
+              <code>System.exit(res);</code>
+            </td>
+          </tr>
+          <tr>
+            <td>121.</td>
+            <td>
+              &nbsp;&nbsp;
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>122.</td>
+            <td>
+              <code>}</code>
+            </td>
+          </tr>
+          <tr>
+            <td>123.</td>
+            <td></td>
+          </tr>
+        </table>
+      </section>
+        
+      <section>
+        <title>运行样例</title>
+        
+        <p>输入样例:</p>
+        <p>
+          <code>$ bin/hadoop dfs -ls /usr/joe/wordcount/input/</code><br/>
+          <code>/usr/joe/wordcount/input/file01</code><br/>
+          <code>/usr/joe/wordcount/input/file02</code><br/>
+          <br/>
+          <code>$ bin/hadoop dfs -cat /usr/joe/wordcount/input/file01</code><br/>
+          <code>Hello World, Bye World!</code><br/>
+          <br/>
+          <code>$ bin/hadoop dfs -cat /usr/joe/wordcount/input/file02</code><br/>
+          <code>Hello Hadoop, Goodbye to hadoop.</code>
+        </p>
+        
+        <p>运行程序:</p>
+        <p>
+          <code>
+            $ bin/hadoop jar /usr/joe/wordcount.jar org.myorg.WordCount 
+              /usr/joe/wordcount/input /usr/joe/wordcount/output 
+          </code>
+        </p>
+
+        <p>输出:</p>
+        <p>
+          <code>
+            $ bin/hadoop dfs -cat /usr/joe/wordcount/output/part-00000
+          </code>
+          <br/>
+          <code>Bye    1</code><br/>
+          <code>Goodbye    1</code><br/>
+          <code>Hadoop,    1</code><br/>
+          <code>Hello    2</code><br/>
+          <code>World!    1</code><br/>
+          <code>World,    1</code><br/>
+          <code>hadoop.    1</code><br/>
+          <code>to    1</code><br/>
+        </p>
+        
+        <p>注意此时的输入与第一个版本的不同,输出的结果也有不同。
+	</p>
+
+        <p>现在通过<code>DistributedCache</code>插入一个模式文件,文件中保存了要被忽略的单词模式。
+	</p>
+        
+        <p>
+          <code>$ hadoop dfs -cat /user/joe/wordcount/patterns.txt</code><br/>
+          <code>\.</code><br/>
+          <code>\,</code><br/>
+          <code>\!</code><br/>
+          <code>to</code><br/>
+        </p>
+        
+        <p>再运行一次,这次使用更多的选项:</p>
+        <p>
+          <code>
+            $ bin/hadoop jar /usr/joe/wordcount.jar org.myorg.WordCount 
+              -Dwordcount.case.sensitive=true /usr/joe/wordcount/input 
+              /usr/joe/wordcount/output -skip /user/joe/wordcount/patterns.txt
+          </code>
+        </p>
+        
+        <p>应该得到这样的输出:</p>
+        <p>
+          <code>
+            $ bin/hadoop dfs -cat /usr/joe/wordcount/output/part-00000
+          </code>
+          <br/>
+          <code>Bye    1</code><br/>
+          <code>Goodbye    1</code><br/>
+          <code>Hadoop    1</code><br/>
+          <code>Hello    2</code><br/>
+          <code>World    2</code><br/>
+          <code>hadoop    1</code><br/>
+        </p>
+        
+        <p>再运行一次,这一次关闭大小写敏感性(case-sensitivity):</p>
+        <p>
+          <code>
+            $ bin/hadoop jar /usr/joe/wordcount.jar org.myorg.WordCount 
+              -Dwordcount.case.sensitive=false /usr/joe/wordcount/input 
+              /usr/joe/wordcount/output -skip /user/joe/wordcount/patterns.txt
+          </code>
+        </p>
+        
+        <p>输出:</p>
+        <p>
+          <code>
+            $ bin/hadoop dfs -cat /usr/joe/wordcount/output/part-00000
+          </code>
+          <br/>
+          <code>bye    1</code><br/>
+          <code>goodbye    1</code><br/>
+          <code>hadoop    2</code><br/>
+          <code>hello    2</code><br/>
+          <code>world    2</code><br/>
+        </p>
+      </section>
+      
+      <section>
+        <title>程序要点</title>
+        
+        <p>
+	通过使用一些Map/Reduce框架提供的功能,<code>WordCount</code>的第二个版本在原始版本基础上有了如下的改进:
+        </p>
+        <ul>
+          <li>
+            展示了应用程序如何在<code>Mapper</code> (和<code>Reducer</code>)中通过<code>configure</code>方法
+		修改配置参数(28-43行)。
+          </li>
+          <li>
+            展示了作业如何使用<code>DistributedCache</code> 来分发只读数据。
+	这里允许用户指定单词的模式,在计数时忽略那些符合模式的单词(104行)。
+          </li>
+          <li>
+        展示<code>Tool</code>接口和<code>GenericOptionsParser</code>处理Hadoop命令行选项的功能
+            (87-116, 119行)。
+          </li>
+          <li>
+	展示了应用程序如何使用<code>Counters</code>(68行),如何通过传递给<code>map</code>(和<code>reduce</code>)
+	方法的<code>Reporter</code>实例来设置应用程序的状态信息(72行)。
+          </li>
+        </ul>
+        
+      </section>
+    </section>
+
+    <p>
+      <em>Java和JNI是Sun Microsystems, Inc.在美国和其它国家的注册商标。</em>
+    </p>
+    
+  </body>
+  
+</document>

+ 230 - 0
common/src/docs/cn/src/documentation/content/xdocs/native_libraries.xml

@@ -0,0 +1,230 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
+
+<document>
+  
+  <header>
+    <title>Hadoop本地库</title>
+  </header>
+  
+  <body>
+  
+    <section>
+      <title>目的</title>
+      
+      <p>
+     鉴于性能问题以及某些Java类库的缺失,对于某些组件,Hadoop提供了自己的本地实现。
+	这些组件保存在Hadoop的一个独立的动态链接的库里。这个库在*nix平台上叫<em>libhadoop.so</em>. 本文主要介绍本地库的使用方法以及如何构建本地库。
+</p>
+    </section>
+    
+    <section>
+      <title>组件</title>
+      
+      <p>Hadoop现在已经有以下
+      <a href="ext:api/org/apache/hadoop/io/compress/compressioncodec">
+      compression codecs</a>本地组件:</p>
+      <ul>
+        <li><a href="ext:zlib">zlib</a></li>
+        <li><a href="ext:gzip">gzip</a></li>
+        <li><a href="ext:lzo">lzo</a></li>
+      </ul>
+      
+      <p>在以上组件中,lzo和gzip压缩编解码器必须使用hadoop本地库才能运行。
+      </p>
+    </section>
+
+    <section>
+      <title>使用方法</title>
+      
+      <p>hadoop本地库的用法很简单:</p>
+
+      <ul>
+        <li>
+          看一下
+	<a href="#支持的平台">支持的平台</a>.
+        </li>
+        <li>
+           <a href="ext:releases/download">下载</a> 预构建的32位i386架构的Linux本地hadoop库(可以在hadoop发行版的<code>lib/native</code>目录下找到)或者自己
+          <a href="#构建Hadoop本地库">构建</a> 这些库。
+        </li>
+        <li>
+          确保你的平台已经安装了<strong>zlib-1.2</strong>以上版本或者<strong>lzo2.0</strong>以上版本的软件包或者两者均已安装(根据你的需要)。
+        </li>
+      </ul>
+      
+      <p><code>bin/hadoop</code> 脚本通过系统属性
+      <em>-Djava.library.path=&lt;path&gt;</em>来确认hadoop本地库是否包含在库路径里。</p>
+
+      <p>检查hadoop日志文件可以查看hadoop库是否正常,正常情况下会看到:</p>
+      
+      <p>
+        <code>
+          DEBUG util.NativeCodeLoader - Trying to load the custom-built 
+          native-hadoop library... 
+        </code><br/>
+        <code>
+          INFO  util.NativeCodeLoader - Loaded the native-hadoop library
+        </code>
+      </p>
+
+      <p>如果出错,会看到:</p>
+      <p>
+        <code>
+          INFO util.NativeCodeLoader - Unable to load native-hadoop library for 
+          your platform... using builtin-java classes where applicable
+        </code>
+      </p>
+    </section>
+    
+    <section>
+      <title>支持的平台</title>
+      
+      <p>Hadoop本地库只支持*nix平台,已经广泛使用在GNU/Linux平台上,但是不支持
+      <a href="ext:cygwin">Cygwin</a> 
+      和 <a href="ext:osx">Mac OS X</a>。 
+      </p>
+
+      <p>已经测试过的GNU/Linux发行版本:</p>
+      <ul>
+        <li>
+          <a href="http://www.redhat.com/rhel/">RHEL4</a>/<a href="http://fedora.redhat.com/">Fedora</a>
+        </li>
+        <li><a href="http://www.ubuntu.com/">Ubuntu</a></li>
+        <li><a href="http://www.gentoo.org/">Gentoo</a></li>
+      </ul>
+
+      <p>在上述平台上,32/64位Hadoop本地库分别能和32/64位的jvm一起正常运行。
+      </p>
+    </section>
+    
+    <section>
+      <title>构建Hadoop本地库</title>
+      
+      <p>Hadoop本地库使用
+      <a href="http://en.wikipedia.org/wiki/ANSI_C">ANSI C</a> 编写,使用GNU autotools工具链 (autoconf, autoheader, automake, autoscan, libtool)构建。也就是说构建hadoop库的平台需要有标准C的编译器和GNU autotools工具链。请参看
+      <a href="#支持的平台">支持的平台</a>。</p>
+
+      <p>你的目标平台上可能会需要的软件包:
+      </p>
+      <ul>
+        <li>
+          C 编译器 (e.g. <a href="http://gcc.gnu.org/">GNU C Compiler</a>)
+        </li>
+        <li>
+          GNU Autools 工具链: 
+          <a href="http://www.gnu.org/software/autoconf/">autoconf</a>, 
+          <a href="http://www.gnu.org/software/automake/">automake</a>, 
+          <a href="http://www.gnu.org/software/libtool/">libtool</a>
+        </li>
+        <li> 
+          zlib开发包 (stable version >= 1.2.0)
+        </li>
+        <li> 
+          lzo开发包 (stable version >= 2.0)
+        </li> 
+      </ul>
+
+      <p>如果已经满足了上述先决条件,可以使用<code>build.xml</code> 
+      文件,并把其中的<code>compile.native</code>置为 
+      <code>true</code>,这样就可以生成hadoop本地库:</p>
+
+      <p><code>$ ant -Dcompile.native=true &lt;target&gt;</code></p>
+
+      <p>因为不是所有用户都需要Hadoop本地库,所以默认情况下hadoop不生成该库。
+      </p>
+
+      <p>你可以在下面的路径查看新生成的hadoop本地库:</p>
+
+      <p><code>$ build/native/&lt;platform&gt;/lib</code></p>
+
+      <p>其中&lt;platform&gt;是下列系统属性的组合 
+      <code>${os.name}-${os.arch}-${sun.arch.data.model}</code>;例如 
+      Linux-i386-32。</p>
+
+      <section>
+        <title>注意</title>
+        
+        <ul>
+          <li>
+            在生成hadoop本地库的目标平台上<strong>必须</strong> 安装了zlib和lzo开发包;但是如果你只希望使用其中一个的话,在部署时,安装其中任何一个都是足够的。
+          </li>
+          <li>
+		  在目标平台上生成以及部署hadoop本地库时,都需要根据32/64位jvm选取对应的32/64位zlib/lzo软件包。
+          </li>
+        </ul>
+      </section>
+    </section>
+<!--DCCOMMENT:diff begin-->
+    <section>
+      <title> 使用DistributedCache 加载本地库</title>
+      <p>用户可以通过
+      <a href="mapred_tutorial.html#DistributedCache">DistributedCache</a>
+      加载本地共享库,并<em>分发</em>和建立库文件的<em>符号链接</em>。
+      </p>
+      <!--DCCOMMENT:
+      for <em>distributing</em> and <em>symlinking</em> the library files</p>
+      -->
+      <p>这个例子描述了如何分发库文件并在从map/reduce任务中装载库文件。
+      </p>
+      <ol>
+      <li>首先拷贝库文件到HDFS。<br/>
+      <code>bin/hadoop fs -copyFromLocal mylib.so.1 /libraries/mylib.so.1</code>
+      </li>
+      <li>启动作业时包含以下代码:<br/>
+      <code> DistributedCache.createSymlink(conf); </code> <br/>
+      <code> DistributedCache.addCacheFile("hdfs://host:port/libraries/mylib.so.1#mylib.so", conf);
+      </code>
+      </li>
+      <li>map/reduce任务中包含以下代码:<br/>
+      <code> System.loadLibrary("mylib.so"); </code>
+      </li>
+      </ol>
+    </section>
+  </body>
+  
+</document>
+<!--DCCOMMENT:diff end
+     </section>
++    <section>
++      <title> Loading native libraries through DistributedCache </title>
++      <p>User can load native shared libraries through
++      <a href="mapred_tutorial.html#DistributedCache">DistributedCache</a>
++      for <em>distributing</em> and <em>symlinking</em> the library files</p>
++
++      <p>Here is an example, describing how to distribute the library and
++      load it from map/reduce task. </p>
++      <ol>
++      <li> First copy the library to the HDFS. <br/>
++      <code>bin/hadoop fs -copyFromLocal mylib.so.1 /libraries/mylib.so.1</code>
++      </li>
++      <li> The job launching program should contain the following: <br/>
++      <code> DistributedCache.createSymlink(conf); </code> <br/>
++      <code> DistributedCache.addCacheFile("hdfs://host:port/libraries/mylib.so.1#mylib.so", conf);
++      </code>
++      </li>
++      <li> The map/reduce task can contain: <br/>
++      <code> System.loadLibrary("mylib.so"); </code>
++      </li>
++      </ol>
++    </section>
+   </body>
+
+-->

+ 252 - 0
common/src/docs/cn/src/documentation/content/xdocs/quickstart.xml

@@ -0,0 +1,252 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
+
+<document>
+  
+  <header>
+    <title>Hadoop快速入门</title>
+  </header>
+  
+  <body>
+  
+    <section>
+      <title>目的</title>
+      <p>这篇文档的目的是帮助你快速完成单机上的Hadoop安装与使用以便你对<a href ="hdfs_design.html">Hadoop分布式文件系统(<acronym title="Hadoop Distributed File System">HDFS</acronym>)</a>和Map-Reduce框架有所体会,比如在HDFS上运行示例程序或简单作业等。</p>
+    </section>
+    <section id="PreReqs">
+      <title>先决条件</title>
+      
+      <section>
+        <title>支持平台</title>
+        
+        <ul>
+          <li>
+                GNU/Linux是产品开发和运行的平台。
+	        Hadoop已在有2000个节点的GNU/Linux主机组成的集群系统上得到验证。
+          </li>
+          <li>
+            Win32平台是作为<em>开发平台</em>支持的。由于分布式操作尚未在Win32平台上充分测试,所以还不作为一个<em>生产平台</em>被支持。
+          </li>
+        </ul>        
+      </section>
+      
+      <section>
+        <title>所需软件</title>
+        <p>Linux和Windows所需软件包括:</p>
+        <ol>
+          <li>
+            Java<sup>TM</sup>1.5.x,必须安装,建议选择Sun公司发行的Java版本。
+          </li>
+          <li>
+            <strong>ssh</strong> 必须安装并且保证 <strong>sshd</strong>一直运行,以便用Hadoop
+	    脚本管理远端Hadoop守护进程。
+          </li>
+        </ol>
+		<p>Windows下的附加软件需求</p>
+          <ol>
+            <li>
+              <a href="http://www.cygwin.com/">Cygwin</a> - 提供上述软件之外的shell支持。 
+            </li>
+          </ol>
+      </section>
+
+      <section>
+        <title>安装软件</title>
+          
+        <p>如果你的集群尚未安装所需软件,你得首先安装它们。</p>
+          
+        <p>以Ubuntu Linux为例:</p>
+        <p>
+          <code>$ sudo apt-get install ssh</code><br/>
+          <code>$ sudo apt-get install rsync</code>
+        </p>
+          
+        <p>在Windows平台上,如果安装cygwin时未安装全部所需软件,则需启动cyqwin安装管理器安装如下软件包:</p>
+        <ul>
+          <li>openssh - <em>Net</em> 类</li>
+        </ul>
+      </section>
+      
+    </section>
+    
+    <section>
+      <title>下载</title>
+      
+      <p>
+        为了获取Hadoop的发行版,从Apache的某个镜像服务器上下载最近的
+        <a href="ext:releases">稳定发行版</a>。</p>
+    </section>
+
+    <section>
+      <title>运行Hadoop集群的准备工作</title>
+      <p>
+        解压所下载的Hadoop发行版。编辑
+        <code>conf/hadoop-env.sh</code>文件,至少需要将<code>JAVA_HOME</code>设置为Java安装根路径。
+      </p>
+
+	  <p>
+	    尝试如下命令:<br/>
+        <code>$ bin/hadoop</code><br/>
+        将会显示<strong>hadoop</strong> 脚本的使用文档。
+      </p>
+      
+      <p>现在你可以用以下三种支持的模式中的一种启动Hadoop集群:
+      </p>
+      <ul>
+        <li>单机模式</li>
+        <li>伪分布式模式</li>
+        <li>完全分布式模式</li>
+      </ul>
+    </section>
+    
+    <section id="Local">
+	    <title>单机模式的操作方法</title>
+      
+      <p>默认情况下,Hadoop被配置成以非分布式模式运行的一个独立Java进程。这对调试非常有帮助。</p>
+      
+      <p>
+        下面的实例将已解压的 <code>conf</code> 目录拷贝作为输入,查找并显示匹配给定正则表达式的条目。输出写入到指定的<code>output</code>目录。
+        <br/>
+        <code>$ mkdir input</code><br/>
+        <code>$ cp conf/*.xml input</code><br/>
+        <code>
+          $ bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
+        </code><br/>
+        <code>$ cat output/*</code>
+      </p>
+    </section>
+    
+    <section id="PseudoDistributed">
+      <title>伪分布式模式的操作方法</title>
+
+	  <p>Hadoop可以在单节点上以所谓的伪分布式模式运行,此时每一个Hadoop守护进程都作为一个独立的Java进程运行。</p>
+	  
+      <section>
+        <title>配置</title>
+        <p>使用如下的 <code>conf/hadoop-site.xml</code>:</p>
+        <table>
+        <tr><td>&lt;configuration&gt;</td></tr>
+
+          <tr><td>&nbsp;&nbsp;&lt;property&gt;</td></tr>
+            <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&lt;name&gt;fs.default.name&lt;/name&gt;</td></tr>
+            <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&lt;value&gt;localhost:9000&lt;/value&gt;</td></tr>
+          <tr><td>&nbsp;&nbsp;&lt;/property&gt;</td></tr>
+
+          <tr><td>&nbsp;&nbsp;&lt;property&gt;</td></tr>
+            <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&lt;name&gt;mapred.job.tracker&lt;/name&gt;</td></tr>
+            <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&lt;value&gt;localhost:9001&lt;/value&gt;</td></tr>
+          <tr><td>&nbsp;&nbsp;&lt;/property&gt;</td></tr>
+
+          <tr><td>&nbsp;&nbsp;&lt;property&gt;</td></tr>
+            <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&lt;name&gt;dfs.replication&lt;/name&gt;</td></tr>
+            <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&lt;value&gt;1&lt;/value&gt;</td></tr>
+          <tr><td>&nbsp;&nbsp;&lt;/property&gt;</td></tr>
+
+        <tr><td>&lt;/configuration&gt;</td></tr>
+        </table>
+     </section>
+
+      <section>
+        <title>免密码<em>ssh</em>设置</title>
+        
+        <p>
+          现在确认能否不输入口令就用ssh登录localhost:<br/>
+          <code>$ ssh localhost</code>
+        </p>
+        
+        <p>
+          如果不输入口令就无法用ssh登陆localhost,执行下面的命令:<br/>
+   		  <code>$ ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa</code><br/>
+		  <code>$ cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys</code>
+		</p>
+      </section>
+    
+      <section>
+        <title>执行</title>
+        
+        <p>
+          格式化一个新的分布式文件系统:<br/>
+          <code>$ bin/hadoop namenode -format</code>
+        </p>
+
+		<p>
+		  启动Hadoop守护进程:<br/>
+          <code>$ bin/start-all.sh</code>
+        </p>
+
+        <p>Hadoop守护进程的日志写入到 
+        <code>${HADOOP_LOG_DIR}</code> 目录 (默认是 
+        <code>${HADOOP_PREFIX}/logs</code>).</p>
+
+        <p>浏览NameNode和JobTracker的网络接口,它们的地址默认为:</p>
+        <ul>
+          <li>
+            <code>NameNode</code> - 
+            <a href="http://localhost:50070/">http://localhost:50070/</a>
+          </li>
+          <li>
+            <code>JobTracker</code> - 
+            <a href="http://localhost:50030/">http://localhost:50030/</a>
+          </li>
+        </ul>
+        
+        <p>
+          将输入文件拷贝到分布式文件系统:<br/>
+		  <code>$ bin/hadoop fs -put conf input</code>
+		</p>
+		
+        <p>
+          运行发行版提供的示例程序:<br/>
+          <code>
+            $ bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
+          </code>
+        </p>
+        
+        <p>查看输出文件:</p>
+        <p>
+          将输出文件从分布式文件系统拷贝到本地文件系统查看:<br/>
+          <code>$ bin/hadoop fs -get output output</code><br/>
+          <code>$ cat output/*</code>
+        </p>
+        <p> 或者 </p>
+        <p>
+          在分布式文件系统上查看输出文件:<br/>
+          <code>$ bin/hadoop fs -cat output/*</code>
+        </p>
+
+		<p>
+		  完成全部操作后,停止守护进程:<br/>
+		  <code>$ bin/stop-all.sh</code>
+		</p>
+      </section>
+    </section>
+    
+    <section id="FullyDistributed">
+      <title>完全分布式模式的操作方法</title>
+      
+	  <p>关于搭建完全分布式模式的,有实际意义的集群的资料可以在<a href="cluster_setup.html">这里</a>找到。</p>  
+    </section>
+    
+    <p>
+	    <em>Java与JNI是Sun Microsystems, Inc.在美国以及其他国家地区的商标或注册商标。</em>
+    </p>
+    
+  </body>
+  
+</document>

+ 249 - 0
common/src/docs/cn/src/documentation/content/xdocs/site.xml

@@ -0,0 +1,249 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!--
+Forrest site.xml
+
+This file contains an outline of the site's information content.  It is used to:
+- Generate the website menus (though these can be overridden - see docs)
+- Provide semantic, location-independent aliases for internal 'site:' URIs, eg
+<link href="site:changes"> links to changes.html (or ../changes.html if in
+  subdir).
+- Provide aliases for external URLs in the external-refs section.  Eg, <link
+  href="ext:cocoon"> links to http://xml.apache.org/cocoon/
+
+See http://forrest.apache.org/docs/linking.html for more info.
+-->
+
+<site label="Hadoop" href="" xmlns="http://apache.org/forrest/linkmap/1.0">
+
+  <docs label="文档"> 
+    <overview  label="概述"           href="index.html" />
+    <quickstart label="快速入门"        href="quickstart.html" />
+    <setup     label="集群搭建"      href="cluster_setup.html" />
+    <hdfs      label="HDFS构架设计"  href="hdfs_design.html" />
+    <hdfs      label="HDFS使用指南"    href="hdfs_user_guide.html" />
+    <hdfs      label="HDFS权限指南"    href="hdfs_permissions_guide.html" />
+    <hdfs      label="HDFS配额管理指南" href="hdfs_quota_admin_guide.html" />
+    <commands  label="命令手册"     href="commands_manual.html" />
+    <fs        label="FS Shell使用指南"   href="hdfs_shell.html" />
+    <distcp    label="DistCp使用指南"       href="distcp.html" />
+    <mapred    label="Map-Reduce教程" href="mapred_tutorial.html" />
+    <mapred    label="Hadoop本地库" href="native_libraries.html" />
+    <streaming label="Streaming"          href="streaming.html" />
+    <archives label="Hadoop Archives"     href="hadoop_archives.html"/>
+    <hod       label="Hadoop On Demand" href="hod.html">
+      <hod-user-guide href="hod_user_guide.html"/>
+      <hod-admin-guide href="hod_admin_guide.html"/>
+      <hod-config-guide href="hod_config_guide.html"/>
+    </hod>
+    <api       label="API参考"           href="ext:api/index" />
+    <jdiff     label="API Changes"        href="ext:jdiff" />
+    <wiki      label="维基"               href="ext:wiki" />
+    <faq       label="常见问题"                href="ext:faq" />
+    <lists     label="邮件列表"      href="ext:lists" />
+    <relnotes  label="发行说明"      href="ext:relnotes" />
+    <changes   label="变更日志"        href="ext:changes" />
+  </docs>
+
+ <external-refs>
+    <site      href="http://hadoop.apache.org/core/"/>
+    <lists     href="http://hadoop.apache.org/core/mailing_lists.html"/>
+    <releases  href="http://hadoop.apache.org/core/releases.html">
+      <download href="#Download" />
+    </releases>
+    <jira      href="http://hadoop.apache.org/core/issue_tracking.html"/>
+    <wiki      href="http://wiki.apache.org/hadoop/" />
+    <faq       href="http://wiki.apache.org/hadoop/FAQ" />
+    <hadoop-default href="http://hadoop.apache.org/core/docs/current/hadoop-default.html" />
+    <zlib      href="http://www.zlib.net/" />
+    <lzo       href="http://www.oberhumer.com/opensource/lzo/" />
+    <gzip      href="http://www.gzip.org/" />
+    <cygwin    href="http://www.cygwin.com/" />
+    <osx       href="http://www.apple.com/macosx" />
+    <hod href="">
+      <cluster-resources href="http://www.clusterresources.com" />
+      <torque href="http://www.clusterresources.com/pages/products/torque-resource-manager.php" />
+      <torque-download href="http://www.clusterresources.com/downloads/torque/" />
+      <torque-docs href="http://www.clusterresources.com/pages/resources/documentation.php" />
+      <torque-wiki href="http://www.clusterresources.com/wiki/doku.php?id=torque:torque_wiki" />
+      <torque-mailing-list href="http://www.clusterresources.com/pages/resources/mailing-lists.php" />
+      <torque-basic-config href="http://www.clusterresources.com/wiki/doku.php?id=torque:1.2_basic_configuration" />
+      <torque-advanced-config href="http://www.clusterresources.com/wiki/doku.php?id=torque:1.3_advanced_configuration" />
+      <maui href="http://www.clusterresources.com/pages/products/maui-cluster-scheduler.php"/>
+      <python href="http://www.python.org" />
+      <twisted-python href="http://twistedmatrix.com/trac/" />
+    </hod>
+    <relnotes href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html" />
+    <changes href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html" />
+    <jdiff href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html" />
+    <api href="http://hadoop.apache.org/core/docs/r0.18.2/api/">
+      <index href="index.html" />
+      <org href="org/">
+        <apache href="apache/">
+          <hadoop href="hadoop/">
+            <conf href="conf/">
+              <configuration href="Configuration.html">
+                <final_parameters href="#FinalParams" />
+                <get href="#get(java.lang.String, java.lang.String)" />
+                <set href="#set(java.lang.String, java.lang.String)" />
+              </configuration>
+            </conf>
+            <filecache href="filecache/">
+              <distributedcache href="DistributedCache.html">
+                <addarchivetoclasspath href="#addArchiveToClassPath(org.apache.hadoop.fs.Path,%20org.apache.hadoop.conf.Configuration)" />
+                <addfiletoclasspath href="#addFileToClassPath(org.apache.hadoop.fs.Path,%20org.apache.hadoop.conf.Configuration)" />
+                <addcachefile href="#addCacheFile(java.net.URI,%20org.apache.hadoop.conf.Configuration)" />
+                <addcachearchive href="#addCacheArchive(java.net.URI,%20org.apache.hadoop.conf.Configuration)" />
+                <setcachefiles href="#setCacheFiles(java.net.URI[],%20org.apache.hadoop.conf.Configuration)" />
+                <setcachearchives href="#setCacheArchives(java.net.URI[],%20org.apache.hadoop.conf.Configuration)" />
+                <createsymlink href="#createSymlink(org.apache.hadoop.conf.Configuration)" />
+              </distributedcache>  
+            </filecache>
+            <fs href="fs/">
+              <filesystem href="FileSystem.html" />
+            </fs>
+            <io href="io/">
+              <closeable href="Closeable.html">
+                <close href="#close()" />
+              </closeable>
+              <sequencefile href="SequenceFile.html" />
+              <sequencefilecompressiontype href="SequenceFile.CompressionType.html">
+                <none href="#NONE" />
+                <record href="#RECORD" />
+                <block href="#BLOCK" />
+              </sequencefilecompressiontype>
+              <writable href="Writable.html" />
+              <writablecomparable href="WritableComparable.html" />
+              <compress href="compress/">
+                <compressioncodec href="CompressionCodec.html" />
+              </compress>
+            </io>
+            <mapred href="mapred/">
+              <clusterstatus href="ClusterStatus.html" />
+              <counters href="Counters.html" />
+              <fileinputformat href="FileInputFormat.html">
+                 <setinputpaths href="#setInputPaths(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.fs.Path[])" />
+                 <addinputpath href="#addInputPath(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.fs.Path)" />
+                 <setinputpathstring href="#setInputPaths(org.apache.hadoop.mapred.JobConf,%20java.lang.String)" />
+                 <addinputpathstring href="#addInputPath(org.apache.hadoop.mapred.JobConf,%20java.lang.String)" />
+              </fileinputformat>
+              <fileoutputformat href="FileOutputFormat.html">
+                <getoutputpath href="#getOutputPath(org.apache.hadoop.mapred.JobConf)" />
+                <getworkoutputpath href="#getWorkOutputPath(org.apache.hadoop.mapred.JobConf)" />
+                <setoutputpath href="#setOutputPath(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.fs.Path)" />
+                <setcompressoutput href="#setCompressOutput(org.apache.hadoop.mapred.JobConf,%20boolean)" />
+                <setoutputcompressorclass href="#setOutputCompressorClass(org.apache.hadoop.mapred.JobConf,%20java.lang.Class)" />
+              </fileoutputformat>
+              <filesplit href="FileSplit.html" />
+              <inputformat href="InputFormat.html" />
+              <inputsplit href="InputSplit.html" />
+              <isolationrunner href="IsolationRunner.html" />
+              <jobclient href="JobClient.html">
+                <runjob href="#runJob(org.apache.hadoop.mapred.JobConf)" />
+                <submitjob href="#submitJob(org.apache.hadoop.mapred.JobConf)" />
+              </jobclient>
+              <jobconf href="JobConf.html">
+                <setnummaptasks href="#setNumMapTasks(int)" />
+                <setnumreducetasks href="#setNumReduceTasks(int)" />
+                <setoutputkeycomparatorclass href="#setOutputKeyComparatorClass(java.lang.Class)" />
+                <setoutputvaluegroupingcomparator href="#setOutputValueGroupingComparator(java.lang.Class)" />
+                <setcombinerclass href="#setCombinerClass(java.lang.Class)" />
+                <setmapdebugscript href="#setMapDebugScript(java.lang.String)" />
+                <setreducedebugscript href="#setReduceDebugScript(java.lang.String)" />
+                <setmapspeculativeexecution href="#setMapSpeculativeExecution(boolean)" />
+                <setreducespeculativeexecution href="#setReduceSpeculativeExecution(boolean)" />
+                <setmaxmapattempts href="#setMaxMapAttempts(int)" />
+                <setmaxreduceattempts href="#setMaxReduceAttempts(int)" />
+                <setmaxmaptaskfailurespercent href="#setMaxMapTaskFailuresPercent(int)" />
+                <setmaxreducetaskfailurespercent href="#setMaxReduceTaskFailuresPercent(int)" />
+                <setjobendnotificationuri href="#setJobEndNotificationURI(java.lang.String)" />
+                <setcompressmapoutput href="#setCompressMapOutput(boolean)" />
+                <setmapoutputcompressorclass href="#setMapOutputCompressorClass(java.lang.Class)" />
+                <setprofileenabled href="#setProfileEnabled(boolean)" />
+                <setprofiletaskrange href="#setProfileTaskRange(boolean,%20java.lang.String)" />
+                <setprofileparams href="#setProfileParams(java.lang.String)" />
+                <getjoblocaldir href="#getJobLocalDir()" />
+                <getjar href="#getJar()" />
+              </jobconf>
+              <jobconfigurable href="JobConfigurable.html">
+                <configure href="#configure(org.apache.hadoop.mapred.JobConf)" />
+              </jobconfigurable>
+              <jobcontrol href="jobcontrol/">
+                <package-summary href="package-summary.html" />
+              </jobcontrol>
+              <mapper href="Mapper.html">
+                <map href="#map(K1, V1, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)" />
+              </mapper>
+              <outputcollector href="OutputCollector.html">
+                <collect href="#collect(K, V)" />
+              </outputcollector>
+              <outputformat href="OutputFormat.html" />
+              <outputformatbase href="OutputFormatBase.html">
+                <setcompressoutput href="#setCompressOutput(org.apache.hadoop.mapred.JobConf,%20boolean)" />
+                <setoutputcompressorclass href="#setOutputCompressorClass(org.apache.hadoop.mapred.JobConf,%20java.lang.Class)" />
+              </outputformatbase>
+              <outputlogfilter href="OutputLogFilter.html" />
+              <sequencefileoutputformat href="SequenceFileOutputFormat.html">
+                <setoutputcompressiontype href="#setOutputCompressionType(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.io.SequenceFile.CompressionType)" />
+              </sequencefileoutputformat>
+              <partitioner href="Partitioner.html" />
+              <recordreader href="RecordReader.html" />
+              <recordwriter href="RecordWriter.html" />
+              <reducer href="Reducer.html">
+                <reduce href="#reduce(K2, java.util.Iterator, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)" />
+              </reducer>
+              <reporter href="Reporter.html">
+                <incrcounterEnum href="#incrCounter(java.lang.Enum, long)" />
+                <incrcounterString href="#incrCounter(java.lang.String, java.lang.String, long amount)" />
+              </reporter>
+              <runningjob href="RunningJob.html" />
+              <textinputformat href="TextInputFormat.html" />
+              <textoutputformat href="TextOutputFormat.html" />
+              <lib href="lib/">
+                <package-summary href="package-summary.html" />
+                <hashpartitioner href="HashPartitioner.html" />
+              </lib>
+              <pipes href="pipes/">
+                <package-summary href="package-summary.html" />
+              </pipes>
+            </mapred>
+            <net href="net/">
+              <dnstoswitchmapping href="DNSToSwitchMapping.html">
+              <resolve href="#resolve(java.util.List)" />
+              </dnstoswitchmapping>
+            </net>
+            <streaming href="streaming/">
+              <package-summary href="package-summary.html" />
+            </streaming>
+            <util href="util/">
+              <genericoptionsparser href="GenericOptionsParser.html" />
+              <progress href="Progress.html" />
+              <tool href="Tool.html" />
+              <toolrunner href="ToolRunner.html">
+                <run href="#run(org.apache.hadoop.util.Tool, java.lang.String[])" />
+              </toolrunner>
+            </util>
+          </hadoop>
+        </apache>
+      </org>
+    </api>
+  </external-refs>
+ 
+</site>
+

+ 618 - 0
common/src/docs/cn/src/documentation/content/xdocs/streaming.xml

@@ -0,0 +1,618 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+          "http://forrest.apache.org/dtd/document-v20.dtd">
+
+
+<document>
+<header>
+<title>Hadoop Streaming</title>
+<meta name="http-equiv">Content-Type</meta>
+<meta name="content">text/html;</meta>
+<meta name="charset">utf-8</meta>
+</header>
+<body>
+<section>
+<title>Hadoop Streaming</title>
+<p>
+Hadoop streaming是Hadoop的一个工具,
+    它帮助用户创建和运行一类特殊的map/reduce作业,
+    这些特殊的map/reduce作业是由一些可执行文件或脚本文件充当mapper或者reducer。例如: 
+</p>
+<source>
+$HADOOP_PREFIX/bin/hadoop  jar $HADOOP_PREFIX/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper /bin/cat \
+    -reducer /bin/wc
+</source>
+</section>
+
+<section>
+<title>Streaming工作原理</title>
+<p>
+在上面的例子里,mapper和reducer都是可执行文件,它们从标准输入读入数据(一行一行读),
+并把计算结果发给标准输出。Streaming工具会创建一个Map/Reduce作业,
+并把它发送给合适的集群,同时监视这个作业的整个执行过程。
+</p><p>
+如果一个可执行文件被用于mapper,则在mapper初始化时,
+每一个mapper任务会把这个可执行文件作为一个单独的进程启动。
+mapper任务运行时,它把输入切分成行并把每一行提供给可执行文件进程的标准输入。
+同时,mapper收集可执行文件进程标准输出的内容,并把收到的每一行内容转化成key/value对,作为mapper的输出。
+默认情况下,一行中第一个tab之前的部分作为<strong>key</strong>,之后的(不包括tab)作为<strong>value</strong>。
+如果没有tab,整行作为key值,value值为null。不过,这可以定制,在下文中将会讨论如何自定义key和value的切分方式。
+</p>
+<p>如果一个可执行文件被用于reducer,每个reducer任务会把这个可执行文件作为一个单独的进程启动。
+Reducer任务运行时,它把输入切分成行并把每一行提供给可执行文件进程的标准输入。
+同时,reducer收集可执行文件进程标准输出的内容,并把每一行内容转化成key/value对,作为reducer的输出。
+默认情况下,一行中第一个tab之前的部分作为key,之后的(不包括tab)作为value。在下文中将会讨论如何自定义key和value的切分方式。
+</p><p>
+这是Map/Reduce框架和streaming mapper/reducer之间的基本通信协议。
+</p><p>
+用户也可以使用java类作为mapper或者reducer。上面的例子与这里的代码等价:
+</p>
+<source>
+$HADOOP_PREFIX/bin/hadoop  jar $HADOOP_PREFIX/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper org.apache.hadoop.mapred.lib.IdentityMapper \
+    -reducer /bin/wc
+</source>
+<p>用户可以设定<code>stream.non.zero.exit.is.failure</code> 
+<code>true</code> 或<code>false</code> 来表明streaming task的返回值非零时是
+<code>Failure</code> 
+还是<code>Success</code>。默认情况,streaming task返回非零时表示失败。
+</p>
+</section>
+
+<section>
+<title>将文件打包到提交的作业中</title>
+<p>
+任何可执行文件都可以被指定为mapper/reducer。这些可执行文件不需要事先存放在集群上;
+如果在集群上还没有,则需要用-file选项让framework把可执行文件作为作业的一部分,一起打包提交。例如:
+</p>
+<source>
+$HADOOP_PREFIX/bin/hadoop  jar $HADOOP_PREFIX/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper myPythonScript.py \
+    -reducer /bin/wc \
+    -file myPythonScript.py 
+</source>
+<p> 
+上面的例子描述了一个用户把可执行python文件作为mapper。
+其中的选项“-file myPythonScirpt.py”使可执行python文件作为作业提交的一部分被上传到集群的机器上。
+</p>
+<p>
+除了可执行文件外,其他mapper或reducer需要用到的辅助文件(比如字典,配置文件等)也可以用这种方式打包上传。例如:
+</p>
+<source>
+$HADOOP_PREFIX/bin/hadoop  jar $HADOOP_PREFIX/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper myPythonScript.py \
+    -reducer /bin/wc \
+    -file myPythonScript.py \
+    -file myDictionary.txt
+</source>
+</section>
+
+<section>
+<title>Streaming选项与用法</title>
+
+<section>
+<title>只使用Mapper的作业</title>
+<p>
+有时只需要map函数处理输入数据。这时只需把mapred.reduce.tasks设置为零,Map/reduce框架就不会创建reducer任务,mapper任务的输出就是整个作业的最终输出。
+</p><p>
+为了做到向下兼容,Hadoop Streaming也支持“-reduce None”选项,它与“-jobconf mapred.reduce.tasks=0”等价。
+</p>
+</section>
+
+<section>
+<title>为作业指定其他插件</title>
+<p>
+和其他普通的Map/Reduce作业一样,用户可以为streaming作业指定其他插件:
+</p>
+<source>
+   -inputformat JavaClassName
+   -outputformat JavaClassName
+   -partitioner JavaClassName
+   -combiner JavaClassName
+</source>
+<p>用于处理输入格式的类要能返回Text类型的key/value对。如果不指定输入格式,则默认会使用TextInputFormat。
+因为TextInputFormat得到的key值是LongWritable类型的(其实key值并不是输入文件中的内容,而是value偏移量),
+所以key会被丢弃,只把value用管道方式发给mapper。
+</p><p>
+用户提供的定义输出格式的类需要能够处理Text类型的key/value对。如果不指定输出格式,则默认会使用TextOutputFormat类。
+</p>
+</section>
+
+<section>
+<title>Hadoop Streaming中的大文件和档案</title>
+
+<p>任务使用-cacheFile和-cacheArchive选项在集群中分发文件和档案,选项的参数是用户已上传至HDFS的文件或档案的URI。这些文件和档案在不同的作业间缓存。用户可以通过fs.default.name.config配置参数的值得到文件所在的host和fs_port。
+</p>
+<p>
+这个是使用-cacheFile选项的例子:
+</p> 
+<source>
+-cacheFile hdfs://host:fs_port/user/testfile.txt#testlink
+</source>
+<p>在上面的例子里,url中#后面的部分是建立在任务当前工作目录下的符号链接的名字。这里的任务的当前工作目录下有一个“testlink”符号链接,它指向testfile.txt文件在本地的拷贝。如果有多个文件,选项可以写成:
+</p>
+<source>
+-cacheFile hdfs://host:fs_port/user/testfile1.txt#testlink1 -cacheFile hdfs://host:fs_port/user/testfile2.txt#testlink2
+</source>
+<p>
+-cacheArchive选项用于把jar文件拷贝到任务当前工作目录并自动把jar文件解压缩。例如: 
+</p>
+<source>
+-cacheArchive hdfs://host:fs_port/user/testfile.jar#testlink3
+</source>
+<p>
+在上面的例子中,testlink3是当前工作目录下的符号链接,它指向testfile.jar解压后的目录。
+</p>
+<p>
+下面是使用-cacheArchive选项的另一个例子。其中,input.txt文件有两行内容,分别是两个文件的名字:testlink/cache.txt和testlink/cache2.txt。“testlink”是指向档案目录(jar文件解压后的目录)的符号链接,这个目录下有“cache.txt”和“cache2.txt”两个文件。
+</p>
+<source>
+$HADOOP_PREFIX/bin/hadoop  jar $HADOOP_PREFIX/hadoop-streaming.jar \
+                  -input "/user/me/samples/cachefile/input.txt"  \
+                  -mapper "xargs cat"  \
+                  -reducer "cat"  \
+                  -output "/user/me/samples/cachefile/out" \  
+                  -cacheArchive 'hdfs://hadoop-nn1.example.com/user/me/samples/cachefile/cachedir.jar#testlink' \  
+                  -jobconf mapred.map.tasks=1 \
+                  -jobconf mapred.reduce.tasks=1 \ 
+                  -jobconf mapred.job.name="Experiment"
+
+$ ls test_jar/
+cache.txt  cache2.txt
+
+$ jar cvf cachedir.jar -C test_jar/ .
+added manifest
+adding: cache.txt(in = 30) (out= 29)(deflated 3%)
+adding: cache2.txt(in = 37) (out= 35)(deflated 5%)
+
+$ hadoop dfs -put cachedir.jar samples/cachefile
+
+$ hadoop dfs -cat /user/me/samples/cachefile/input.txt
+testlink/cache.txt
+testlink/cache2.txt
+
+$ cat test_jar/cache.txt 
+This is just the cache string
+
+$ cat test_jar/cache2.txt 
+This is just the second cache string
+
+$ hadoop dfs -ls /user/me/samples/cachefile/out      
+Found 1 items
+/user/me/samples/cachefile/out/part-00000  &lt;r 3&gt;   69
+
+$ hadoop dfs -cat /user/me/samples/cachefile/out/part-00000
+This is just the cache string   
+This is just the second cache string
+
+</source>
+</section>
+
+<section>
+<title>为作业指定附加配置参数</title>
+<p>
+用户可以使用“-jobconf  &lt;n&gt;=&lt;v&gt;”增加一些配置变量。例如:
+</p>
+<source>
+$HADOOP_PREFIX/bin/hadoop  jar $HADOOP_PREFIX/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper org.apache.hadoop.mapred.lib.IdentityMapper\
+    -reducer /bin/wc \
+    -jobconf mapred.reduce.tasks=2
+</source>
+<p>
+上面的例子中,-jobconf mapred.reduce.tasks=2表明用两个reducer完成作业。
+</p>
+<p>
+关于jobconf参数的更多细节可以参考:<a href="ext:hadoop-default">hadoop-default.html</a></p>
+</section>
+
+<section>
+<title>其他选项</title>
+<p>
+Streaming 作业的其他选项如下表:
+</p>
+<table>
+<tr><th>选项</th><th>可选/必须</th><th>描述</th></tr>
+<tr><td> -cluster name </td><td> 可选 </td><td> 在本地Hadoop集群与一个或多个远程集群间切换</td></tr>
+
+<tr><td> -dfs  host:port or local </td><td> 可选 </td><td> 覆盖作业的HDFS配置</td></tr>
+<tr><td> -jt host:port or local </td><td> 可选 </td><td> 覆盖作业的JobTracker配置</td></tr>
+<tr><td> -additionalconfspec specfile </td><td> 可选 </td><td> 用一个类似于hadoop-site.xml的XML文件保存所有配置,从而不需要用多个"-jobconf name=value"类型的选项单独为每个配置变量赋值</td></tr>
+<tr><td> -cmdenv   name=value </td><td> 可选 </td><td> 传递环境变量给streaming命令</td></tr>
+<tr><td> -cacheFile fileNameURI </td><td> 可选 </td><td> 指定一个上传到HDFS的文件</td></tr>
+<tr><td> -cacheArchive fileNameURI </td><td> 可选 </td><td> 指定一个上传到HDFS的jar文件,这个jar文件会被自动解压缩到当前工作目录下</td></tr>
+
+<tr><td> -inputreader JavaClassName </td><td> 可选 </td><td> 为了向下兼容:指定一个record reader类(而不是input format类)</td></tr>
+<tr><td> -verbose </td><td> 可选 </td><td> 详细输出 </td></tr>
+</table>
+<p>
+使用-cluster &lt;name&gt;实现“本地”Hadoop和一个或多个远程Hadoop集群间切换。默认情况下,使用hadoop-default.xml和hadoop-site.xml;当使用-cluster &lt;name&gt;选项时,会使用$HADOOP_PREFIX/conf/hadoop-&lt;name&gt;.xml。
+</p>
+<p>
+下面的选项改变temp目录:
+</p>
+<source>
+  -jobconf dfs.data.dir=/tmp
+</source>
+<p>
+下面的选项指定其他本地temp目录:
+</p>
+<source>
+   -jobconf mapred.local.dir=/tmp/local
+   -jobconf mapred.system.dir=/tmp/system
+   -jobconf mapred.temp.dir=/tmp/temp
+</source>
+<p>
+更多有关jobconf的细节请参考:<a href="http://wiki.apache.org/hadoop/JobConfFile">http://wiki.apache.org/hadoop/JobConfFile</a>
+</p><p>
+在streaming命令中设置环境变量:
+</p>
+<source>
+-cmdenv EXAMPLE_DIR=/home/example/dictionaries/
+</source>
+</section>
+</section>
+
+<section>
+<title>其他例子</title>
+
+<section>
+<title>使用自定义的方法切分行来形成Key/Value对</title>
+<p>
+之前已经提到,当Map/Reduce框架从mapper的标准输入读取一行时,它把这一行切分为key/value对。
+在默认情况下,每行第一个tab符之前的部分作为key,之后的部分作为value(不包括tab符)。
+</p>
+<p>
+但是,用户可以自定义,可以指定分隔符是其他字符而不是默认的tab符,或者指定在第n(n>=1)个分割符处分割而不是默认的第一个。例如:
+</p>
+
+<source>
+$HADOOP_PREFIX/bin/hadoop  jar $HADOOP_PREFIX/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper org.apache.hadoop.mapred.lib.IdentityMapper \
+    -reducer org.apache.hadoop.mapred.lib.IdentityReducer \
+    -jobconf stream.map.output.field.separator=. \
+    -jobconf stream.num.map.output.key.fields=4 
+</source>
+<p>
+在上面的例子,“-jobconf stream.map.output.field.separator=.”指定“.”作为map输出内容的分隔符,并且从在第四个“.”之前的部分作为key,之后的部分作为value(不包括这第四个“.”)。 
+如果一行中的“.”少于四个,则整行的内容作为key,value设为空的Text对象(就像这样创建了一个Text:new Text(""))。
+</p><p>
+同样,用户可以使用“-jobconf stream.reduce.output.field.separator=SEP”和“-jobconf stream.num.reduce.output.fields=NUM”来指定reduce输出的行中,第几个分隔符处分割key和value。
+</p>
+</section>
+
+
+<section>
+<title>一个实用的Partitioner类<!--A Useful Partitioner Class--> (二次排序,-partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner 选项) </title>
+<p>
+Hadoop有一个工具类org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner,
+它在应用程序中很有用。Map/reduce框架用这个类切分map的输出,
+切分是基于key值的前缀,而不是整个key。例如:
+</p>
+<source>
+$HADOOP_PREFIX/bin/hadoop  jar $HADOOP_PREFIX/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper org.apache.hadoop.mapred.lib.IdentityMapper \
+    -reducer org.apache.hadoop.mapred.lib.IdentityReducer \
+    -partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner \
+    -jobconf stream.map.output.field.separator=. \
+    -jobconf stream.num.map.output.key.fields=4 \
+    -jobconf map.output.key.field.separator=. \
+    -jobconf num.key.fields.for.partition=2 \
+    -jobconf mapred.reduce.tasks=12
+</source>
+<p>
+其中,<em>-jobconf stream.map.output.field.separator=.</em> 和<em>-jobconf stream.num.map.output.key.fields=4</em>是前文中的例子。Streaming用这两个变量来得到mapper的key/value对。
+</p><p>
+上面的Map/Reduce 作业中map输出的key一般是由“.”分割成的四块。但是因为使用了
+<em>-jobconf num.key.fields.for.partition=2</em> 
+选项,所以Map/Reduce框架使用key的前两块来切分map的输出。其中,
+<em>-jobconf map.output.key.field.separator=.</em>
+指定了这次切分使用的key的分隔符。这样可以保证在所有key/value对中,
+key值前两个块值相同的所有key被分到一组,分配给一个reducer。 
+</p><p>
+<em>这种高效的方法等价于指定前两块作为主键,后两块作为副键。
+主键用于切分块,主键和副键的组合用于排序。</em>一个简单的示例如下:
+</p>
+<p>
+Map的输出(key)</p><source>
+11.12.1.2
+11.14.2.3
+11.11.4.1
+11.12.1.1
+11.14.2.2
+
+</source>
+<p>
+切分给3个reducer(前两块的值用于切分)</p><source>
+11.11.4.1
+-----------
+11.12.1.2
+11.12.1.1
+-----------
+11.14.2.3
+11.14.2.2
+</source>
+<p>
+在每个切分后的组内排序(四个块的值都用于排序)
+</p><source>
+11.11.4.1
+-----------
+11.12.1.1
+11.12.1.2
+-----------
+11.14.2.2
+11.14.2.3
+</source>
+</section>
+
+<section>
+    <title>Hadoop聚合功能包的使用(-reduce aggregate 选项)</title>
+<p>
+Hadoop有一个工具包“Aggregate”(
+<a href="https://svn.apache.org/repos/asf/hadoop/core/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate">https://svn.apache.org/repos/asf/hadoop/core/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate</a>)。
+“Aggregate”提供一个特殊的reducer类和一个特殊的combiner类,
+并且有一系列的“聚合器”(“aggregator”)(例如“sum”,“max”,“min”等)用于聚合一组value的序列。
+用户可以使用Aggregate定义一个mapper插件类,
+这个类用于为mapper输入的每个key/value对产生“可聚合项”。
+combiner/reducer利用适当的聚合器聚合这些可聚合项。 
+</p><p>
+要使用Aggregate,只需指定“-reducer aggregate”:</p>
+<source>
+$HADOOP_PREFIX/bin/hadoop  jar $HADOOP_PREFIX/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper myAggregatorForKeyCount.py \
+    -reducer aggregate \
+    -file myAggregatorForKeyCount.py \
+    -jobconf mapred.reduce.tasks=12
+</source>
+<p>
+python程序myAggregatorForKeyCount.py例子:
+</p>
+<source>
+#!/usr/bin/python
+
+import sys;
+
+def generateLongCountToken(id):
+    return "LongValueSum:" + id + "\t" + "1"
+
+def main(argv):
+    line = sys.stdin.readline();
+    try:
+        while line:
+            line = line&#91;:-1];
+            fields = line.split("\t");
+            print generateLongCountToken(fields&#91;0]);
+            line = sys.stdin.readline();
+    except "end of file":
+        return None
+if __name__ == "__main__":
+     main(sys.argv)
+</source>
+</section>
+
+<section>
+<title>字段的选取(类似于unix中的 'cut' 命令) </title>
+<p>
+Hadoop的工具类org.apache.hadoop.mapred.lib.FieldSelectionMapReduce帮助用户高效处理文本数据,
+就像unix中的“cut”工具。工具类中的map函数把输入的key/value对看作字段的列表。
+用户可以指定字段的分隔符(默认是tab),
+可以选择字段列表中任意一段(由列表中一个或多个字段组成)作为map输出的key或者value。
+同样,工具类中的reduce函数也把输入的key/value对看作字段的列表,用户可以选取任意一段作为reduce输出的key或value。例如: 
+</p>
+<source>
+$HADOOP_PREFIX/bin/hadoop  jar $HADOOP_PREFIX/hadoop-streaming.jar \
+    -input myInputDirs \
+    -output myOutputDir \
+    -mapper org.apache.hadoop.mapred.lib.FieldSelectionMapReduce\
+    -reducer org.apache.hadoop.mapred.lib.FieldSelectionMapReduce\
+    -partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner \
+    -jobconf map.output.key.field.separa=. \
+    -jobconf num.key.fields.for.partition=2 \
+    -jobconf mapred.data.field.separator=. \
+    -jobconf map.output.key.value.fields.spec=6,5,1-3:0- \
+    -jobconf reduce.output.key.value.fields.spec=0-2:5- \
+    -jobconf mapred.reduce.tasks=12
+</source>
+<p>
+选项“-jobconf map.output.key.value.fields.spec=6,5,1-3:0-”指定了如何为map的输出选取key和value。Key选取规则和value选取规则由“:”分割。
+在这个例子中,map输出的key由字段6,5,1,2和3组成。输出的value由所有字段组成(“0-”指字段0以及之后所有字段)。
+</p>
+<p>
+选项“-jobconf reduce.output.key.value.fields.spec=0-2:0-”(译者注:此处应为”0-2:5-“)指定如何为reduce的输出选取value。
+本例中,reduce的输出的key将包含字段0,1,2(对应于原始的字段6,5,1)。
+reduce输出的value将包含起自字段5的所有字段(对应于所有的原始字段)。
+</p>
+</section>
+</section>
+
+<section>
+<title>常见问题</title>
+
+<section>
+<title>我该怎样使用Hadoop Streaming运行一组独立(相关)的任务呢?</title>
+<p>
+多数情况下,你不需要Map Reduce的全部功能,
+而只需要运行同一程序的多个实例,或者使用不同数据,或者在相同数据上使用不同的参数。
+你可以通过Hadoop Streaming来实现。</p>
+
+</section>
+
+<section>
+<title>如何处理多个文件,其中每个文件一个map?</title>
+<p>
+例如这样一个问题,在集群上压缩(zipping)一些文件,你可以使用以下几种方法:</p><ol>
+<li>使用Hadoop Streaming和用户编写的mapper脚本程序:<ul>
+  <li> 生成一个文件,文件中包含所有要压缩的文件在HDFS上的完整路径。每个map 任务获得一个路径名作为输入。</li>
+  <li> 创建一个mapper脚本程序,实现如下功能:获得文件名,把该文件拷贝到本地,压缩该文件并把它发到期望的输出目录。</li>
+</ul></li>
+<li>使用现有的Hadoop框架:<ul>
+   <li>在main函数中添加如下命令:
+<source>
+       FileOutputFormat.setCompressOutput(conf, true);
+       FileOutputFormat.setOutputCompressorClass(conf, org.apache.hadoop.io.compress.GzipCodec.class);
+       conf.setOutputFormat(NonSplitableTextInputFormat.class);
+       conf.setNumReduceTasks(0);
+</source></li>
+   <li>编写map函数:
+<source>
+
+       public void map(WritableComparable key, Writable value, 
+                               OutputCollector output, 
+                               Reporter reporter) throws IOException {
+            output.collect((Text)value, null);
+       }
+</source></li>
+  <li>注意输出的文件名和原文件名不同</li>
+</ul></li>
+</ol>
+</section>
+
+<section>
+<title>应该使用多少个reducer?</title>
+<p>
+请参考Hadoop Wiki:<a href="mapred_tutorial.html#Reducer">Reducer</a>
+</p>
+</section>
+
+<section>
+<title>
+如果在Shell脚本里设置一个别名,并放在-mapper之后,Streaming会正常运行吗?
+例如,alias cl='cut -fl',-mapper "cl"会运行正常吗?
+</title>
+<p>
+脚本里无法使用别名,但是允许变量替换,例如:
+</p>
+<source>
+$ hadoop dfs -cat samples/student_marks
+alice   50
+bruce   70
+charlie 80
+dan     75
+
+$ c2='cut -f2'; $HADOOP_PREFIX/bin/hadoop jar $HADOOP_PREFIX/hadoop-streaming.jar \
+    -input /user/me/samples/student_marks 
+    -mapper \"$c2\" -reducer 'cat'  
+    -output /user/me/samples/student_out 
+    -jobconf mapred.job.name='Experiment'
+
+$ hadoop dfs -ls samples/student_out
+Found 1 items/user/me/samples/student_out/part-00000    &lt;r 3&gt;   16
+
+$ hadoop dfs -cat samples/student_out/part-00000
+50
+70
+75
+80
+</source>
+</section>
+
+<section>
+<title>
+我可以使用UNIX pipes吗?例如 –mapper "cut –fl | set s/foo/bar/g"管用么?
+</title>
+<p>
+现在不支持,而且会给出错误信息“java.io.IOException: Broken pipe”。这或许是一个bug,需要进一步研究。
+</p>
+</section>
+
+<section>
+<title>在streaming作业中用-file选项运行一个<strong>分布式的超大可执行文件(例如,3.6G)</strong>时,
+我得到了一个错误信息“No space left on device”。如何解决?
+</title>
+<p>
+配置变量stream.tmpdir指定了一个目录,在这个目录下要进行打jar包的操作。stream.tmpdir的默认值是/tmp,你需要将这个值设置为一个有更大空间的目录:
+</p>
+<source>
+-jobconf stream.tmpdir=/export/bigspace/...
+</source>
+</section>
+
+<section>
+<title>如何设置多个输入目录?</title>
+<p>
+可以使用多个-input选项设置多个输入目录:
+</p><source>
+ hadoop jar hadoop-streaming.jar -input '/user/foo/dir1' -input '/user/foo/dir2' 
+</source>
+</section>
+
+<section>
+<title>如何生成gzip格式的输出文件?</title>
+<p>
+除了纯文本格式的输出,你还可以生成gzip文件格式的输出,你只需设置streaming作业中的选项‘-jobconf mapred.output.compress=true -jobconf mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCode’。
+</p>
+</section>
+
+<section>
+<title>Streaming中如何自定义input/output format?</title>
+<p>
+至少在Hadoop 0.14版本以前,不支持多个jar文件。所以当指定自定义的类时,你要把他们和原有的streaming jar打包在一起,并用这个自定义的jar包替换默认的hadoop streaming jar包。
+</p>
+</section>
+
+<section>
+<title>Streaming如何解析XML文档?</title>
+<p>
+你可以使用StreamXmlRecordReader来解析XML文档。
+</p>
+<source>
+hadoop jar hadoop-streaming.jar -inputreader "StreamXmlRecord,begin=BEGIN_STRING,end=END_STRING" ..... (rest of the command)
+</source>
+<p>
+Map任务会把BEGIN_STRING和END_STRING之间的部分看作一条记录。
+</p>
+</section>
+
+<section>
+<title>在streaming应用程序中如何更新计数器?</title>
+<p>
+streaming进程能够使用stderr发出计数器信息。
+<code>reporter:counter:&lt;group&gt;,&lt;counter&gt;,&lt;amount&gt;</code>
+应该被发送到stderr来更新计数器。
+</p>
+</section>
+<section>
+<title>如何更新streaming应用程序的状态?</title>
+<p>
+streaming进程能够使用stderr发出状态信息。
+<code>reporter:status:&lt;message&gt;</code> 要被发送到stderr来设置状态。
+</p>
+</section>
+
+</section>
+</body>
+</document>

+ 37 - 0
common/src/docs/cn/src/documentation/content/xdocs/tabs.xml

@@ -0,0 +1,37 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE tabs PUBLIC "-//APACHE//DTD Cocoon Documentation Tab V1.0//EN" 
+          "http://forrest.apache.org/dtd/tab-cocoon-v10.dtd">
+
+<tabs software="Hadoop"
+      title="Hadoop"
+      copyright="The Apache Software Foundation"
+      xmlns:xlink="http://www.w3.org/1999/xlink">
+
+  <!-- The rules are:
+    @dir will always have /index.html added.
+    @href is not modified unless it is root-relative and obviously specifies a
+    directory (ends in '/'), in which case /index.html will be added
+  -->
+
+  <tab label="项目" href="http://hadoop.apache.org/core/" />
+  <tab label="维基" href="http://wiki.apache.org/hadoop" />
+  <tab label="Hadoop 0.18文档" dir="" />  
+  
+</tabs>

BIN
common/src/docs/cn/src/documentation/resources/images/architecture.gif


BIN
common/src/docs/cn/src/documentation/resources/images/core-logo.gif


BIN
common/src/docs/cn/src/documentation/resources/images/favicon.ico


BIN
common/src/docs/cn/src/documentation/resources/images/hadoop-logo-big.jpg


BIN
common/src/docs/cn/src/documentation/resources/images/hadoop-logo.jpg


BIN
common/src/docs/cn/src/documentation/resources/images/hdfsarchitecture.gif


BIN
common/src/docs/cn/src/documentation/resources/images/hdfsarchitecture.odg


BIN
common/src/docs/cn/src/documentation/resources/images/hdfsarchitecture.png


BIN
common/src/docs/cn/src/documentation/resources/images/hdfsdatanodes.gif


BIN
common/src/docs/cn/src/documentation/resources/images/hdfsdatanodes.odg


BIN
common/src/docs/cn/src/documentation/resources/images/hdfsdatanodes.png


+ 346 - 0
common/src/docs/cn/src/documentation/skinconf.xml

@@ -0,0 +1,346 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!--
+Skin configuration file. This file contains details of your project,
+which will be used to configure the chosen Forrest skin.
+-->
+
+<!DOCTYPE skinconfig PUBLIC "-//APACHE//DTD Skin Configuration V0.6-3//EN" "http://forrest.apache.org/dtd/skinconfig-v06-3.dtd">
+<skinconfig>
+  <!-- To enable lucene search add provider="lucene" (default is google).
+    Add box-location="alt" to move the search box to an alternate location
+    (if the skin supports it) and box-location="all" to show it in all
+    available locations on the page.  Remove the <search> element to show
+    no search box. @domain will enable sitesearch for the specific domain with google.
+    In other words google will search the @domain for the query string.
+
+  -->
+  <search name="Lucene" domain="hadoop.apache.org" provider="google"/>
+
+  <!-- Disable the print link? If enabled, invalid HTML 4.0.1 -->
+  <disable-print-link>true</disable-print-link>  
+  <!-- Disable the PDF link? -->
+  <disable-pdf-link>false</disable-pdf-link>
+  <!-- Disable the POD link? -->
+  <disable-pod-link>true</disable-pod-link>
+  <!-- Disable the Text link? FIXME: NOT YET IMPLEMENETED. -->
+  <disable-txt-link>true</disable-txt-link>
+  <!-- Disable the xml source link? -->
+  <!-- The xml source link makes it possible to access the xml rendition
+    of the source frim the html page, and to have it generated statically.
+    This can be used to enable other sites and services to reuse the
+    xml format for their uses. Keep this disabled if you don't want other
+    sites to easily reuse your pages.-->
+  <disable-xml-link>true</disable-xml-link>
+
+  <!-- Disable navigation icons on all external links? -->
+  <disable-external-link-image>true</disable-external-link-image>
+
+  <!-- Disable w3c compliance links? 
+    Use e.g. align="center" to move the compliance links logos to 
+    an alternate location default is left.
+    (if the skin supports it) -->
+  <disable-compliance-links>true</disable-compliance-links>
+
+  <!-- Render mailto: links unrecognisable by spam harvesters? -->
+  <obfuscate-mail-links>false</obfuscate-mail-links>
+
+  <!-- Disable the javascript facility to change the font size -->
+  <disable-font-script>true</disable-font-script>
+
+  <!-- project logo -->
+  <project-name>Hadoop</project-name>
+  <project-description>Scalable Computing Platform</project-description>
+  <project-url>http://hadoop.apache.org/core/</project-url>
+  <project-logo>images/core-logo.gif</project-logo>
+
+  <!-- group logo -->
+  <group-name>Hadoop</group-name>
+  <group-description>Apache Hadoop</group-description>
+  <group-url>http://hadoop.apache.org/</group-url>
+  <group-logo>images/hadoop-logo.jpg</group-logo>
+
+  <!-- optional host logo (e.g. sourceforge logo)
+       default skin: renders it at the bottom-left corner -->
+  <host-url></host-url>
+  <host-logo></host-logo>
+
+  <!-- relative url of a favicon file, normally favicon.ico -->
+  <favicon-url>images/favicon.ico</favicon-url>
+
+  <!-- The following are used to construct a copyright statement -->
+  <year>2009</year>
+  <vendor>The Apache Software Foundation.</vendor>
+  <copyright-link>http://www.apache.org/licenses/</copyright-link>
+
+  <!-- Some skins use this to form a 'breadcrumb trail' of links.
+    Use location="alt" to move the trail to an alternate location
+    (if the skin supports it).
+	  Omit the location attribute to display the trail in the default location.
+	  Use location="none" to not display the trail (if the skin supports it).
+    For some skins just set the attributes to blank.
+  -->
+  <trail>
+    <link1 name="Apache" href="http://www.apache.org/"/>
+    <link2 name="Hadoop" href="http://hadoop.apache.org/"/>
+    <link3 name="Common" href="http://hadoop.apache.org/common/"/>
+  </trail>
+
+  <!-- Configure the TOC, i.e. the Table of Contents.
+  @max-depth
+   how many "section" levels need to be included in the
+   generated Table of Contents (TOC). 
+  @min-sections
+   Minimum required to create a TOC.
+  @location ("page","menu","page,menu", "none")
+   Where to show the TOC.
+  -->
+  <toc max-depth="2" min-sections="1" location="page"/>
+
+  <!-- Heading types can be clean|underlined|boxed  -->
+  <headings type="clean"/>
+  
+  <!-- The optional feedback element will be used to construct a
+    feedback link in the footer with the page pathname appended:
+    <a href="@href">{@to}</a>
+  <feedback to="webmaster@foo.com"
+    href="mailto:webmaster@foo.com?subject=Feedback&#160;" >
+    Send feedback about the website to:
+  </feedback>
+    -->
+  <!--
+    extra-css - here you can define custom css-elements that are 
+    a. overriding the fallback elements or 
+    b. adding the css definition from new elements that you may have 
+       used in your documentation.
+    -->
+  <extra-css>
+    <!--Example of b. 
+        To define the css definition of a new element that you may have used
+        in the class attribute of a <p> node. 
+        e.g. <p class="quote"/>
+    -->
+    p.quote {
+      margin-left: 2em;
+      padding: .5em;
+      background-color: #f0f0f0;
+      font-family: monospace;
+    }
+  </extra-css>
+
+  <colors>
+  <!-- These values are used for the generated CSS files. -->
+
+  <!-- Krysalis -->
+<!--
+    <color name="header"    value="#FFFFFF"/>
+
+    <color name="tab-selected" value="#a5b6c6" link="#000000" vlink="#000000" hlink="#000000"/>
+    <color name="tab-unselected" value="#F7F7F7"  link="#000000" vlink="#000000" hlink="#000000"/>
+    <color name="subtab-selected" value="#a5b6c6"  link="#000000" vlink="#000000" hlink="#000000"/>
+    <color name="subtab-unselected" value="#a5b6c6"  link="#000000" vlink="#000000" hlink="#000000"/>
+
+    <color name="heading" value="#a5b6c6"/>
+    <color name="subheading" value="#CFDCED"/>
+        
+    <color name="navstrip" value="#CFDCED" font="#000000" link="#000000" vlink="#000000" hlink="#000000"/>
+    <color name="toolbox" value="#a5b6c6"/>
+    <color name="border" value="#a5b6c6"/>
+        
+    <color name="menu" value="#F7F7F7" link="#000000" vlink="#000000" hlink="#000000"/>    
+    <color name="dialog" value="#F7F7F7"/>
+            
+    <color name="body"    value="#ffffff" link="#0F3660" vlink="#009999" hlink="#000066"/>
+    
+    <color name="table" value="#a5b6c6"/>    
+    <color name="table-cell" value="#ffffff"/>    
+    <color name="highlight" value="#ffff00"/>
+    <color name="fixme" value="#cc6600"/>
+    <color name="note" value="#006699"/>
+    <color name="warning" value="#990000"/>
+    <color name="code" value="#a5b6c6"/>
+        
+    <color name="footer" value="#a5b6c6"/>
+-->
+  
+  <!-- Forrest -->
+<!--
+    <color name="header"    value="#294563"/>
+
+    <color name="tab-selected" value="#4a6d8c" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
+    <color name="tab-unselected" value="#b5c7e7" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
+    <color name="subtab-selected" value="#4a6d8c" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
+    <color name="subtab-unselected" value="#4a6d8c" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
+
+    <color name="heading" value="#294563"/>
+    <color name="subheading" value="#4a6d8c"/>
+        
+    <color name="navstrip" value="#cedfef" font="#0F3660" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
+    <color name="toolbox" value="#4a6d8c"/>
+    <color name="border" value="#294563"/>
+    
+    <color name="menu" value="#4a6d8c" font="#cedfef" link="#ffffff" vlink="#ffffff" hlink="#ffcf00"/>    
+    <color name="dialog" value="#4a6d8c"/>
+            
+    <color name="body" value="#ffffff"  link="#0F3660" vlink="#009999" hlink="#000066"/>
+    
+    <color name="table" value="#7099C5"/>    
+    <color name="table-cell" value="#f0f0ff"/>    
+    <color name="highlight" value="#ffff00"/>
+    <color name="fixme" value="#cc6600"/>
+    <color name="note" value="#006699"/>
+    <color name="warning" value="#990000"/>
+    <color name="code" value="#CFDCED"/>
+        
+    <color name="footer" value="#cedfef"/>
+-->
+
+  <!-- Collabnet --> 
+<!--
+    <color name="header"    value="#003366"/>
+
+    <color name="tab-selected" value="#dddddd" link="#555555" vlink="#555555" hlink="#555555"/>
+    <color name="tab-unselected" value="#999999" link="#ffffff" vlink="#ffffff" hlink="#ffffff"/>
+    <color name="subtab-selected" value="#cccccc" link="#000000" vlink="#000000" hlink="#000000"/>
+    <color name="subtab-unselected" value="#cccccc" link="#555555" vlink="#555555" hlink="#555555"/>
+
+    <color name="heading" value="#003366"/>
+    <color name="subheading" value="#888888"/>
+    
+    <color name="navstrip" value="#dddddd" font="#555555"/>
+    <color name="toolbox" value="#dddddd" font="#555555"/>
+    <color name="border" value="#999999"/>
+    
+    <color name="menu" value="#ffffff"/>    
+    <color name="dialog" value="#eeeeee"/>
+            
+    <color name="body"      value="#ffffff"/>
+    
+    <color name="table" value="#ccc"/>    
+    <color name="table-cell" value="#ffffff"/>   
+    <color name="highlight" value="#ffff00"/>
+    <color name="fixme" value="#cc6600"/>
+    <color name="note" value="#006699"/>
+    <color name="warning" value="#990000"/>
+    <color name="code" value="#003366"/>
+        
+    <color name="footer" value="#ffffff"/>
+-->
+ <!-- Lenya using pelt-->
+<!--
+    <color name="header" value="#ffffff"/>
+
+    <color name="tab-selected" value="#4C6C8F" link="#ffffff" vlink="#ffffff" hlink="#ffffff"/>
+    <color name="tab-unselected" value="#E5E4D9" link="#000000" vlink="#000000" hlink="#000000"/>
+    <color name="subtab-selected" value="#000000" link="#000000" vlink="#000000" hlink="#000000"/>
+    <color name="subtab-unselected" value="#E5E4D9" link="#000000" vlink="#000000" hlink="#000000"/>
+
+    <color name="heading" value="#E5E4D9"/>
+    <color name="subheading" value="#000000"/>
+    <color name="published" value="#4C6C8F" font="#FFFFFF"/>
+    <color name="feedback" value="#4C6C8F" font="#FFFFFF" align="center"/>
+    <color name="navstrip" value="#E5E4D9" font="#000000"/>
+
+    <color name="toolbox" value="#CFDCED" font="#000000"/>
+
+    <color name="border" value="#999999"/>
+    <color name="menu" value="#4C6C8F" font="#ffffff" link="#ffffff" vlink="#ffffff" hlink="#ffffff" current="#FFCC33" />    
+    <color name="menuheading" value="#cfdced" font="#000000" />
+    <color name="searchbox" value="#E5E4D9" font="#000000"/>
+    
+    <color name="dialog" value="#CFDCED"/>
+    <color name="body" value="#ffffff" />            
+    
+    <color name="table" value="#ccc"/>    
+    <color name="table-cell" value="#ffffff"/>   
+    <color name="highlight" value="#ffff00"/>
+    <color name="fixme" value="#cc6600"/>
+    <color name="note" value="#006699"/>
+    <color name="warning" value="#990000"/>
+    <color name="code" value="#003366"/>
+        
+    <color name="footer" value="#E5E4D9"/>
+-->
+  </colors>
+ 
+  <!-- Settings specific to PDF output. -->
+  <pdf>
+    <!-- 
+       Supported page sizes are a0, a1, a2, a3, a4, a5, executive,
+       folio, legal, ledger, letter, quarto, tabloid (default letter).
+       Supported page orientations are portrait, landscape (default
+       portrait).
+       Supported text alignments are left, right, justify (default left).
+    -->
+    <page size="letter" orientation="portrait" text-align="left"/>
+
+    <!--
+       Margins can be specified for top, bottom, inner, and outer
+       edges. If double-sided="false", the inner edge is always left
+       and the outer is always right. If double-sided="true", the
+       inner edge will be left on odd pages, right on even pages,
+       the outer edge vice versa.
+       Specified below are the default settings.
+    -->
+    <margins double-sided="false">
+      <top>1in</top>
+      <bottom>1in</bottom>
+      <inner>1.25in</inner>
+      <outer>1in</outer>
+    </margins>
+
+    <!--
+      Print the URL text next to all links going outside the file
+    -->
+    <show-external-urls>false</show-external-urls>
+
+    <!--
+      Disable the copyright footer on each page of the PDF.
+      A footer is composed for each page. By default, a "credit" with role=pdf
+      will be used, as explained below. Otherwise a copyright statement
+      will be generated. This latter can be disabled.
+    -->
+    <disable-copyright-footer>false</disable-copyright-footer>
+  </pdf>
+
+  <!-- Credits are typically rendered as a set of small clickable
+    images in the page footer.
+    Use box-location="alt" to move the credit to an alternate location
+    (if the skin supports it). 
+  -->
+  <credits>
+    <credit box-location="alt">
+      <name>Built with Apache Forrest</name>
+      <url>http://forrest.apache.org/</url>
+      <image>images/built-with-forrest-button.png</image>
+      <width>88</width>
+      <height>31</height>
+    </credit>
+    <!-- A credit with @role="pdf" will be used to compose a footer
+     for each page in the PDF, using either "name" or "url" or both.
+    -->
+    <!--
+    <credit role="pdf">
+      <name>Built with Apache Forrest</name>
+      <url>http://forrest.apache.org/</url>
+    </credit>
+    -->
+  </credits>
+
+</skinconfig>

+ 78 - 0
common/src/docs/cn/src/documentation/skins/common/css/forrest.css.xslt

@@ -0,0 +1,78 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<!-- This is not used by Forrest but makes it possible to debug the 
+       stylesheet in standalone editors -->
+  <xsl:output method = "text"  omit-xml-declaration="yes"  />
+<!--
+  If the skin doesn't override this, at least aural styles 
+  and extra-css are present 
+-->
+  <xsl:template match="skinconfig">
+    <xsl:call-template name="aural"/>
+    <xsl:call-template name="a-external"/>
+    <xsl:apply-templates/>
+    <xsl:call-template name="add-extra-css"/>
+  </xsl:template>
+  <xsl:template match="colors">
+    <xsl:apply-templates/>
+  </xsl:template>
+  <xsl:template name="aural">
+
+/* ==================== aural ============================ */
+
+@media aural {
+  h1, h2, h3, h4, h5, h6 { voice-family: paul, male; stress: 20; richness: 90 }
+  h1 { pitch: x-low; pitch-range: 90 }
+  h2 { pitch: x-low; pitch-range: 80 }
+  h3 { pitch: low; pitch-range: 70 }
+  h4 { pitch: medium; pitch-range: 60 }
+  h5 { pitch: medium; pitch-range: 50 }
+  h6 { pitch: medium; pitch-range: 40 }
+  li, dt, dd { pitch: medium; richness: 60 }
+  dt { stress: 80 }
+  pre, code, tt { pitch: medium; pitch-range: 0; stress: 0; richness: 80 }
+  em { pitch: medium; pitch-range: 60; stress: 60; richness: 50 }
+  strong { pitch: medium; pitch-range: 60; stress: 90; richness: 90 }
+  dfn { pitch: high; pitch-range: 60; stress: 60 }
+  s, strike { richness: 0 }
+  i { pitch: medium; pitch-range: 60; stress: 60; richness: 50 }
+  b { pitch: medium; pitch-range: 60; stress: 90; richness: 90 }
+  u { richness: 0 }
+  
+  :link { voice-family: harry, male }
+  :visited { voice-family: betty, female }
+  :active { voice-family: betty, female; pitch-range: 80; pitch: x-high }
+}
+  </xsl:template>
+  <xsl:template name="a-external">
+a.external  {
+  padding: 0 20px 0px 0px;
+	display:inline;
+  background-repeat: no-repeat;
+	background-position: center right;
+	background-image: url(images/external-link.gif);
+}
+  </xsl:template>
+  <xsl:template name="add-extra-css">
+<xsl:text>/* extra-css */</xsl:text>
+    <xsl:value-of select="extra-css"/>
+  </xsl:template>
+  <xsl:template match="*"></xsl:template>
+  <xsl:template match="text()"></xsl:template>
+</xsl:stylesheet>

+ 1 - 0
common/src/docs/cn/src/documentation/skins/common/images/README.txt

@@ -0,0 +1 @@
+The images in this directory are used if the current skin lacks them.

BIN
common/src/docs/cn/src/documentation/skins/common/images/add.jpg


BIN
common/src/docs/cn/src/documentation/skins/common/images/built-with-forrest-button.png


+ 92 - 0
common/src/docs/cn/src/documentation/skins/common/images/corner-imports.svg.xslt

@@ -0,0 +1,92 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+  <xsl:param name="orientation-tb"/>
+  <xsl:param name="orientation-lr"/>
+  <xsl:param name="size"/>
+  <xsl:param name="bg-color-name"/>
+  <xsl:param name="stroke-color-name"/>
+  <xsl:param name="fg-color-name"/>
+<!-- if not all colors are present, don't even try to render the corners -->
+  <xsl:variable name="isize">
+    <xsl:choose>
+      <xsl:when test="$bg-color-name and $stroke-color-name and $fg-color-name">
+        <xsl:value-of select="$size"/>
+      </xsl:when>
+      <xsl:otherwise>0</xsl:otherwise>
+    </xsl:choose>
+  </xsl:variable>
+  <xsl:variable name="smallersize" select="number($isize)-1"/>
+  <xsl:variable name="biggersize" select="number($isize)+1"/>
+  <xsl:variable name="bg">
+    <xsl:if test="skinconfig/colors/color[@name=$bg-color-name]">fill:<xsl:value-of select="skinconfig/colors/color[@name=$bg-color-name]/@value"/>;</xsl:if>
+  </xsl:variable>
+  <xsl:variable name="fill">
+    <xsl:if test="skinconfig/colors/color[@name=$stroke-color-name]">fill:<xsl:value-of select="skinconfig/colors/color[@name=$stroke-color-name]/@value"/>;</xsl:if>
+  </xsl:variable>
+  <xsl:variable name="stroke">
+    <xsl:if test="skinconfig/colors/color[@name=$fg-color-name]">stroke:<xsl:value-of select="skinconfig/colors/color[@name=$fg-color-name]/@value"/>;</xsl:if>
+  </xsl:variable>
+  <xsl:template match="skinconfig">
+    <svg width="{$isize}" height="{$isize}">
+<!-- background-->
+      <rect x="-1" y="-1" width="{$biggersize}" height="{$biggersize}" style="{$bg}stroke-width:0"/>
+<!-- 0,0 0,-4 4,0 4,-4-->
+      <xsl:variable name="flip-tb-scale">
+        <xsl:choose>
+          <xsl:when test="$orientation-tb='t'">1</xsl:when>
+          <xsl:otherwise>-1</xsl:otherwise>
+        </xsl:choose>
+      </xsl:variable>
+      <xsl:variable name="flip-lr-scale">
+        <xsl:choose>
+          <xsl:when test="$orientation-lr='l'">1</xsl:when>
+          <xsl:otherwise>-1</xsl:otherwise>
+        </xsl:choose>
+      </xsl:variable>
+      <xsl:variable name="flip-tb-translate">
+        <xsl:choose>
+          <xsl:when test="$orientation-tb='t'">0</xsl:when>
+          <xsl:otherwise>-<xsl:value-of select="$isize" />
+          </xsl:otherwise>
+        </xsl:choose>
+      </xsl:variable>
+      <xsl:variable name="flip-lr-translate">
+        <xsl:choose>
+          <xsl:when test="$orientation-lr='l'">0</xsl:when>
+          <xsl:otherwise>-<xsl:value-of select="$isize" />
+          </xsl:otherwise>
+        </xsl:choose>
+      </xsl:variable>
+<!-- flip transform -->
+      <g transform="scale({$flip-lr-scale},{$flip-tb-scale}) translate({$flip-lr-translate}, {$flip-tb-translate})">
+        <xsl:call-template name="figure" />
+      </g>
+    </svg>
+  </xsl:template>
+  <xsl:template name="figure">
+<!-- Just change shape here -->
+    <g transform="translate(0.5 0.5)">
+      <ellipse cx="{$smallersize}" cy="{$smallersize}" rx="{$smallersize}" ry="{$smallersize}"
+				 style="{$fill}{$stroke}stroke-width:1"/>
+    </g>
+<!-- end -->
+  </xsl:template>
+  <xsl:template match="*"></xsl:template>
+  <xsl:template match="text()"></xsl:template>
+</xsl:stylesheet>

+ 28 - 0
common/src/docs/cn/src/documentation/skins/common/images/dc.svg.xslt

@@ -0,0 +1,28 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+  <xsl:import href="corner-imports.svg.xslt" />
+<!-- Diagonal 45 degrees corner -->
+  <xsl:template name="figure">
+    <xsl:variable name="biggersize" select="number($size)+number($size)"/>
+    <g transform="translate(0 0.5)">
+      <polygon points="0,{$size} {$size},0 {$biggersize},0 {$biggersize},{$biggersize} 0,{$biggersize}"
+                    style="{$fill}{$stroke}stroke-width:1"/>
+    </g>
+  </xsl:template>
+</xsl:stylesheet>

BIN
common/src/docs/cn/src/documentation/skins/common/images/external-link.gif


BIN
common/src/docs/cn/src/documentation/skins/common/images/fix.jpg


BIN
common/src/docs/cn/src/documentation/skins/common/images/forrest-credit-logo.png


BIN
common/src/docs/cn/src/documentation/skins/common/images/hack.jpg


BIN
common/src/docs/cn/src/documentation/skins/common/images/pdfdoc.gif


BIN
common/src/docs/cn/src/documentation/skins/common/images/poddoc.png


+ 55 - 0
common/src/docs/cn/src/documentation/skins/common/images/poddoc.svg.xslt

@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<svg width="20pt" height="20pt"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:xlink="http://www.w3.org/1999/xlink">
+  <defs
+     id="defs550">
+    <linearGradient id="gray2white">
+      <stop style="stop-color:#7f7f7f;stop-opacity:1;" offset="0.000000"/>
+      <stop style="stop-color:#ffffff;stop-opacity:1;" offset="1.000000"/>
+    </linearGradient>
+    <linearGradient id="pageshade" xlink:href="#gray2white"
+       x1="0.95" y1="0.95"
+       x2="0.40" y2="0.20"
+       gradientUnits="objectBoundingBox" spreadMethod="pad" />
+    <path d="M 0 0 L 200 0" style="stroke:#000000;stroke-width:1pt;" id="hr"/>
+  </defs>
+  <g transform="scale(0.08)">
+    <g transform="translate(40, 0)">
+      <rect width="230" height="300" x="0" y="0"
+            style="fill:url(#pageshade);fill-rule:evenodd;
+            stroke:#000000;stroke-width:1.25;"/>
+      <g transform="translate(15, 60)">
+        <use xlink:href="#hr" x="0" y="0"/>
+        <use xlink:href="#hr" x="0" y="60"/>
+        <use xlink:href="#hr" x="0" y="120"/>
+        <use xlink:href="#hr" x="0" y="180"/>
+      </g>
+    </g>
+    <g transform="translate(0,70),scale(1.1,1.6)">
+      <rect width="200" height="100" x="0" y="0"
+         style="fill:#ff0000;fill-rule:evenodd;
+                stroke:#000000;stroke-width:2.33903;"/>
+      <text x="20" y="75"
+            style="stroke:#ffffff;stroke-width:1.0;
+                   font-size:72;font-weight:normal;fill:#ffffff;
+                   font-family:Arial;text-anchor:start;">POD</text>
+    </g>
+  </g>
+</svg>

Некоторые файлы не были показаны из-за большого количества измененных файлов